summaryrefslogtreecommitdiff
path: root/thirdparty/bullet/LinearMath/TaskScheduler
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/bullet/LinearMath/TaskScheduler')
-rw-r--r--thirdparty/bullet/LinearMath/TaskScheduler/btTaskScheduler.cpp1406
-rw-r--r--thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportInterface.h76
-rw-r--r--thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportPosix.cpp458
-rw-r--r--thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportWin32.cpp700
4 files changed, 1296 insertions, 1344 deletions
diff --git a/thirdparty/bullet/LinearMath/TaskScheduler/btTaskScheduler.cpp b/thirdparty/bullet/LinearMath/TaskScheduler/btTaskScheduler.cpp
index 49510d1660..5f1115c402 100644
--- a/thirdparty/bullet/LinearMath/TaskScheduler/btTaskScheduler.cpp
+++ b/thirdparty/bullet/LinearMath/TaskScheduler/btTaskScheduler.cpp
@@ -6,13 +6,11 @@
#include <stdio.h>
#include <algorithm>
-
-
#if BT_THREADSAFE
#include "btThreadSupportInterface.h"
-#if defined( _WIN32 )
+#if defined(_WIN32)
#define WIN32_LEAN_AND_MEAN
@@ -20,404 +18,399 @@
#endif
-
typedef unsigned long long btU64;
static const int kCacheLineSize = 64;
void btSpinPause()
{
-#if defined( _WIN32 )
- YieldProcessor();
+#if defined(_WIN32)
+ YieldProcessor();
#endif
}
-
struct WorkerThreadStatus
{
- enum Type
- {
- kInvalid,
- kWaitingForWork,
- kWorking,
- kSleeping,
- };
+ enum Type
+ {
+ kInvalid,
+ kWaitingForWork,
+ kWorking,
+ kSleeping,
+ };
};
-
-ATTRIBUTE_ALIGNED64(class) WorkerThreadDirectives
+ATTRIBUTE_ALIGNED64(class)
+WorkerThreadDirectives
{
- static const int kMaxThreadCount = BT_MAX_THREAD_COUNT;
- // directives for all worker threads packed into a single cacheline
- char m_threadDirs[kMaxThreadCount];
+ static const int kMaxThreadCount = BT_MAX_THREAD_COUNT;
+ // directives for all worker threads packed into a single cacheline
+ char m_threadDirs[kMaxThreadCount];
public:
- enum Type
- {
- kInvalid,
- kGoToSleep, // go to sleep
- kStayAwakeButIdle, // wait for not checking job queue
- kScanForJobs, // actively scan job queue for jobs
- };
- WorkerThreadDirectives()
- {
- for ( int i = 0; i < kMaxThreadCount; ++i )
- {
- m_threadDirs[ i ] = 0;
- }
- }
-
- Type getDirective(int threadId)
- {
- btAssert(threadId < kMaxThreadCount);
- return static_cast<Type>(m_threadDirs[threadId]);
- }
-
- void setDirectiveByRange(int threadBegin, int threadEnd, Type dir)
- {
- btAssert( threadBegin < threadEnd );
- btAssert( threadEnd <= kMaxThreadCount );
- char dirChar = static_cast<char>(dir);
- for ( int i = threadBegin; i < threadEnd; ++i )
- {
- m_threadDirs[ i ] = dirChar;
- }
- }
+ enum Type
+ {
+ kInvalid,
+ kGoToSleep, // go to sleep
+ kStayAwakeButIdle, // wait for not checking job queue
+ kScanForJobs, // actively scan job queue for jobs
+ };
+ WorkerThreadDirectives()
+ {
+ for (int i = 0; i < kMaxThreadCount; ++i)
+ {
+ m_threadDirs[i] = 0;
+ }
+ }
+
+ Type getDirective(int threadId)
+ {
+ btAssert(threadId < kMaxThreadCount);
+ return static_cast<Type>(m_threadDirs[threadId]);
+ }
+
+ void setDirectiveByRange(int threadBegin, int threadEnd, Type dir)
+ {
+ btAssert(threadBegin < threadEnd);
+ btAssert(threadEnd <= kMaxThreadCount);
+ char dirChar = static_cast<char>(dir);
+ for (int i = threadBegin; i < threadEnd; ++i)
+ {
+ m_threadDirs[i] = dirChar;
+ }
+ }
};
class JobQueue;
-ATTRIBUTE_ALIGNED64(struct) ThreadLocalStorage
+ATTRIBUTE_ALIGNED64(struct)
+ThreadLocalStorage
{
- int m_threadId;
- WorkerThreadStatus::Type m_status;
- int m_numJobsFinished;
- btSpinMutex m_mutex;
- btScalar m_sumResult;
- WorkerThreadDirectives * m_directive;
- JobQueue* m_queue;
- btClock* m_clock;
- unsigned int m_cooldownTime;
+ int m_threadId;
+ WorkerThreadStatus::Type m_status;
+ int m_numJobsFinished;
+ btSpinMutex m_mutex;
+ btScalar m_sumResult;
+ WorkerThreadDirectives* m_directive;
+ JobQueue* m_queue;
+ btClock* m_clock;
+ unsigned int m_cooldownTime;
};
-
struct IJob
{
- virtual void executeJob(int threadId) = 0;
+ virtual void executeJob(int threadId) = 0;
};
class ParallelForJob : public IJob
{
- const btIParallelForBody* m_body;
- int m_begin;
- int m_end;
+ const btIParallelForBody* m_body;
+ int m_begin;
+ int m_end;
public:
- ParallelForJob( int iBegin, int iEnd, const btIParallelForBody& body )
- {
- m_body = &body;
- m_begin = iBegin;
- m_end = iEnd;
- }
- virtual void executeJob(int threadId) BT_OVERRIDE
- {
- BT_PROFILE( "executeJob" );
-
- // call the functor body to do the work
- m_body->forLoop( m_begin, m_end );
- }
-};
+ ParallelForJob(int iBegin, int iEnd, const btIParallelForBody& body)
+ {
+ m_body = &body;
+ m_begin = iBegin;
+ m_end = iEnd;
+ }
+ virtual void executeJob(int threadId) BT_OVERRIDE
+ {
+ BT_PROFILE("executeJob");
+ // call the functor body to do the work
+ m_body->forLoop(m_begin, m_end);
+ }
+};
class ParallelSumJob : public IJob
{
- const btIParallelSumBody* m_body;
- ThreadLocalStorage* m_threadLocalStoreArray;
- int m_begin;
- int m_end;
+ const btIParallelSumBody* m_body;
+ ThreadLocalStorage* m_threadLocalStoreArray;
+ int m_begin;
+ int m_end;
public:
- ParallelSumJob( int iBegin, int iEnd, const btIParallelSumBody& body, ThreadLocalStorage* tls )
- {
- m_body = &body;
- m_threadLocalStoreArray = tls;
- m_begin = iBegin;
- m_end = iEnd;
- }
- virtual void executeJob( int threadId ) BT_OVERRIDE
- {
- BT_PROFILE( "executeJob" );
-
- // call the functor body to do the work
- btScalar val = m_body->sumLoop( m_begin, m_end );
+ ParallelSumJob(int iBegin, int iEnd, const btIParallelSumBody& body, ThreadLocalStorage* tls)
+ {
+ m_body = &body;
+ m_threadLocalStoreArray = tls;
+ m_begin = iBegin;
+ m_end = iEnd;
+ }
+ virtual void executeJob(int threadId) BT_OVERRIDE
+ {
+ BT_PROFILE("executeJob");
+
+ // call the functor body to do the work
+ btScalar val = m_body->sumLoop(m_begin, m_end);
#if BT_PARALLEL_SUM_DETERMINISTISM
- // by truncating bits of the result, we can make the parallelSum deterministic (at the expense of precision)
- const float TRUNC_SCALE = float(1<<19);
- val = floor(val*TRUNC_SCALE+0.5f)/TRUNC_SCALE; // truncate some bits
+ // by truncating bits of the result, we can make the parallelSum deterministic (at the expense of precision)
+ const float TRUNC_SCALE = float(1 << 19);
+ val = floor(val * TRUNC_SCALE + 0.5f) / TRUNC_SCALE; // truncate some bits
#endif
- m_threadLocalStoreArray[threadId].m_sumResult += val;
- }
+ m_threadLocalStoreArray[threadId].m_sumResult += val;
+ }
};
-
-ATTRIBUTE_ALIGNED64(class) JobQueue
+ATTRIBUTE_ALIGNED64(class)
+JobQueue
{
- btThreadSupportInterface* m_threadSupport;
- btCriticalSection* m_queueLock;
- btSpinMutex m_mutex;
-
- btAlignedObjectArray<IJob*> m_jobQueue;
- char* m_jobMem;
- int m_jobMemSize;
- bool m_queueIsEmpty;
- int m_tailIndex;
- int m_headIndex;
- int m_allocSize;
- bool m_useSpinMutex;
- btAlignedObjectArray<JobQueue*> m_neighborContexts;
- char m_cachePadding[kCacheLineSize]; // prevent false sharing
-
- void freeJobMem()
- {
- if ( m_jobMem )
- {
- // free old
- btAlignedFree(m_jobMem);
- m_jobMem = NULL;
- }
- }
- void resizeJobMem(int newSize)
- {
- if (newSize > m_jobMemSize)
- {
- freeJobMem();
- m_jobMem = static_cast<char*>(btAlignedAlloc(newSize, kCacheLineSize));
- m_jobMemSize = newSize;
- }
- }
+ btThreadSupportInterface* m_threadSupport;
+ btCriticalSection* m_queueLock;
+ btSpinMutex m_mutex;
+
+ btAlignedObjectArray<IJob*> m_jobQueue;
+ char* m_jobMem;
+ int m_jobMemSize;
+ bool m_queueIsEmpty;
+ int m_tailIndex;
+ int m_headIndex;
+ int m_allocSize;
+ bool m_useSpinMutex;
+ btAlignedObjectArray<JobQueue*> m_neighborContexts;
+ char m_cachePadding[kCacheLineSize]; // prevent false sharing
+
+ void freeJobMem()
+ {
+ if (m_jobMem)
+ {
+ // free old
+ btAlignedFree(m_jobMem);
+ m_jobMem = NULL;
+ }
+ }
+ void resizeJobMem(int newSize)
+ {
+ if (newSize > m_jobMemSize)
+ {
+ freeJobMem();
+ m_jobMem = static_cast<char*>(btAlignedAlloc(newSize, kCacheLineSize));
+ m_jobMemSize = newSize;
+ }
+ }
public:
-
- JobQueue()
- {
- m_jobMem = NULL;
- m_jobMemSize = 0;
- m_threadSupport = NULL;
- m_queueLock = NULL;
- m_headIndex = 0;
- m_tailIndex = 0;
- m_useSpinMutex = false;
- }
- ~JobQueue()
- {
+ JobQueue()
+ {
+ m_jobMem = NULL;
+ m_jobMemSize = 0;
+ m_threadSupport = NULL;
+ m_queueLock = NULL;
+ m_headIndex = 0;
+ m_tailIndex = 0;
+ m_useSpinMutex = false;
+ }
+ ~JobQueue()
+ {
exit();
- }
+ }
void exit()
- {
+ {
freeJobMem();
- if (m_queueLock && m_threadSupport)
- {
- m_threadSupport->deleteCriticalSection(m_queueLock);
- m_queueLock = NULL;
+ if (m_queueLock && m_threadSupport)
+ {
+ m_threadSupport->deleteCriticalSection(m_queueLock);
+ m_queueLock = NULL;
m_threadSupport = 0;
- }
- }
-
- void init(btThreadSupportInterface* threadSup, btAlignedObjectArray<JobQueue>* contextArray)
- {
- m_threadSupport = threadSup;
- if (threadSup)
- {
- m_queueLock = m_threadSupport->createCriticalSection();
- }
- setupJobStealing(contextArray, contextArray->size());
- }
- void setupJobStealing(btAlignedObjectArray<JobQueue>* contextArray, int numActiveContexts)
- {
- btAlignedObjectArray<JobQueue>& contexts = *contextArray;
- int selfIndex = 0;
- for (int i = 0; i < contexts.size(); ++i)
- {
- if ( this == &contexts[ i ] )
- {
- selfIndex = i;
- break;
- }
- }
- int numNeighbors = btMin(2, contexts.size() - 1);
- int neighborOffsets[ ] = {-1, 1, -2, 2, -3, 3};
- int numOffsets = sizeof(neighborOffsets)/sizeof(neighborOffsets[0]);
- m_neighborContexts.reserve( numNeighbors );
- m_neighborContexts.resizeNoInitialize(0);
- for (int i = 0; i < numOffsets && m_neighborContexts.size() < numNeighbors; i++)
- {
- int neighborIndex = selfIndex + neighborOffsets[i];
- if ( neighborIndex >= 0 && neighborIndex < numActiveContexts)
- {
- m_neighborContexts.push_back( &contexts[ neighborIndex ] );
- }
- }
- }
-
- bool isQueueEmpty() const {return m_queueIsEmpty;}
- void lockQueue()
- {
- if ( m_useSpinMutex )
- {
- m_mutex.lock();
- }
- else
- {
- m_queueLock->lock();
- }
- }
- void unlockQueue()
- {
- if ( m_useSpinMutex )
- {
- m_mutex.unlock();
- }
- else
- {
- m_queueLock->unlock();
- }
- }
- void clearQueue(int jobCount, int jobSize)
- {
- lockQueue();
- m_headIndex = 0;
- m_tailIndex = 0;
- m_allocSize = 0;
- m_queueIsEmpty = true;
- int jobBufSize = jobSize * jobCount;
- // make sure we have enough memory allocated to store jobs
- if ( jobBufSize > m_jobMemSize )
- {
- resizeJobMem( jobBufSize );
- }
- // make sure job queue is big enough
- if ( jobCount > m_jobQueue.capacity() )
- {
- m_jobQueue.reserve( jobCount );
- }
- unlockQueue();
- m_jobQueue.resizeNoInitialize( 0 );
- }
- void* allocJobMem(int jobSize)
- {
- btAssert(m_jobMemSize >= (m_allocSize + jobSize));
- void* jobMem = &m_jobMem[m_allocSize];
- m_allocSize += jobSize;
- return jobMem;
- }
- void submitJob( IJob* job )
- {
- btAssert( reinterpret_cast<char*>( job ) >= &m_jobMem[ 0 ] && reinterpret_cast<char*>( job ) < &m_jobMem[ 0 ] + m_allocSize );
- m_jobQueue.push_back( job );
- lockQueue();
- m_tailIndex++;
- m_queueIsEmpty = false;
- unlockQueue();
- }
- IJob* consumeJobFromOwnQueue()
- {
- if ( m_queueIsEmpty )
- {
- // lock free path. even if this is taken erroneously it isn't harmful
- return NULL;
- }
- IJob* job = NULL;
- lockQueue();
- if ( !m_queueIsEmpty )
- {
- job = m_jobQueue[ m_headIndex++ ];
- btAssert( reinterpret_cast<char*>( job ) >= &m_jobMem[ 0 ] && reinterpret_cast<char*>( job ) < &m_jobMem[ 0 ] + m_allocSize );
- if ( m_headIndex == m_tailIndex )
- {
- m_queueIsEmpty = true;
- }
- }
- unlockQueue();
- return job;
- }
- IJob* consumeJob()
- {
- if (IJob* job = consumeJobFromOwnQueue())
- {
- return job;
- }
- // own queue is empty, try to steal from neighbor
- for (int i = 0; i < m_neighborContexts.size(); ++i)
- {
- JobQueue* otherContext = m_neighborContexts[ i ];
- if ( IJob* job = otherContext->consumeJobFromOwnQueue() )
- {
- return job;
- }
- }
- return NULL;
- }
-};
+ }
+ }
+ void init(btThreadSupportInterface * threadSup, btAlignedObjectArray<JobQueue> * contextArray)
+ {
+ m_threadSupport = threadSup;
+ if (threadSup)
+ {
+ m_queueLock = m_threadSupport->createCriticalSection();
+ }
+ setupJobStealing(contextArray, contextArray->size());
+ }
+ void setupJobStealing(btAlignedObjectArray<JobQueue> * contextArray, int numActiveContexts)
+ {
+ btAlignedObjectArray<JobQueue>& contexts = *contextArray;
+ int selfIndex = 0;
+ for (int i = 0; i < contexts.size(); ++i)
+ {
+ if (this == &contexts[i])
+ {
+ selfIndex = i;
+ break;
+ }
+ }
+ int numNeighbors = btMin(2, contexts.size() - 1);
+ int neighborOffsets[] = {-1, 1, -2, 2, -3, 3};
+ int numOffsets = sizeof(neighborOffsets) / sizeof(neighborOffsets[0]);
+ m_neighborContexts.reserve(numNeighbors);
+ m_neighborContexts.resizeNoInitialize(0);
+ for (int i = 0; i < numOffsets && m_neighborContexts.size() < numNeighbors; i++)
+ {
+ int neighborIndex = selfIndex + neighborOffsets[i];
+ if (neighborIndex >= 0 && neighborIndex < numActiveContexts)
+ {
+ m_neighborContexts.push_back(&contexts[neighborIndex]);
+ }
+ }
+ }
+
+ bool isQueueEmpty() const { return m_queueIsEmpty; }
+ void lockQueue()
+ {
+ if (m_useSpinMutex)
+ {
+ m_mutex.lock();
+ }
+ else
+ {
+ m_queueLock->lock();
+ }
+ }
+ void unlockQueue()
+ {
+ if (m_useSpinMutex)
+ {
+ m_mutex.unlock();
+ }
+ else
+ {
+ m_queueLock->unlock();
+ }
+ }
+ void clearQueue(int jobCount, int jobSize)
+ {
+ lockQueue();
+ m_headIndex = 0;
+ m_tailIndex = 0;
+ m_allocSize = 0;
+ m_queueIsEmpty = true;
+ int jobBufSize = jobSize * jobCount;
+ // make sure we have enough memory allocated to store jobs
+ if (jobBufSize > m_jobMemSize)
+ {
+ resizeJobMem(jobBufSize);
+ }
+ // make sure job queue is big enough
+ if (jobCount > m_jobQueue.capacity())
+ {
+ m_jobQueue.reserve(jobCount);
+ }
+ unlockQueue();
+ m_jobQueue.resizeNoInitialize(0);
+ }
+ void* allocJobMem(int jobSize)
+ {
+ btAssert(m_jobMemSize >= (m_allocSize + jobSize));
+ void* jobMem = &m_jobMem[m_allocSize];
+ m_allocSize += jobSize;
+ return jobMem;
+ }
+ void submitJob(IJob * job)
+ {
+ btAssert(reinterpret_cast<char*>(job) >= &m_jobMem[0] && reinterpret_cast<char*>(job) < &m_jobMem[0] + m_allocSize);
+ m_jobQueue.push_back(job);
+ lockQueue();
+ m_tailIndex++;
+ m_queueIsEmpty = false;
+ unlockQueue();
+ }
+ IJob* consumeJobFromOwnQueue()
+ {
+ if (m_queueIsEmpty)
+ {
+ // lock free path. even if this is taken erroneously it isn't harmful
+ return NULL;
+ }
+ IJob* job = NULL;
+ lockQueue();
+ if (!m_queueIsEmpty)
+ {
+ job = m_jobQueue[m_headIndex++];
+ btAssert(reinterpret_cast<char*>(job) >= &m_jobMem[0] && reinterpret_cast<char*>(job) < &m_jobMem[0] + m_allocSize);
+ if (m_headIndex == m_tailIndex)
+ {
+ m_queueIsEmpty = true;
+ }
+ }
+ unlockQueue();
+ return job;
+ }
+ IJob* consumeJob()
+ {
+ if (IJob* job = consumeJobFromOwnQueue())
+ {
+ return job;
+ }
+ // own queue is empty, try to steal from neighbor
+ for (int i = 0; i < m_neighborContexts.size(); ++i)
+ {
+ JobQueue* otherContext = m_neighborContexts[i];
+ if (IJob* job = otherContext->consumeJobFromOwnQueue())
+ {
+ return job;
+ }
+ }
+ return NULL;
+ }
+};
-static void WorkerThreadFunc( void* userPtr )
+static void WorkerThreadFunc(void* userPtr)
{
- BT_PROFILE( "WorkerThreadFunc" );
- ThreadLocalStorage* localStorage = (ThreadLocalStorage*) userPtr;
- JobQueue* jobQueue = localStorage->m_queue;
-
- bool shouldSleep = false;
- int threadId = localStorage->m_threadId;
- while (! shouldSleep)
- {
- // do work
- localStorage->m_mutex.lock();
- while ( IJob* job = jobQueue->consumeJob() )
- {
- localStorage->m_status = WorkerThreadStatus::kWorking;
- job->executeJob( threadId );
- localStorage->m_numJobsFinished++;
- }
- localStorage->m_status = WorkerThreadStatus::kWaitingForWork;
- localStorage->m_mutex.unlock();
- btU64 clockStart = localStorage->m_clock->getTimeMicroseconds();
- // while queue is empty,
- while (jobQueue->isQueueEmpty())
- {
- // todo: spin wait a bit to avoid hammering the empty queue
- btSpinPause();
- if ( localStorage->m_directive->getDirective(threadId) == WorkerThreadDirectives::kGoToSleep )
- {
- shouldSleep = true;
- break;
- }
- // if jobs are incoming,
- if ( localStorage->m_directive->getDirective( threadId ) == WorkerThreadDirectives::kScanForJobs )
- {
- clockStart = localStorage->m_clock->getTimeMicroseconds(); // reset clock
- }
- else
- {
- for ( int i = 0; i < 50; ++i )
- {
- btSpinPause();
- btSpinPause();
- btSpinPause();
- btSpinPause();
- if (localStorage->m_directive->getDirective( threadId ) == WorkerThreadDirectives::kScanForJobs || !jobQueue->isQueueEmpty())
- {
- break;
- }
- }
- // if no jobs incoming and queue has been empty for the cooldown time, sleep
- btU64 timeElapsed = localStorage->m_clock->getTimeMicroseconds() - clockStart;
- if (timeElapsed > localStorage->m_cooldownTime)
- {
- shouldSleep = true;
- break;
- }
- }
- }
- }
+ BT_PROFILE("WorkerThreadFunc");
+ ThreadLocalStorage* localStorage = (ThreadLocalStorage*)userPtr;
+ JobQueue* jobQueue = localStorage->m_queue;
+
+ bool shouldSleep = false;
+ int threadId = localStorage->m_threadId;
+ while (!shouldSleep)
+ {
+ // do work
+ localStorage->m_mutex.lock();
+ while (IJob* job = jobQueue->consumeJob())
+ {
+ localStorage->m_status = WorkerThreadStatus::kWorking;
+ job->executeJob(threadId);
+ localStorage->m_numJobsFinished++;
+ }
+ localStorage->m_status = WorkerThreadStatus::kWaitingForWork;
+ localStorage->m_mutex.unlock();
+ btU64 clockStart = localStorage->m_clock->getTimeMicroseconds();
+ // while queue is empty,
+ while (jobQueue->isQueueEmpty())
+ {
+ // todo: spin wait a bit to avoid hammering the empty queue
+ btSpinPause();
+ if (localStorage->m_directive->getDirective(threadId) == WorkerThreadDirectives::kGoToSleep)
+ {
+ shouldSleep = true;
+ break;
+ }
+ // if jobs are incoming,
+ if (localStorage->m_directive->getDirective(threadId) == WorkerThreadDirectives::kScanForJobs)
+ {
+ clockStart = localStorage->m_clock->getTimeMicroseconds(); // reset clock
+ }
+ else
+ {
+ for (int i = 0; i < 50; ++i)
+ {
+ btSpinPause();
+ btSpinPause();
+ btSpinPause();
+ btSpinPause();
+ if (localStorage->m_directive->getDirective(threadId) == WorkerThreadDirectives::kScanForJobs || !jobQueue->isQueueEmpty())
+ {
+ break;
+ }
+ }
+ // if no jobs incoming and queue has been empty for the cooldown time, sleep
+ btU64 timeElapsed = localStorage->m_clock->getTimeMicroseconds() - clockStart;
+ if (timeElapsed > localStorage->m_cooldownTime)
+ {
+ shouldSleep = true;
+ break;
+ }
+ }
+ }
+ }
{
BT_PROFILE("sleep");
// go sleep
@@ -427,376 +420,373 @@ static void WorkerThreadFunc( void* userPtr )
}
}
-
class btTaskSchedulerDefault : public btITaskScheduler
{
- btThreadSupportInterface* m_threadSupport;
- WorkerThreadDirectives* m_workerDirective;
- btAlignedObjectArray<JobQueue> m_jobQueues;
- btAlignedObjectArray<JobQueue*> m_perThreadJobQueues;
- btAlignedObjectArray<ThreadLocalStorage> m_threadLocalStorage;
- btSpinMutex m_antiNestingLock; // prevent nested parallel-for
- btClock m_clock;
- int m_numThreads;
- int m_numWorkerThreads;
- int m_numActiveJobQueues;
- int m_maxNumThreads;
- int m_numJobs;
- static const int kFirstWorkerThreadId = 1;
+ btThreadSupportInterface* m_threadSupport;
+ WorkerThreadDirectives* m_workerDirective;
+ btAlignedObjectArray<JobQueue> m_jobQueues;
+ btAlignedObjectArray<JobQueue*> m_perThreadJobQueues;
+ btAlignedObjectArray<ThreadLocalStorage> m_threadLocalStorage;
+ btSpinMutex m_antiNestingLock; // prevent nested parallel-for
+ btClock m_clock;
+ int m_numThreads;
+ int m_numWorkerThreads;
+ int m_numActiveJobQueues;
+ int m_maxNumThreads;
+ int m_numJobs;
+ static const int kFirstWorkerThreadId = 1;
+
public:
+ btTaskSchedulerDefault() : btITaskScheduler("ThreadSupport")
+ {
+ m_threadSupport = NULL;
+ m_workerDirective = NULL;
+ }
- btTaskSchedulerDefault() : btITaskScheduler("ThreadSupport")
- {
- m_threadSupport = NULL;
- m_workerDirective = NULL;
- }
-
- virtual ~btTaskSchedulerDefault()
- {
- waitForWorkersToSleep();
-
- for ( int i = 0; i < m_jobQueues.size(); ++i )
- {
- m_jobQueues[i].exit();
- }
-
- if (m_threadSupport)
- {
- delete m_threadSupport;
- m_threadSupport = NULL;
- }
- if (m_workerDirective)
- {
- btAlignedFree(m_workerDirective);
- m_workerDirective = NULL;
- }
- }
-
- void init()
- {
- btThreadSupportInterface::ConstructionInfo constructionInfo( "TaskScheduler", WorkerThreadFunc );
- m_threadSupport = btThreadSupportInterface::create( constructionInfo );
- m_workerDirective = static_cast<WorkerThreadDirectives*>(btAlignedAlloc(sizeof(*m_workerDirective), 64));
-
- m_numWorkerThreads = m_threadSupport->getNumWorkerThreads();
- m_maxNumThreads = m_threadSupport->getNumWorkerThreads() + 1;
- m_numThreads = m_maxNumThreads;
- // ideal to have one job queue for each physical processor (except for the main thread which needs no queue)
- int numThreadsPerQueue = m_threadSupport->getLogicalToPhysicalCoreRatio();
- int numJobQueues = (numThreadsPerQueue == 1) ? (m_maxNumThreads-1) : (m_maxNumThreads / numThreadsPerQueue);
- m_jobQueues.resize(numJobQueues);
- m_numActiveJobQueues = numJobQueues;
- for ( int i = 0; i < m_jobQueues.size(); ++i )
- {
- m_jobQueues[i].init( m_threadSupport, &m_jobQueues );
- }
- m_perThreadJobQueues.resize(m_numThreads);
- for ( int i = 0; i < m_numThreads; i++ )
- {
- JobQueue* jq = NULL;
- // only worker threads get a job queue
- if (i > 0)
- {
- if (numThreadsPerQueue == 1)
- {
- // one queue per worker thread
- jq = &m_jobQueues[ i - kFirstWorkerThreadId ];
- }
- else
- {
- // 2 threads share each queue
- jq = &m_jobQueues[ i / numThreadsPerQueue ];
- }
- }
- m_perThreadJobQueues[i] = jq;
- }
- m_threadLocalStorage.resize(m_numThreads);
- for ( int i = 0; i < m_numThreads; i++ )
- {
- ThreadLocalStorage& storage = m_threadLocalStorage[i];
- storage.m_threadId = i;
- storage.m_directive = m_workerDirective;
- storage.m_status = WorkerThreadStatus::kSleeping;
- storage.m_cooldownTime = 100; // 100 microseconds, threads go to sleep after this long if they have nothing to do
- storage.m_clock = &m_clock;
- storage.m_queue = m_perThreadJobQueues[i];
- }
- setWorkerDirectives( WorkerThreadDirectives::kGoToSleep ); // no work for them yet
- setNumThreads( m_threadSupport->getCacheFriendlyNumThreads() );
- }
-
- void setWorkerDirectives(WorkerThreadDirectives::Type dir)
- {
- m_workerDirective->setDirectiveByRange(kFirstWorkerThreadId, m_numThreads, dir);
- }
-
- virtual int getMaxNumThreads() const BT_OVERRIDE
- {
- return m_maxNumThreads;
- }
-
- virtual int getNumThreads() const BT_OVERRIDE
- {
- return m_numThreads;
- }
-
- virtual void setNumThreads( int numThreads ) BT_OVERRIDE
- {
- m_numThreads = btMax( btMin(numThreads, int(m_maxNumThreads)), 1 );
- m_numWorkerThreads = m_numThreads - 1;
- m_numActiveJobQueues = 0;
- // if there is at least 1 worker,
- if ( m_numWorkerThreads > 0 )
- {
- // re-setup job stealing between queues to avoid attempting to steal from an inactive job queue
- JobQueue* lastActiveContext = m_perThreadJobQueues[ m_numThreads - 1 ];
- int iLastActiveContext = lastActiveContext - &m_jobQueues[0];
- m_numActiveJobQueues = iLastActiveContext + 1;
- for ( int i = 0; i < m_jobQueues.size(); ++i )
- {
- m_jobQueues[ i ].setupJobStealing( &m_jobQueues, m_numActiveJobQueues );
- }
- }
- m_workerDirective->setDirectiveByRange(m_numThreads, BT_MAX_THREAD_COUNT, WorkerThreadDirectives::kGoToSleep);
- }
-
- void waitJobs()
- {
- BT_PROFILE( "waitJobs" );
- // have the main thread work until the job queues are empty
- int numMainThreadJobsFinished = 0;
- for ( int i = 0; i < m_numActiveJobQueues; ++i )
- {
- while ( IJob* job = m_jobQueues[i].consumeJob() )
- {
- job->executeJob( 0 );
- numMainThreadJobsFinished++;
- }
- }
-
- // done with jobs for now, tell workers to rest (but not sleep)
- setWorkerDirectives( WorkerThreadDirectives::kStayAwakeButIdle );
-
- btU64 clockStart = m_clock.getTimeMicroseconds();
- // wait for workers to finish any jobs in progress
- while ( true )
- {
- int numWorkerJobsFinished = 0;
- for ( int iThread = kFirstWorkerThreadId; iThread < m_numThreads; ++iThread )
- {
- ThreadLocalStorage* storage = &m_threadLocalStorage[iThread];
- storage->m_mutex.lock();
- numWorkerJobsFinished += storage->m_numJobsFinished;
- storage->m_mutex.unlock();
- }
- if (numWorkerJobsFinished + numMainThreadJobsFinished == m_numJobs)
- {
- break;
- }
- btU64 timeElapsed = m_clock.getTimeMicroseconds() - clockStart;
- btAssert(timeElapsed < 1000);
- if (timeElapsed > 100000)
- {
- break;
- }
- btSpinPause();
- }
- }
-
- void wakeWorkers(int numWorkersToWake)
- {
- BT_PROFILE( "wakeWorkers" );
- btAssert( m_workerDirective->getDirective(1) == WorkerThreadDirectives::kScanForJobs );
- int numDesiredWorkers = btMin(numWorkersToWake, m_numWorkerThreads);
- int numActiveWorkers = 0;
- for ( int iWorker = 0; iWorker < m_numWorkerThreads; ++iWorker )
- {
- // note this count of active workers is not necessarily totally reliable, because a worker thread could be
- // just about to put itself to sleep. So we may on occasion fail to wake up all the workers. It should be rare.
- ThreadLocalStorage& storage = m_threadLocalStorage[ kFirstWorkerThreadId + iWorker ];
- if (storage.m_status != WorkerThreadStatus::kSleeping)
- {
- numActiveWorkers++;
- }
- }
- for ( int iWorker = 0; iWorker < m_numWorkerThreads && numActiveWorkers < numDesiredWorkers; ++iWorker )
- {
- ThreadLocalStorage& storage = m_threadLocalStorage[ kFirstWorkerThreadId + iWorker ];
- if (storage.m_status == WorkerThreadStatus::kSleeping)
- {
- m_threadSupport->runTask( iWorker, &storage );
- numActiveWorkers++;
- }
- }
- }
-
- void waitForWorkersToSleep()
- {
- BT_PROFILE( "waitForWorkersToSleep" );
- setWorkerDirectives( WorkerThreadDirectives::kGoToSleep );
- m_threadSupport->waitForAllTasks();
- for ( int i = kFirstWorkerThreadId; i < m_numThreads; i++ )
- {
- ThreadLocalStorage& storage = m_threadLocalStorage[i];
- btAssert( storage.m_status == WorkerThreadStatus::kSleeping );
- }
- }
-
- virtual void sleepWorkerThreadsHint() BT_OVERRIDE
- {
- BT_PROFILE( "sleepWorkerThreadsHint" );
- // hint the task scheduler that we may not be using these threads for a little while
- setWorkerDirectives( WorkerThreadDirectives::kGoToSleep );
- }
-
- void prepareWorkerThreads()
- {
- for ( int i = kFirstWorkerThreadId; i < m_numThreads; ++i )
- {
- ThreadLocalStorage& storage = m_threadLocalStorage[i];
- storage.m_mutex.lock();
- storage.m_numJobsFinished = 0;
- storage.m_mutex.unlock();
- }
- setWorkerDirectives( WorkerThreadDirectives::kScanForJobs );
- }
-
- virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
- {
- BT_PROFILE( "parallelFor_ThreadSupport" );
- btAssert( iEnd >= iBegin );
- btAssert( grainSize >= 1 );
- int iterationCount = iEnd - iBegin;
- if ( iterationCount > grainSize && m_numWorkerThreads > 0 && m_antiNestingLock.tryLock() )
- {
- typedef ParallelForJob JobType;
- int jobCount = ( iterationCount + grainSize - 1 ) / grainSize;
- m_numJobs = jobCount;
- btAssert( jobCount >= 2 ); // need more than one job for multithreading
- int jobSize = sizeof( JobType );
-
- for (int i = 0; i < m_numActiveJobQueues; ++i)
- {
- m_jobQueues[i].clearQueue( jobCount, jobSize );
- }
- // prepare worker threads for incoming work
- prepareWorkerThreads();
- // submit all of the jobs
- int iJob = 0;
- int iThread = kFirstWorkerThreadId; // first worker thread
- for ( int i = iBegin; i < iEnd; i += grainSize )
- {
- btAssert( iJob < jobCount );
- int iE = btMin( i + grainSize, iEnd );
- JobQueue* jq = m_perThreadJobQueues[ iThread ];
- btAssert(jq);
- btAssert((jq - &m_jobQueues[0]) < m_numActiveJobQueues);
- void* jobMem = jq->allocJobMem(jobSize);
- JobType* job = new ( jobMem ) ParallelForJob( i, iE, body ); // placement new
- jq->submitJob( job );
- iJob++;
- iThread++;
- if ( iThread >= m_numThreads )
- {
- iThread = kFirstWorkerThreadId; // first worker thread
- }
- }
- wakeWorkers( jobCount - 1 );
-
- // put the main thread to work on emptying the job queue and then wait for all workers to finish
- waitJobs();
- m_antiNestingLock.unlock();
- }
- else
- {
- BT_PROFILE( "parallelFor_mainThread" );
- // just run on main thread
- body.forLoop( iBegin, iEnd );
- }
- }
- virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
- {
- BT_PROFILE( "parallelSum_ThreadSupport" );
- btAssert( iEnd >= iBegin );
- btAssert( grainSize >= 1 );
- int iterationCount = iEnd - iBegin;
- if ( iterationCount > grainSize && m_numWorkerThreads > 0 && m_antiNestingLock.tryLock() )
- {
- typedef ParallelSumJob JobType;
- int jobCount = ( iterationCount + grainSize - 1 ) / grainSize;
- m_numJobs = jobCount;
- btAssert( jobCount >= 2 ); // need more than one job for multithreading
- int jobSize = sizeof( JobType );
- for (int i = 0; i < m_numActiveJobQueues; ++i)
- {
- m_jobQueues[i].clearQueue( jobCount, jobSize );
- }
-
- // initialize summation
- for ( int iThread = 0; iThread < m_numThreads; ++iThread )
- {
- m_threadLocalStorage[iThread].m_sumResult = btScalar(0);
- }
-
- // prepare worker threads for incoming work
- prepareWorkerThreads();
- // submit all of the jobs
- int iJob = 0;
- int iThread = kFirstWorkerThreadId; // first worker thread
- for ( int i = iBegin; i < iEnd; i += grainSize )
- {
- btAssert( iJob < jobCount );
- int iE = btMin( i + grainSize, iEnd );
- JobQueue* jq = m_perThreadJobQueues[ iThread ];
- btAssert(jq);
- btAssert((jq - &m_jobQueues[0]) < m_numActiveJobQueues);
- void* jobMem = jq->allocJobMem(jobSize);
- JobType* job = new ( jobMem ) ParallelSumJob( i, iE, body, &m_threadLocalStorage[0] ); // placement new
- jq->submitJob( job );
- iJob++;
- iThread++;
- if ( iThread >= m_numThreads )
- {
- iThread = kFirstWorkerThreadId; // first worker thread
- }
- }
- wakeWorkers( jobCount - 1 );
-
- // put the main thread to work on emptying the job queue and then wait for all workers to finish
- waitJobs();
-
- // add up all the thread sums
- btScalar sum = btScalar(0);
- for ( int iThread = 0; iThread < m_numThreads; ++iThread )
- {
- sum += m_threadLocalStorage[ iThread ].m_sumResult;
- }
- m_antiNestingLock.unlock();
- return sum;
- }
- else
- {
- BT_PROFILE( "parallelSum_mainThread" );
- // just run on main thread
- return body.sumLoop( iBegin, iEnd );
- }
- }
-};
+ virtual ~btTaskSchedulerDefault()
+ {
+ waitForWorkersToSleep();
+
+ for (int i = 0; i < m_jobQueues.size(); ++i)
+ {
+ m_jobQueues[i].exit();
+ }
+
+ if (m_threadSupport)
+ {
+ delete m_threadSupport;
+ m_threadSupport = NULL;
+ }
+ if (m_workerDirective)
+ {
+ btAlignedFree(m_workerDirective);
+ m_workerDirective = NULL;
+ }
+ }
+ void init()
+ {
+ btThreadSupportInterface::ConstructionInfo constructionInfo("TaskScheduler", WorkerThreadFunc);
+ m_threadSupport = btThreadSupportInterface::create(constructionInfo);
+ m_workerDirective = static_cast<WorkerThreadDirectives*>(btAlignedAlloc(sizeof(*m_workerDirective), 64));
+
+ m_numWorkerThreads = m_threadSupport->getNumWorkerThreads();
+ m_maxNumThreads = m_threadSupport->getNumWorkerThreads() + 1;
+ m_numThreads = m_maxNumThreads;
+ // ideal to have one job queue for each physical processor (except for the main thread which needs no queue)
+ int numThreadsPerQueue = m_threadSupport->getLogicalToPhysicalCoreRatio();
+ int numJobQueues = (numThreadsPerQueue == 1) ? (m_maxNumThreads - 1) : (m_maxNumThreads / numThreadsPerQueue);
+ m_jobQueues.resize(numJobQueues);
+ m_numActiveJobQueues = numJobQueues;
+ for (int i = 0; i < m_jobQueues.size(); ++i)
+ {
+ m_jobQueues[i].init(m_threadSupport, &m_jobQueues);
+ }
+ m_perThreadJobQueues.resize(m_numThreads);
+ for (int i = 0; i < m_numThreads; i++)
+ {
+ JobQueue* jq = NULL;
+ // only worker threads get a job queue
+ if (i > 0)
+ {
+ if (numThreadsPerQueue == 1)
+ {
+ // one queue per worker thread
+ jq = &m_jobQueues[i - kFirstWorkerThreadId];
+ }
+ else
+ {
+ // 2 threads share each queue
+ jq = &m_jobQueues[i / numThreadsPerQueue];
+ }
+ }
+ m_perThreadJobQueues[i] = jq;
+ }
+ m_threadLocalStorage.resize(m_numThreads);
+ for (int i = 0; i < m_numThreads; i++)
+ {
+ ThreadLocalStorage& storage = m_threadLocalStorage[i];
+ storage.m_threadId = i;
+ storage.m_directive = m_workerDirective;
+ storage.m_status = WorkerThreadStatus::kSleeping;
+ storage.m_cooldownTime = 100; // 100 microseconds, threads go to sleep after this long if they have nothing to do
+ storage.m_clock = &m_clock;
+ storage.m_queue = m_perThreadJobQueues[i];
+ }
+ setWorkerDirectives(WorkerThreadDirectives::kGoToSleep); // no work for them yet
+ setNumThreads(m_threadSupport->getCacheFriendlyNumThreads());
+ }
+
+ void setWorkerDirectives(WorkerThreadDirectives::Type dir)
+ {
+ m_workerDirective->setDirectiveByRange(kFirstWorkerThreadId, m_numThreads, dir);
+ }
+
+ virtual int getMaxNumThreads() const BT_OVERRIDE
+ {
+ return m_maxNumThreads;
+ }
+ virtual int getNumThreads() const BT_OVERRIDE
+ {
+ return m_numThreads;
+ }
+
+ virtual void setNumThreads(int numThreads) BT_OVERRIDE
+ {
+ m_numThreads = btMax(btMin(numThreads, int(m_maxNumThreads)), 1);
+ m_numWorkerThreads = m_numThreads - 1;
+ m_numActiveJobQueues = 0;
+ // if there is at least 1 worker,
+ if (m_numWorkerThreads > 0)
+ {
+ // re-setup job stealing between queues to avoid attempting to steal from an inactive job queue
+ JobQueue* lastActiveContext = m_perThreadJobQueues[m_numThreads - 1];
+ int iLastActiveContext = lastActiveContext - &m_jobQueues[0];
+ m_numActiveJobQueues = iLastActiveContext + 1;
+ for (int i = 0; i < m_jobQueues.size(); ++i)
+ {
+ m_jobQueues[i].setupJobStealing(&m_jobQueues, m_numActiveJobQueues);
+ }
+ }
+ m_workerDirective->setDirectiveByRange(m_numThreads, BT_MAX_THREAD_COUNT, WorkerThreadDirectives::kGoToSleep);
+ }
+
+ void waitJobs()
+ {
+ BT_PROFILE("waitJobs");
+ // have the main thread work until the job queues are empty
+ int numMainThreadJobsFinished = 0;
+ for (int i = 0; i < m_numActiveJobQueues; ++i)
+ {
+ while (IJob* job = m_jobQueues[i].consumeJob())
+ {
+ job->executeJob(0);
+ numMainThreadJobsFinished++;
+ }
+ }
+
+ // done with jobs for now, tell workers to rest (but not sleep)
+ setWorkerDirectives(WorkerThreadDirectives::kStayAwakeButIdle);
+
+ btU64 clockStart = m_clock.getTimeMicroseconds();
+ // wait for workers to finish any jobs in progress
+ while (true)
+ {
+ int numWorkerJobsFinished = 0;
+ for (int iThread = kFirstWorkerThreadId; iThread < m_numThreads; ++iThread)
+ {
+ ThreadLocalStorage* storage = &m_threadLocalStorage[iThread];
+ storage->m_mutex.lock();
+ numWorkerJobsFinished += storage->m_numJobsFinished;
+ storage->m_mutex.unlock();
+ }
+ if (numWorkerJobsFinished + numMainThreadJobsFinished == m_numJobs)
+ {
+ break;
+ }
+ btU64 timeElapsed = m_clock.getTimeMicroseconds() - clockStart;
+ btAssert(timeElapsed < 1000);
+ if (timeElapsed > 100000)
+ {
+ break;
+ }
+ btSpinPause();
+ }
+ }
+
+ void wakeWorkers(int numWorkersToWake)
+ {
+ BT_PROFILE("wakeWorkers");
+ btAssert(m_workerDirective->getDirective(1) == WorkerThreadDirectives::kScanForJobs);
+ int numDesiredWorkers = btMin(numWorkersToWake, m_numWorkerThreads);
+ int numActiveWorkers = 0;
+ for (int iWorker = 0; iWorker < m_numWorkerThreads; ++iWorker)
+ {
+ // note this count of active workers is not necessarily totally reliable, because a worker thread could be
+ // just about to put itself to sleep. So we may on occasion fail to wake up all the workers. It should be rare.
+ ThreadLocalStorage& storage = m_threadLocalStorage[kFirstWorkerThreadId + iWorker];
+ if (storage.m_status != WorkerThreadStatus::kSleeping)
+ {
+ numActiveWorkers++;
+ }
+ }
+ for (int iWorker = 0; iWorker < m_numWorkerThreads && numActiveWorkers < numDesiredWorkers; ++iWorker)
+ {
+ ThreadLocalStorage& storage = m_threadLocalStorage[kFirstWorkerThreadId + iWorker];
+ if (storage.m_status == WorkerThreadStatus::kSleeping)
+ {
+ m_threadSupport->runTask(iWorker, &storage);
+ numActiveWorkers++;
+ }
+ }
+ }
+
+ void waitForWorkersToSleep()
+ {
+ BT_PROFILE("waitForWorkersToSleep");
+ setWorkerDirectives(WorkerThreadDirectives::kGoToSleep);
+ m_threadSupport->waitForAllTasks();
+ for (int i = kFirstWorkerThreadId; i < m_numThreads; i++)
+ {
+ ThreadLocalStorage& storage = m_threadLocalStorage[i];
+ btAssert(storage.m_status == WorkerThreadStatus::kSleeping);
+ }
+ }
+
+ virtual void sleepWorkerThreadsHint() BT_OVERRIDE
+ {
+ BT_PROFILE("sleepWorkerThreadsHint");
+ // hint the task scheduler that we may not be using these threads for a little while
+ setWorkerDirectives(WorkerThreadDirectives::kGoToSleep);
+ }
+
+ void prepareWorkerThreads()
+ {
+ for (int i = kFirstWorkerThreadId; i < m_numThreads; ++i)
+ {
+ ThreadLocalStorage& storage = m_threadLocalStorage[i];
+ storage.m_mutex.lock();
+ storage.m_numJobsFinished = 0;
+ storage.m_mutex.unlock();
+ }
+ setWorkerDirectives(WorkerThreadDirectives::kScanForJobs);
+ }
+
+ virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody& body) BT_OVERRIDE
+ {
+ BT_PROFILE("parallelFor_ThreadSupport");
+ btAssert(iEnd >= iBegin);
+ btAssert(grainSize >= 1);
+ int iterationCount = iEnd - iBegin;
+ if (iterationCount > grainSize && m_numWorkerThreads > 0 && m_antiNestingLock.tryLock())
+ {
+ typedef ParallelForJob JobType;
+ int jobCount = (iterationCount + grainSize - 1) / grainSize;
+ m_numJobs = jobCount;
+ btAssert(jobCount >= 2); // need more than one job for multithreading
+ int jobSize = sizeof(JobType);
+
+ for (int i = 0; i < m_numActiveJobQueues; ++i)
+ {
+ m_jobQueues[i].clearQueue(jobCount, jobSize);
+ }
+ // prepare worker threads for incoming work
+ prepareWorkerThreads();
+ // submit all of the jobs
+ int iJob = 0;
+ int iThread = kFirstWorkerThreadId; // first worker thread
+ for (int i = iBegin; i < iEnd; i += grainSize)
+ {
+ btAssert(iJob < jobCount);
+ int iE = btMin(i + grainSize, iEnd);
+ JobQueue* jq = m_perThreadJobQueues[iThread];
+ btAssert(jq);
+ btAssert((jq - &m_jobQueues[0]) < m_numActiveJobQueues);
+ void* jobMem = jq->allocJobMem(jobSize);
+ JobType* job = new (jobMem) ParallelForJob(i, iE, body); // placement new
+ jq->submitJob(job);
+ iJob++;
+ iThread++;
+ if (iThread >= m_numThreads)
+ {
+ iThread = kFirstWorkerThreadId; // first worker thread
+ }
+ }
+ wakeWorkers(jobCount - 1);
+
+ // put the main thread to work on emptying the job queue and then wait for all workers to finish
+ waitJobs();
+ m_antiNestingLock.unlock();
+ }
+ else
+ {
+ BT_PROFILE("parallelFor_mainThread");
+ // just run on main thread
+ body.forLoop(iBegin, iEnd);
+ }
+ }
+ virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body) BT_OVERRIDE
+ {
+ BT_PROFILE("parallelSum_ThreadSupport");
+ btAssert(iEnd >= iBegin);
+ btAssert(grainSize >= 1);
+ int iterationCount = iEnd - iBegin;
+ if (iterationCount > grainSize && m_numWorkerThreads > 0 && m_antiNestingLock.tryLock())
+ {
+ typedef ParallelSumJob JobType;
+ int jobCount = (iterationCount + grainSize - 1) / grainSize;
+ m_numJobs = jobCount;
+ btAssert(jobCount >= 2); // need more than one job for multithreading
+ int jobSize = sizeof(JobType);
+ for (int i = 0; i < m_numActiveJobQueues; ++i)
+ {
+ m_jobQueues[i].clearQueue(jobCount, jobSize);
+ }
+
+ // initialize summation
+ for (int iThread = 0; iThread < m_numThreads; ++iThread)
+ {
+ m_threadLocalStorage[iThread].m_sumResult = btScalar(0);
+ }
+
+ // prepare worker threads for incoming work
+ prepareWorkerThreads();
+ // submit all of the jobs
+ int iJob = 0;
+ int iThread = kFirstWorkerThreadId; // first worker thread
+ for (int i = iBegin; i < iEnd; i += grainSize)
+ {
+ btAssert(iJob < jobCount);
+ int iE = btMin(i + grainSize, iEnd);
+ JobQueue* jq = m_perThreadJobQueues[iThread];
+ btAssert(jq);
+ btAssert((jq - &m_jobQueues[0]) < m_numActiveJobQueues);
+ void* jobMem = jq->allocJobMem(jobSize);
+ JobType* job = new (jobMem) ParallelSumJob(i, iE, body, &m_threadLocalStorage[0]); // placement new
+ jq->submitJob(job);
+ iJob++;
+ iThread++;
+ if (iThread >= m_numThreads)
+ {
+ iThread = kFirstWorkerThreadId; // first worker thread
+ }
+ }
+ wakeWorkers(jobCount - 1);
+
+ // put the main thread to work on emptying the job queue and then wait for all workers to finish
+ waitJobs();
+
+ // add up all the thread sums
+ btScalar sum = btScalar(0);
+ for (int iThread = 0; iThread < m_numThreads; ++iThread)
+ {
+ sum += m_threadLocalStorage[iThread].m_sumResult;
+ }
+ m_antiNestingLock.unlock();
+ return sum;
+ }
+ else
+ {
+ BT_PROFILE("parallelSum_mainThread");
+ // just run on main thread
+ return body.sumLoop(iBegin, iEnd);
+ }
+ }
+};
btITaskScheduler* btCreateDefaultTaskScheduler()
{
- btTaskSchedulerDefault* ts = new btTaskSchedulerDefault();
- ts->init();
- return ts;
+ btTaskSchedulerDefault* ts = new btTaskSchedulerDefault();
+ ts->init();
+ return ts;
}
-#else // #if BT_THREADSAFE
+#else // #if BT_THREADSAFE
btITaskScheduler* btCreateDefaultTaskScheduler()
{
- return NULL;
+ return NULL;
}
-#endif // #else // #if BT_THREADSAFE
+#endif // #else // #if BT_THREADSAFE
diff --git a/thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportInterface.h b/thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportInterface.h
index a0ad802b1e..1fe49335a1 100644
--- a/thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportInterface.h
+++ b/thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportInterface.h
@@ -16,55 +16,49 @@ subject to the following restrictions:
#ifndef BT_THREAD_SUPPORT_INTERFACE_H
#define BT_THREAD_SUPPORT_INTERFACE_H
-
-
class btCriticalSection
{
public:
- btCriticalSection() {}
- virtual ~btCriticalSection() {}
+ btCriticalSection() {}
+ virtual ~btCriticalSection() {}
- virtual void lock() = 0;
- virtual void unlock() = 0;
+ virtual void lock() = 0;
+ virtual void unlock() = 0;
};
-
class btThreadSupportInterface
{
public:
-
- virtual ~btThreadSupportInterface() {}
-
- virtual int getNumWorkerThreads() const = 0; // number of worker threads (total number of logical processors - 1)
- virtual int getCacheFriendlyNumThreads() const = 0; // the number of logical processors sharing a single L3 cache
- virtual int getLogicalToPhysicalCoreRatio() const = 0; // the number of logical processors per physical processor (usually 1 or 2)
- virtual void runTask( int threadIndex, void* userData ) = 0;
- virtual void waitForAllTasks() = 0;
-
- virtual btCriticalSection* createCriticalSection() = 0;
- virtual void deleteCriticalSection( btCriticalSection* criticalSection ) = 0;
-
- typedef void( *ThreadFunc )( void* userPtr );
-
- struct ConstructionInfo
- {
- ConstructionInfo( const char* uniqueName,
- ThreadFunc userThreadFunc,
- int threadStackSize = 65535
- )
- :m_uniqueName( uniqueName ),
- m_userThreadFunc( userThreadFunc ),
- m_threadStackSize( threadStackSize )
- {
- }
-
- const char* m_uniqueName;
- ThreadFunc m_userThreadFunc;
- int m_threadStackSize;
- };
-
- static btThreadSupportInterface* create( const ConstructionInfo& info );
+ virtual ~btThreadSupportInterface() {}
+
+ virtual int getNumWorkerThreads() const = 0; // number of worker threads (total number of logical processors - 1)
+ virtual int getCacheFriendlyNumThreads() const = 0; // the number of logical processors sharing a single L3 cache
+ virtual int getLogicalToPhysicalCoreRatio() const = 0; // the number of logical processors per physical processor (usually 1 or 2)
+ virtual void runTask(int threadIndex, void* userData) = 0;
+ virtual void waitForAllTasks() = 0;
+
+ virtual btCriticalSection* createCriticalSection() = 0;
+ virtual void deleteCriticalSection(btCriticalSection* criticalSection) = 0;
+
+ typedef void (*ThreadFunc)(void* userPtr);
+
+ struct ConstructionInfo
+ {
+ ConstructionInfo(const char* uniqueName,
+ ThreadFunc userThreadFunc,
+ int threadStackSize = 65535)
+ : m_uniqueName(uniqueName),
+ m_userThreadFunc(userThreadFunc),
+ m_threadStackSize(threadStackSize)
+ {
+ }
+
+ const char* m_uniqueName;
+ ThreadFunc m_userThreadFunc;
+ int m_threadStackSize;
+ };
+
+ static btThreadSupportInterface* create(const ConstructionInfo& info);
};
-#endif //BT_THREAD_SUPPORT_INTERFACE_H
-
+#endif //BT_THREAD_SUPPORT_INTERFACE_H
diff --git a/thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportPosix.cpp b/thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportPosix.cpp
index 50ca060dfe..02f4ed1631 100644
--- a/thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportPosix.cpp
+++ b/thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportPosix.cpp
@@ -1,3 +1,4 @@
+
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2018 Erwin Coumans http://bulletphysics.com
@@ -13,9 +14,7 @@ subject to the following restrictions:
3. This notice may not be removed or altered from any source distribution.
*/
-
-#if BT_THREADSAFE && !defined( _WIN32 )
-
+#if BT_THREADSAFE && !defined(_WIN32)
#include "LinearMath/btScalar.h"
#include "LinearMath/btAlignedObjectArray.h"
@@ -27,14 +26,12 @@ subject to the following restrictions:
#include <errno.h>
#include <unistd.h>
-
#ifndef _XOPEN_SOURCE
-#define _XOPEN_SOURCE 600 //for definition of pthread_barrier_t, see http://pages.cs.wisc.edu/~travitch/pthreads_primer.html
-#endif //_XOPEN_SOURCE
+#define _XOPEN_SOURCE 600 //for definition of pthread_barrier_t, see http://pages.cs.wisc.edu/~travitch/pthreads_primer.html
+#endif //_XOPEN_SOURCE
#include <pthread.h>
#include <semaphore.h>
-#include <unistd.h> //for sysconf
-
+#include <unistd.h> //for sysconf
///
/// getNumHardwareThreads()
@@ -48,318 +45,309 @@ subject to the following restrictions:
int btGetNumHardwareThreads()
{
- return btMin<int>(BT_MAX_THREAD_COUNT, std::thread::hardware_concurrency());
+ return btMin<int>(BT_MAX_THREAD_COUNT, std::thread::hardware_concurrency());
}
#else
int btGetNumHardwareThreads()
{
- return btMin<int>(BT_MAX_THREAD_COUNT, sysconf( _SC_NPROCESSORS_ONLN ));
+ return btMin<int>(BT_MAX_THREAD_COUNT, sysconf(_SC_NPROCESSORS_ONLN));
}
#endif
-
// btThreadSupportPosix helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
class btThreadSupportPosix : public btThreadSupportInterface
{
public:
- struct btThreadStatus
- {
- int m_taskId;
- int m_commandId;
- int m_status;
-
- ThreadFunc m_userThreadFunc;
- void* m_userPtr; //for taskDesc etc
-
- pthread_t thread;
- //each tread will wait until this signal to start its work
- sem_t* startSemaphore;
-
- // this is a copy of m_mainSemaphore,
- //each tread will signal once it is finished with its work
- sem_t* m_mainSemaphore;
- unsigned long threadUsed;
- };
-private:
- typedef unsigned long long UINT64;
-
- btAlignedObjectArray<btThreadStatus> m_activeThreadStatus;
- // m_mainSemaphoresemaphore will signal, if and how many threads are finished with their work
- sem_t* m_mainSemaphore;
- int m_numThreads;
- UINT64 m_startedThreadsMask;
- void startThreads( const ConstructionInfo& threadInfo );
- void stopThreads();
- int waitForResponse();
+ struct btThreadStatus
+ {
+ int m_taskId;
+ int m_commandId;
+ int m_status;
+
+ ThreadFunc m_userThreadFunc;
+ void* m_userPtr; //for taskDesc etc
+
+ pthread_t thread;
+ //each tread will wait until this signal to start its work
+ sem_t* startSemaphore;
+ btCriticalSection* m_cs;
+ // this is a copy of m_mainSemaphore,
+ //each tread will signal once it is finished with its work
+ sem_t* m_mainSemaphore;
+ unsigned long threadUsed;
+ };
+private:
+ typedef unsigned long long UINT64;
+
+ btAlignedObjectArray<btThreadStatus> m_activeThreadStatus;
+ // m_mainSemaphoresemaphore will signal, if and how many threads are finished with their work
+ sem_t* m_mainSemaphore;
+ int m_numThreads;
+ UINT64 m_startedThreadsMask;
+ void startThreads(const ConstructionInfo& threadInfo);
+ void stopThreads();
+ int waitForResponse();
+ btCriticalSection* m_cs;
public:
- btThreadSupportPosix( const ConstructionInfo& threadConstructionInfo );
- virtual ~btThreadSupportPosix();
+ btThreadSupportPosix(const ConstructionInfo& threadConstructionInfo);
+ virtual ~btThreadSupportPosix();
- virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; }
- // TODO: return the number of logical processors sharing the first L3 cache
- virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return m_numThreads + 1; }
- // TODO: detect if CPU has hyperthreading enabled
- virtual int getLogicalToPhysicalCoreRatio() const BT_OVERRIDE { return 1; }
+ virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; }
+ // TODO: return the number of logical processors sharing the first L3 cache
+ virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return m_numThreads + 1; }
+ // TODO: detect if CPU has hyperthreading enabled
+ virtual int getLogicalToPhysicalCoreRatio() const BT_OVERRIDE { return 1; }
- virtual void runTask( int threadIndex, void* userData ) BT_OVERRIDE;
- virtual void waitForAllTasks() BT_OVERRIDE;
+ virtual void runTask(int threadIndex, void* userData) BT_OVERRIDE;
+ virtual void waitForAllTasks() BT_OVERRIDE;
- virtual btCriticalSection* createCriticalSection() BT_OVERRIDE;
- virtual void deleteCriticalSection( btCriticalSection* criticalSection ) BT_OVERRIDE;
+ virtual btCriticalSection* createCriticalSection() BT_OVERRIDE;
+ virtual void deleteCriticalSection(btCriticalSection* criticalSection) BT_OVERRIDE;
};
-
-#define checkPThreadFunction(returnValue) \
- if(0 != returnValue) { \
- printf("PThread problem at line %i in file %s: %i %d\n", __LINE__, __FILE__, returnValue, errno); \
- }
+#define checkPThreadFunction(returnValue) \
+ if (0 != returnValue) \
+ { \
+ printf("PThread problem at line %i in file %s: %i %d\n", __LINE__, __FILE__, returnValue, errno); \
+ }
// The number of threads should be equal to the number of available cores
// Todo: each worker should be linked to a single core, using SetThreadIdealProcessor.
-
-btThreadSupportPosix::btThreadSupportPosix( const ConstructionInfo& threadConstructionInfo )
+btThreadSupportPosix::btThreadSupportPosix(const ConstructionInfo& threadConstructionInfo)
{
- startThreads( threadConstructionInfo );
+ m_cs = createCriticalSection();
+ startThreads(threadConstructionInfo);
}
// cleanup/shutdown Libspe2
btThreadSupportPosix::~btThreadSupportPosix()
{
- stopThreads();
+ stopThreads();
+ deleteCriticalSection(m_cs);
+ m_cs=0;
}
-#if (defined (__APPLE__))
+#if (defined(__APPLE__))
#define NAMED_SEMAPHORES
#endif
-
-static sem_t* createSem( const char* baseName )
+static sem_t* createSem(const char* baseName)
{
- static int semCount = 0;
+ static int semCount = 0;
#ifdef NAMED_SEMAPHORES
- /// Named semaphore begin
- char name[ 32 ];
- snprintf( name, 32, "/%8.s-%4.d-%4.4d", baseName, getpid(), semCount++ );
- sem_t* tempSem = sem_open( name, O_CREAT, 0600, 0 );
-
- if ( tempSem != reinterpret_cast<sem_t *>( SEM_FAILED ) )
- {
- // printf("Created \"%s\" Semaphore %p\n", name, tempSem);
- }
- else
- {
- //printf("Error creating Semaphore %d\n", errno);
- exit( -1 );
- }
- /// Named semaphore end
+ /// Named semaphore begin
+ char name[32];
+ snprintf(name, 32, "/%8.s-%4.d-%4.4d", baseName, getpid(), semCount++);
+ sem_t* tempSem = sem_open(name, O_CREAT, 0600, 0);
+
+ if (tempSem != reinterpret_cast<sem_t*>(SEM_FAILED))
+ {
+ // printf("Created \"%s\" Semaphore %p\n", name, tempSem);
+ }
+ else
+ {
+ //printf("Error creating Semaphore %d\n", errno);
+ exit(-1);
+ }
+ /// Named semaphore end
#else
- sem_t* tempSem = new sem_t;
- checkPThreadFunction( sem_init( tempSem, 0, 0 ) );
+ sem_t* tempSem = new sem_t;
+ checkPThreadFunction(sem_init(tempSem, 0, 0));
#endif
- return tempSem;
+ return tempSem;
}
-static void destroySem( sem_t* semaphore )
+static void destroySem(sem_t* semaphore)
{
#ifdef NAMED_SEMAPHORES
- checkPThreadFunction( sem_close( semaphore ) );
+ checkPThreadFunction(sem_close(semaphore));
#else
- checkPThreadFunction( sem_destroy( semaphore ) );
- delete semaphore;
+ checkPThreadFunction(sem_destroy(semaphore));
+ delete semaphore;
#endif
}
-static void *threadFunction( void *argument )
+static void* threadFunction(void* argument)
{
- btThreadSupportPosix::btThreadStatus* status = ( btThreadSupportPosix::btThreadStatus* )argument;
-
- while ( 1 )
- {
- checkPThreadFunction( sem_wait( status->startSemaphore ) );
- void* userPtr = status->m_userPtr;
-
- if ( userPtr )
- {
- btAssert( status->m_status );
- status->m_userThreadFunc( userPtr );
- status->m_status = 2;
- checkPThreadFunction( sem_post( status->m_mainSemaphore ) );
- status->threadUsed++;
- }
- else
- {
- //exit Thread
- status->m_status = 3;
- checkPThreadFunction( sem_post( status->m_mainSemaphore ) );
- printf( "Thread with taskId %i exiting\n", status->m_taskId );
- break;
- }
- }
-
- printf( "Thread TERMINATED\n" );
- return 0;
+ btThreadSupportPosix::btThreadStatus* status = (btThreadSupportPosix::btThreadStatus*)argument;
+
+ while (1)
+ {
+ checkPThreadFunction(sem_wait(status->startSemaphore));
+ void* userPtr = status->m_userPtr;
+
+ if (userPtr)
+ {
+ btAssert(status->m_status);
+ status->m_userThreadFunc(userPtr);
+ status->m_cs->lock();
+ status->m_status = 2;
+ status->m_cs->unlock();
+ checkPThreadFunction(sem_post(status->m_mainSemaphore));
+ status->threadUsed++;
+ }
+ else
+ {
+ //exit Thread
+ status->m_cs->lock();
+ status->m_status = 3;
+ status->m_cs->unlock();
+ checkPThreadFunction(sem_post(status->m_mainSemaphore));
+ break;
+ }
+ }
+
+ return 0;
}
///send messages to SPUs
-void btThreadSupportPosix::runTask( int threadIndex, void* userData )
+void btThreadSupportPosix::runTask(int threadIndex, void* userData)
{
- ///we should spawn an SPU task here, and in 'waitForResponse' it should wait for response of the (one of) the first tasks that finished
- btThreadStatus& threadStatus = m_activeThreadStatus[ threadIndex ];
- btAssert( threadIndex >= 0 );
- btAssert( threadIndex < m_activeThreadStatus.size() );
-
- threadStatus.m_commandId = 1;
- threadStatus.m_status = 1;
- threadStatus.m_userPtr = userData;
- m_startedThreadsMask |= UINT64( 1 ) << threadIndex;
-
- // fire event to start new task
- checkPThreadFunction( sem_post( threadStatus.startSemaphore ) );
+ ///we should spawn an SPU task here, and in 'waitForResponse' it should wait for response of the (one of) the first tasks that finished
+ btThreadStatus& threadStatus = m_activeThreadStatus[threadIndex];
+ btAssert(threadIndex >= 0);
+ btAssert(threadIndex < m_activeThreadStatus.size());
+ threadStatus.m_cs = m_cs;
+ threadStatus.m_commandId = 1;
+ threadStatus.m_status = 1;
+ threadStatus.m_userPtr = userData;
+ m_startedThreadsMask |= UINT64(1) << threadIndex;
+
+ // fire event to start new task
+ checkPThreadFunction(sem_post(threadStatus.startSemaphore));
}
-
///check for messages from SPUs
int btThreadSupportPosix::waitForResponse()
{
- ///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response
- ///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback'
-
- btAssert( m_activeThreadStatus.size() );
-
- // wait for any of the threads to finish
- checkPThreadFunction( sem_wait( m_mainSemaphore ) );
- // get at least one thread which has finished
- size_t last = -1;
-
- for ( size_t t = 0; t < size_t( m_activeThreadStatus.size() ); ++t )
- {
- if ( 2 == m_activeThreadStatus[ t ].m_status )
- {
- last = t;
- break;
- }
- }
-
- btThreadStatus& threadStatus = m_activeThreadStatus[ last ];
-
- btAssert( threadStatus.m_status > 1 );
- threadStatus.m_status = 0;
-
- // need to find an active spu
- btAssert( last >= 0 );
- m_startedThreadsMask &= ~( UINT64( 1 ) << last );
-
- return last;
+ ///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response
+ ///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback'
+
+ btAssert(m_activeThreadStatus.size());
+
+ // wait for any of the threads to finish
+ checkPThreadFunction(sem_wait(m_mainSemaphore));
+ // get at least one thread which has finished
+ size_t last = -1;
+
+ for (size_t t = 0; t < size_t(m_activeThreadStatus.size()); ++t)
+ {
+ m_cs->lock();
+ bool hasFinished = (2 == m_activeThreadStatus[t].m_status);
+ m_cs->unlock();
+ if (hasFinished)
+ {
+ last = t;
+ break;
+ }
+ }
+
+ btThreadStatus& threadStatus = m_activeThreadStatus[last];
+
+ btAssert(threadStatus.m_status > 1);
+ threadStatus.m_status = 0;
+
+ // need to find an active spu
+ btAssert(last >= 0);
+ m_startedThreadsMask &= ~(UINT64(1) << last);
+
+ return last;
}
-
void btThreadSupportPosix::waitForAllTasks()
{
- while ( m_startedThreadsMask )
- {
- waitForResponse();
- }
+ while (m_startedThreadsMask)
+ {
+ waitForResponse();
+ }
}
-
-void btThreadSupportPosix::startThreads( const ConstructionInfo& threadConstructionInfo )
+void btThreadSupportPosix::startThreads(const ConstructionInfo& threadConstructionInfo)
{
- m_numThreads = btGetNumHardwareThreads() - 1; // main thread exists already
- printf( "%s creating %i threads.\n", __FUNCTION__, m_numThreads );
- m_activeThreadStatus.resize( m_numThreads );
- m_startedThreadsMask = 0;
-
- m_mainSemaphore = createSem( "main" );
- //checkPThreadFunction(sem_wait(mainSemaphore));
-
- for ( int i = 0; i < m_numThreads; i++ )
- {
- printf( "starting thread %d\n", i );
- btThreadStatus& threadStatus = m_activeThreadStatus[ i ];
- threadStatus.startSemaphore = createSem( "threadLocal" );
- checkPThreadFunction( pthread_create( &threadStatus.thread, NULL, &threadFunction, (void*) &threadStatus ) );
-
- threadStatus.m_userPtr = 0;
- threadStatus.m_taskId = i;
- threadStatus.m_commandId = 0;
- threadStatus.m_status = 0;
- threadStatus.m_mainSemaphore = m_mainSemaphore;
- threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
- threadStatus.threadUsed = 0;
-
- printf( "started thread %d \n", i );
- }
+ m_numThreads = btGetNumHardwareThreads() - 1; // main thread exists already
+ m_activeThreadStatus.resize(m_numThreads);
+ m_startedThreadsMask = 0;
+
+ m_mainSemaphore = createSem("main");
+ //checkPThreadFunction(sem_wait(mainSemaphore));
+
+ for (int i = 0; i < m_numThreads; i++)
+ {
+ btThreadStatus& threadStatus = m_activeThreadStatus[i];
+ threadStatus.startSemaphore = createSem("threadLocal");
+ threadStatus.m_userPtr = 0;
+ threadStatus.m_cs = m_cs;
+ threadStatus.m_taskId = i;
+ threadStatus.m_commandId = 0;
+ threadStatus.m_status = 0;
+ threadStatus.m_mainSemaphore = m_mainSemaphore;
+ threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
+ threadStatus.threadUsed = 0;
+ checkPThreadFunction(pthread_create(&threadStatus.thread, NULL, &threadFunction, (void*)&threadStatus));
+
+ }
}
///tell the task scheduler we are done with the SPU tasks
void btThreadSupportPosix::stopThreads()
{
- for ( size_t t = 0; t < size_t( m_activeThreadStatus.size() ); ++t )
- {
- btThreadStatus& threadStatus = m_activeThreadStatus[ t ];
- printf( "%s: Thread %i used: %ld\n", __FUNCTION__, int( t ), threadStatus.threadUsed );
-
- threadStatus.m_userPtr = 0;
- checkPThreadFunction( sem_post( threadStatus.startSemaphore ) );
- checkPThreadFunction( sem_wait( m_mainSemaphore ) );
-
- printf( "destroy semaphore\n" );
- destroySem( threadStatus.startSemaphore );
- printf( "semaphore destroyed\n" );
- checkPThreadFunction( pthread_join( threadStatus.thread, 0 ) );
-
- }
- printf( "destroy main semaphore\n" );
- destroySem( m_mainSemaphore );
- printf( "main semaphore destroyed\n" );
- m_activeThreadStatus.clear();
+ for (size_t t = 0; t < size_t(m_activeThreadStatus.size()); ++t)
+ {
+ btThreadStatus& threadStatus = m_activeThreadStatus[t];
+
+ threadStatus.m_userPtr = 0;
+ checkPThreadFunction(sem_post(threadStatus.startSemaphore));
+ checkPThreadFunction(sem_wait(m_mainSemaphore));
+
+ destroySem(threadStatus.startSemaphore);
+ checkPThreadFunction(pthread_join(threadStatus.thread, 0));
+ }
+ destroySem(m_mainSemaphore);
+ m_activeThreadStatus.clear();
}
class btCriticalSectionPosix : public btCriticalSection
{
- pthread_mutex_t m_mutex;
+ pthread_mutex_t m_mutex;
public:
- btCriticalSectionPosix()
- {
- pthread_mutex_init( &m_mutex, NULL );
- }
- virtual ~btCriticalSectionPosix()
- {
- pthread_mutex_destroy( &m_mutex );
- }
-
- virtual void lock()
- {
- pthread_mutex_lock( &m_mutex );
- }
- virtual void unlock()
- {
- pthread_mutex_unlock( &m_mutex );
- }
+ btCriticalSectionPosix()
+ {
+ pthread_mutex_init(&m_mutex, NULL);
+ }
+ virtual ~btCriticalSectionPosix()
+ {
+ pthread_mutex_destroy(&m_mutex);
+ }
+
+ virtual void lock()
+ {
+ pthread_mutex_lock(&m_mutex);
+ }
+ virtual void unlock()
+ {
+ pthread_mutex_unlock(&m_mutex);
+ }
};
-
btCriticalSection* btThreadSupportPosix::createCriticalSection()
{
- return new btCriticalSectionPosix();
+ return new btCriticalSectionPosix();
}
-void btThreadSupportPosix::deleteCriticalSection( btCriticalSection* cs )
+void btThreadSupportPosix::deleteCriticalSection(btCriticalSection* cs)
{
- delete cs;
+ delete cs;
}
-
-btThreadSupportInterface* btThreadSupportInterface::create( const ConstructionInfo& info )
+btThreadSupportInterface* btThreadSupportInterface::create(const ConstructionInfo& info)
{
- return new btThreadSupportPosix( info );
+ return new btThreadSupportPosix(info);
}
-#endif // BT_THREADSAFE && !defined( _WIN32 )
-
+#endif // BT_THREADSAFE && !defined( _WIN32 )
diff --git a/thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportWin32.cpp b/thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportWin32.cpp
index 00edac650b..922e449cce 100644
--- a/thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportWin32.cpp
+++ b/thirdparty/bullet/LinearMath/TaskScheduler/btThreadSupportWin32.cpp
@@ -13,7 +13,7 @@ subject to the following restrictions:
3. This notice may not be removed or altered from any source distribution.
*/
-#if defined( _WIN32 ) && BT_THREADSAFE
+#if defined(_WIN32) && BT_THREADSAFE
#include "LinearMath/btScalar.h"
#include "LinearMath/btMinMax.h"
@@ -23,450 +23,430 @@ subject to the following restrictions:
#include <windows.h>
#include <stdio.h>
-
struct btProcessorInfo
{
- int numLogicalProcessors;
- int numCores;
- int numNumaNodes;
- int numL1Cache;
- int numL2Cache;
- int numL3Cache;
- int numPhysicalPackages;
- static const int maxNumTeamMasks = 32;
- int numTeamMasks;
- UINT64 processorTeamMasks[ maxNumTeamMasks ];
+ int numLogicalProcessors;
+ int numCores;
+ int numNumaNodes;
+ int numL1Cache;
+ int numL2Cache;
+ int numL3Cache;
+ int numPhysicalPackages;
+ static const int maxNumTeamMasks = 32;
+ int numTeamMasks;
+ UINT64 processorTeamMasks[maxNumTeamMasks];
};
-UINT64 getProcessorTeamMask( const btProcessorInfo& procInfo, int procId )
+UINT64 getProcessorTeamMask(const btProcessorInfo& procInfo, int procId)
{
- UINT64 procMask = UINT64( 1 ) << procId;
- for ( int i = 0; i < procInfo.numTeamMasks; ++i )
- {
- if ( procMask & procInfo.processorTeamMasks[ i ] )
- {
- return procInfo.processorTeamMasks[ i ];
- }
- }
- return 0;
+ UINT64 procMask = UINT64(1) << procId;
+ for (int i = 0; i < procInfo.numTeamMasks; ++i)
+ {
+ if (procMask & procInfo.processorTeamMasks[i])
+ {
+ return procInfo.processorTeamMasks[i];
+ }
+ }
+ return 0;
}
-int getProcessorTeamIndex( const btProcessorInfo& procInfo, int procId )
+int getProcessorTeamIndex(const btProcessorInfo& procInfo, int procId)
{
- UINT64 procMask = UINT64( 1 ) << procId;
- for ( int i = 0; i < procInfo.numTeamMasks; ++i )
- {
- if ( procMask & procInfo.processorTeamMasks[ i ] )
- {
- return i;
- }
- }
- return -1;
+ UINT64 procMask = UINT64(1) << procId;
+ for (int i = 0; i < procInfo.numTeamMasks; ++i)
+ {
+ if (procMask & procInfo.processorTeamMasks[i])
+ {
+ return i;
+ }
+ }
+ return -1;
}
-int countSetBits( ULONG64 bits )
+int countSetBits(ULONG64 bits)
{
- int count = 0;
- while ( bits )
- {
- if ( bits & 1 )
- {
- count++;
- }
- bits >>= 1;
- }
- return count;
+ int count = 0;
+ while (bits)
+ {
+ if (bits & 1)
+ {
+ count++;
+ }
+ bits >>= 1;
+ }
+ return count;
}
+typedef BOOL(WINAPI* Pfn_GetLogicalProcessorInformation)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD);
-typedef BOOL( WINAPI *Pfn_GetLogicalProcessorInformation )( PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD );
-
-
-void getProcessorInformation( btProcessorInfo* procInfo )
+void getProcessorInformation(btProcessorInfo* procInfo)
{
- memset( procInfo, 0, sizeof( *procInfo ) );
- Pfn_GetLogicalProcessorInformation getLogicalProcInfo =
- (Pfn_GetLogicalProcessorInformation) GetProcAddress( GetModuleHandle( TEXT( "kernel32" ) ), "GetLogicalProcessorInformation" );
- if ( getLogicalProcInfo == NULL )
- {
- // no info
- return;
- }
- PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buf = NULL;
- DWORD bufSize = 0;
- while ( true )
- {
- if ( getLogicalProcInfo( buf, &bufSize ) )
- {
- break;
- }
- else
- {
- if ( GetLastError() == ERROR_INSUFFICIENT_BUFFER )
- {
- if ( buf )
- {
- free( buf );
- }
- buf = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( bufSize );
- }
- }
- }
-
- int len = bufSize / sizeof( *buf );
- for ( int i = 0; i < len; ++i )
- {
- PSYSTEM_LOGICAL_PROCESSOR_INFORMATION info = buf + i;
- switch ( info->Relationship )
- {
- case RelationNumaNode:
- procInfo->numNumaNodes++;
- break;
-
- case RelationProcessorCore:
- procInfo->numCores++;
- procInfo->numLogicalProcessors += countSetBits( info->ProcessorMask );
- break;
-
- case RelationCache:
- if ( info->Cache.Level == 1 )
- {
- procInfo->numL1Cache++;
- }
- else if ( info->Cache.Level == 2 )
- {
- procInfo->numL2Cache++;
- }
- else if ( info->Cache.Level == 3 )
- {
- procInfo->numL3Cache++;
- // processors that share L3 cache are considered to be on the same team
- // because they can more easily work together on the same data.
- // Large performance penalties will occur if 2 or more threads from different
- // teams attempt to frequently read and modify the same cache lines.
- //
- // On the AMD Ryzen 7 CPU for example, the 8 cores on the CPU are split into
- // 2 CCX units of 4 cores each. Each CCX has a separate L3 cache, so if both
- // CCXs are operating on the same data, many cycles will be spent keeping the
- // two caches coherent.
- if ( procInfo->numTeamMasks < btProcessorInfo::maxNumTeamMasks )
- {
- procInfo->processorTeamMasks[ procInfo->numTeamMasks ] = info->ProcessorMask;
- procInfo->numTeamMasks++;
- }
- }
- break;
-
- case RelationProcessorPackage:
- procInfo->numPhysicalPackages++;
- break;
- }
- }
- free( buf );
+ memset(procInfo, 0, sizeof(*procInfo));
+ Pfn_GetLogicalProcessorInformation getLogicalProcInfo =
+ (Pfn_GetLogicalProcessorInformation)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation");
+ if (getLogicalProcInfo == NULL)
+ {
+ // no info
+ return;
+ }
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buf = NULL;
+ DWORD bufSize = 0;
+ while (true)
+ {
+ if (getLogicalProcInfo(buf, &bufSize))
+ {
+ break;
+ }
+ else
+ {
+ if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
+ {
+ if (buf)
+ {
+ free(buf);
+ }
+ buf = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(bufSize);
+ }
+ }
+ }
+
+ int len = bufSize / sizeof(*buf);
+ for (int i = 0; i < len; ++i)
+ {
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION info = buf + i;
+ switch (info->Relationship)
+ {
+ case RelationNumaNode:
+ procInfo->numNumaNodes++;
+ break;
+
+ case RelationProcessorCore:
+ procInfo->numCores++;
+ procInfo->numLogicalProcessors += countSetBits(info->ProcessorMask);
+ break;
+
+ case RelationCache:
+ if (info->Cache.Level == 1)
+ {
+ procInfo->numL1Cache++;
+ }
+ else if (info->Cache.Level == 2)
+ {
+ procInfo->numL2Cache++;
+ }
+ else if (info->Cache.Level == 3)
+ {
+ procInfo->numL3Cache++;
+ // processors that share L3 cache are considered to be on the same team
+ // because they can more easily work together on the same data.
+ // Large performance penalties will occur if 2 or more threads from different
+ // teams attempt to frequently read and modify the same cache lines.
+ //
+ // On the AMD Ryzen 7 CPU for example, the 8 cores on the CPU are split into
+ // 2 CCX units of 4 cores each. Each CCX has a separate L3 cache, so if both
+ // CCXs are operating on the same data, many cycles will be spent keeping the
+ // two caches coherent.
+ if (procInfo->numTeamMasks < btProcessorInfo::maxNumTeamMasks)
+ {
+ procInfo->processorTeamMasks[procInfo->numTeamMasks] = info->ProcessorMask;
+ procInfo->numTeamMasks++;
+ }
+ }
+ break;
+
+ case RelationProcessorPackage:
+ procInfo->numPhysicalPackages++;
+ break;
+ }
+ }
+ free(buf);
}
-
-
///btThreadSupportWin32 helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
class btThreadSupportWin32 : public btThreadSupportInterface
{
public:
- struct btThreadStatus
- {
- int m_taskId;
- int m_commandId;
- int m_status;
+ struct btThreadStatus
+ {
+ int m_taskId;
+ int m_commandId;
+ int m_status;
- ThreadFunc m_userThreadFunc;
- void* m_userPtr; //for taskDesc etc
+ ThreadFunc m_userThreadFunc;
+ void* m_userPtr; //for taskDesc etc
- void* m_threadHandle; //this one is calling 'Win32ThreadFunc'
+ void* m_threadHandle; //this one is calling 'Win32ThreadFunc'
- void* m_eventStartHandle;
- char m_eventStartHandleName[ 32 ];
+ void* m_eventStartHandle;
+ char m_eventStartHandleName[32];
- void* m_eventCompleteHandle;
- char m_eventCompleteHandleName[ 32 ];
- };
+ void* m_eventCompleteHandle;
+ char m_eventCompleteHandleName[32];
+ };
private:
- btAlignedObjectArray<btThreadStatus> m_activeThreadStatus;
- btAlignedObjectArray<void*> m_completeHandles;
- int m_numThreads;
- DWORD_PTR m_startedThreadMask;
- btProcessorInfo m_processorInfo;
+ btAlignedObjectArray<btThreadStatus> m_activeThreadStatus;
+ btAlignedObjectArray<void*> m_completeHandles;
+ int m_numThreads;
+ DWORD_PTR m_startedThreadMask;
+ btProcessorInfo m_processorInfo;
- void startThreads( const ConstructionInfo& threadInfo );
- void stopThreads();
- int waitForResponse();
+ void startThreads(const ConstructionInfo& threadInfo);
+ void stopThreads();
+ int waitForResponse();
public:
+ btThreadSupportWin32(const ConstructionInfo& threadConstructionInfo);
+ virtual ~btThreadSupportWin32();
- btThreadSupportWin32( const ConstructionInfo& threadConstructionInfo );
- virtual ~btThreadSupportWin32();
+ virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; }
+ virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return countSetBits(m_processorInfo.processorTeamMasks[0]); }
+ virtual int getLogicalToPhysicalCoreRatio() const BT_OVERRIDE { return m_processorInfo.numLogicalProcessors / m_processorInfo.numCores; }
- virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; }
- virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return countSetBits(m_processorInfo.processorTeamMasks[0]); }
- virtual int getLogicalToPhysicalCoreRatio() const BT_OVERRIDE { return m_processorInfo.numLogicalProcessors / m_processorInfo.numCores; }
+ virtual void runTask(int threadIndex, void* userData) BT_OVERRIDE;
+ virtual void waitForAllTasks() BT_OVERRIDE;
- virtual void runTask( int threadIndex, void* userData ) BT_OVERRIDE;
- virtual void waitForAllTasks() BT_OVERRIDE;
-
- virtual btCriticalSection* createCriticalSection() BT_OVERRIDE;
- virtual void deleteCriticalSection( btCriticalSection* criticalSection ) BT_OVERRIDE;
+ virtual btCriticalSection* createCriticalSection() BT_OVERRIDE;
+ virtual void deleteCriticalSection(btCriticalSection* criticalSection) BT_OVERRIDE;
};
-
-btThreadSupportWin32::btThreadSupportWin32( const ConstructionInfo & threadConstructionInfo )
+btThreadSupportWin32::btThreadSupportWin32(const ConstructionInfo& threadConstructionInfo)
{
- startThreads( threadConstructionInfo );
+ startThreads(threadConstructionInfo);
}
-
btThreadSupportWin32::~btThreadSupportWin32()
{
- stopThreads();
+ stopThreads();
}
-
-DWORD WINAPI win32threadStartFunc( LPVOID lpParam )
+DWORD WINAPI win32threadStartFunc(LPVOID lpParam)
{
- btThreadSupportWin32::btThreadStatus* status = ( btThreadSupportWin32::btThreadStatus* )lpParam;
-
- while ( 1 )
- {
- WaitForSingleObject( status->m_eventStartHandle, INFINITE );
- void* userPtr = status->m_userPtr;
-
- if ( userPtr )
- {
- btAssert( status->m_status );
- status->m_userThreadFunc( userPtr );
- status->m_status = 2;
- SetEvent( status->m_eventCompleteHandle );
- }
- else
- {
- //exit Thread
- status->m_status = 3;
- printf( "Thread with taskId %i with handle %p exiting\n", status->m_taskId, status->m_threadHandle );
- SetEvent( status->m_eventCompleteHandle );
- break;
- }
- }
- printf( "Thread TERMINATED\n" );
- return 0;
+ btThreadSupportWin32::btThreadStatus* status = (btThreadSupportWin32::btThreadStatus*)lpParam;
+
+ while (1)
+ {
+ WaitForSingleObject(status->m_eventStartHandle, INFINITE);
+ void* userPtr = status->m_userPtr;
+
+ if (userPtr)
+ {
+ btAssert(status->m_status);
+ status->m_userThreadFunc(userPtr);
+ status->m_status = 2;
+ SetEvent(status->m_eventCompleteHandle);
+ }
+ else
+ {
+ //exit Thread
+ status->m_status = 3;
+ printf("Thread with taskId %i with handle %p exiting\n", status->m_taskId, status->m_threadHandle);
+ SetEvent(status->m_eventCompleteHandle);
+ break;
+ }
+ }
+ printf("Thread TERMINATED\n");
+ return 0;
}
-
-void btThreadSupportWin32::runTask( int threadIndex, void* userData )
+void btThreadSupportWin32::runTask(int threadIndex, void* userData)
{
- btThreadStatus& threadStatus = m_activeThreadStatus[ threadIndex ];
- btAssert( threadIndex >= 0 );
- btAssert( int( threadIndex ) < m_activeThreadStatus.size() );
+ btThreadStatus& threadStatus = m_activeThreadStatus[threadIndex];
+ btAssert(threadIndex >= 0);
+ btAssert(int(threadIndex) < m_activeThreadStatus.size());
- threadStatus.m_commandId = 1;
- threadStatus.m_status = 1;
- threadStatus.m_userPtr = userData;
- m_startedThreadMask |= DWORD_PTR( 1 ) << threadIndex;
+ threadStatus.m_commandId = 1;
+ threadStatus.m_status = 1;
+ threadStatus.m_userPtr = userData;
+ m_startedThreadMask |= DWORD_PTR(1) << threadIndex;
- ///fire event to start new task
- SetEvent( threadStatus.m_eventStartHandle );
+ ///fire event to start new task
+ SetEvent(threadStatus.m_eventStartHandle);
}
-
int btThreadSupportWin32::waitForResponse()
{
- btAssert( m_activeThreadStatus.size() );
+ btAssert(m_activeThreadStatus.size());
- int last = -1;
- DWORD res = WaitForMultipleObjects( m_completeHandles.size(), &m_completeHandles[ 0 ], FALSE, INFINITE );
- btAssert( res != WAIT_FAILED );
- last = res - WAIT_OBJECT_0;
+ int last = -1;
+ DWORD res = WaitForMultipleObjects(m_completeHandles.size(), &m_completeHandles[0], FALSE, INFINITE);
+ btAssert(res != WAIT_FAILED);
+ last = res - WAIT_OBJECT_0;
- btThreadStatus& threadStatus = m_activeThreadStatus[ last ];
- btAssert( threadStatus.m_threadHandle );
- btAssert( threadStatus.m_eventCompleteHandle );
+ btThreadStatus& threadStatus = m_activeThreadStatus[last];
+ btAssert(threadStatus.m_threadHandle);
+ btAssert(threadStatus.m_eventCompleteHandle);
- //WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
- btAssert( threadStatus.m_status > 1 );
- threadStatus.m_status = 0;
+ //WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
+ btAssert(threadStatus.m_status > 1);
+ threadStatus.m_status = 0;
- ///need to find an active spu
- btAssert( last >= 0 );
- m_startedThreadMask &= ~( DWORD_PTR( 1 ) << last );
+ ///need to find an active spu
+ btAssert(last >= 0);
+ m_startedThreadMask &= ~(DWORD_PTR(1) << last);
- return last;
+ return last;
}
-
void btThreadSupportWin32::waitForAllTasks()
{
- while ( m_startedThreadMask )
- {
- waitForResponse();
- }
+ while (m_startedThreadMask)
+ {
+ waitForResponse();
+ }
}
-
-void btThreadSupportWin32::startThreads( const ConstructionInfo& threadConstructionInfo )
+void btThreadSupportWin32::startThreads(const ConstructionInfo& threadConstructionInfo)
{
- static int uniqueId = 0;
- uniqueId++;
- btProcessorInfo& procInfo = m_processorInfo;
- getProcessorInformation( &procInfo );
- DWORD_PTR dwProcessAffinityMask = 0;
- DWORD_PTR dwSystemAffinityMask = 0;
- if ( !GetProcessAffinityMask( GetCurrentProcess(), &dwProcessAffinityMask, &dwSystemAffinityMask ) )
- {
- dwProcessAffinityMask = 0;
- }
- ///The number of threads should be equal to the number of available cores - 1
- m_numThreads = btMin(procInfo.numLogicalProcessors, int(BT_MAX_THREAD_COUNT)) - 1; // cap to max thread count (-1 because main thread already exists)
-
- m_activeThreadStatus.resize( m_numThreads );
- m_completeHandles.resize( m_numThreads );
- m_startedThreadMask = 0;
-
- // set main thread affinity
- if ( DWORD_PTR mask = dwProcessAffinityMask & getProcessorTeamMask( procInfo, 0 ))
- {
- SetThreadAffinityMask( GetCurrentThread(), mask );
- SetThreadIdealProcessor( GetCurrentThread(), 0 );
- }
-
- for ( int i = 0; i < m_numThreads; i++ )
- {
- printf( "starting thread %d\n", i );
-
- btThreadStatus& threadStatus = m_activeThreadStatus[ i ];
-
- LPSECURITY_ATTRIBUTES lpThreadAttributes = NULL;
- SIZE_T dwStackSize = threadConstructionInfo.m_threadStackSize;
- LPTHREAD_START_ROUTINE lpStartAddress = &win32threadStartFunc;
- LPVOID lpParameter = &threadStatus;
- DWORD dwCreationFlags = 0;
- LPDWORD lpThreadId = 0;
-
- threadStatus.m_userPtr = 0;
-
- sprintf( threadStatus.m_eventStartHandleName, "es%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i );
- threadStatus.m_eventStartHandle = CreateEventA( 0, false, false, threadStatus.m_eventStartHandleName );
-
- sprintf( threadStatus.m_eventCompleteHandleName, "ec%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i );
- threadStatus.m_eventCompleteHandle = CreateEventA( 0, false, false, threadStatus.m_eventCompleteHandleName );
-
- m_completeHandles[ i ] = threadStatus.m_eventCompleteHandle;
-
- HANDLE handle = CreateThread( lpThreadAttributes, dwStackSize, lpStartAddress, lpParameter, dwCreationFlags, lpThreadId );
- //SetThreadPriority( handle, THREAD_PRIORITY_HIGHEST );
- // highest priority -- can cause erratic performance when numThreads > numCores
- // we don't want worker threads to be higher priority than the main thread or the main thread could get
- // totally shut out and unable to tell the workers to stop
- //SetThreadPriority( handle, THREAD_PRIORITY_BELOW_NORMAL );
-
- {
- int processorId = i + 1; // leave processor 0 for main thread
- DWORD_PTR teamMask = getProcessorTeamMask( procInfo, processorId );
- if ( teamMask )
- {
- // bind each thread to only execute on processors of it's assigned team
- // - for single-socket Intel x86 CPUs this has no effect (only a single, shared L3 cache so there is only 1 team)
- // - for multi-socket Intel this will keep threads from migrating from one socket to another
- // - for AMD Ryzen this will keep threads from migrating from one CCX to another
- DWORD_PTR mask = teamMask & dwProcessAffinityMask;
- if ( mask )
- {
- SetThreadAffinityMask( handle, mask );
- }
- }
- SetThreadIdealProcessor( handle, processorId );
- }
-
- threadStatus.m_taskId = i;
- threadStatus.m_commandId = 0;
- threadStatus.m_status = 0;
- threadStatus.m_threadHandle = handle;
- threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
-
- printf( "started %s thread %d with threadHandle %p\n", threadConstructionInfo.m_uniqueName, i, handle );
- }
+ static int uniqueId = 0;
+ uniqueId++;
+ btProcessorInfo& procInfo = m_processorInfo;
+ getProcessorInformation(&procInfo);
+ DWORD_PTR dwProcessAffinityMask = 0;
+ DWORD_PTR dwSystemAffinityMask = 0;
+ if (!GetProcessAffinityMask(GetCurrentProcess(), &dwProcessAffinityMask, &dwSystemAffinityMask))
+ {
+ dwProcessAffinityMask = 0;
+ }
+ ///The number of threads should be equal to the number of available cores - 1
+ m_numThreads = btMin(procInfo.numLogicalProcessors, int(BT_MAX_THREAD_COUNT)) - 1; // cap to max thread count (-1 because main thread already exists)
+
+ m_activeThreadStatus.resize(m_numThreads);
+ m_completeHandles.resize(m_numThreads);
+ m_startedThreadMask = 0;
+
+ // set main thread affinity
+ if (DWORD_PTR mask = dwProcessAffinityMask & getProcessorTeamMask(procInfo, 0))
+ {
+ SetThreadAffinityMask(GetCurrentThread(), mask);
+ SetThreadIdealProcessor(GetCurrentThread(), 0);
+ }
+
+ for (int i = 0; i < m_numThreads; i++)
+ {
+ printf("starting thread %d\n", i);
+
+ btThreadStatus& threadStatus = m_activeThreadStatus[i];
+
+ LPSECURITY_ATTRIBUTES lpThreadAttributes = NULL;
+ SIZE_T dwStackSize = threadConstructionInfo.m_threadStackSize;
+ LPTHREAD_START_ROUTINE lpStartAddress = &win32threadStartFunc;
+ LPVOID lpParameter = &threadStatus;
+ DWORD dwCreationFlags = 0;
+ LPDWORD lpThreadId = 0;
+
+ threadStatus.m_userPtr = 0;
+
+ sprintf(threadStatus.m_eventStartHandleName, "es%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i);
+ threadStatus.m_eventStartHandle = CreateEventA(0, false, false, threadStatus.m_eventStartHandleName);
+
+ sprintf(threadStatus.m_eventCompleteHandleName, "ec%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i);
+ threadStatus.m_eventCompleteHandle = CreateEventA(0, false, false, threadStatus.m_eventCompleteHandleName);
+
+ m_completeHandles[i] = threadStatus.m_eventCompleteHandle;
+
+ HANDLE handle = CreateThread(lpThreadAttributes, dwStackSize, lpStartAddress, lpParameter, dwCreationFlags, lpThreadId);
+ //SetThreadPriority( handle, THREAD_PRIORITY_HIGHEST );
+ // highest priority -- can cause erratic performance when numThreads > numCores
+ // we don't want worker threads to be higher priority than the main thread or the main thread could get
+ // totally shut out and unable to tell the workers to stop
+ //SetThreadPriority( handle, THREAD_PRIORITY_BELOW_NORMAL );
+
+ {
+ int processorId = i + 1; // leave processor 0 for main thread
+ DWORD_PTR teamMask = getProcessorTeamMask(procInfo, processorId);
+ if (teamMask)
+ {
+ // bind each thread to only execute on processors of it's assigned team
+ // - for single-socket Intel x86 CPUs this has no effect (only a single, shared L3 cache so there is only 1 team)
+ // - for multi-socket Intel this will keep threads from migrating from one socket to another
+ // - for AMD Ryzen this will keep threads from migrating from one CCX to another
+ DWORD_PTR mask = teamMask & dwProcessAffinityMask;
+ if (mask)
+ {
+ SetThreadAffinityMask(handle, mask);
+ }
+ }
+ SetThreadIdealProcessor(handle, processorId);
+ }
+
+ threadStatus.m_taskId = i;
+ threadStatus.m_commandId = 0;
+ threadStatus.m_status = 0;
+ threadStatus.m_threadHandle = handle;
+ threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
+
+ printf("started %s thread %d with threadHandle %p\n", threadConstructionInfo.m_uniqueName, i, handle);
+ }
}
///tell the task scheduler we are done with the SPU tasks
void btThreadSupportWin32::stopThreads()
{
- for ( int i = 0; i < m_activeThreadStatus.size(); i++ )
- {
- btThreadStatus& threadStatus = m_activeThreadStatus[ i ];
- if ( threadStatus.m_status > 0 )
- {
- WaitForSingleObject( threadStatus.m_eventCompleteHandle, INFINITE );
- }
-
- threadStatus.m_userPtr = NULL;
- SetEvent( threadStatus.m_eventStartHandle );
- WaitForSingleObject( threadStatus.m_eventCompleteHandle, INFINITE );
-
- CloseHandle( threadStatus.m_eventCompleteHandle );
- CloseHandle( threadStatus.m_eventStartHandle );
- CloseHandle( threadStatus.m_threadHandle );
-
- }
-
- m_activeThreadStatus.clear();
- m_completeHandles.clear();
+ for (int i = 0; i < m_activeThreadStatus.size(); i++)
+ {
+ btThreadStatus& threadStatus = m_activeThreadStatus[i];
+ if (threadStatus.m_status > 0)
+ {
+ WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
+ }
+
+ threadStatus.m_userPtr = NULL;
+ SetEvent(threadStatus.m_eventStartHandle);
+ WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
+
+ CloseHandle(threadStatus.m_eventCompleteHandle);
+ CloseHandle(threadStatus.m_eventStartHandle);
+ CloseHandle(threadStatus.m_threadHandle);
+ }
+
+ m_activeThreadStatus.clear();
+ m_completeHandles.clear();
}
-
class btWin32CriticalSection : public btCriticalSection
{
private:
- CRITICAL_SECTION mCriticalSection;
+ CRITICAL_SECTION mCriticalSection;
public:
- btWin32CriticalSection()
- {
- InitializeCriticalSection( &mCriticalSection );
- }
-
- ~btWin32CriticalSection()
- {
- DeleteCriticalSection( &mCriticalSection );
- }
-
- void lock()
- {
- EnterCriticalSection( &mCriticalSection );
- }
-
- void unlock()
- {
- LeaveCriticalSection( &mCriticalSection );
- }
+ btWin32CriticalSection()
+ {
+ InitializeCriticalSection(&mCriticalSection);
+ }
+
+ ~btWin32CriticalSection()
+ {
+ DeleteCriticalSection(&mCriticalSection);
+ }
+
+ void lock()
+ {
+ EnterCriticalSection(&mCriticalSection);
+ }
+
+ void unlock()
+ {
+ LeaveCriticalSection(&mCriticalSection);
+ }
};
-
btCriticalSection* btThreadSupportWin32::createCriticalSection()
{
- unsigned char* mem = (unsigned char*) btAlignedAlloc( sizeof( btWin32CriticalSection ), 16 );
- btWin32CriticalSection* cs = new( mem ) btWin32CriticalSection();
- return cs;
+ unsigned char* mem = (unsigned char*)btAlignedAlloc(sizeof(btWin32CriticalSection), 16);
+ btWin32CriticalSection* cs = new (mem) btWin32CriticalSection();
+ return cs;
}
-void btThreadSupportWin32::deleteCriticalSection( btCriticalSection* criticalSection )
+void btThreadSupportWin32::deleteCriticalSection(btCriticalSection* criticalSection)
{
- criticalSection->~btCriticalSection();
- btAlignedFree( criticalSection );
+ criticalSection->~btCriticalSection();
+ btAlignedFree(criticalSection);
}
-
-btThreadSupportInterface* btThreadSupportInterface::create( const ConstructionInfo& info )
+btThreadSupportInterface* btThreadSupportInterface::create(const ConstructionInfo& info)
{
- return new btThreadSupportWin32( info );
+ return new btThreadSupportWin32(info);
}
-
-
-#endif //defined(_WIN32) && BT_THREADSAFE
-
+#endif //defined(_WIN32) && BT_THREADSAFE