summaryrefslogtreecommitdiff
path: root/thirdparty/bullet/LinearMath/btThreads.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/bullet/LinearMath/btThreads.cpp')
-rw-r--r--thirdparty/bullet/LinearMath/btThreads.cpp941
1 files changed, 457 insertions, 484 deletions
diff --git a/thirdparty/bullet/LinearMath/btThreads.cpp b/thirdparty/bullet/LinearMath/btThreads.cpp
index c037626ffb..69a86799fa 100644
--- a/thirdparty/bullet/LinearMath/btThreads.cpp
+++ b/thirdparty/bullet/LinearMath/btThreads.cpp
@@ -12,18 +12,15 @@ subject to the following restrictions:
3. This notice may not be removed or altered from any source distribution.
*/
-
#include "btThreads.h"
#include "btQuickprof.h"
#include <algorithm> // for min and max
-
#if BT_USE_OPENMP && BT_THREADSAFE
#include <omp.h>
-#endif // #if BT_USE_OPENMP && BT_THREADSAFE
-
+#endif // #if BT_USE_OPENMP && BT_THREADSAFE
#if BT_USE_PPL && BT_THREADSAFE
@@ -32,8 +29,7 @@ subject to the following restrictions:
// Visual Studio 2010 and later should come with it
#include <concrtrm.h> // for GetProcessorCount()
-#endif // #if BT_USE_PPL && BT_THREADSAFE
-
+#endif // #if BT_USE_PPL && BT_THREADSAFE
#if BT_USE_TBB && BT_THREADSAFE
@@ -44,8 +40,7 @@ subject to the following restrictions:
#include <tbb/parallel_for.h>
#include <tbb/blocked_range.h>
-#endif // #if BT_USE_TBB && BT_THREADSAFE
-
+#endif // #if BT_USE_TBB && BT_THREADSAFE
#if BT_THREADSAFE
//
@@ -53,7 +48,7 @@ subject to the following restrictions:
// Using ordinary system-provided mutexes like Windows critical sections was noticeably slower
// presumably because when it fails to lock at first it would sleep the thread and trigger costly
// context switching.
-//
+//
#if __cplusplus >= 201103L
@@ -61,25 +56,24 @@ subject to the following restrictions:
// on GCC or Clang you need to compile with -std=c++11
#define USE_CPP11_ATOMICS 1
-#elif defined( _MSC_VER )
+#elif defined(_MSC_VER)
// on MSVC, use intrinsics instead
#define USE_MSVC_INTRINSICS 1
-#elif defined( __GNUC__ ) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
+#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
// available since GCC 4.7 and some versions of clang
// todo: check for clang
#define USE_GCC_BUILTIN_ATOMICS 1
-#elif defined( __GNUC__ ) && (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
+#elif defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
// available since GCC 4.1
#define USE_GCC_BUILTIN_ATOMICS_OLD 1
#endif
-
#if USE_CPP11_ATOMICS
#include <atomic>
@@ -89,27 +83,26 @@ subject to the following restrictions:
bool btSpinMutex::tryLock()
{
- std::atomic<int>* aDest = reinterpret_cast<std::atomic<int>*>(&mLock);
- int expected = 0;
- return std::atomic_compare_exchange_weak_explicit( aDest, &expected, int(1), std::memory_order_acq_rel, std::memory_order_acquire );
+ std::atomic<int>* aDest = reinterpret_cast<std::atomic<int>*>(&mLock);
+ int expected = 0;
+ return std::atomic_compare_exchange_weak_explicit(aDest, &expected, int(1), std::memory_order_acq_rel, std::memory_order_acquire);
}
void btSpinMutex::lock()
{
- // note: this lock does not sleep the thread.
- while (! tryLock())
- {
- // spin
- }
+ // note: this lock does not sleep the thread.
+ while (!tryLock())
+ {
+ // spin
+ }
}
void btSpinMutex::unlock()
{
- std::atomic<int>* aDest = reinterpret_cast<std::atomic<int>*>(&mLock);
- std::atomic_store_explicit( aDest, int(0), std::memory_order_release );
+ std::atomic<int>* aDest = reinterpret_cast<std::atomic<int>*>(&mLock);
+ std::atomic_store_explicit(aDest, int(0), std::memory_order_release);
}
-
#elif USE_MSVC_INTRINSICS
#define WIN32_LEAN_AND_MEAN
@@ -117,148 +110,142 @@ void btSpinMutex::unlock()
#include <windows.h>
#include <intrin.h>
-#define THREAD_LOCAL_STATIC __declspec( thread ) static
-
+#define THREAD_LOCAL_STATIC __declspec(thread) static
bool btSpinMutex::tryLock()
{
- volatile long* aDest = reinterpret_cast<long*>(&mLock);
- return ( 0 == _InterlockedCompareExchange( aDest, 1, 0) );
+ volatile long* aDest = reinterpret_cast<long*>(&mLock);
+ return (0 == _InterlockedCompareExchange(aDest, 1, 0));
}
void btSpinMutex::lock()
{
- // note: this lock does not sleep the thread
- while (! tryLock())
- {
- // spin
- }
+ // note: this lock does not sleep the thread
+ while (!tryLock())
+ {
+ // spin
+ }
}
void btSpinMutex::unlock()
{
- volatile long* aDest = reinterpret_cast<long*>( &mLock );
- _InterlockedExchange( aDest, 0 );
+ volatile long* aDest = reinterpret_cast<long*>(&mLock);
+ _InterlockedExchange(aDest, 0);
}
#elif USE_GCC_BUILTIN_ATOMICS
#define THREAD_LOCAL_STATIC static __thread
-
bool btSpinMutex::tryLock()
{
- int expected = 0;
- bool weak = false;
- const int memOrderSuccess = __ATOMIC_ACQ_REL;
- const int memOrderFail = __ATOMIC_ACQUIRE;
- return __atomic_compare_exchange_n(&mLock, &expected, int(1), weak, memOrderSuccess, memOrderFail);
+ int expected = 0;
+ bool weak = false;
+ const int memOrderSuccess = __ATOMIC_ACQ_REL;
+ const int memOrderFail = __ATOMIC_ACQUIRE;
+ return __atomic_compare_exchange_n(&mLock, &expected, int(1), weak, memOrderSuccess, memOrderFail);
}
void btSpinMutex::lock()
{
- // note: this lock does not sleep the thread
- while (! tryLock())
- {
- // spin
- }
+ // note: this lock does not sleep the thread
+ while (!tryLock())
+ {
+ // spin
+ }
}
void btSpinMutex::unlock()
{
- __atomic_store_n(&mLock, int(0), __ATOMIC_RELEASE);
+ __atomic_store_n(&mLock, int(0), __ATOMIC_RELEASE);
}
#elif USE_GCC_BUILTIN_ATOMICS_OLD
-
#define THREAD_LOCAL_STATIC static __thread
bool btSpinMutex::tryLock()
{
- return __sync_bool_compare_and_swap(&mLock, int(0), int(1));
+ return __sync_bool_compare_and_swap(&mLock, int(0), int(1));
}
void btSpinMutex::lock()
{
- // note: this lock does not sleep the thread
- while (! tryLock())
- {
- // spin
- }
+ // note: this lock does not sleep the thread
+ while (!tryLock())
+ {
+ // spin
+ }
}
void btSpinMutex::unlock()
{
- // write 0
- __sync_fetch_and_and(&mLock, int(0));
+ // write 0
+ __sync_fetch_and_and(&mLock, int(0));
}
-#else //#elif USE_MSVC_INTRINSICS
+#else //#elif USE_MSVC_INTRINSICS
#error "no threading primitives defined -- unknown platform"
#endif //#else //#elif USE_MSVC_INTRINSICS
-#else //#if BT_THREADSAFE
+#else //#if BT_THREADSAFE
// These should not be called ever
void btSpinMutex::lock()
{
- btAssert( !"unimplemented btSpinMutex::lock() called" );
+ btAssert(!"unimplemented btSpinMutex::lock() called");
}
void btSpinMutex::unlock()
{
- btAssert( !"unimplemented btSpinMutex::unlock() called" );
+ btAssert(!"unimplemented btSpinMutex::unlock() called");
}
bool btSpinMutex::tryLock()
{
- btAssert( !"unimplemented btSpinMutex::tryLock() called" );
- return true;
+ btAssert(!"unimplemented btSpinMutex::tryLock() called");
+ return true;
}
#define THREAD_LOCAL_STATIC static
-#endif // #else //#if BT_THREADSAFE
-
+#endif // #else //#if BT_THREADSAFE
struct ThreadsafeCounter
{
- unsigned int mCounter;
- btSpinMutex mMutex;
-
- ThreadsafeCounter()
- {
- mCounter = 0;
- --mCounter; // first count should come back 0
- }
-
- unsigned int getNext()
- {
- // no need to optimize this with atomics, it is only called ONCE per thread!
- mMutex.lock();
- mCounter++;
- if ( mCounter >= BT_MAX_THREAD_COUNT )
- {
- btAssert( !"thread counter exceeded" );
- // wrap back to the first worker index
- mCounter = 1;
- }
- unsigned int val = mCounter;
- mMutex.unlock();
- return val;
- }
+ unsigned int mCounter;
+ btSpinMutex mMutex;
+
+ ThreadsafeCounter()
+ {
+ mCounter = 0;
+ --mCounter; // first count should come back 0
+ }
+
+ unsigned int getNext()
+ {
+ // no need to optimize this with atomics, it is only called ONCE per thread!
+ mMutex.lock();
+ mCounter++;
+ if (mCounter >= BT_MAX_THREAD_COUNT)
+ {
+ btAssert(!"thread counter exceeded");
+ // wrap back to the first worker index
+ mCounter = 1;
+ }
+ unsigned int val = mCounter;
+ mMutex.unlock();
+ return val;
+ }
};
-
-static btITaskScheduler* gBtTaskScheduler;
+static btITaskScheduler* gBtTaskScheduler=0;
static int gThreadsRunningCounter = 0; // useful for detecting if we are trying to do nested parallel-for calls
static btSpinMutex gThreadsRunningCounterMutex;
static ThreadsafeCounter gThreadCounter;
-
//
// BT_DETECT_BAD_THREAD_INDEX tries to detect when there are multiple threads assigned the same thread index.
//
@@ -276,7 +263,7 @@ static ThreadsafeCounter gThreadCounter;
// We allocate thread-indexes as needed with a sequential global thread counter.
//
// Our simple thread-counting scheme falls apart if the task scheduler destroys some threads but
-// continues to re-use other threads and the application repeatedly resizes the thread pool of the
+// continues to re-use other threads and the application repeatedly resizes the thread pool of the
// task scheduler.
// In order to prevent the thread-counter from exceeding the global max (BT_MAX_THREAD_COUNT), we
// wrap the thread counter back to 1. This should only happen if the worker threads have all been
@@ -290,197 +277,192 @@ static ThreadsafeCounter gThreadCounter;
typedef DWORD ThreadId_t;
const static ThreadId_t kInvalidThreadId = 0;
-ThreadId_t gDebugThreadIds[ BT_MAX_THREAD_COUNT ];
+ThreadId_t gDebugThreadIds[BT_MAX_THREAD_COUNT];
static ThreadId_t getDebugThreadId()
{
- return GetCurrentThreadId();
+ return GetCurrentThreadId();
}
-#endif // #if BT_DETECT_BAD_THREAD_INDEX
-
+#endif // #if BT_DETECT_BAD_THREAD_INDEX
// return a unique index per thread, main thread is 0, worker threads are in [1, BT_MAX_THREAD_COUNT)
unsigned int btGetCurrentThreadIndex()
{
- const unsigned int kNullIndex = ~0U;
- THREAD_LOCAL_STATIC unsigned int sThreadIndex = kNullIndex;
- if ( sThreadIndex == kNullIndex )
- {
- sThreadIndex = gThreadCounter.getNext();
- btAssert( sThreadIndex < BT_MAX_THREAD_COUNT );
- }
+ const unsigned int kNullIndex = ~0U;
+ THREAD_LOCAL_STATIC unsigned int sThreadIndex = kNullIndex;
+ if (sThreadIndex == kNullIndex)
+ {
+ sThreadIndex = gThreadCounter.getNext();
+ btAssert(sThreadIndex < BT_MAX_THREAD_COUNT);
+ }
#if BT_DETECT_BAD_THREAD_INDEX
- if ( gBtTaskScheduler && sThreadIndex > 0 )
- {
- ThreadId_t tid = getDebugThreadId();
- // if not set
- if ( gDebugThreadIds[ sThreadIndex ] == kInvalidThreadId )
- {
- // set it
- gDebugThreadIds[ sThreadIndex ] = tid;
- }
- else
- {
- if ( gDebugThreadIds[ sThreadIndex ] != tid )
- {
- // this could indicate the task scheduler is breaking our assumptions about
- // how threads are managed when threadpool is resized
- btAssert( !"there are 2 or more threads with the same thread-index!" );
- __debugbreak();
- }
- }
- }
-#endif // #if BT_DETECT_BAD_THREAD_INDEX
- return sThreadIndex;
+ if (gBtTaskScheduler && sThreadIndex > 0)
+ {
+ ThreadId_t tid = getDebugThreadId();
+ // if not set
+ if (gDebugThreadIds[sThreadIndex] == kInvalidThreadId)
+ {
+ // set it
+ gDebugThreadIds[sThreadIndex] = tid;
+ }
+ else
+ {
+ if (gDebugThreadIds[sThreadIndex] != tid)
+ {
+ // this could indicate the task scheduler is breaking our assumptions about
+ // how threads are managed when threadpool is resized
+ btAssert(!"there are 2 or more threads with the same thread-index!");
+ __debugbreak();
+ }
+ }
+ }
+#endif // #if BT_DETECT_BAD_THREAD_INDEX
+ return sThreadIndex;
}
bool btIsMainThread()
{
- return btGetCurrentThreadIndex() == 0;
+ return btGetCurrentThreadIndex() == 0;
}
void btResetThreadIndexCounter()
{
- // for when all current worker threads are destroyed
- btAssert( btIsMainThread() );
- gThreadCounter.mCounter = 0;
+ // for when all current worker threads are destroyed
+ btAssert(btIsMainThread());
+ gThreadCounter.mCounter = 0;
}
-btITaskScheduler::btITaskScheduler( const char* name )
+btITaskScheduler::btITaskScheduler(const char* name)
{
- m_name = name;
- m_savedThreadCounter = 0;
- m_isActive = false;
+ m_name = name;
+ m_savedThreadCounter = 0;
+ m_isActive = false;
}
void btITaskScheduler::activate()
{
- // gThreadCounter is used to assign a thread-index to each worker thread in a task scheduler.
- // The main thread is always thread-index 0, and worker threads are numbered from 1 to 63 (BT_MAX_THREAD_COUNT-1)
- // The thread-indexes need to be unique amongst the threads that can be running simultaneously.
- // Since only one task scheduler can be used at a time, it is OK for a pair of threads that belong to different
- // task schedulers to share the same thread index because they can't be running at the same time.
- // So each task scheduler needs to keep its own thread counter value
- if ( !m_isActive )
- {
- gThreadCounter.mCounter = m_savedThreadCounter; // restore saved thread counter
- m_isActive = true;
- }
+ // gThreadCounter is used to assign a thread-index to each worker thread in a task scheduler.
+ // The main thread is always thread-index 0, and worker threads are numbered from 1 to 63 (BT_MAX_THREAD_COUNT-1)
+ // The thread-indexes need to be unique amongst the threads that can be running simultaneously.
+ // Since only one task scheduler can be used at a time, it is OK for a pair of threads that belong to different
+ // task schedulers to share the same thread index because they can't be running at the same time.
+ // So each task scheduler needs to keep its own thread counter value
+ if (!m_isActive)
+ {
+ gThreadCounter.mCounter = m_savedThreadCounter; // restore saved thread counter
+ m_isActive = true;
+ }
}
void btITaskScheduler::deactivate()
{
- if ( m_isActive )
- {
- m_savedThreadCounter = gThreadCounter.mCounter; // save thread counter
- m_isActive = false;
- }
+ if (m_isActive)
+ {
+ m_savedThreadCounter = gThreadCounter.mCounter; // save thread counter
+ m_isActive = false;
+ }
}
void btPushThreadsAreRunning()
{
- gThreadsRunningCounterMutex.lock();
- gThreadsRunningCounter++;
- gThreadsRunningCounterMutex.unlock();
+ gThreadsRunningCounterMutex.lock();
+ gThreadsRunningCounter++;
+ gThreadsRunningCounterMutex.unlock();
}
void btPopThreadsAreRunning()
{
- gThreadsRunningCounterMutex.lock();
- gThreadsRunningCounter--;
- gThreadsRunningCounterMutex.unlock();
+ gThreadsRunningCounterMutex.lock();
+ gThreadsRunningCounter--;
+ gThreadsRunningCounterMutex.unlock();
}
bool btThreadsAreRunning()
{
- return gThreadsRunningCounter != 0;
+ return gThreadsRunningCounter != 0;
}
-
-void btSetTaskScheduler( btITaskScheduler* ts )
+void btSetTaskScheduler(btITaskScheduler* ts)
{
- int threadId = btGetCurrentThreadIndex(); // make sure we call this on main thread at least once before any workers run
- if ( threadId != 0 )
- {
- btAssert( !"btSetTaskScheduler must be called from the main thread!" );
- return;
- }
- if ( gBtTaskScheduler )
- {
- // deactivate old task scheduler
- gBtTaskScheduler->deactivate();
- }
- gBtTaskScheduler = ts;
- if ( ts )
- {
- // activate new task scheduler
- ts->activate();
- }
+ int threadId = btGetCurrentThreadIndex(); // make sure we call this on main thread at least once before any workers run
+ if (threadId != 0)
+ {
+ btAssert(!"btSetTaskScheduler must be called from the main thread!");
+ return;
+ }
+ if (gBtTaskScheduler)
+ {
+ // deactivate old task scheduler
+ gBtTaskScheduler->deactivate();
+ }
+ gBtTaskScheduler = ts;
+ if (ts)
+ {
+ // activate new task scheduler
+ ts->activate();
+ }
}
-
btITaskScheduler* btGetTaskScheduler()
{
- return gBtTaskScheduler;
+ return gBtTaskScheduler;
}
-
-void btParallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body )
+void btParallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody& body)
{
#if BT_THREADSAFE
#if BT_DETECT_BAD_THREAD_INDEX
- if ( !btThreadsAreRunning() )
- {
- // clear out thread ids
- for ( int i = 0; i < BT_MAX_THREAD_COUNT; ++i )
- {
- gDebugThreadIds[ i ] = kInvalidThreadId;
- }
- }
-#endif // #if BT_DETECT_BAD_THREAD_INDEX
+ if (!btThreadsAreRunning())
+ {
+ // clear out thread ids
+ for (int i = 0; i < BT_MAX_THREAD_COUNT; ++i)
+ {
+ gDebugThreadIds[i] = kInvalidThreadId;
+ }
+ }
+#endif // #if BT_DETECT_BAD_THREAD_INDEX
- btAssert( gBtTaskScheduler != NULL ); // call btSetTaskScheduler() with a valid task scheduler first!
- gBtTaskScheduler->parallelFor( iBegin, iEnd, grainSize, body );
+ btAssert(gBtTaskScheduler != NULL); // call btSetTaskScheduler() with a valid task scheduler first!
+ gBtTaskScheduler->parallelFor(iBegin, iEnd, grainSize, body);
-#else // #if BT_THREADSAFE
+#else // #if BT_THREADSAFE
- // non-parallel version of btParallelFor
- btAssert( !"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE" );
- body.forLoop( iBegin, iEnd );
+ // non-parallel version of btParallelFor
+ btAssert(!"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE");
+ body.forLoop(iBegin, iEnd);
-#endif// #if BT_THREADSAFE
+#endif // #if BT_THREADSAFE
}
-btScalar btParallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body )
+btScalar btParallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body)
{
#if BT_THREADSAFE
#if BT_DETECT_BAD_THREAD_INDEX
- if ( !btThreadsAreRunning() )
- {
- // clear out thread ids
- for ( int i = 0; i < BT_MAX_THREAD_COUNT; ++i )
- {
- gDebugThreadIds[ i ] = kInvalidThreadId;
- }
- }
-#endif // #if BT_DETECT_BAD_THREAD_INDEX
+ if (!btThreadsAreRunning())
+ {
+ // clear out thread ids
+ for (int i = 0; i < BT_MAX_THREAD_COUNT; ++i)
+ {
+ gDebugThreadIds[i] = kInvalidThreadId;
+ }
+ }
+#endif // #if BT_DETECT_BAD_THREAD_INDEX
- btAssert( gBtTaskScheduler != NULL ); // call btSetTaskScheduler() with a valid task scheduler first!
- return gBtTaskScheduler->parallelSum( iBegin, iEnd, grainSize, body );
+ btAssert(gBtTaskScheduler != NULL); // call btSetTaskScheduler() with a valid task scheduler first!
+ return gBtTaskScheduler->parallelSum(iBegin, iEnd, grainSize, body);
-#else // #if BT_THREADSAFE
+#else // #if BT_THREADSAFE
- // non-parallel version of btParallelSum
- btAssert( !"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE" );
- return body.sumLoop( iBegin, iEnd );
+ // non-parallel version of btParallelSum
+ btAssert(!"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE");
+ return body.sumLoop(iBegin, iEnd);
-#endif //#else // #if BT_THREADSAFE
+#endif //#else // #if BT_THREADSAFE
}
-
///
/// btTaskSchedulerSequential -- non-threaded implementation of task scheduler
/// (really just useful for testing performance of single threaded vs multi)
@@ -488,86 +470,86 @@ btScalar btParallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSu
class btTaskSchedulerSequential : public btITaskScheduler
{
public:
- btTaskSchedulerSequential() : btITaskScheduler( "Sequential" ) {}
- virtual int getMaxNumThreads() const BT_OVERRIDE { return 1; }
- virtual int getNumThreads() const BT_OVERRIDE { return 1; }
- virtual void setNumThreads( int numThreads ) BT_OVERRIDE {}
- virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
- {
- BT_PROFILE( "parallelFor_sequential" );
- body.forLoop( iBegin, iEnd );
- }
- virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
- {
- BT_PROFILE( "parallelSum_sequential" );
- return body.sumLoop( iBegin, iEnd );
- }
+ btTaskSchedulerSequential() : btITaskScheduler("Sequential") {}
+ virtual int getMaxNumThreads() const BT_OVERRIDE { return 1; }
+ virtual int getNumThreads() const BT_OVERRIDE { return 1; }
+ virtual void setNumThreads(int numThreads) BT_OVERRIDE {}
+ virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody& body) BT_OVERRIDE
+ {
+ BT_PROFILE("parallelFor_sequential");
+ body.forLoop(iBegin, iEnd);
+ }
+ virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body) BT_OVERRIDE
+ {
+ BT_PROFILE("parallelSum_sequential");
+ return body.sumLoop(iBegin, iEnd);
+ }
};
-
#if BT_USE_OPENMP && BT_THREADSAFE
///
/// btTaskSchedulerOpenMP -- wrapper around OpenMP task scheduler
///
class btTaskSchedulerOpenMP : public btITaskScheduler
{
- int m_numThreads;
+ int m_numThreads;
+
public:
- btTaskSchedulerOpenMP() : btITaskScheduler( "OpenMP" )
- {
- m_numThreads = 0;
- }
- virtual int getMaxNumThreads() const BT_OVERRIDE
- {
- return omp_get_max_threads();
- }
- virtual int getNumThreads() const BT_OVERRIDE
- {
- return m_numThreads;
- }
- virtual void setNumThreads( int numThreads ) BT_OVERRIDE
- {
- // With OpenMP, because it is a standard with various implementations, we can't
- // know for sure if every implementation has the same behavior of destroying all
- // previous threads when resizing the threadpool
- m_numThreads = ( std::max )( 1, ( std::min )( int( BT_MAX_THREAD_COUNT ), numThreads ) );
- omp_set_num_threads( 1 ); // hopefully, all previous threads get destroyed here
- omp_set_num_threads( m_numThreads );
- m_savedThreadCounter = 0;
- if ( m_isActive )
- {
- btResetThreadIndexCounter();
- }
- }
- virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
- {
- BT_PROFILE( "parallelFor_OpenMP" );
- btPushThreadsAreRunning();
-#pragma omp parallel for schedule( static, 1 )
- for ( int i = iBegin; i < iEnd; i += grainSize )
- {
- BT_PROFILE( "OpenMP_forJob" );
- body.forLoop( i, ( std::min )( i + grainSize, iEnd ) );
- }
- btPopThreadsAreRunning();
- }
- virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
- {
- BT_PROFILE( "parallelFor_OpenMP" );
- btPushThreadsAreRunning();
- btScalar sum = btScalar( 0 );
-#pragma omp parallel for schedule( static, 1 ) reduction(+:sum)
- for ( int i = iBegin; i < iEnd; i += grainSize )
- {
- BT_PROFILE( "OpenMP_sumJob" );
- sum += body.sumLoop( i, ( std::min )( i + grainSize, iEnd ) );
- }
- btPopThreadsAreRunning();
- return sum;
- }
+ btTaskSchedulerOpenMP() : btITaskScheduler("OpenMP")
+ {
+ m_numThreads = 0;
+ }
+ virtual int getMaxNumThreads() const BT_OVERRIDE
+ {
+ return omp_get_max_threads();
+ }
+ virtual int getNumThreads() const BT_OVERRIDE
+ {
+ return m_numThreads;
+ }
+ virtual void setNumThreads(int numThreads) BT_OVERRIDE
+ {
+ // With OpenMP, because it is a standard with various implementations, we can't
+ // know for sure if every implementation has the same behavior of destroying all
+ // previous threads when resizing the threadpool
+ m_numThreads = (std::max)(1, (std::min)(int(BT_MAX_THREAD_COUNT), numThreads));
+ omp_set_num_threads(1); // hopefully, all previous threads get destroyed here
+ omp_set_num_threads(m_numThreads);
+ m_savedThreadCounter = 0;
+ if (m_isActive)
+ {
+ btResetThreadIndexCounter();
+ }
+ }
+ virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody& body) BT_OVERRIDE
+ {
+ BT_PROFILE("parallelFor_OpenMP");
+ btPushThreadsAreRunning();
+#pragma omp parallel for schedule(static, 1)
+ for (int i = iBegin; i < iEnd; i += grainSize)
+ {
+ BT_PROFILE("OpenMP_forJob");
+ body.forLoop(i, (std::min)(i + grainSize, iEnd));
+ }
+ btPopThreadsAreRunning();
+ }
+ virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body) BT_OVERRIDE
+ {
+ BT_PROFILE("parallelFor_OpenMP");
+ btPushThreadsAreRunning();
+ btScalar sum = btScalar(0);
+#pragma omp parallel for schedule(static, 1) reduction(+ \
+ : sum)
+ for (int i = iBegin; i < iEnd; i += grainSize)
+ {
+ BT_PROFILE("OpenMP_sumJob");
+ sum += body.sumLoop(i, (std::min)(i + grainSize, iEnd));
+ }
+ btPopThreadsAreRunning();
+ return sum;
+ }
};
-#endif // #if BT_USE_OPENMP && BT_THREADSAFE
-
+#endif // #if BT_USE_OPENMP && BT_THREADSAFE
#if BT_USE_TBB && BT_THREADSAFE
///
@@ -575,96 +557,94 @@ public:
///
class btTaskSchedulerTBB : public btITaskScheduler
{
- int m_numThreads;
- tbb::task_scheduler_init* m_tbbSchedulerInit;
+ int m_numThreads;
+ tbb::task_scheduler_init* m_tbbSchedulerInit;
public:
- btTaskSchedulerTBB() : btITaskScheduler( "IntelTBB" )
- {
- m_numThreads = 0;
- m_tbbSchedulerInit = NULL;
- }
- ~btTaskSchedulerTBB()
- {
- if ( m_tbbSchedulerInit )
- {
- delete m_tbbSchedulerInit;
- m_tbbSchedulerInit = NULL;
- }
- }
-
- virtual int getMaxNumThreads() const BT_OVERRIDE
- {
- return tbb::task_scheduler_init::default_num_threads();
- }
- virtual int getNumThreads() const BT_OVERRIDE
- {
- return m_numThreads;
- }
- virtual void setNumThreads( int numThreads ) BT_OVERRIDE
- {
- m_numThreads = ( std::max )( 1, ( std::min )( int(BT_MAX_THREAD_COUNT), numThreads ) );
- if ( m_tbbSchedulerInit )
- {
- // destroys all previous threads
- delete m_tbbSchedulerInit;
- m_tbbSchedulerInit = NULL;
- }
- m_tbbSchedulerInit = new tbb::task_scheduler_init( m_numThreads );
- m_savedThreadCounter = 0;
- if ( m_isActive )
- {
- btResetThreadIndexCounter();
- }
- }
- struct ForBodyAdapter
- {
- const btIParallelForBody* mBody;
-
- ForBodyAdapter( const btIParallelForBody* body ) : mBody( body ) {}
- void operator()( const tbb::blocked_range<int>& range ) const
- {
- BT_PROFILE( "TBB_forJob" );
- mBody->forLoop( range.begin(), range.end() );
- }
- };
- virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
- {
- BT_PROFILE( "parallelFor_TBB" );
- ForBodyAdapter tbbBody( &body );
- btPushThreadsAreRunning();
- tbb::parallel_for( tbb::blocked_range<int>( iBegin, iEnd, grainSize ),
- tbbBody,
- tbb::simple_partitioner()
- );
- btPopThreadsAreRunning();
- }
- struct SumBodyAdapter
- {
- const btIParallelSumBody* mBody;
- btScalar mSum;
-
- SumBodyAdapter( const btIParallelSumBody* body ) : mBody( body ), mSum( btScalar( 0 ) ) {}
- SumBodyAdapter( const SumBodyAdapter& src, tbb::split ) : mBody( src.mBody ), mSum( btScalar( 0 ) ) {}
- void join( const SumBodyAdapter& src ) { mSum += src.mSum; }
- void operator()( const tbb::blocked_range<int>& range )
- {
- BT_PROFILE( "TBB_sumJob" );
- mSum += mBody->sumLoop( range.begin(), range.end() );
- }
- };
- virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
- {
- BT_PROFILE( "parallelSum_TBB" );
- SumBodyAdapter tbbBody( &body );
- btPushThreadsAreRunning();
- tbb::parallel_deterministic_reduce( tbb::blocked_range<int>( iBegin, iEnd, grainSize ), tbbBody );
- btPopThreadsAreRunning();
- return tbbBody.mSum;
- }
+ btTaskSchedulerTBB() : btITaskScheduler("IntelTBB")
+ {
+ m_numThreads = 0;
+ m_tbbSchedulerInit = NULL;
+ }
+ ~btTaskSchedulerTBB()
+ {
+ if (m_tbbSchedulerInit)
+ {
+ delete m_tbbSchedulerInit;
+ m_tbbSchedulerInit = NULL;
+ }
+ }
+
+ virtual int getMaxNumThreads() const BT_OVERRIDE
+ {
+ return tbb::task_scheduler_init::default_num_threads();
+ }
+ virtual int getNumThreads() const BT_OVERRIDE
+ {
+ return m_numThreads;
+ }
+ virtual void setNumThreads(int numThreads) BT_OVERRIDE
+ {
+ m_numThreads = (std::max)(1, (std::min)(int(BT_MAX_THREAD_COUNT), numThreads));
+ if (m_tbbSchedulerInit)
+ {
+ // destroys all previous threads
+ delete m_tbbSchedulerInit;
+ m_tbbSchedulerInit = NULL;
+ }
+ m_tbbSchedulerInit = new tbb::task_scheduler_init(m_numThreads);
+ m_savedThreadCounter = 0;
+ if (m_isActive)
+ {
+ btResetThreadIndexCounter();
+ }
+ }
+ struct ForBodyAdapter
+ {
+ const btIParallelForBody* mBody;
+
+ ForBodyAdapter(const btIParallelForBody* body) : mBody(body) {}
+ void operator()(const tbb::blocked_range<int>& range) const
+ {
+ BT_PROFILE("TBB_forJob");
+ mBody->forLoop(range.begin(), range.end());
+ }
+ };
+ virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody& body) BT_OVERRIDE
+ {
+ BT_PROFILE("parallelFor_TBB");
+ ForBodyAdapter tbbBody(&body);
+ btPushThreadsAreRunning();
+ tbb::parallel_for(tbb::blocked_range<int>(iBegin, iEnd, grainSize),
+ tbbBody,
+ tbb::simple_partitioner());
+ btPopThreadsAreRunning();
+ }
+ struct SumBodyAdapter
+ {
+ const btIParallelSumBody* mBody;
+ btScalar mSum;
+
+ SumBodyAdapter(const btIParallelSumBody* body) : mBody(body), mSum(btScalar(0)) {}
+ SumBodyAdapter(const SumBodyAdapter& src, tbb::split) : mBody(src.mBody), mSum(btScalar(0)) {}
+ void join(const SumBodyAdapter& src) { mSum += src.mSum; }
+ void operator()(const tbb::blocked_range<int>& range)
+ {
+ BT_PROFILE("TBB_sumJob");
+ mSum += mBody->sumLoop(range.begin(), range.end());
+ }
+ };
+ virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body) BT_OVERRIDE
+ {
+ BT_PROFILE("parallelSum_TBB");
+ SumBodyAdapter tbbBody(&body);
+ btPushThreadsAreRunning();
+ tbb::parallel_deterministic_reduce(tbb::blocked_range<int>(iBegin, iEnd, grainSize), tbbBody);
+ btPopThreadsAreRunning();
+ return tbbBody.mSum;
+ }
};
-#endif // #if BT_USE_TBB && BT_THREADSAFE
-
+#endif // #if BT_USE_TBB && BT_THREADSAFE
#if BT_USE_PPL && BT_THREADSAFE
///
@@ -672,148 +652,141 @@ public:
///
class btTaskSchedulerPPL : public btITaskScheduler
{
- int m_numThreads;
- concurrency::combinable<btScalar> m_sum; // for parallelSum
+ int m_numThreads;
+ concurrency::combinable<btScalar> m_sum; // for parallelSum
public:
- btTaskSchedulerPPL() : btITaskScheduler( "PPL" )
- {
- m_numThreads = 0;
- }
- virtual int getMaxNumThreads() const BT_OVERRIDE
- {
- return concurrency::GetProcessorCount();
- }
- virtual int getNumThreads() const BT_OVERRIDE
- {
- return m_numThreads;
- }
- virtual void setNumThreads( int numThreads ) BT_OVERRIDE
- {
- // capping the thread count for PPL due to a thread-index issue
- const int maxThreadCount = (std::min)(int(BT_MAX_THREAD_COUNT), 31);
- m_numThreads = ( std::max )( 1, ( std::min )( maxThreadCount, numThreads ) );
- using namespace concurrency;
- if ( CurrentScheduler::Id() != -1 )
- {
- CurrentScheduler::Detach();
- }
- SchedulerPolicy policy;
- {
- // PPL seems to destroy threads when threadpool is shrunk, but keeps reusing old threads
- // force it to destroy old threads
- policy.SetConcurrencyLimits( 1, 1 );
- CurrentScheduler::Create( policy );
- CurrentScheduler::Detach();
- }
- policy.SetConcurrencyLimits( m_numThreads, m_numThreads );
- CurrentScheduler::Create( policy );
- m_savedThreadCounter = 0;
- if ( m_isActive )
- {
- btResetThreadIndexCounter();
- }
- }
- struct ForBodyAdapter
- {
- const btIParallelForBody* mBody;
- int mGrainSize;
- int mIndexEnd;
-
- ForBodyAdapter( const btIParallelForBody* body, int grainSize, int end ) : mBody( body ), mGrainSize( grainSize ), mIndexEnd( end ) {}
- void operator()( int i ) const
- {
- BT_PROFILE( "PPL_forJob" );
- mBody->forLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) );
- }
- };
- virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
- {
- BT_PROFILE( "parallelFor_PPL" );
- // PPL dispatch
- ForBodyAdapter pplBody( &body, grainSize, iEnd );
- btPushThreadsAreRunning();
- // note: MSVC 2010 doesn't support partitioner args, so avoid them
- concurrency::parallel_for( iBegin,
- iEnd,
- grainSize,
- pplBody
- );
- btPopThreadsAreRunning();
- }
- struct SumBodyAdapter
- {
- const btIParallelSumBody* mBody;
- concurrency::combinable<btScalar>* mSum;
- int mGrainSize;
- int mIndexEnd;
-
- SumBodyAdapter( const btIParallelSumBody* body, concurrency::combinable<btScalar>* sum, int grainSize, int end ) : mBody( body ), mSum(sum), mGrainSize( grainSize ), mIndexEnd( end ) {}
- void operator()( int i ) const
- {
- BT_PROFILE( "PPL_sumJob" );
- mSum->local() += mBody->sumLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) );
- }
- };
- static btScalar sumFunc( btScalar a, btScalar b ) { return a + b; }
- virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
- {
- BT_PROFILE( "parallelSum_PPL" );
- m_sum.clear();
- SumBodyAdapter pplBody( &body, &m_sum, grainSize, iEnd );
- btPushThreadsAreRunning();
- // note: MSVC 2010 doesn't support partitioner args, so avoid them
- concurrency::parallel_for( iBegin,
- iEnd,
- grainSize,
- pplBody
- );
- btPopThreadsAreRunning();
- return m_sum.combine( sumFunc );
- }
+ btTaskSchedulerPPL() : btITaskScheduler("PPL")
+ {
+ m_numThreads = 0;
+ }
+ virtual int getMaxNumThreads() const BT_OVERRIDE
+ {
+ return concurrency::GetProcessorCount();
+ }
+ virtual int getNumThreads() const BT_OVERRIDE
+ {
+ return m_numThreads;
+ }
+ virtual void setNumThreads(int numThreads) BT_OVERRIDE
+ {
+ // capping the thread count for PPL due to a thread-index issue
+ const int maxThreadCount = (std::min)(int(BT_MAX_THREAD_COUNT), 31);
+ m_numThreads = (std::max)(1, (std::min)(maxThreadCount, numThreads));
+ using namespace concurrency;
+ if (CurrentScheduler::Id() != -1)
+ {
+ CurrentScheduler::Detach();
+ }
+ SchedulerPolicy policy;
+ {
+ // PPL seems to destroy threads when threadpool is shrunk, but keeps reusing old threads
+ // force it to destroy old threads
+ policy.SetConcurrencyLimits(1, 1);
+ CurrentScheduler::Create(policy);
+ CurrentScheduler::Detach();
+ }
+ policy.SetConcurrencyLimits(m_numThreads, m_numThreads);
+ CurrentScheduler::Create(policy);
+ m_savedThreadCounter = 0;
+ if (m_isActive)
+ {
+ btResetThreadIndexCounter();
+ }
+ }
+ struct ForBodyAdapter
+ {
+ const btIParallelForBody* mBody;
+ int mGrainSize;
+ int mIndexEnd;
+
+ ForBodyAdapter(const btIParallelForBody* body, int grainSize, int end) : mBody(body), mGrainSize(grainSize), mIndexEnd(end) {}
+ void operator()(int i) const
+ {
+ BT_PROFILE("PPL_forJob");
+ mBody->forLoop(i, (std::min)(i + mGrainSize, mIndexEnd));
+ }
+ };
+ virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody& body) BT_OVERRIDE
+ {
+ BT_PROFILE("parallelFor_PPL");
+ // PPL dispatch
+ ForBodyAdapter pplBody(&body, grainSize, iEnd);
+ btPushThreadsAreRunning();
+ // note: MSVC 2010 doesn't support partitioner args, so avoid them
+ concurrency::parallel_for(iBegin,
+ iEnd,
+ grainSize,
+ pplBody);
+ btPopThreadsAreRunning();
+ }
+ struct SumBodyAdapter
+ {
+ const btIParallelSumBody* mBody;
+ concurrency::combinable<btScalar>* mSum;
+ int mGrainSize;
+ int mIndexEnd;
+
+ SumBodyAdapter(const btIParallelSumBody* body, concurrency::combinable<btScalar>* sum, int grainSize, int end) : mBody(body), mSum(sum), mGrainSize(grainSize), mIndexEnd(end) {}
+ void operator()(int i) const
+ {
+ BT_PROFILE("PPL_sumJob");
+ mSum->local() += mBody->sumLoop(i, (std::min)(i + mGrainSize, mIndexEnd));
+ }
+ };
+ static btScalar sumFunc(btScalar a, btScalar b) { return a + b; }
+ virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body) BT_OVERRIDE
+ {
+ BT_PROFILE("parallelSum_PPL");
+ m_sum.clear();
+ SumBodyAdapter pplBody(&body, &m_sum, grainSize, iEnd);
+ btPushThreadsAreRunning();
+ // note: MSVC 2010 doesn't support partitioner args, so avoid them
+ concurrency::parallel_for(iBegin,
+ iEnd,
+ grainSize,
+ pplBody);
+ btPopThreadsAreRunning();
+ return m_sum.combine(sumFunc);
+ }
};
-#endif // #if BT_USE_PPL && BT_THREADSAFE
-
+#endif // #if BT_USE_PPL && BT_THREADSAFE
// create a non-threaded task scheduler (always available)
btITaskScheduler* btGetSequentialTaskScheduler()
{
- static btTaskSchedulerSequential sTaskScheduler;
- return &sTaskScheduler;
+ static btTaskSchedulerSequential sTaskScheduler;
+ return &sTaskScheduler;
}
-
// create an OpenMP task scheduler (if available, otherwise returns null)
btITaskScheduler* btGetOpenMPTaskScheduler()
{
#if BT_USE_OPENMP && BT_THREADSAFE
- static btTaskSchedulerOpenMP sTaskScheduler;
- return &sTaskScheduler;
+ static btTaskSchedulerOpenMP sTaskScheduler;
+ return &sTaskScheduler;
#else
- return NULL;
+ return NULL;
#endif
}
-
// create an Intel TBB task scheduler (if available, otherwise returns null)
btITaskScheduler* btGetTBBTaskScheduler()
{
#if BT_USE_TBB && BT_THREADSAFE
- static btTaskSchedulerTBB sTaskScheduler;
- return &sTaskScheduler;
+ static btTaskSchedulerTBB sTaskScheduler;
+ return &sTaskScheduler;
#else
- return NULL;
+ return NULL;
#endif
}
-
// create a PPL task scheduler (if available, otherwise returns null)
btITaskScheduler* btGetPPLTaskScheduler()
{
#if BT_USE_PPL && BT_THREADSAFE
- static btTaskSchedulerPPL sTaskScheduler;
- return &sTaskScheduler;
+ static btTaskSchedulerPPL sTaskScheduler;
+ return &sTaskScheduler;
#else
- return NULL;
+ return NULL;
#endif
}
-