1 files changed, 457 insertions, 484 deletions
diff --git a/thirdparty/bullet/LinearMath/btThreads.cpp b/thirdparty/bullet/LinearMath/btThreads.cpp
index c037626ffb..69a86799fa 100644
--- a/thirdparty/bullet/LinearMath/btThreads.cpp
+++ b/thirdparty/bullet/LinearMath/btThreads.cpp
@@ -12,18 +12,15 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-
 #include "btThreads.h"
 #include "btQuickprof.h"
 #include <algorithm>  // for min and max
 
-
 #if BT_USE_OPENMP && BT_THREADSAFE
 
 #include <omp.h>
 
-#endif // #if BT_USE_OPENMP && BT_THREADSAFE
-
+#endif  // #if BT_USE_OPENMP && BT_THREADSAFE
 
 #if BT_USE_PPL && BT_THREADSAFE
 
@@ -32,8 +29,7 @@ subject to the following restrictions:
 // Visual Studio 2010 and later should come with it
 #include <concrtrm.h>  // for GetProcessorCount()
 
-#endif // #if BT_USE_PPL && BT_THREADSAFE
-
+#endif  // #if BT_USE_PPL && BT_THREADSAFE
 
 #if BT_USE_TBB && BT_THREADSAFE
 
@@ -44,8 +40,7 @@ subject to the following restrictions:
 #include <tbb/parallel_for.h>
 #include <tbb/blocked_range.h>
 
-#endif // #if BT_USE_TBB && BT_THREADSAFE
-
+#endif  // #if BT_USE_TBB && BT_THREADSAFE
 
 #if BT_THREADSAFE
 //
@@ -53,7 +48,7 @@ subject to the following restrictions:
 // Using ordinary system-provided mutexes like Windows critical sections was noticeably slower
 // presumably because when it fails to lock at first it would sleep the thread and trigger costly
 // context switching.
-// 
+//
 
 #if __cplusplus >= 201103L
 
@@ -61,25 +56,24 @@ subject to the following restrictions:
 // on GCC or Clang you need to compile with -std=c++11
 #define USE_CPP11_ATOMICS 1
 
-#elif defined( _MSC_VER )
+#elif defined(_MSC_VER)
 
 // on MSVC, use intrinsics instead
 #define USE_MSVC_INTRINSICS 1
 
-#elif defined( __GNUC__ ) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
+#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
 
 // available since GCC 4.7 and some versions of clang
 // todo: check for clang
 #define USE_GCC_BUILTIN_ATOMICS 1
 
-#elif defined( __GNUC__ ) && (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
+#elif defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
 
 // available since GCC 4.1
 #define USE_GCC_BUILTIN_ATOMICS_OLD 1
 
 #endif
 
-
 #if USE_CPP11_ATOMICS
 
 #include <atomic>
@@ -89,27 +83,26 @@ subject to the following restrictions:
 
 bool btSpinMutex::tryLock()
 {
-    std::atomic<int>* aDest = reinterpret_cast<std::atomic<int>*>(&mLock);
-    int expected = 0;
-    return std::atomic_compare_exchange_weak_explicit( aDest, &expected, int(1), std::memory_order_acq_rel, std::memory_order_acquire );
+	std::atomic<int>* aDest = reinterpret_cast<std::atomic<int>*>(&mLock);
+	int expected = 0;
+	return std::atomic_compare_exchange_weak_explicit(aDest, &expected, int(1), std::memory_order_acq_rel, std::memory_order_acquire);
 }
 
 void btSpinMutex::lock()
 {
-    // note: this lock does not sleep the thread.
-    while (! tryLock())
-    {
-        // spin
-    }
+	// note: this lock does not sleep the thread.
+	while (!tryLock())
+	{
+		// spin
+	}
 }
 
 void btSpinMutex::unlock()
 {
-    std::atomic<int>* aDest = reinterpret_cast<std::atomic<int>*>(&mLock);
-    std::atomic_store_explicit( aDest, int(0), std::memory_order_release );
+	std::atomic<int>* aDest = reinterpret_cast<std::atomic<int>*>(&mLock);
+	std::atomic_store_explicit(aDest, int(0), std::memory_order_release);
 }
 
-
 #elif USE_MSVC_INTRINSICS
 
 #define WIN32_LEAN_AND_MEAN
@@ -117,148 +110,142 @@ void btSpinMutex::unlock()
 #include <windows.h>
 #include <intrin.h>
 
-#define THREAD_LOCAL_STATIC __declspec( thread ) static
-
+#define THREAD_LOCAL_STATIC __declspec(thread) static
 
 bool btSpinMutex::tryLock()
 {
-    volatile long* aDest = reinterpret_cast<long*>(&mLock);
-    return ( 0 == _InterlockedCompareExchange( aDest, 1, 0) );
+	volatile long* aDest = reinterpret_cast<long*>(&mLock);
+	return (0 == _InterlockedCompareExchange(aDest, 1, 0));
 }
 
 void btSpinMutex::lock()
 {
-    // note: this lock does not sleep the thread
-    while (! tryLock())
-    {
-        // spin
-    }
+	// note: this lock does not sleep the thread
+	while (!tryLock())
+	{
+		// spin
+	}
 }
 
 void btSpinMutex::unlock()
 {
-    volatile long* aDest = reinterpret_cast<long*>( &mLock );
-    _InterlockedExchange( aDest, 0 );
+	volatile long* aDest = reinterpret_cast<long*>(&mLock);
+	_InterlockedExchange(aDest, 0);
 }
 
 #elif USE_GCC_BUILTIN_ATOMICS
 
 #define THREAD_LOCAL_STATIC static __thread
 
-
 bool btSpinMutex::tryLock()
 {
-    int expected = 0;
-    bool weak = false;
-    const int memOrderSuccess = __ATOMIC_ACQ_REL;
-    const int memOrderFail = __ATOMIC_ACQUIRE;
-    return __atomic_compare_exchange_n(&mLock, &expected, int(1), weak, memOrderSuccess, memOrderFail);
+	int expected = 0;
+	bool weak = false;
+	const int memOrderSuccess = __ATOMIC_ACQ_REL;
+	const int memOrderFail = __ATOMIC_ACQUIRE;
+	return __atomic_compare_exchange_n(&mLock, &expected, int(1), weak, memOrderSuccess, memOrderFail);
 }
 
 void btSpinMutex::lock()
 {
-    // note: this lock does not sleep the thread
-    while (! tryLock())
-    {
-        // spin
-    }
+	// note: this lock does not sleep the thread
+	while (!tryLock())
+	{
+		// spin
+	}
 }
 
 void btSpinMutex::unlock()
 {
-    __atomic_store_n(&mLock, int(0), __ATOMIC_RELEASE);
+	__atomic_store_n(&mLock, int(0), __ATOMIC_RELEASE);
 }
 
 #elif USE_GCC_BUILTIN_ATOMICS_OLD
 
-
 #define THREAD_LOCAL_STATIC static __thread
 
 bool btSpinMutex::tryLock()
 {
-    return __sync_bool_compare_and_swap(&mLock, int(0), int(1));
+	return __sync_bool_compare_and_swap(&mLock, int(0), int(1));
 }
 
 void btSpinMutex::lock()
 {
-    // note: this lock does not sleep the thread
-    while (! tryLock())
-    {
-        // spin
-    }
+	// note: this lock does not sleep the thread
+	while (!tryLock())
+	{
+		// spin
+	}
 }
 
 void btSpinMutex::unlock()
 {
-    // write 0
-    __sync_fetch_and_and(&mLock, int(0));
+	// write 0
+	__sync_fetch_and_and(&mLock, int(0));
 }
 
-#else //#elif USE_MSVC_INTRINSICS
+#else  //#elif USE_MSVC_INTRINSICS
 
 #error "no threading primitives defined -- unknown platform"
 
 #endif  //#else //#elif USE_MSVC_INTRINSICS
 
-#else //#if BT_THREADSAFE
+#else  //#if BT_THREADSAFE
 
 // These should not be called ever
 void btSpinMutex::lock()
 {
-    btAssert( !"unimplemented btSpinMutex::lock() called" );
+	btAssert(!"unimplemented btSpinMutex::lock() called");
 }
 
 void btSpinMutex::unlock()
 {
-    btAssert( !"unimplemented btSpinMutex::unlock() called" );
+	btAssert(!"unimplemented btSpinMutex::unlock() called");
 }
 
 bool btSpinMutex::tryLock()
 {
-    btAssert( !"unimplemented btSpinMutex::tryLock() called" );
-    return true;
+	btAssert(!"unimplemented btSpinMutex::tryLock() called");
+	return true;
 }
 
 #define THREAD_LOCAL_STATIC static
 
-#endif // #else //#if BT_THREADSAFE
-
+#endif  // #else //#if BT_THREADSAFE
 
 struct ThreadsafeCounter
 {
-    unsigned int mCounter;
-    btSpinMutex mMutex;
-
-    ThreadsafeCounter()
-    {
-        mCounter = 0;
-        --mCounter; // first count should come back 0
-    }
-
-    unsigned int getNext()
-    {
-        // no need to optimize this with atomics, it is only called ONCE per thread!
-        mMutex.lock();
-        mCounter++;
-        if ( mCounter >= BT_MAX_THREAD_COUNT )
-        {
-            btAssert( !"thread counter exceeded" );
-            // wrap back to the first worker index
-            mCounter = 1;
-        }
-        unsigned int val = mCounter;
-        mMutex.unlock();
-        return val;
-    }
+	unsigned int mCounter;
+	btSpinMutex mMutex;
+
+	ThreadsafeCounter()
+	{
+		mCounter = 0;
+		--mCounter;  // first count should come back 0
+	}
+
+	unsigned int getNext()
+	{
+		// no need to optimize this with atomics, it is only called ONCE per thread!
+		mMutex.lock();
+		mCounter++;
+		if (mCounter >= BT_MAX_THREAD_COUNT)
+		{
+			btAssert(!"thread counter exceeded");
+			// wrap back to the first worker index
+			mCounter = 1;
+		}
+		unsigned int val = mCounter;
+		mMutex.unlock();
+		return val;
+	}
 };
 
-
-static btITaskScheduler* gBtTaskScheduler;
+static btITaskScheduler* gBtTaskScheduler=0;
 static int gThreadsRunningCounter = 0;  // useful for detecting if we are trying to do nested parallel-for calls
 static btSpinMutex gThreadsRunningCounterMutex;
 static ThreadsafeCounter gThreadCounter;
 
-
 //
 // BT_DETECT_BAD_THREAD_INDEX tries to detect when there are multiple threads assigned the same thread index.
 //
@@ -276,7 +263,7 @@ static ThreadsafeCounter gThreadCounter;
 // We allocate thread-indexes as needed with a sequential global thread counter.
 //
 // Our simple thread-counting scheme falls apart if the task scheduler destroys some threads but
-// continues to re-use other threads and the application repeatedly resizes the thread pool of the 
+// continues to re-use other threads and the application repeatedly resizes the thread pool of the
 // task scheduler.
 // In order to prevent the thread-counter from exceeding the global max (BT_MAX_THREAD_COUNT), we
 // wrap the thread counter back to 1. This should only happen if the worker threads have all been
@@ -290,197 +277,192 @@ static ThreadsafeCounter gThreadCounter;
 
 typedef DWORD ThreadId_t;
 const static ThreadId_t kInvalidThreadId = 0;
-ThreadId_t gDebugThreadIds[ BT_MAX_THREAD_COUNT ];
+ThreadId_t gDebugThreadIds[BT_MAX_THREAD_COUNT];
 
 static ThreadId_t getDebugThreadId()
 {
-    return GetCurrentThreadId();
+	return GetCurrentThreadId();
 }
 
-#endif // #if BT_DETECT_BAD_THREAD_INDEX
-
+#endif  // #if BT_DETECT_BAD_THREAD_INDEX
 
 // return a unique index per thread, main thread is 0, worker threads are in [1, BT_MAX_THREAD_COUNT)
 unsigned int btGetCurrentThreadIndex()
 {
-    const unsigned int kNullIndex = ~0U;
-    THREAD_LOCAL_STATIC unsigned int sThreadIndex = kNullIndex;
-    if ( sThreadIndex == kNullIndex )
-    {
-        sThreadIndex = gThreadCounter.getNext();
-        btAssert( sThreadIndex < BT_MAX_THREAD_COUNT );
-    }
+	const unsigned int kNullIndex = ~0U;
+	THREAD_LOCAL_STATIC unsigned int sThreadIndex = kNullIndex;
+	if (sThreadIndex == kNullIndex)
+	{
+		sThreadIndex = gThreadCounter.getNext();
+		btAssert(sThreadIndex < BT_MAX_THREAD_COUNT);
+	}
 #if BT_DETECT_BAD_THREAD_INDEX
-    if ( gBtTaskScheduler && sThreadIndex > 0 )
-    {
-        ThreadId_t tid = getDebugThreadId();
-        // if not set
-        if ( gDebugThreadIds[ sThreadIndex ] == kInvalidThreadId )
-        {
-            // set it
-            gDebugThreadIds[ sThreadIndex ] = tid;
-        }
-        else
-        {
-            if ( gDebugThreadIds[ sThreadIndex ] != tid )
-            {
-                // this could indicate the task scheduler is breaking our assumptions about
-                // how threads are managed when threadpool is resized
-                btAssert( !"there are 2 or more threads with the same thread-index!" );
-                __debugbreak();
-            }
-        }
-    }
-#endif // #if BT_DETECT_BAD_THREAD_INDEX
-    return sThreadIndex;
+	if (gBtTaskScheduler && sThreadIndex > 0)
+	{
+		ThreadId_t tid = getDebugThreadId();
+		// if not set
+		if (gDebugThreadIds[sThreadIndex] == kInvalidThreadId)
+		{
+			// set it
+			gDebugThreadIds[sThreadIndex] = tid;
+		}
+		else
+		{
+			if (gDebugThreadIds[sThreadIndex] != tid)
+			{
+				// this could indicate the task scheduler is breaking our assumptions about
+				// how threads are managed when threadpool is resized
+				btAssert(!"there are 2 or more threads with the same thread-index!");
+				__debugbreak();
+			}
+		}
+	}
+#endif  // #if BT_DETECT_BAD_THREAD_INDEX
+	return sThreadIndex;
 }
 
 bool btIsMainThread()
 {
-    return btGetCurrentThreadIndex() == 0;
+	return btGetCurrentThreadIndex() == 0;
 }
 
 void btResetThreadIndexCounter()
 {
-    // for when all current worker threads are destroyed
-    btAssert( btIsMainThread() );
-    gThreadCounter.mCounter = 0;
+	// for when all current worker threads are destroyed
+	btAssert(btIsMainThread());
+	gThreadCounter.mCounter = 0;
 }
 
-btITaskScheduler::btITaskScheduler( const char* name )
+btITaskScheduler::btITaskScheduler(const char* name)
 {
-    m_name = name;
-    m_savedThreadCounter = 0;
-    m_isActive = false;
+	m_name = name;
+	m_savedThreadCounter = 0;
+	m_isActive = false;
 }
 
 void btITaskScheduler::activate()
 {
-    // gThreadCounter is used to assign a thread-index to each worker thread in a task scheduler.
-    // The main thread is always thread-index 0, and worker threads are numbered from 1 to 63 (BT_MAX_THREAD_COUNT-1)
-    // The thread-indexes need to be unique amongst the threads that can be running simultaneously.
-    // Since only one task scheduler can be used at a time, it is OK for a pair of threads that belong to different
-    // task schedulers to share the same thread index because they can't be running at the same time.
-    // So each task scheduler needs to keep its own thread counter value
-    if ( !m_isActive )
-    {
-        gThreadCounter.mCounter = m_savedThreadCounter;  // restore saved thread counter
-        m_isActive = true;
-    }
+	// gThreadCounter is used to assign a thread-index to each worker thread in a task scheduler.
+	// The main thread is always thread-index 0, and worker threads are numbered from 1 to 63 (BT_MAX_THREAD_COUNT-1)
+	// The thread-indexes need to be unique amongst the threads that can be running simultaneously.
+	// Since only one task scheduler can be used at a time, it is OK for a pair of threads that belong to different
+	// task schedulers to share the same thread index because they can't be running at the same time.
+	// So each task scheduler needs to keep its own thread counter value
+	if (!m_isActive)
+	{
+		gThreadCounter.mCounter = m_savedThreadCounter;  // restore saved thread counter
+		m_isActive = true;
+	}
 }
 
 void btITaskScheduler::deactivate()
 {
-    if ( m_isActive )
-    {
-        m_savedThreadCounter = gThreadCounter.mCounter;  // save thread counter
-        m_isActive = false;
-    }
+	if (m_isActive)
+	{
+		m_savedThreadCounter = gThreadCounter.mCounter;  // save thread counter
+		m_isActive = false;
+	}
 }
 
 void btPushThreadsAreRunning()
 {
-    gThreadsRunningCounterMutex.lock();
-    gThreadsRunningCounter++;
-    gThreadsRunningCounterMutex.unlock();
+	gThreadsRunningCounterMutex.lock();
+	gThreadsRunningCounter++;
+	gThreadsRunningCounterMutex.unlock();
 }
 
 void btPopThreadsAreRunning()
 {
-    gThreadsRunningCounterMutex.lock();
-    gThreadsRunningCounter--;
-    gThreadsRunningCounterMutex.unlock();
+	gThreadsRunningCounterMutex.lock();
+	gThreadsRunningCounter--;
+	gThreadsRunningCounterMutex.unlock();
 }
 
 bool btThreadsAreRunning()
 {
-    return gThreadsRunningCounter != 0;
+	return gThreadsRunningCounter != 0;
 }
 
-
-void btSetTaskScheduler( btITaskScheduler* ts )
+void btSetTaskScheduler(btITaskScheduler* ts)
 {
-    int threadId = btGetCurrentThreadIndex();  // make sure we call this on main thread at least once before any workers run
-    if ( threadId != 0 )
-    {
-        btAssert( !"btSetTaskScheduler must be called from the main thread!" );
-        return;
-    }
-    if ( gBtTaskScheduler )
-    {
-        // deactivate old task scheduler
-        gBtTaskScheduler->deactivate();
-    }
-    gBtTaskScheduler = ts;
-    if ( ts )
-    {
-        // activate new task scheduler
-        ts->activate();
-    }
+	int threadId = btGetCurrentThreadIndex();  // make sure we call this on main thread at least once before any workers run
+	if (threadId != 0)
+	{
+		btAssert(!"btSetTaskScheduler must be called from the main thread!");
+		return;
+	}
+	if (gBtTaskScheduler)
+	{
+		// deactivate old task scheduler
+		gBtTaskScheduler->deactivate();
+	}
+	gBtTaskScheduler = ts;
+	if (ts)
+	{
+		// activate new task scheduler
+		ts->activate();
+	}
 }
 
-
 btITaskScheduler* btGetTaskScheduler()
 {
-    return gBtTaskScheduler;
+	return gBtTaskScheduler;
 }
 
-
-void btParallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body )
+void btParallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody& body)
 {
 #if BT_THREADSAFE
 
 #if BT_DETECT_BAD_THREAD_INDEX
-    if ( !btThreadsAreRunning() )
-    {
-        // clear out thread ids
-        for ( int i = 0; i < BT_MAX_THREAD_COUNT; ++i )
-        {
-            gDebugThreadIds[ i ] = kInvalidThreadId;
-        }
-    }
-#endif // #if BT_DETECT_BAD_THREAD_INDEX
+	if (!btThreadsAreRunning())
+	{
+		// clear out thread ids
+		for (int i = 0; i < BT_MAX_THREAD_COUNT; ++i)
+		{
+			gDebugThreadIds[i] = kInvalidThreadId;
+		}
+	}
+#endif  // #if BT_DETECT_BAD_THREAD_INDEX
 
-    btAssert( gBtTaskScheduler != NULL );  // call btSetTaskScheduler() with a valid task scheduler first!
-    gBtTaskScheduler->parallelFor( iBegin, iEnd, grainSize, body );
+	btAssert(gBtTaskScheduler != NULL);  // call btSetTaskScheduler() with a valid task scheduler first!
+	gBtTaskScheduler->parallelFor(iBegin, iEnd, grainSize, body);
 
-#else // #if BT_THREADSAFE
+#else  // #if BT_THREADSAFE
 
-    // non-parallel version of btParallelFor
-    btAssert( !"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE" );
-    body.forLoop( iBegin, iEnd );
+	// non-parallel version of btParallelFor
+	btAssert(!"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE");
+	body.forLoop(iBegin, iEnd);
 
-#endif// #if BT_THREADSAFE
+#endif  // #if BT_THREADSAFE
 }
 
-btScalar btParallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body )
+btScalar btParallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body)
 {
 #if BT_THREADSAFE
 
 #if BT_DETECT_BAD_THREAD_INDEX
-    if ( !btThreadsAreRunning() )
-    {
-        // clear out thread ids
-        for ( int i = 0; i < BT_MAX_THREAD_COUNT; ++i )
-        {
-            gDebugThreadIds[ i ] = kInvalidThreadId;
-        }
-    }
-#endif // #if BT_DETECT_BAD_THREAD_INDEX
+	if (!btThreadsAreRunning())
+	{
+		// clear out thread ids
+		for (int i = 0; i < BT_MAX_THREAD_COUNT; ++i)
+		{
+			gDebugThreadIds[i] = kInvalidThreadId;
+		}
+	}
+#endif  // #if BT_DETECT_BAD_THREAD_INDEX
 
-    btAssert( gBtTaskScheduler != NULL );  // call btSetTaskScheduler() with a valid task scheduler first!
-    return gBtTaskScheduler->parallelSum( iBegin, iEnd, grainSize, body );
+	btAssert(gBtTaskScheduler != NULL);  // call btSetTaskScheduler() with a valid task scheduler first!
+	return gBtTaskScheduler->parallelSum(iBegin, iEnd, grainSize, body);
 
-#else // #if BT_THREADSAFE
+#else  // #if BT_THREADSAFE
 
-    // non-parallel version of btParallelSum
-    btAssert( !"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE" );
-    return body.sumLoop( iBegin, iEnd );
+	// non-parallel version of btParallelSum
+	btAssert(!"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE");
+	return body.sumLoop(iBegin, iEnd);
 
-#endif //#else // #if BT_THREADSAFE
+#endif  //#else // #if BT_THREADSAFE
 }
 
-
 ///
 /// btTaskSchedulerSequential -- non-threaded implementation of task scheduler
 ///                              (really just useful for testing performance of single threaded vs multi)
@@ -488,86 +470,86 @@ btScalar btParallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSu
 class btTaskSchedulerSequential : public btITaskScheduler
 {
 public:
-    btTaskSchedulerSequential() : btITaskScheduler( "Sequential" ) {}
-    virtual int getMaxNumThreads() const BT_OVERRIDE { return 1; }
-    virtual int getNumThreads() const BT_OVERRIDE { return 1; }
-    virtual void setNumThreads( int numThreads ) BT_OVERRIDE {}
-    virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
-    {
-        BT_PROFILE( "parallelFor_sequential" );
-        body.forLoop( iBegin, iEnd );
-    }
-    virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
-    {
-        BT_PROFILE( "parallelSum_sequential" );
-        return body.sumLoop( iBegin, iEnd );
-    }
+	btTaskSchedulerSequential() : btITaskScheduler("Sequential") {}
+	virtual int getMaxNumThreads() const BT_OVERRIDE { return 1; }
+	virtual int getNumThreads() const BT_OVERRIDE { return 1; }
+	virtual void setNumThreads(int numThreads) BT_OVERRIDE {}
+	virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody& body) BT_OVERRIDE
+	{
+		BT_PROFILE("parallelFor_sequential");
+		body.forLoop(iBegin, iEnd);
+	}
+	virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body) BT_OVERRIDE
+	{
+		BT_PROFILE("parallelSum_sequential");
+		return body.sumLoop(iBegin, iEnd);
+	}
 };
 
-
 #if BT_USE_OPENMP && BT_THREADSAFE
 ///
 /// btTaskSchedulerOpenMP -- wrapper around OpenMP task scheduler
 ///
 class btTaskSchedulerOpenMP : public btITaskScheduler
 {
-    int m_numThreads;
+	int m_numThreads;
+
 public:
-    btTaskSchedulerOpenMP() : btITaskScheduler( "OpenMP" )
-    {
-        m_numThreads = 0;
-    }
-    virtual int getMaxNumThreads() const BT_OVERRIDE
-    {
-        return omp_get_max_threads();
-    }
-    virtual int getNumThreads() const BT_OVERRIDE
-    {
-        return m_numThreads;
-    }
-    virtual void setNumThreads( int numThreads ) BT_OVERRIDE
-    {
-        // With OpenMP, because it is a standard with various implementations, we can't
-        // know for sure if every implementation has the same behavior of destroying all
-        // previous threads when resizing the threadpool
-        m_numThreads = ( std::max )( 1, ( std::min )( int( BT_MAX_THREAD_COUNT ), numThreads ) );
-        omp_set_num_threads( 1 );  // hopefully, all previous threads get destroyed here
-        omp_set_num_threads( m_numThreads );
-        m_savedThreadCounter = 0;
-        if ( m_isActive )
-        {
-            btResetThreadIndexCounter();
-        }
-    }
-    virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
-    {
-        BT_PROFILE( "parallelFor_OpenMP" );
-        btPushThreadsAreRunning();
-#pragma omp parallel for schedule( static, 1 )
-        for ( int i = iBegin; i < iEnd; i += grainSize )
-        {
-            BT_PROFILE( "OpenMP_forJob" );
-            body.forLoop( i, ( std::min )( i + grainSize, iEnd ) );
-        }
-        btPopThreadsAreRunning();
-    }
-    virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
-    {
-        BT_PROFILE( "parallelFor_OpenMP" );
-        btPushThreadsAreRunning();
-        btScalar sum = btScalar( 0 );
-#pragma omp parallel for schedule( static, 1 ) reduction(+:sum)
-        for ( int i = iBegin; i < iEnd; i += grainSize )
-        {
-            BT_PROFILE( "OpenMP_sumJob" );
-            sum += body.sumLoop( i, ( std::min )( i + grainSize, iEnd ) );
-        }
-        btPopThreadsAreRunning();
-        return sum;
-    }
+	btTaskSchedulerOpenMP() : btITaskScheduler("OpenMP")
+	{
+		m_numThreads = 0;
+	}
+	virtual int getMaxNumThreads() const BT_OVERRIDE
+	{
+		return omp_get_max_threads();
+	}
+	virtual int getNumThreads() const BT_OVERRIDE
+	{
+		return m_numThreads;
+	}
+	virtual void setNumThreads(int numThreads) BT_OVERRIDE
+	{
+		// With OpenMP, because it is a standard with various implementations, we can't
+		// know for sure if every implementation has the same behavior of destroying all
+		// previous threads when resizing the threadpool
+		m_numThreads = (std::max)(1, (std::min)(int(BT_MAX_THREAD_COUNT), numThreads));
+		omp_set_num_threads(1);  // hopefully, all previous threads get destroyed here
+		omp_set_num_threads(m_numThreads);
+		m_savedThreadCounter = 0;
+		if (m_isActive)
+		{
+			btResetThreadIndexCounter();
+		}
+	}
+	virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody& body) BT_OVERRIDE
+	{
+		BT_PROFILE("parallelFor_OpenMP");
+		btPushThreadsAreRunning();
+#pragma omp parallel for schedule(static, 1)
+		for (int i = iBegin; i < iEnd; i += grainSize)
+		{
+			BT_PROFILE("OpenMP_forJob");
+			body.forLoop(i, (std::min)(i + grainSize, iEnd));
+		}
+		btPopThreadsAreRunning();
+	}
+	virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body) BT_OVERRIDE
+	{
+		BT_PROFILE("parallelFor_OpenMP");
+		btPushThreadsAreRunning();
+		btScalar sum = btScalar(0);
+#pragma omp parallel for schedule(static, 1) reduction(+ \
+													   : sum)
+		for (int i = iBegin; i < iEnd; i += grainSize)
+		{
+			BT_PROFILE("OpenMP_sumJob");
+			sum += body.sumLoop(i, (std::min)(i + grainSize, iEnd));
+		}
+		btPopThreadsAreRunning();
+		return sum;
+	}
 };
-#endif // #if BT_USE_OPENMP && BT_THREADSAFE
-
+#endif  // #if BT_USE_OPENMP && BT_THREADSAFE
 
 #if BT_USE_TBB && BT_THREADSAFE
 ///
@@ -575,96 +557,94 @@ public:
 ///
 class btTaskSchedulerTBB : public btITaskScheduler
 {
-    int m_numThreads;
-    tbb::task_scheduler_init* m_tbbSchedulerInit;
+	int m_numThreads;
+	tbb::task_scheduler_init* m_tbbSchedulerInit;
 
 public:
-    btTaskSchedulerTBB() : btITaskScheduler( "IntelTBB" )
-    {
-        m_numThreads = 0;
-        m_tbbSchedulerInit = NULL;
-    }
-    ~btTaskSchedulerTBB()
-    {
-        if ( m_tbbSchedulerInit )
-        {
-            delete m_tbbSchedulerInit;
-            m_tbbSchedulerInit = NULL;
-        }
-    }
-
-    virtual int getMaxNumThreads() const BT_OVERRIDE
-    {
-        return tbb::task_scheduler_init::default_num_threads();
-    }
-    virtual int getNumThreads() const BT_OVERRIDE
-    {
-        return m_numThreads;
-    }
-    virtual void setNumThreads( int numThreads ) BT_OVERRIDE
-    {
-        m_numThreads = ( std::max )( 1, ( std::min )( int(BT_MAX_THREAD_COUNT), numThreads ) );
-        if ( m_tbbSchedulerInit )
-        {
-            // destroys all previous threads
-            delete m_tbbSchedulerInit;
-            m_tbbSchedulerInit = NULL;
-        }
-        m_tbbSchedulerInit = new tbb::task_scheduler_init( m_numThreads );
-        m_savedThreadCounter = 0;
-        if ( m_isActive )
-        {
-            btResetThreadIndexCounter();
-        }
-    }
-    struct ForBodyAdapter
-    {
-        const btIParallelForBody* mBody;
-
-        ForBodyAdapter( const btIParallelForBody* body ) : mBody( body ) {}
-        void operator()( const tbb::blocked_range<int>& range ) const
-        {
-            BT_PROFILE( "TBB_forJob" );
-            mBody->forLoop( range.begin(), range.end() );
-        }
-    };
-    virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
-    {
-        BT_PROFILE( "parallelFor_TBB" );
-        ForBodyAdapter tbbBody( &body );
-        btPushThreadsAreRunning();
-        tbb::parallel_for( tbb::blocked_range<int>( iBegin, iEnd, grainSize ),
-            tbbBody,
-            tbb::simple_partitioner()
-        );
-        btPopThreadsAreRunning();
-    }
-    struct SumBodyAdapter
-    {
-        const btIParallelSumBody* mBody;
-        btScalar mSum;
-
-        SumBodyAdapter( const btIParallelSumBody* body ) : mBody( body ), mSum( btScalar( 0 ) ) {}
-        SumBodyAdapter( const SumBodyAdapter& src, tbb::split ) : mBody( src.mBody ), mSum( btScalar( 0 ) ) {}
-        void join( const SumBodyAdapter& src ) { mSum += src.mSum; }
-        void operator()( const tbb::blocked_range<int>& range )
-        {
-            BT_PROFILE( "TBB_sumJob" );
-            mSum += mBody->sumLoop( range.begin(), range.end() );
-        }
-    };
-    virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
-    {
-        BT_PROFILE( "parallelSum_TBB" );
-        SumBodyAdapter tbbBody( &body );
-        btPushThreadsAreRunning();
-        tbb::parallel_deterministic_reduce( tbb::blocked_range<int>( iBegin, iEnd, grainSize ), tbbBody );
-        btPopThreadsAreRunning();
-        return tbbBody.mSum;
-    }
+	btTaskSchedulerTBB() : btITaskScheduler("IntelTBB")
+	{
+		m_numThreads = 0;
+		m_tbbSchedulerInit = NULL;
+	}
+	~btTaskSchedulerTBB()
+	{
+		if (m_tbbSchedulerInit)
+		{
+			delete m_tbbSchedulerInit;
+			m_tbbSchedulerInit = NULL;
+		}
+	}
+
+	virtual int getMaxNumThreads() const BT_OVERRIDE
+	{
+		return tbb::task_scheduler_init::default_num_threads();
+	}
+	virtual int getNumThreads() const BT_OVERRIDE
+	{
+		return m_numThreads;
+	}
+	virtual void setNumThreads(int numThreads) BT_OVERRIDE
+	{
+		m_numThreads = (std::max)(1, (std::min)(int(BT_MAX_THREAD_COUNT), numThreads));
+		if (m_tbbSchedulerInit)
+		{
+			// destroys all previous threads
+			delete m_tbbSchedulerInit;
+			m_tbbSchedulerInit = NULL;
+		}
+		m_tbbSchedulerInit = new tbb::task_scheduler_init(m_numThreads);
+		m_savedThreadCounter = 0;
+		if (m_isActive)
+		{
+			btResetThreadIndexCounter();
+		}
+	}
+	struct ForBodyAdapter
+	{
+		const btIParallelForBody* mBody;
+
+		ForBodyAdapter(const btIParallelForBody* body) : mBody(body) {}
+		void operator()(const tbb::blocked_range<int>& range) const
+		{
+			BT_PROFILE("TBB_forJob");
+			mBody->forLoop(range.begin(), range.end());
+		}
+	};
+	virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody& body) BT_OVERRIDE
+	{
+		BT_PROFILE("parallelFor_TBB");
+		ForBodyAdapter tbbBody(&body);
+		btPushThreadsAreRunning();
+		tbb::parallel_for(tbb::blocked_range<int>(iBegin, iEnd, grainSize),
+						  tbbBody,
+						  tbb::simple_partitioner());
+		btPopThreadsAreRunning();
+	}
+	struct SumBodyAdapter
+	{
+		const btIParallelSumBody* mBody;
+		btScalar mSum;
+
+		SumBodyAdapter(const btIParallelSumBody* body) : mBody(body), mSum(btScalar(0)) {}
+		SumBodyAdapter(const SumBodyAdapter& src, tbb::split) : mBody(src.mBody), mSum(btScalar(0)) {}
+		void join(const SumBodyAdapter& src) { mSum += src.mSum; }
+		void operator()(const tbb::blocked_range<int>& range)
+		{
+			BT_PROFILE("TBB_sumJob");
+			mSum += mBody->sumLoop(range.begin(), range.end());
+		}
+	};
+	virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body) BT_OVERRIDE
+	{
+		BT_PROFILE("parallelSum_TBB");
+		SumBodyAdapter tbbBody(&body);
+		btPushThreadsAreRunning();
+		tbb::parallel_deterministic_reduce(tbb::blocked_range<int>(iBegin, iEnd, grainSize), tbbBody);
+		btPopThreadsAreRunning();
+		return tbbBody.mSum;
+	}
 };
-#endif // #if BT_USE_TBB && BT_THREADSAFE
-
+#endif  // #if BT_USE_TBB && BT_THREADSAFE
 
 #if BT_USE_PPL && BT_THREADSAFE
 ///
@@ -672,148 +652,141 @@ public:
 ///
 class btTaskSchedulerPPL : public btITaskScheduler
 {
-    int m_numThreads;
-    concurrency::combinable<btScalar> m_sum;  // for parallelSum
+	int m_numThreads;
+	concurrency::combinable<btScalar> m_sum;  // for parallelSum
 public:
-    btTaskSchedulerPPL() : btITaskScheduler( "PPL" )
-    {
-        m_numThreads = 0;
-    }
-    virtual int getMaxNumThreads() const BT_OVERRIDE
-    {
-        return concurrency::GetProcessorCount();
-    }
-    virtual int getNumThreads() const BT_OVERRIDE
-    {
-        return m_numThreads;
-    }
-    virtual void setNumThreads( int numThreads ) BT_OVERRIDE
-    {
-        // capping the thread count for PPL due to a thread-index issue
-        const int maxThreadCount = (std::min)(int(BT_MAX_THREAD_COUNT), 31);
-        m_numThreads = ( std::max )( 1, ( std::min )( maxThreadCount, numThreads ) );
-        using namespace concurrency;
-        if ( CurrentScheduler::Id() != -1 )
-        {
-            CurrentScheduler::Detach();
-        }
-        SchedulerPolicy policy;
-        {
-            // PPL seems to destroy threads when threadpool is shrunk, but keeps reusing old threads
-            // force it to destroy old threads
-            policy.SetConcurrencyLimits( 1, 1 );
-            CurrentScheduler::Create( policy );
-            CurrentScheduler::Detach();
-        }
-        policy.SetConcurrencyLimits( m_numThreads, m_numThreads );
-        CurrentScheduler::Create( policy );
-        m_savedThreadCounter = 0;
-        if ( m_isActive )
-        {
-            btResetThreadIndexCounter();
-        }
-    }
-    struct ForBodyAdapter
-    {
-        const btIParallelForBody* mBody;
-        int mGrainSize;
-        int mIndexEnd;
-
-        ForBodyAdapter( const btIParallelForBody* body, int grainSize, int end ) : mBody( body ), mGrainSize( grainSize ), mIndexEnd( end ) {}
-        void operator()( int i ) const
-        {
-            BT_PROFILE( "PPL_forJob" );
-            mBody->forLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) );
-        }
-    };
-    virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
-    {
-        BT_PROFILE( "parallelFor_PPL" );
-        // PPL dispatch
-        ForBodyAdapter pplBody( &body, grainSize, iEnd );
-        btPushThreadsAreRunning();
-        // note: MSVC 2010 doesn't support partitioner args, so avoid them
-        concurrency::parallel_for( iBegin,
-            iEnd,
-            grainSize,
-            pplBody
-        );
-        btPopThreadsAreRunning();
-    }
-    struct SumBodyAdapter
-    {
-        const btIParallelSumBody* mBody;
-        concurrency::combinable<btScalar>* mSum;
-        int mGrainSize;
-        int mIndexEnd;
-
-        SumBodyAdapter( const btIParallelSumBody* body, concurrency::combinable<btScalar>* sum, int grainSize, int end ) : mBody( body ), mSum(sum), mGrainSize( grainSize ), mIndexEnd( end ) {}
-        void operator()( int i ) const
-        {
-            BT_PROFILE( "PPL_sumJob" );
-            mSum->local() += mBody->sumLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) );
-        }
-    };
-    static btScalar sumFunc( btScalar a, btScalar b ) { return a + b; }
-    virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
-    {
-        BT_PROFILE( "parallelSum_PPL" );
-        m_sum.clear();
-        SumBodyAdapter pplBody( &body, &m_sum, grainSize, iEnd );
-        btPushThreadsAreRunning();
-        // note: MSVC 2010 doesn't support partitioner args, so avoid them
-        concurrency::parallel_for( iBegin,
-            iEnd,
-            grainSize,
-            pplBody
-        );
-        btPopThreadsAreRunning();
-        return m_sum.combine( sumFunc );
-    }
+	btTaskSchedulerPPL() : btITaskScheduler("PPL")
+	{
+		m_numThreads = 0;
+	}
+	virtual int getMaxNumThreads() const BT_OVERRIDE
+	{
+		return concurrency::GetProcessorCount();
+	}
+	virtual int getNumThreads() const BT_OVERRIDE
+	{
+		return m_numThreads;
+	}
+	virtual void setNumThreads(int numThreads) BT_OVERRIDE
+	{
+		// capping the thread count for PPL due to a thread-index issue
+		const int maxThreadCount = (std::min)(int(BT_MAX_THREAD_COUNT), 31);
+		m_numThreads = (std::max)(1, (std::min)(maxThreadCount, numThreads));
+		using namespace concurrency;
+		if (CurrentScheduler::Id() != -1)
+		{
+			CurrentScheduler::Detach();
+		}
+		SchedulerPolicy policy;
+		{
+			// PPL seems to destroy threads when threadpool is shrunk, but keeps reusing old threads
+			// force it to destroy old threads
+			policy.SetConcurrencyLimits(1, 1);
+			CurrentScheduler::Create(policy);
+			CurrentScheduler::Detach();
+		}
+		policy.SetConcurrencyLimits(m_numThreads, m_numThreads);
+		CurrentScheduler::Create(policy);
+		m_savedThreadCounter = 0;
+		if (m_isActive)
+		{
+			btResetThreadIndexCounter();
+		}
+	}
+	struct ForBodyAdapter
+	{
+		const btIParallelForBody* mBody;
+		int mGrainSize;
+		int mIndexEnd;
+
+		ForBodyAdapter(const btIParallelForBody* body, int grainSize, int end) : mBody(body), mGrainSize(grainSize), mIndexEnd(end) {}
+		void operator()(int i) const
+		{
+			BT_PROFILE("PPL_forJob");
+			mBody->forLoop(i, (std::min)(i + mGrainSize, mIndexEnd));
+		}
+	};
+	virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody& body) BT_OVERRIDE
+	{
+		BT_PROFILE("parallelFor_PPL");
+		// PPL dispatch
+		ForBodyAdapter pplBody(&body, grainSize, iEnd);
+		btPushThreadsAreRunning();
+		// note: MSVC 2010 doesn't support partitioner args, so avoid them
+		concurrency::parallel_for(iBegin,
+								  iEnd,
+								  grainSize,
+								  pplBody);
+		btPopThreadsAreRunning();
+	}
+	struct SumBodyAdapter
+	{
+		const btIParallelSumBody* mBody;
+		concurrency::combinable<btScalar>* mSum;
+		int mGrainSize;
+		int mIndexEnd;
+
+		SumBodyAdapter(const btIParallelSumBody* body, concurrency::combinable<btScalar>* sum, int grainSize, int end) : mBody(body), mSum(sum), mGrainSize(grainSize), mIndexEnd(end) {}
+		void operator()(int i) const
+		{
+			BT_PROFILE("PPL_sumJob");
+			mSum->local() += mBody->sumLoop(i, (std::min)(i + mGrainSize, mIndexEnd));
+		}
+	};
+	static btScalar sumFunc(btScalar a, btScalar b) { return a + b; }
+	virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body) BT_OVERRIDE
+	{
+		BT_PROFILE("parallelSum_PPL");
+		m_sum.clear();
+		SumBodyAdapter pplBody(&body, &m_sum, grainSize, iEnd);
+		btPushThreadsAreRunning();
+		// note: MSVC 2010 doesn't support partitioner args, so avoid them
+		concurrency::parallel_for(iBegin,
+								  iEnd,
+								  grainSize,
+								  pplBody);
+		btPopThreadsAreRunning();
+		return m_sum.combine(sumFunc);
+	}
 };
-#endif // #if BT_USE_PPL && BT_THREADSAFE
-
+#endif  // #if BT_USE_PPL && BT_THREADSAFE
 
 // create a non-threaded task scheduler (always available)
 btITaskScheduler* btGetSequentialTaskScheduler()
 {
-    static btTaskSchedulerSequential sTaskScheduler;
-    return &sTaskScheduler;
+	static btTaskSchedulerSequential sTaskScheduler;
+	return &sTaskScheduler;
 }
 
-
 // create an OpenMP task scheduler (if available, otherwise returns null)
 btITaskScheduler* btGetOpenMPTaskScheduler()
 {
 #if BT_USE_OPENMP && BT_THREADSAFE
-    static btTaskSchedulerOpenMP sTaskScheduler;
-    return &sTaskScheduler;
+	static btTaskSchedulerOpenMP sTaskScheduler;
+	return &sTaskScheduler;
 #else
-    return NULL;
+	return NULL;
 #endif
 }
 
-
 // create an Intel TBB task scheduler (if available, otherwise returns null)
 btITaskScheduler* btGetTBBTaskScheduler()
 {
 #if BT_USE_TBB && BT_THREADSAFE
-    static btTaskSchedulerTBB sTaskScheduler;
-    return &sTaskScheduler;
+	static btTaskSchedulerTBB sTaskScheduler;
+	return &sTaskScheduler;
 #else
-    return NULL;
+	return NULL;
 #endif
 }
 
-
 // create a PPL task scheduler (if available, otherwise returns null)
 btITaskScheduler* btGetPPLTaskScheduler()
 {
 #if BT_USE_PPL && BT_THREADSAFE
-    static btTaskSchedulerPPL sTaskScheduler;
-    return &sTaskScheduler;
+	static btTaskSchedulerPPL sTaskScheduler;
+	return &sTaskScheduler;
 #else
-    return NULL;
+	return NULL;
 #endif
 }
-