summaryrefslogtreecommitdiff
path: root/thirdparty/bullet/Bullet3Common
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/bullet/Bullet3Common')
-rw-r--r--thirdparty/bullet/Bullet3Common/b3AlignedAllocator.cpp186
-rw-r--r--thirdparty/bullet/Bullet3Common/b3AlignedAllocator.h110
-rw-r--r--thirdparty/bullet/Bullet3Common/b3AlignedObjectArray.h522
-rw-r--r--thirdparty/bullet/Bullet3Common/b3CommandLineArgs.h106
-rw-r--r--thirdparty/bullet/Bullet3Common/b3FileUtils.h133
-rw-r--r--thirdparty/bullet/Bullet3Common/b3HashMap.h462
-rw-r--r--thirdparty/bullet/Bullet3Common/b3Logging.cpp145
-rw-r--r--thirdparty/bullet/Bullet3Common/b3Logging.h74
-rw-r--r--thirdparty/bullet/Bullet3Common/b3Matrix3x3.h1354
-rw-r--r--thirdparty/bullet/Bullet3Common/b3MinMax.h69
-rw-r--r--thirdparty/bullet/Bullet3Common/b3PoolAllocator.h121
-rw-r--r--thirdparty/bullet/Bullet3Common/b3QuadWord.h242
-rw-r--r--thirdparty/bullet/Bullet3Common/b3Quaternion.h908
-rw-r--r--thirdparty/bullet/Bullet3Common/b3Random.h46
-rw-r--r--thirdparty/bullet/Bullet3Common/b3ResizablePool.h171
-rw-r--r--thirdparty/bullet/Bullet3Common/b3Scalar.h689
-rw-r--r--thirdparty/bullet/Bullet3Common/b3StackAlloc.h118
-rw-r--r--thirdparty/bullet/Bullet3Common/b3Transform.h286
-rw-r--r--thirdparty/bullet/Bullet3Common/b3TransformUtil.h210
-rw-r--r--thirdparty/bullet/Bullet3Common/b3Vector3.cpp1637
-rw-r--r--thirdparty/bullet/Bullet3Common/b3Vector3.h1303
-rw-r--r--thirdparty/bullet/Bullet3Common/shared/b3Float4.h90
-rw-r--r--thirdparty/bullet/Bullet3Common/shared/b3Int2.h63
-rw-r--r--thirdparty/bullet/Bullet3Common/shared/b3Int4.h71
-rw-r--r--thirdparty/bullet/Bullet3Common/shared/b3Mat3x3.h157
-rw-r--r--thirdparty/bullet/Bullet3Common/shared/b3PlatformDefinitions.h41
-rw-r--r--thirdparty/bullet/Bullet3Common/shared/b3Quat.h100
27 files changed, 0 insertions, 9414 deletions
diff --git a/thirdparty/bullet/Bullet3Common/b3AlignedAllocator.cpp b/thirdparty/bullet/Bullet3Common/b3AlignedAllocator.cpp
deleted file mode 100644
index d546d5e066..0000000000
--- a/thirdparty/bullet/Bullet3Common/b3AlignedAllocator.cpp
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it freely,
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#include "b3AlignedAllocator.h"
-
-#ifdef B3_ALLOCATOR_STATISTICS
-int b3g_numAlignedAllocs = 0;
-int b3g_numAlignedFree = 0;
-int b3g_totalBytesAlignedAllocs = 0; //detect memory leaks
-#endif
-
-static void *b3AllocDefault(size_t size)
-{
- return malloc(size);
-}
-
-static void b3FreeDefault(void *ptr)
-{
- free(ptr);
-}
-
-static b3AllocFunc *b3s_allocFunc = b3AllocDefault;
-static b3FreeFunc *b3s_freeFunc = b3FreeDefault;
-
-#if defined(B3_HAS_ALIGNED_ALLOCATOR)
-#include <malloc.h>
-static void *b3AlignedAllocDefault(size_t size, int alignment)
-{
- return _aligned_malloc(size, (size_t)alignment);
-}
-
-static void b3AlignedFreeDefault(void *ptr)
-{
- _aligned_free(ptr);
-}
-#elif defined(__CELLOS_LV2__)
-#include <stdlib.h>
-
-static inline void *b3AlignedAllocDefault(size_t size, int alignment)
-{
- return memalign(alignment, size);
-}
-
-static inline void b3AlignedFreeDefault(void *ptr)
-{
- free(ptr);
-}
-#else
-
-static inline void *b3AlignedAllocDefault(size_t size, int alignment)
-{
- void *ret;
- char *real;
- real = (char *)b3s_allocFunc(size + sizeof(void *) + (alignment - 1));
- if (real)
- {
- ret = b3AlignPointer(real + sizeof(void *), alignment);
- *((void **)(ret)-1) = (void *)(real);
- }
- else
- {
- ret = (void *)(real);
- }
- return (ret);
-}
-
-static inline void b3AlignedFreeDefault(void *ptr)
-{
- void *real;
-
- if (ptr)
- {
- real = *((void **)(ptr)-1);
- b3s_freeFunc(real);
- }
-}
-#endif
-
-static b3AlignedAllocFunc *b3s_alignedAllocFunc = b3AlignedAllocDefault;
-static b3AlignedFreeFunc *b3s_alignedFreeFunc = b3AlignedFreeDefault;
-
-void b3AlignedAllocSetCustomAligned(b3AlignedAllocFunc *allocFunc, b3AlignedFreeFunc *freeFunc)
-{
- b3s_alignedAllocFunc = allocFunc ? allocFunc : b3AlignedAllocDefault;
- b3s_alignedFreeFunc = freeFunc ? freeFunc : b3AlignedFreeDefault;
-}
-
-void b3AlignedAllocSetCustom(b3AllocFunc *allocFunc, b3FreeFunc *freeFunc)
-{
- b3s_allocFunc = allocFunc ? allocFunc : b3AllocDefault;
- b3s_freeFunc = freeFunc ? freeFunc : b3FreeDefault;
-}
-
-#ifdef B3_DEBUG_MEMORY_ALLOCATIONS
-//this generic allocator provides the total allocated number of bytes
-#include <stdio.h>
-
-void *b3AlignedAllocInternal(size_t size, int alignment, int line, char *filename)
-{
- void *ret;
- char *real;
-#ifdef B3_ALLOCATOR_STATISTICS
- b3g_totalBytesAlignedAllocs += size;
- b3g_numAlignedAllocs++;
-#endif
- real = (char *)b3s_allocFunc(size + 2 * sizeof(void *) + (alignment - 1));
- if (real)
- {
- ret = (void *)b3AlignPointer(real + 2 * sizeof(void *), alignment);
- *((void **)(ret)-1) = (void *)(real);
- *((int *)(ret)-2) = size;
- }
- else
- {
- ret = (void *)(real); //??
- }
-
- b3Printf("allocation#%d at address %x, from %s,line %d, size %d\n", b3g_numAlignedAllocs, real, filename, line, size);
-
- int *ptr = (int *)ret;
- *ptr = 12;
- return (ret);
-}
-
-void b3AlignedFreeInternal(void *ptr, int line, char *filename)
-{
- void *real;
-#ifdef B3_ALLOCATOR_STATISTICS
- b3g_numAlignedFree++;
-#endif
- if (ptr)
- {
- real = *((void **)(ptr)-1);
- int size = *((int *)(ptr)-2);
-#ifdef B3_ALLOCATOR_STATISTICS
- b3g_totalBytesAlignedAllocs -= size;
-#endif
- b3Printf("free #%d at address %x, from %s,line %d, size %d\n", b3g_numAlignedFree, real, filename, line, size);
-
- b3s_freeFunc(real);
- }
- else
- {
- b3Printf("NULL ptr\n");
- }
-}
-
-#else //B3_DEBUG_MEMORY_ALLOCATIONS
-
-void *b3AlignedAllocInternal(size_t size, int alignment)
-{
-#ifdef B3_ALLOCATOR_STATISTICS
- b3g_numAlignedAllocs++;
-#endif
- void *ptr;
- ptr = b3s_alignedAllocFunc(size, alignment);
- // b3Printf("b3AlignedAllocInternal %d, %x\n",size,ptr);
- return ptr;
-}
-
-void b3AlignedFreeInternal(void *ptr)
-{
- if (!ptr)
- {
- return;
- }
-#ifdef B3_ALLOCATOR_STATISTICS
- b3g_numAlignedFree++;
-#endif
- // b3Printf("b3AlignedFreeInternal %x\n",ptr);
- b3s_alignedFreeFunc(ptr);
-}
-
-#endif //B3_DEBUG_MEMORY_ALLOCATIONS
diff --git a/thirdparty/bullet/Bullet3Common/b3AlignedAllocator.h b/thirdparty/bullet/Bullet3Common/b3AlignedAllocator.h
deleted file mode 100644
index bcff9f128e..0000000000
--- a/thirdparty/bullet/Bullet3Common/b3AlignedAllocator.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it freely,
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef B3_ALIGNED_ALLOCATOR
-#define B3_ALIGNED_ALLOCATOR
-
-///we probably replace this with our own aligned memory allocator
-///so we replace _aligned_malloc and _aligned_free with our own
-///that is better portable and more predictable
-
-#include "b3Scalar.h"
-//#define B3_DEBUG_MEMORY_ALLOCATIONS 1
-#ifdef B3_DEBUG_MEMORY_ALLOCATIONS
-
-#define b3AlignedAlloc(a, b) \
- b3AlignedAllocInternal(a, b, __LINE__, __FILE__)
-
-#define b3AlignedFree(ptr) \
- b3AlignedFreeInternal(ptr, __LINE__, __FILE__)
-
-void* b3AlignedAllocInternal(size_t size, int alignment, int line, char* filename);
-
-void b3AlignedFreeInternal(void* ptr, int line, char* filename);
-
-#else
-void* b3AlignedAllocInternal(size_t size, int alignment);
-void b3AlignedFreeInternal(void* ptr);
-
-#define b3AlignedAlloc(size, alignment) b3AlignedAllocInternal(size, alignment)
-#define b3AlignedFree(ptr) b3AlignedFreeInternal(ptr)
-
-#endif
-typedef int btSizeType;
-
-typedef void*(b3AlignedAllocFunc)(size_t size, int alignment);
-typedef void(b3AlignedFreeFunc)(void* memblock);
-typedef void*(b3AllocFunc)(size_t size);
-typedef void(b3FreeFunc)(void* memblock);
-
-///The developer can let all Bullet memory allocations go through a custom memory allocator, using b3AlignedAllocSetCustom
-void b3AlignedAllocSetCustom(b3AllocFunc* allocFunc, b3FreeFunc* freeFunc);
-///If the developer has already an custom aligned allocator, then b3AlignedAllocSetCustomAligned can be used. The default aligned allocator pre-allocates extra memory using the non-aligned allocator, and instruments it.
-void b3AlignedAllocSetCustomAligned(b3AlignedAllocFunc* allocFunc, b3AlignedFreeFunc* freeFunc);
-
-///The b3AlignedAllocator is a portable class for aligned memory allocations.
-///Default implementations for unaligned and aligned allocations can be overridden by a custom allocator using b3AlignedAllocSetCustom and b3AlignedAllocSetCustomAligned.
-template <typename T, unsigned Alignment>
-class b3AlignedAllocator
-{
- typedef b3AlignedAllocator<T, Alignment> self_type;
-
-public:
- //just going down a list:
- b3AlignedAllocator() {}
- /*
- b3AlignedAllocator( const self_type & ) {}
- */
-
- template <typename Other>
- b3AlignedAllocator(const b3AlignedAllocator<Other, Alignment>&)
- {
- }
-
- typedef const T* const_pointer;
- typedef const T& const_reference;
- typedef T* pointer;
- typedef T& reference;
- typedef T value_type;
-
- pointer address(reference ref) const { return &ref; }
- const_pointer address(const_reference ref) const { return &ref; }
- pointer allocate(btSizeType n, const_pointer* hint = 0)
- {
- (void)hint;
- return reinterpret_cast<pointer>(b3AlignedAlloc(sizeof(value_type) * n, Alignment));
- }
- void construct(pointer ptr, const value_type& value) { new (ptr) value_type(value); }
- void deallocate(pointer ptr)
- {
- b3AlignedFree(reinterpret_cast<void*>(ptr));
- }
- void destroy(pointer ptr) { ptr->~value_type(); }
-
- template <typename O>
- struct rebind
- {
- typedef b3AlignedAllocator<O, Alignment> other;
- };
- template <typename O>
- self_type& operator=(const b3AlignedAllocator<O, Alignment>&)
- {
- return *this;
- }
-
- friend bool operator==(const self_type&, const self_type&) { return true; }
-};
-
-#endif //B3_ALIGNED_ALLOCATOR
diff --git a/thirdparty/bullet/Bullet3Common/b3AlignedObjectArray.h b/thirdparty/bullet/Bullet3Common/b3AlignedObjectArray.h
deleted file mode 100644
index 249e381bf1..0000000000
--- a/thirdparty/bullet/Bullet3Common/b3AlignedObjectArray.h
+++ /dev/null
@@ -1,522 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it freely,
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef B3_OBJECT_ARRAY__
-#define B3_OBJECT_ARRAY__
-
-#include "b3Scalar.h" // has definitions like B3_FORCE_INLINE
-#include "b3AlignedAllocator.h"
-
-///If the platform doesn't support placement new, you can disable B3_USE_PLACEMENT_NEW
-///then the b3AlignedObjectArray doesn't support objects with virtual methods, and non-trivial constructors/destructors
-///You can enable B3_USE_MEMCPY, then swapping elements in the array will use memcpy instead of operator=
-///see discussion here: http://continuousphysics.com/Bullet/phpBB2/viewtopic.php?t=1231 and
-///http://www.continuousphysics.com/Bullet/phpBB2/viewtopic.php?t=1240
-
-#define B3_USE_PLACEMENT_NEW 1
-//#define B3_USE_MEMCPY 1 //disable, because it is cumbersome to find out for each platform where memcpy is defined. It can be in <memory.h> or <string.h> or otherwise...
-#define B3_ALLOW_ARRAY_COPY_OPERATOR // enabling this can accidently perform deep copies of data if you are not careful
-
-#ifdef B3_USE_MEMCPY
-#include <memory.h>
-#include <string.h>
-#endif //B3_USE_MEMCPY
-
-#ifdef B3_USE_PLACEMENT_NEW
-#include <new> //for placement new
-#endif //B3_USE_PLACEMENT_NEW
-
-///The b3AlignedObjectArray template class uses a subset of the stl::vector interface for its methods
-///It is developed to replace stl::vector to avoid portability issues, including STL alignment issues to add SIMD/SSE data
-template <typename T>
-//template <class T>
-class b3AlignedObjectArray
-{
- b3AlignedAllocator<T, 16> m_allocator;
-
- int m_size;
- int m_capacity;
- T* m_data;
- //PCK: added this line
- bool m_ownsMemory;
-
-#ifdef B3_ALLOW_ARRAY_COPY_OPERATOR
-public:
- B3_FORCE_INLINE b3AlignedObjectArray<T>& operator=(const b3AlignedObjectArray<T>& other)
- {
- copyFromArray(other);
- return *this;
- }
-#else //B3_ALLOW_ARRAY_COPY_OPERATOR
-private:
- B3_FORCE_INLINE b3AlignedObjectArray<T>& operator=(const b3AlignedObjectArray<T>& other);
-#endif //B3_ALLOW_ARRAY_COPY_OPERATOR
-
-protected:
- B3_FORCE_INLINE int allocSize(int size)
- {
- return (size ? size * 2 : 1);
- }
- B3_FORCE_INLINE void copy(int start, int end, T* dest) const
- {
- int i;
- for (i = start; i < end; ++i)
-#ifdef B3_USE_PLACEMENT_NEW
- new (&dest[i]) T(m_data[i]);
-#else
- dest[i] = m_data[i];
-#endif //B3_USE_PLACEMENT_NEW
- }
-
- B3_FORCE_INLINE void init()
- {
- //PCK: added this line
- m_ownsMemory = true;
- m_data = 0;
- m_size = 0;
- m_capacity = 0;
- }
- B3_FORCE_INLINE void destroy(int first, int last)
- {
- int i;
- for (i = first; i < last; i++)
- {
- m_data[i].~T();
- }
- }
-
- B3_FORCE_INLINE void* allocate(int size)
- {
- if (size)
- return m_allocator.allocate(size);
- return 0;
- }
-
- B3_FORCE_INLINE void deallocate()
- {
- if (m_data)
- {
- //PCK: enclosed the deallocation in this block
- if (m_ownsMemory)
- {
- m_allocator.deallocate(m_data);
- }
- m_data = 0;
- }
- }
-
-public:
- b3AlignedObjectArray()
- {
- init();
- }
-
- ~b3AlignedObjectArray()
- {
- clear();
- }
-
- ///Generally it is best to avoid using the copy constructor of an b3AlignedObjectArray, and use a (const) reference to the array instead.
- b3AlignedObjectArray(const b3AlignedObjectArray& otherArray)
- {
- init();
-
- int otherSize = otherArray.size();
- resize(otherSize);
- otherArray.copy(0, otherSize, m_data);
- }
-
- /// return the number of elements in the array
- B3_FORCE_INLINE int size() const
- {
- return m_size;
- }
-
- B3_FORCE_INLINE const T& at(int n) const
- {
- b3Assert(n >= 0);
- b3Assert(n < size());
- return m_data[n];
- }
-
- B3_FORCE_INLINE T& at(int n)
- {
- b3Assert(n >= 0);
- b3Assert(n < size());
- return m_data[n];
- }
-
- B3_FORCE_INLINE const T& operator[](int n) const
- {
- b3Assert(n >= 0);
- b3Assert(n < size());
- return m_data[n];
- }
-
- B3_FORCE_INLINE T& operator[](int n)
- {
- b3Assert(n >= 0);
- b3Assert(n < size());
- return m_data[n];
- }
-
- ///clear the array, deallocated memory. Generally it is better to use array.resize(0), to reduce performance overhead of run-time memory (de)allocations.
- B3_FORCE_INLINE void clear()
- {
- destroy(0, size());
-
- deallocate();
-
- init();
- }
-
- B3_FORCE_INLINE void pop_back()
- {
- b3Assert(m_size > 0);
- m_size--;
- m_data[m_size].~T();
- }
-
- ///resize changes the number of elements in the array. If the new size is larger, the new elements will be constructed using the optional second argument.
- ///when the new number of elements is smaller, the destructor will be called, but memory will not be freed, to reduce performance overhead of run-time memory (de)allocations.
- B3_FORCE_INLINE void resizeNoInitialize(int newsize)
- {
- int curSize = size();
-
- if (newsize < curSize)
- {
- }
- else
- {
- if (newsize > size())
- {
- reserve(newsize);
- }
- //leave this uninitialized
- }
- m_size = newsize;
- }
-
- B3_FORCE_INLINE void resize(int newsize, const T& fillData = T())
- {
- int curSize = size();
-
- if (newsize < curSize)
- {
- for (int i = newsize; i < curSize; i++)
- {
- m_data[i].~T();
- }
- }
- else
- {
- if (newsize > size())
- {
- reserve(newsize);
- }
-#ifdef B3_USE_PLACEMENT_NEW
- for (int i = curSize; i < newsize; i++)
- {
- new (&m_data[i]) T(fillData);
- }
-#endif //B3_USE_PLACEMENT_NEW
- }
-
- m_size = newsize;
- }
- B3_FORCE_INLINE T& expandNonInitializing()
- {
- int sz = size();
- if (sz == capacity())
- {
- reserve(allocSize(size()));
- }
- m_size++;
-
- return m_data[sz];
- }
-
- B3_FORCE_INLINE T& expand(const T& fillValue = T())
- {
- int sz = size();
- if (sz == capacity())
- {
- reserve(allocSize(size()));
- }
- m_size++;
-#ifdef B3_USE_PLACEMENT_NEW
- new (&m_data[sz]) T(fillValue); //use the in-place new (not really allocating heap memory)
-#endif
-
- return m_data[sz];
- }
-
- B3_FORCE_INLINE void push_back(const T& _Val)
- {
- int sz = size();
- if (sz == capacity())
- {
- reserve(allocSize(size()));
- }
-
-#ifdef B3_USE_PLACEMENT_NEW
- new (&m_data[m_size]) T(_Val);
-#else
- m_data[size()] = _Val;
-#endif //B3_USE_PLACEMENT_NEW
-
- m_size++;
- }
-
- /// return the pre-allocated (reserved) elements, this is at least as large as the total number of elements,see size() and reserve()
- B3_FORCE_INLINE int capacity() const
- {
- return m_capacity;
- }
-
- B3_FORCE_INLINE void reserve(int _Count)
- { // determine new minimum length of allocated storage
- if (capacity() < _Count)
- { // not enough room, reallocate
- T* s = (T*)allocate(_Count);
- b3Assert(s);
- if (s == 0)
- {
- b3Error("b3AlignedObjectArray reserve out-of-memory\n");
- _Count = 0;
- m_size = 0;
- }
- copy(0, size(), s);
-
- destroy(0, size());
-
- deallocate();
-
- //PCK: added this line
- m_ownsMemory = true;
-
- m_data = s;
-
- m_capacity = _Count;
- }
- }
-
- class less
- {
- public:
- bool operator()(const T& a, const T& b)
- {
- return (a < b);
- }
- };
-
- template <typename L>
- void quickSortInternal(const L& CompareFunc, int lo, int hi)
- {
- // lo is the lower index, hi is the upper index
- // of the region of array a that is to be sorted
- int i = lo, j = hi;
- T x = m_data[(lo + hi) / 2];
-
- // partition
- do
- {
- while (CompareFunc(m_data[i], x))
- i++;
- while (CompareFunc(x, m_data[j]))
- j--;
- if (i <= j)
- {
- swap(i, j);
- i++;
- j--;
- }
- } while (i <= j);
-
- // recursion
- if (lo < j)
- quickSortInternal(CompareFunc, lo, j);
- if (i < hi)
- quickSortInternal(CompareFunc, i, hi);
- }
-
- template <typename L>
- void quickSort(const L& CompareFunc)
- {
- //don't sort 0 or 1 elements
- if (size() > 1)
- {
- quickSortInternal(CompareFunc, 0, size() - 1);
- }
- }
-
- ///heap sort from http://www.csse.monash.edu.au/~lloyd/tildeAlgDS/Sort/Heap/
- template <typename L>
- void downHeap(T* pArr, int k, int n, const L& CompareFunc)
- {
- /* PRE: a[k+1..N] is a heap */
- /* POST: a[k..N] is a heap */
-
- T temp = pArr[k - 1];
- /* k has child(s) */
- while (k <= n / 2)
- {
- int child = 2 * k;
-
- if ((child < n) && CompareFunc(pArr[child - 1], pArr[child]))
- {
- child++;
- }
- /* pick larger child */
- if (CompareFunc(temp, pArr[child - 1]))
- {
- /* move child up */
- pArr[k - 1] = pArr[child - 1];
- k = child;
- }
- else
- {
- break;
- }
- }
- pArr[k - 1] = temp;
- } /*downHeap*/
-
- void swap(int index0, int index1)
- {
-#ifdef B3_USE_MEMCPY
- char temp[sizeof(T)];
- memcpy(temp, &m_data[index0], sizeof(T));
- memcpy(&m_data[index0], &m_data[index1], sizeof(T));
- memcpy(&m_data[index1], temp, sizeof(T));
-#else
- T temp = m_data[index0];
- m_data[index0] = m_data[index1];
- m_data[index1] = temp;
-#endif //B3_USE_PLACEMENT_NEW
- }
-
- template <typename L>
- void heapSort(const L& CompareFunc)
- {
- /* sort a[0..N-1], N.B. 0 to N-1 */
- int k;
- int n = m_size;
- for (k = n / 2; k > 0; k--)
- {
- downHeap(m_data, k, n, CompareFunc);
- }
-
- /* a[1..N] is now a heap */
- while (n >= 1)
- {
- swap(0, n - 1); /* largest of a[0..n-1] */
-
- n = n - 1;
- /* restore a[1..i-1] heap */
- downHeap(m_data, 1, n, CompareFunc);
- }
- }
-
- ///non-recursive binary search, assumes sorted array
- int findBinarySearch(const T& key) const
- {
- int first = 0;
- int last = size() - 1;
-
- //assume sorted array
- while (first <= last)
- {
- int mid = (first + last) / 2; // compute mid point.
- if (key > m_data[mid])
- first = mid + 1; // repeat search in top half.
- else if (key < m_data[mid])
- last = mid - 1; // repeat search in bottom half.
- else
- return mid; // found it. return position /////
- }
- return size(); // failed to find key
- }
-
- int findLinearSearch(const T& key) const
- {
- int index = size();
- int i;
-
- for (i = 0; i < size(); i++)
- {
- if (m_data[i] == key)
- {
- index = i;
- break;
- }
- }
- return index;
- }
-
- int findLinearSearch2(const T& key) const
- {
- int index = -1;
- int i;
-
- for (i = 0; i < size(); i++)
- {
- if (m_data[i] == key)
- {
- index = i;
- break;
- }
- }
- return index;
- }
-
- void remove(const T& key)
- {
- int findIndex = findLinearSearch(key);
- if (findIndex < size())
- {
- swap(findIndex, size() - 1);
- pop_back();
- }
- }
-
- //PCK: whole function
- void initializeFromBuffer(void* buffer, int size, int capacity)
- {
- clear();
- m_ownsMemory = false;
- m_data = (T*)buffer;
- m_size = size;
- m_capacity = capacity;
- }
-
- void copyFromArray(const b3AlignedObjectArray& otherArray)
- {
- int otherSize = otherArray.size();
- resize(otherSize);
- otherArray.copy(0, otherSize, m_data);
- }
-
- void removeAtIndex(int index)
- {
- if (index < size())
- {
- swap(index, size() - 1);
- pop_back();
- }
- }
-};
-
-#endif //B3_OBJECT_ARRAY__
diff --git a/thirdparty/bullet/Bullet3Common/b3CommandLineArgs.h b/thirdparty/bullet/Bullet3Common/b3CommandLineArgs.h
deleted file mode 100644
index 5fe4f25f8d..0000000000
--- a/thirdparty/bullet/Bullet3Common/b3CommandLineArgs.h
+++ /dev/null
@@ -1,106 +0,0 @@
-#ifndef COMMAND_LINE_ARGS_H
-#define COMMAND_LINE_ARGS_H
-
-/******************************************************************************
- * Command-line parsing
- ******************************************************************************/
-#include <map>
-#include <algorithm>
-#include <string>
-#include <cstring>
-#include <sstream>
-class b3CommandLineArgs
-{
-protected:
- std::map<std::string, std::string> pairs;
-
-public:
- // Constructor
- b3CommandLineArgs(int argc, char **argv)
- {
- addArgs(argc, argv);
- }
-
- void addArgs(int argc, char **argv)
- {
- for (int i = 1; i < argc; i++)
- {
- std::string arg = argv[i];
-
- if ((arg.length() < 2) || (arg[0] != '-') || (arg[1] != '-'))
- {
- continue;
- }
-
- std::string::size_type pos;
- std::string key, val;
- if ((pos = arg.find('=')) == std::string::npos)
- {
- key = std::string(arg, 2, arg.length() - 2);
- val = "";
- }
- else
- {
- key = std::string(arg, 2, pos - 2);
- val = std::string(arg, pos + 1, arg.length() - 1);
- }
-
- //only add new keys, don't replace existing
- if (pairs.find(key) == pairs.end())
- {
- pairs[key] = val;
- }
- }
- }
-
- bool CheckCmdLineFlag(const char *arg_name)
- {
- std::map<std::string, std::string>::iterator itr;
- if ((itr = pairs.find(arg_name)) != pairs.end())
- {
- return true;
- }
- return false;
- }
-
- template <typename T>
- bool GetCmdLineArgument(const char *arg_name, T &val);
-
- int ParsedArgc()
- {
- return pairs.size();
- }
-};
-
-template <typename T>
-inline bool b3CommandLineArgs::GetCmdLineArgument(const char *arg_name, T &val)
-{
- std::map<std::string, std::string>::iterator itr;
- if ((itr = pairs.find(arg_name)) != pairs.end())
- {
- std::istringstream strstream(itr->second);
- strstream >> val;
- return true;
- }
- return false;
-}
-
-template <>
-inline bool b3CommandLineArgs::GetCmdLineArgument<char *>(const char *arg_name, char *&val)
-{
- std::map<std::string, std::string>::iterator itr;
- if ((itr = pairs.find(arg_name)) != pairs.end())
- {
- std::string s = itr->second;
- val = (char *)malloc(sizeof(char) * (s.length() + 1));
- std::strcpy(val, s.c_str());
- return true;
- }
- else
- {
- val = NULL;
- }
- return false;
-}
-
-#endif //COMMAND_LINE_ARGS_H
diff --git a/thirdparty/bullet/Bullet3Common/b3FileUtils.h b/thirdparty/bullet/Bullet3Common/b3FileUtils.h
deleted file mode 100644
index 9ded17eaaf..0000000000
--- a/thirdparty/bullet/Bullet3Common/b3FileUtils.h
+++ /dev/null
@@ -1,133 +0,0 @@
-#ifndef B3_FILE_UTILS_H
-#define B3_FILE_UTILS_H
-
-#include <stdio.h>
-#include "b3Scalar.h"
-#include <stddef.h> //ptrdiff_h
-#include <string.h>
-
-struct b3FileUtils
-{
- b3FileUtils()
- {
- }
- virtual ~b3FileUtils()
- {
- }
-
- static bool findFile(const char* orgFileName, char* relativeFileName, int maxRelativeFileNameMaxLen)
- {
- FILE* f = 0;
- f = fopen(orgFileName, "rb");
- if (f)
- {
- //printf("original file found: [%s]\n", orgFileName);
- sprintf(relativeFileName, "%s", orgFileName);
- fclose(f);
- return true;
- }
-
- //printf("Trying various directories, relative to current working directory\n");
- const char* prefix[] = {"./", "./data/", "../data/", "../../data/", "../../../data/", "../../../../data/"};
- int numPrefixes = sizeof(prefix) / sizeof(const char*);
-
- f = 0;
- bool fileFound = false;
-
- for (int i = 0; !f && i < numPrefixes; i++)
- {
-#ifdef _MSC_VER
- sprintf_s(relativeFileName, maxRelativeFileNameMaxLen, "%s%s", prefix[i], orgFileName);
-#else
- sprintf(relativeFileName, "%s%s", prefix[i], orgFileName);
-#endif
- f = fopen(relativeFileName, "rb");
- if (f)
- {
- fileFound = true;
- break;
- }
- }
- if (f)
- {
- fclose(f);
- }
-
- return fileFound;
- }
-
- static const char* strip2(const char* name, const char* pattern)
- {
- size_t const patlen = strlen(pattern);
- size_t patcnt = 0;
- const char* oriptr;
- const char* patloc;
- // find how many times the pattern occurs in the original string
- for (oriptr = name; (patloc = strstr(oriptr, pattern)); oriptr = patloc + patlen)
- {
- patcnt++;
- }
- return oriptr;
- }
-
- static int extractPath(const char* fileName, char* path, int maxPathLength)
- {
- const char* stripped = strip2(fileName, "/");
- stripped = strip2(stripped, "\\");
-
- ptrdiff_t len = stripped - fileName;
- b3Assert((len + 1) < maxPathLength);
-
- if (len && ((len + 1) < maxPathLength))
- {
- for (int i = 0; i < len; i++)
- {
- path[i] = fileName[i];
- }
- path[len] = 0;
- }
- else
- {
- len = 0;
- b3Assert(maxPathLength > 0);
- if (maxPathLength > 0)
- {
- path[len] = 0;
- }
- }
- return len;
- }
-
- static char toLowerChar(const char t)
- {
- if (t >= (char)'A' && t <= (char)'Z')
- return t + ((char)'a' - (char)'A');
- else
- return t;
- }
-
- static void toLower(char* str)
- {
- int len = strlen(str);
- for (int i = 0; i < len; i++)
- {
- str[i] = toLowerChar(str[i]);
- }
- }
-
- /*static const char* strip2(const char* name, const char* pattern)
- {
- size_t const patlen = strlen(pattern);
- size_t patcnt = 0;
- const char * oriptr;
- const char * patloc;
- // find how many times the pattern occurs in the original string
- for (oriptr = name; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen)
- {
- patcnt++;
- }
- return oriptr;
- }
- */
-};
-#endif //B3_FILE_UTILS_H
diff --git a/thirdparty/bullet/Bullet3Common/b3HashMap.h b/thirdparty/bullet/Bullet3Common/b3HashMap.h
deleted file mode 100644
index 3009e2cf2f..0000000000
--- a/thirdparty/bullet/Bullet3Common/b3HashMap.h
+++ /dev/null
@@ -1,462 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it freely,
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef B3_HASH_MAP_H
-#define B3_HASH_MAP_H
-
-#include "b3AlignedObjectArray.h"
-
-#include <string>
-
-///very basic hashable string implementation, compatible with b3HashMap
-struct b3HashString
-{
- std::string m_string;
- unsigned int m_hash;
-
- B3_FORCE_INLINE unsigned int getHash() const
- {
- return m_hash;
- }
-
- b3HashString(const char* name)
- : m_string(name)
- {
- /* magic numbers from http://www.isthe.com/chongo/tech/comp/fnv/ */
- static const unsigned int InitialFNV = 2166136261u;
- static const unsigned int FNVMultiple = 16777619u;
-
- /* Fowler / Noll / Vo (FNV) Hash */
- unsigned int hash = InitialFNV;
- int len = m_string.length();
- for (int i = 0; i < len; i++)
- {
- hash = hash ^ (m_string[i]); /* xor the low 8 bits */
- hash = hash * FNVMultiple; /* multiply by the magic number */
- }
- m_hash = hash;
- }
-
- int portableStringCompare(const char* src, const char* dst) const
- {
- int ret = 0;
-
- while (!(ret = *(unsigned char*)src - *(unsigned char*)dst) && *dst)
- ++src, ++dst;
-
- if (ret < 0)
- ret = -1;
- else if (ret > 0)
- ret = 1;
-
- return (ret);
- }
-
- bool equals(const b3HashString& other) const
- {
- return (m_string == other.m_string);
- }
-};
-
-const int B3_HASH_NULL = 0xffffffff;
-
-class b3HashInt
-{
- int m_uid;
-
-public:
- b3HashInt(int uid) : m_uid(uid)
- {
- }
-
- int getUid1() const
- {
- return m_uid;
- }
-
- void setUid1(int uid)
- {
- m_uid = uid;
- }
-
- bool equals(const b3HashInt& other) const
- {
- return getUid1() == other.getUid1();
- }
- //to our success
- B3_FORCE_INLINE unsigned int getHash() const
- {
- int key = m_uid;
- // Thomas Wang's hash
- key += ~(key << 15);
- key ^= (key >> 10);
- key += (key << 3);
- key ^= (key >> 6);
- key += ~(key << 11);
- key ^= (key >> 16);
- return key;
- }
-};
-
-class b3HashPtr
-{
- union {
- const void* m_pointer;
- int m_hashValues[2];
- };
-
-public:
- b3HashPtr(const void* ptr)
- : m_pointer(ptr)
- {
- }
-
- const void* getPointer() const
- {
- return m_pointer;
- }
-
- bool equals(const b3HashPtr& other) const
- {
- return getPointer() == other.getPointer();
- }
-
- //to our success
- B3_FORCE_INLINE unsigned int getHash() const
- {
- const bool VOID_IS_8 = ((sizeof(void*) == 8));
-
- int key = VOID_IS_8 ? m_hashValues[0] + m_hashValues[1] : m_hashValues[0];
-
- // Thomas Wang's hash
- key += ~(key << 15);
- key ^= (key >> 10);
- key += (key << 3);
- key ^= (key >> 6);
- key += ~(key << 11);
- key ^= (key >> 16);
- return key;
- }
-};
-
-template <class Value>
-class b3HashKeyPtr
-{
- int m_uid;
-
-public:
- b3HashKeyPtr(int uid) : m_uid(uid)
- {
- }
-
- int getUid1() const
- {
- return m_uid;
- }
-
- bool equals(const b3HashKeyPtr<Value>& other) const
- {
- return getUid1() == other.getUid1();
- }
-
- //to our success
- B3_FORCE_INLINE unsigned int getHash() const
- {
- int key = m_uid;
- // Thomas Wang's hash
- key += ~(key << 15);
- key ^= (key >> 10);
- key += (key << 3);
- key ^= (key >> 6);
- key += ~(key << 11);
- key ^= (key >> 16);
- return key;
- }
-};
-
-template <class Value>
-class b3HashKey
-{
- int m_uid;
-
-public:
- b3HashKey(int uid) : m_uid(uid)
- {
- }
-
- int getUid1() const
- {
- return m_uid;
- }
-
- bool equals(const b3HashKey<Value>& other) const
- {
- return getUid1() == other.getUid1();
- }
- //to our success
- B3_FORCE_INLINE unsigned int getHash() const
- {
- int key = m_uid;
- // Thomas Wang's hash
- key += ~(key << 15);
- key ^= (key >> 10);
- key += (key << 3);
- key ^= (key >> 6);
- key += ~(key << 11);
- key ^= (key >> 16);
- return key;
- }
-};
-
-///The b3HashMap template class implements a generic and lightweight hashmap.
-///A basic sample of how to use b3HashMap is located in Demos\BasicDemo\main.cpp
-template <class Key, class Value>
-class b3HashMap
-{
-protected:
- b3AlignedObjectArray<int> m_hashTable;
- b3AlignedObjectArray<int> m_next;
-
- b3AlignedObjectArray<Value> m_valueArray;
- b3AlignedObjectArray<Key> m_keyArray;
-
- void growTables(const Key& /*key*/)
- {
- int newCapacity = m_valueArray.capacity();
-
- if (m_hashTable.size() < newCapacity)
- {
- //grow hashtable and next table
- int curHashtableSize = m_hashTable.size();
-
- m_hashTable.resize(newCapacity);
- m_next.resize(newCapacity);
-
- int i;
-
- for (i = 0; i < newCapacity; ++i)
- {
- m_hashTable[i] = B3_HASH_NULL;
- }
- for (i = 0; i < newCapacity; ++i)
- {
- m_next[i] = B3_HASH_NULL;
- }
-
- for (i = 0; i < curHashtableSize; i++)
- {
- //const Value& value = m_valueArray[i];
- //const Key& key = m_keyArray[i];
-
- int hashValue = m_keyArray[i].getHash() & (m_valueArray.capacity() - 1); // New hash value with new mask
- m_next[i] = m_hashTable[hashValue];
- m_hashTable[hashValue] = i;
- }
- }
- }
-
-public:
- void insert(const Key& key, const Value& value)
- {
- int hash = key.getHash() & (m_valueArray.capacity() - 1);
-
- //replace value if the key is already there
- int index = findIndex(key);
- if (index != B3_HASH_NULL)
- {
- m_valueArray[index] = value;
- return;
- }
-
- int count = m_valueArray.size();
- int oldCapacity = m_valueArray.capacity();
- m_valueArray.push_back(value);
- m_keyArray.push_back(key);
-
- int newCapacity = m_valueArray.capacity();
- if (oldCapacity < newCapacity)
- {
- growTables(key);
- //hash with new capacity
- hash = key.getHash() & (m_valueArray.capacity() - 1);
- }
- m_next[count] = m_hashTable[hash];
- m_hashTable[hash] = count;
- }
-
- void remove(const Key& key)
- {
- int hash = key.getHash() & (m_valueArray.capacity() - 1);
-
- int pairIndex = findIndex(key);
-
- if (pairIndex == B3_HASH_NULL)
- {
- return;
- }
-
- // Remove the pair from the hash table.
- int index = m_hashTable[hash];
- b3Assert(index != B3_HASH_NULL);
-
- int previous = B3_HASH_NULL;
- while (index != pairIndex)
- {
- previous = index;
- index = m_next[index];
- }
-
- if (previous != B3_HASH_NULL)
- {
- b3Assert(m_next[previous] == pairIndex);
- m_next[previous] = m_next[pairIndex];
- }
- else
- {
- m_hashTable[hash] = m_next[pairIndex];
- }
-
- // We now move the last pair into spot of the
- // pair being removed. We need to fix the hash
- // table indices to support the move.
-
- int lastPairIndex = m_valueArray.size() - 1;
-
- // If the removed pair is the last pair, we are done.
- if (lastPairIndex == pairIndex)
- {
- m_valueArray.pop_back();
- m_keyArray.pop_back();
- return;
- }
-
- // Remove the last pair from the hash table.
- int lastHash = m_keyArray[lastPairIndex].getHash() & (m_valueArray.capacity() - 1);
-
- index = m_hashTable[lastHash];
- b3Assert(index != B3_HASH_NULL);
-
- previous = B3_HASH_NULL;
- while (index != lastPairIndex)
- {
- previous = index;
- index = m_next[index];
- }
-
- if (previous != B3_HASH_NULL)
- {
- b3Assert(m_next[previous] == lastPairIndex);
- m_next[previous] = m_next[lastPairIndex];
- }
- else
- {
- m_hashTable[lastHash] = m_next[lastPairIndex];
- }
-
- // Copy the last pair into the remove pair's spot.
- m_valueArray[pairIndex] = m_valueArray[lastPairIndex];
- m_keyArray[pairIndex] = m_keyArray[lastPairIndex];
-
- // Insert the last pair into the hash table
- m_next[pairIndex] = m_hashTable[lastHash];
- m_hashTable[lastHash] = pairIndex;
-
- m_valueArray.pop_back();
- m_keyArray.pop_back();
- }
-
- int size() const
- {
- return m_valueArray.size();
- }
-
- const Value* getAtIndex(int index) const
- {
- b3Assert(index < m_valueArray.size());
-
- return &m_valueArray[index];
- }
-
- Value* getAtIndex(int index)
- {
- b3Assert(index < m_valueArray.size());
-
- return &m_valueArray[index];
- }
-
- Key getKeyAtIndex(int index)
- {
- b3Assert(index < m_keyArray.size());
- return m_keyArray[index];
- }
-
- const Key getKeyAtIndex(int index) const
- {
- b3Assert(index < m_keyArray.size());
- return m_keyArray[index];
- }
-
- Value* operator[](const Key& key)
- {
- return find(key);
- }
-
- const Value* find(const Key& key) const
- {
- int index = findIndex(key);
- if (index == B3_HASH_NULL)
- {
- return NULL;
- }
- return &m_valueArray[index];
- }
-
- Value* find(const Key& key)
- {
- int index = findIndex(key);
- if (index == B3_HASH_NULL)
- {
- return NULL;
- }
- return &m_valueArray[index];
- }
-
- int findIndex(const Key& key) const
- {
- unsigned int hash = key.getHash() & (m_valueArray.capacity() - 1);
-
- if (hash >= (unsigned int)m_hashTable.size())
- {
- return B3_HASH_NULL;
- }
-
- int index = m_hashTable[hash];
- while ((index != B3_HASH_NULL) && key.equals(m_keyArray[index]) == false)
- {
- index = m_next[index];
- }
- return index;
- }
-
- void clear()
- {
- m_hashTable.clear();
- m_next.clear();
- m_valueArray.clear();
- m_keyArray.clear();
- }
-};
-
-#endif //B3_HASH_MAP_H
diff --git a/thirdparty/bullet/Bullet3Common/b3Logging.cpp b/thirdparty/bullet/Bullet3Common/b3Logging.cpp
deleted file mode 100644
index 9c9f7c09ea..0000000000
--- a/thirdparty/bullet/Bullet3Common/b3Logging.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
-Copyright (c) 2013 Advanced Micro Devices, Inc.
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it freely,
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-//Originally written by Erwin Coumans
-
-#include "b3Logging.h"
-
-#include <stdio.h>
-#include <stdarg.h>
-
-#ifdef _WIN32
-#include <windows.h>
-#endif //_WIN32
-
-void b3PrintfFuncDefault(const char* msg)
-{
-#ifdef _WIN32
- OutputDebugStringA(msg);
-#endif
- printf("%s", msg);
- //is this portable?
- fflush(stdout);
-}
-
-void b3WarningMessageFuncDefault(const char* msg)
-{
-#ifdef _WIN32
- OutputDebugStringA(msg);
-#endif
- printf("%s", msg);
- //is this portable?
- fflush(stdout);
-}
-
-void b3ErrorMessageFuncDefault(const char* msg)
-{
-#ifdef _WIN32
- OutputDebugStringA(msg);
-#endif
- printf("%s", msg);
-
- //is this portable?
- fflush(stdout);
-}
-
-static b3PrintfFunc* b3s_printfFunc = b3PrintfFuncDefault;
-static b3WarningMessageFunc* b3s_warningMessageFunc = b3WarningMessageFuncDefault;
-static b3ErrorMessageFunc* b3s_errorMessageFunc = b3ErrorMessageFuncDefault;
-
-///The developer can route b3Printf output using their own implementation
-void b3SetCustomPrintfFunc(b3PrintfFunc* printfFunc)
-{
- b3s_printfFunc = printfFunc;
-}
-void b3SetCustomWarningMessageFunc(b3PrintfFunc* warningMessageFunc)
-{
- b3s_warningMessageFunc = warningMessageFunc;
-}
-void b3SetCustomErrorMessageFunc(b3PrintfFunc* errorMessageFunc)
-{
- b3s_errorMessageFunc = errorMessageFunc;
-}
-
-//#define B3_MAX_DEBUG_STRING_LENGTH 2048
-#define B3_MAX_DEBUG_STRING_LENGTH 32768
-
-void b3OutputPrintfVarArgsInternal(const char* str, ...)
-{
- char strDebug[B3_MAX_DEBUG_STRING_LENGTH] = {0};
- va_list argList;
- va_start(argList, str);
-#ifdef _MSC_VER
- vsprintf_s(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList);
-#else
- vsnprintf(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList);
-#endif
- (b3s_printfFunc)(strDebug);
- va_end(argList);
-}
-void b3OutputWarningMessageVarArgsInternal(const char* str, ...)
-{
- char strDebug[B3_MAX_DEBUG_STRING_LENGTH] = {0};
- va_list argList;
- va_start(argList, str);
-#ifdef _MSC_VER
- vsprintf_s(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList);
-#else
- vsnprintf(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList);
-#endif
- (b3s_warningMessageFunc)(strDebug);
- va_end(argList);
-}
-void b3OutputErrorMessageVarArgsInternal(const char* str, ...)
-{
- char strDebug[B3_MAX_DEBUG_STRING_LENGTH] = {0};
- va_list argList;
- va_start(argList, str);
-#ifdef _MSC_VER
- vsprintf_s(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList);
-#else
- vsnprintf(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList);
-#endif
- (b3s_errorMessageFunc)(strDebug);
- va_end(argList);
-}
-
-void b3EnterProfileZoneDefault(const char* name)
-{
-}
-void b3LeaveProfileZoneDefault()
-{
-}
-static b3EnterProfileZoneFunc* b3s_enterFunc = b3EnterProfileZoneDefault;
-static b3LeaveProfileZoneFunc* b3s_leaveFunc = b3LeaveProfileZoneDefault;
-void b3EnterProfileZone(const char* name)
-{
- (b3s_enterFunc)(name);
-}
-void b3LeaveProfileZone()
-{
- (b3s_leaveFunc)();
-}
-
-void b3SetCustomEnterProfileZoneFunc(b3EnterProfileZoneFunc* enterFunc)
-{
- b3s_enterFunc = enterFunc;
-}
-void b3SetCustomLeaveProfileZoneFunc(b3LeaveProfileZoneFunc* leaveFunc)
-{
- b3s_leaveFunc = leaveFunc;
-}
-
-#ifndef _MSC_VER
-#undef vsprintf_s
-#endif
diff --git a/thirdparty/bullet/Bullet3Common/b3Logging.h b/thirdparty/bullet/Bullet3Common/b3Logging.h
deleted file mode 100644
index f61149de77..0000000000
--- a/thirdparty/bullet/Bullet3Common/b3Logging.h
+++ /dev/null
@@ -1,74 +0,0 @@
-
-#ifndef B3_LOGGING_H
-#define B3_LOGGING_H
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-///We add the do/while so that the statement "if (condition) b3Printf("test"); else {...}" would fail
-///You can also customize the message by uncommenting out a different line below
-#define b3Printf(...) b3OutputPrintfVarArgsInternal(__VA_ARGS__)
- //#define b3Printf(...) do {b3OutputPrintfVarArgsInternal("b3Printf[%s,%d]:",__FILE__,__LINE__);b3OutputPrintfVarArgsInternal(__VA_ARGS__); } while(0)
- //#define b3Printf b3OutputPrintfVarArgsInternal
- //#define b3Printf(...) printf(__VA_ARGS__)
- //#define b3Printf(...)
-#define b3Warning(...) do{ b3OutputWarningMessageVarArgsInternal("b3Warning[%s,%d]:\n", __FILE__, __LINE__);b3OutputWarningMessageVarArgsInternal(__VA_ARGS__);} while (0)
-#define b3Error(...)do {b3OutputErrorMessageVarArgsInternal("b3Error[%s,%d]:\n", __FILE__, __LINE__);b3OutputErrorMessageVarArgsInternal(__VA_ARGS__);} while (0)
-#ifndef B3_NO_PROFILE
-
- void b3EnterProfileZone(const char* name);
- void b3LeaveProfileZone();
-#ifdef __cplusplus
-
- class b3ProfileZone
- {
- public:
- b3ProfileZone(const char* name)
- {
- b3EnterProfileZone(name);
- }
-
- ~b3ProfileZone()
- {
- b3LeaveProfileZone();
- }
- };
-
-#define B3_PROFILE(name) b3ProfileZone __profile(name)
-#endif
-
-#else //B3_NO_PROFILE
-
-#define B3_PROFILE(name)
-#define b3StartProfile(a)
-#define b3StopProfile
-
-#endif //#ifndef B3_NO_PROFILE
-
- typedef void(b3PrintfFunc)(const char* msg);
- typedef void(b3WarningMessageFunc)(const char* msg);
- typedef void(b3ErrorMessageFunc)(const char* msg);
- typedef void(b3EnterProfileZoneFunc)(const char* msg);
- typedef void(b3LeaveProfileZoneFunc)();
-
- ///The developer can route b3Printf output using their own implementation
- void b3SetCustomPrintfFunc(b3PrintfFunc* printfFunc);
- void b3SetCustomWarningMessageFunc(b3WarningMessageFunc* warningMsgFunc);
- void b3SetCustomErrorMessageFunc(b3ErrorMessageFunc* errorMsgFunc);
-
- ///Set custom profile zone functions (zones can be nested)
- void b3SetCustomEnterProfileZoneFunc(b3EnterProfileZoneFunc* enterFunc);
- void b3SetCustomLeaveProfileZoneFunc(b3LeaveProfileZoneFunc* leaveFunc);
-
- ///Don't use those internal functions directly, use the b3Printf or b3SetCustomPrintfFunc instead (or warning/error version)
- void b3OutputPrintfVarArgsInternal(const char* str, ...);
- void b3OutputWarningMessageVarArgsInternal(const char* str, ...);
- void b3OutputErrorMessageVarArgsInternal(const char* str, ...);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif //B3_LOGGING_H \ No newline at end of file
diff --git a/thirdparty/bullet/Bullet3Common/b3Matrix3x3.h b/thirdparty/bullet/Bullet3Common/b3Matrix3x3.h
deleted file mode 100644
index 6c46536a81..0000000000
--- a/thirdparty/bullet/Bullet3Common/b3Matrix3x3.h
+++ /dev/null
@@ -1,1354 +0,0 @@
-/*
-Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it freely,
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef B3_MATRIX3x3_H
-#define B3_MATRIX3x3_H
-
-#include "b3Vector3.h"
-#include "b3Quaternion.h"
-#include <stdio.h>
-
-#ifdef B3_USE_SSE
-//const __m128 B3_ATTRIBUTE_ALIGNED16(b3v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
-const __m128 B3_ATTRIBUTE_ALIGNED16(b3vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
-#endif
-
-#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
-const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
-const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
-const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
-#endif
-
-#ifdef B3_USE_DOUBLE_PRECISION
-#define b3Matrix3x3Data b3Matrix3x3DoubleData
-#else
-#define b3Matrix3x3Data b3Matrix3x3FloatData
-#endif //B3_USE_DOUBLE_PRECISION
-
-/**@brief The b3Matrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with b3Quaternion, b3Transform and b3Vector3.
-* Make sure to only include a pure orthogonal matrix without scaling. */
-B3_ATTRIBUTE_ALIGNED16(class)
-b3Matrix3x3
-{
- ///Data storage for the matrix, each vector is a row of the matrix
- b3Vector3 m_el[3];
-
-public:
- /** @brief No initializaion constructor */
- b3Matrix3x3() {}
-
- // explicit b3Matrix3x3(const b3Scalar *m) { setFromOpenGLSubMatrix(m); }
-
- /**@brief Constructor from Quaternion */
- explicit b3Matrix3x3(const b3Quaternion& q) { setRotation(q); }
- /*
- template <typename b3Scalar>
- Matrix3x3(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
- {
- setEulerYPR(yaw, pitch, roll);
- }
- */
- /** @brief Constructor with row major formatting */
- b3Matrix3x3(const b3Scalar& xx, const b3Scalar& xy, const b3Scalar& xz,
- const b3Scalar& yx, const b3Scalar& yy, const b3Scalar& yz,
- const b3Scalar& zx, const b3Scalar& zy, const b3Scalar& zz)
- {
- setValue(xx, xy, xz,
- yx, yy, yz,
- zx, zy, zz);
- }
-
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
- B3_FORCE_INLINE b3Matrix3x3(const b3SimdFloat4 v0, const b3SimdFloat4 v1, const b3SimdFloat4 v2)
- {
- m_el[0].mVec128 = v0;
- m_el[1].mVec128 = v1;
- m_el[2].mVec128 = v2;
- }
-
- B3_FORCE_INLINE b3Matrix3x3(const b3Vector3& v0, const b3Vector3& v1, const b3Vector3& v2)
- {
- m_el[0] = v0;
- m_el[1] = v1;
- m_el[2] = v2;
- }
-
- // Copy constructor
- B3_FORCE_INLINE b3Matrix3x3(const b3Matrix3x3& rhs)
- {
- m_el[0].mVec128 = rhs.m_el[0].mVec128;
- m_el[1].mVec128 = rhs.m_el[1].mVec128;
- m_el[2].mVec128 = rhs.m_el[2].mVec128;
- }
-
- // Assignment Operator
- B3_FORCE_INLINE b3Matrix3x3& operator=(const b3Matrix3x3& m)
- {
- m_el[0].mVec128 = m.m_el[0].mVec128;
- m_el[1].mVec128 = m.m_el[1].mVec128;
- m_el[2].mVec128 = m.m_el[2].mVec128;
-
- return *this;
- }
-
-#else
-
- /** @brief Copy constructor */
- B3_FORCE_INLINE b3Matrix3x3(const b3Matrix3x3& other)
- {
- m_el[0] = other.m_el[0];
- m_el[1] = other.m_el[1];
- m_el[2] = other.m_el[2];
- }
-
- /** @brief Assignment Operator */
- B3_FORCE_INLINE b3Matrix3x3& operator=(const b3Matrix3x3& other)
- {
- m_el[0] = other.m_el[0];
- m_el[1] = other.m_el[1];
- m_el[2] = other.m_el[2];
- return *this;
- }
-
-#endif
-
- /** @brief Get a column of the matrix as a vector
- * @param i Column number 0 indexed */
- B3_FORCE_INLINE b3Vector3 getColumn(int i) const
- {
- return b3MakeVector3(m_el[0][i], m_el[1][i], m_el[2][i]);
- }
-
- /** @brief Get a row of the matrix as a vector
- * @param i Row number 0 indexed */
- B3_FORCE_INLINE const b3Vector3& getRow(int i) const
- {
- b3FullAssert(0 <= i && i < 3);
- return m_el[i];
- }
-
- /** @brief Get a mutable reference to a row of the matrix as a vector
- * @param i Row number 0 indexed */
- B3_FORCE_INLINE b3Vector3& operator[](int i)
- {
- b3FullAssert(0 <= i && i < 3);
- return m_el[i];
- }
-
- /** @brief Get a const reference to a row of the matrix as a vector
- * @param i Row number 0 indexed */
- B3_FORCE_INLINE const b3Vector3& operator[](int i) const
- {
- b3FullAssert(0 <= i && i < 3);
- return m_el[i];
- }
-
- /** @brief Multiply by the target matrix on the right
- * @param m Rotation matrix to be applied
- * Equivilant to this = this * m */
- b3Matrix3x3& operator*=(const b3Matrix3x3& m);
-
- /** @brief Adds by the target matrix on the right
- * @param m matrix to be applied
- * Equivilant to this = this + m */
- b3Matrix3x3& operator+=(const b3Matrix3x3& m);
-
- /** @brief Substractss by the target matrix on the right
- * @param m matrix to be applied
- * Equivilant to this = this - m */
- b3Matrix3x3& operator-=(const b3Matrix3x3& m);
-
- /** @brief Set from the rotational part of a 4x4 OpenGL matrix
- * @param m A pointer to the beginning of the array of scalars*/
- void setFromOpenGLSubMatrix(const b3Scalar* m)
- {
- m_el[0].setValue(m[0], m[4], m[8]);
- m_el[1].setValue(m[1], m[5], m[9]);
- m_el[2].setValue(m[2], m[6], m[10]);
- }
- /** @brief Set the values of the matrix explicitly (row major)
- * @param xx Top left
- * @param xy Top Middle
- * @param xz Top Right
- * @param yx Middle Left
- * @param yy Middle Middle
- * @param yz Middle Right
- * @param zx Bottom Left
- * @param zy Bottom Middle
- * @param zz Bottom Right*/
- void setValue(const b3Scalar& xx, const b3Scalar& xy, const b3Scalar& xz,
- const b3Scalar& yx, const b3Scalar& yy, const b3Scalar& yz,
- const b3Scalar& zx, const b3Scalar& zy, const b3Scalar& zz)
- {
- m_el[0].setValue(xx, xy, xz);
- m_el[1].setValue(yx, yy, yz);
- m_el[2].setValue(zx, zy, zz);
- }
-
- /** @brief Set the matrix from a quaternion
- * @param q The Quaternion to match */
- void setRotation(const b3Quaternion& q)
- {
- b3Scalar d = q.length2();
- b3FullAssert(d != b3Scalar(0.0));
- b3Scalar s = b3Scalar(2.0) / d;
-
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- __m128 vs, Q = q.get128();
- __m128i Qi = b3CastfTo128i(Q);
- __m128 Y, Z;
- __m128 V1, V2, V3;
- __m128 V11, V21, V31;
- __m128 NQ = _mm_xor_ps(Q, b3vMzeroMask);
- __m128i NQi = b3CastfTo128i(NQ);
-
- V1 = b3CastiTo128f(_mm_shuffle_epi32(Qi, B3_SHUFFLE(1, 0, 2, 3))); // Y X Z W
- V2 = _mm_shuffle_ps(NQ, Q, B3_SHUFFLE(0, 0, 1, 3)); // -X -X Y W
- V3 = b3CastiTo128f(_mm_shuffle_epi32(Qi, B3_SHUFFLE(2, 1, 0, 3))); // Z Y X W
- V1 = _mm_xor_ps(V1, b3vMPPP); // change the sign of the first element
-
- V11 = b3CastiTo128f(_mm_shuffle_epi32(Qi, B3_SHUFFLE(1, 1, 0, 3))); // Y Y X W
- V21 = _mm_unpackhi_ps(Q, Q); // Z Z W W
- V31 = _mm_shuffle_ps(Q, NQ, B3_SHUFFLE(0, 2, 0, 3)); // X Z -X -W
-
- V2 = V2 * V1; //
- V1 = V1 * V11; //
- V3 = V3 * V31; //
-
- V11 = _mm_shuffle_ps(NQ, Q, B3_SHUFFLE(2, 3, 1, 3)); // -Z -W Y W
- V11 = V11 * V21; //
- V21 = _mm_xor_ps(V21, b3vMPPP); // change the sign of the first element
- V31 = _mm_shuffle_ps(Q, NQ, B3_SHUFFLE(3, 3, 1, 3)); // W W -Y -W
- V31 = _mm_xor_ps(V31, b3vMPPP); // change the sign of the first element
- Y = b3CastiTo128f(_mm_shuffle_epi32(NQi, B3_SHUFFLE(3, 2, 0, 3))); // -W -Z -X -W
- Z = b3CastiTo128f(_mm_shuffle_epi32(Qi, B3_SHUFFLE(1, 0, 1, 3))); // Y X Y W
-
- vs = _mm_load_ss(&s);
- V21 = V21 * Y;
- V31 = V31 * Z;
-
- V1 = V1 + V11;
- V2 = V2 + V21;
- V3 = V3 + V31;
-
- vs = b3_splat3_ps(vs, 0);
- // s ready
- V1 = V1 * vs;
- V2 = V2 * vs;
- V3 = V3 * vs;
-
- V1 = V1 + b3v1000;
- V2 = V2 + b3v0100;
- V3 = V3 + b3v0010;
-
- m_el[0] = b3MakeVector3(V1);
- m_el[1] = b3MakeVector3(V2);
- m_el[2] = b3MakeVector3(V3);
-#else
- b3Scalar xs = q.getX() * s, ys = q.getY() * s, zs = q.getZ() * s;
- b3Scalar wx = q.getW() * xs, wy = q.getW() * ys, wz = q.getW() * zs;
- b3Scalar xx = q.getX() * xs, xy = q.getX() * ys, xz = q.getX() * zs;
- b3Scalar yy = q.getY() * ys, yz = q.getY() * zs, zz = q.getZ() * zs;
- setValue(
- b3Scalar(1.0) - (yy + zz), xy - wz, xz + wy,
- xy + wz, b3Scalar(1.0) - (xx + zz), yz - wx,
- xz - wy, yz + wx, b3Scalar(1.0) - (xx + yy));
-#endif
- }
-
- /** @brief Set the matrix from euler angles using YPR around YXZ respectively
- * @param yaw Yaw about Y axis
- * @param pitch Pitch about X axis
- * @param roll Roll about Z axis
- */
- void setEulerYPR(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
- {
- setEulerZYX(roll, pitch, yaw);
- }
-
- /** @brief Set the matrix from euler angles YPR around ZYX axes
- * @param eulerX Roll about X axis
- * @param eulerY Pitch around Y axis
- * @param eulerZ Yaw aboud Z axis
- *
- * These angles are used to produce a rotation matrix. The euler
- * angles are applied in ZYX order. I.e a vector is first rotated
- * about X then Y and then Z
- **/
- void setEulerZYX(b3Scalar eulerX, b3Scalar eulerY, b3Scalar eulerZ)
- {
- ///@todo proposed to reverse this since it's labeled zyx but takes arguments xyz and it will match all other parts of the code
- b3Scalar ci(b3Cos(eulerX));
- b3Scalar cj(b3Cos(eulerY));
- b3Scalar ch(b3Cos(eulerZ));
- b3Scalar si(b3Sin(eulerX));
- b3Scalar sj(b3Sin(eulerY));
- b3Scalar sh(b3Sin(eulerZ));
- b3Scalar cc = ci * ch;
- b3Scalar cs = ci * sh;
- b3Scalar sc = si * ch;
- b3Scalar ss = si * sh;
-
- setValue(cj * ch, sj * sc - cs, sj * cc + ss,
- cj * sh, sj * ss + cc, sj * cs - sc,
- -sj, cj * si, cj * ci);
- }
-
- /**@brief Set the matrix to the identity */
- void setIdentity()
- {
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
- m_el[0] = b3MakeVector3(b3v1000);
- m_el[1] = b3MakeVector3(b3v0100);
- m_el[2] = b3MakeVector3(b3v0010);
-#else
- setValue(b3Scalar(1.0), b3Scalar(0.0), b3Scalar(0.0),
- b3Scalar(0.0), b3Scalar(1.0), b3Scalar(0.0),
- b3Scalar(0.0), b3Scalar(0.0), b3Scalar(1.0));
-#endif
- }
-
- static const b3Matrix3x3& getIdentity()
- {
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
- static const b3Matrix3x3
- identityMatrix(b3v1000, b3v0100, b3v0010);
-#else
- static const b3Matrix3x3
- identityMatrix(
- b3Scalar(1.0), b3Scalar(0.0), b3Scalar(0.0),
- b3Scalar(0.0), b3Scalar(1.0), b3Scalar(0.0),
- b3Scalar(0.0), b3Scalar(0.0), b3Scalar(1.0));
-#endif
- return identityMatrix;
- }
-
- /**@brief Fill the rotational part of an OpenGL matrix and clear the shear/perspective
- * @param m The array to be filled */
- void getOpenGLSubMatrix(b3Scalar * m) const
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- __m128 v0 = m_el[0].mVec128;
- __m128 v1 = m_el[1].mVec128;
- __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
- __m128* vm = (__m128*)m;
- __m128 vT;
-
- v2 = _mm_and_ps(v2, b3vFFF0fMask); // x2 y2 z2 0
-
- vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
- v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
-
- v1 = _mm_shuffle_ps(v0, v2, B3_SHUFFLE(2, 3, 1, 3)); // y0 y1 y2 0
- v0 = _mm_shuffle_ps(v0, v2, B3_SHUFFLE(0, 1, 0, 3)); // x0 x1 x2 0
- v2 = b3CastdTo128f(_mm_move_sd(b3CastfTo128d(v2), b3CastfTo128d(vT))); // z0 z1 z2 0
-
- vm[0] = v0;
- vm[1] = v1;
- vm[2] = v2;
-#elif defined(B3_USE_NEON)
- // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
- static const uint32x2_t zMask = (const uint32x2_t){-1, 0};
- float32x4_t* vm = (float32x4_t*)m;
- float32x4x2_t top = vtrnq_f32(m_el[0].mVec128, m_el[1].mVec128); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
- float32x2x2_t bl = vtrn_f32(vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f)); // {x2 0 }, {y2 0}
- float32x4_t v0 = vcombine_f32(vget_low_f32(top.val[0]), bl.val[0]);
- float32x4_t v1 = vcombine_f32(vget_low_f32(top.val[1]), bl.val[1]);
- float32x2_t q = (float32x2_t)vand_u32((uint32x2_t)vget_high_f32(m_el[2].mVec128), zMask);
- float32x4_t v2 = vcombine_f32(vget_high_f32(top.val[0]), q); // z0 z1 z2 0
-
- vm[0] = v0;
- vm[1] = v1;
- vm[2] = v2;
-#else
- m[0] = b3Scalar(m_el[0].getX());
- m[1] = b3Scalar(m_el[1].getX());
- m[2] = b3Scalar(m_el[2].getX());
- m[3] = b3Scalar(0.0);
- m[4] = b3Scalar(m_el[0].getY());
- m[5] = b3Scalar(m_el[1].getY());
- m[6] = b3Scalar(m_el[2].getY());
- m[7] = b3Scalar(0.0);
- m[8] = b3Scalar(m_el[0].getZ());
- m[9] = b3Scalar(m_el[1].getZ());
- m[10] = b3Scalar(m_el[2].getZ());
- m[11] = b3Scalar(0.0);
-#endif
- }
-
- /**@brief Get the matrix represented as a quaternion
- * @param q The quaternion which will be set */
- void getRotation(b3Quaternion & q) const
- {
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
- b3Scalar trace = m_el[0].getX() + m_el[1].getY() + m_el[2].getZ();
- b3Scalar s, x;
-
- union {
- b3SimdFloat4 vec;
- b3Scalar f[4];
- } temp;
-
- if (trace > b3Scalar(0.0))
- {
- x = trace + b3Scalar(1.0);
-
- temp.f[0] = m_el[2].getY() - m_el[1].getZ();
- temp.f[1] = m_el[0].getZ() - m_el[2].getX();
- temp.f[2] = m_el[1].getX() - m_el[0].getY();
- temp.f[3] = x;
- //temp.f[3]= s * b3Scalar(0.5);
- }
- else
- {
- int i, j, k;
- if (m_el[0].getX() < m_el[1].getY())
- {
- if (m_el[1].getY() < m_el[2].getZ())
- {
- i = 2;
- j = 0;
- k = 1;
- }
- else
- {
- i = 1;
- j = 2;
- k = 0;
- }
- }
- else
- {
- if (m_el[0].getX() < m_el[2].getZ())
- {
- i = 2;
- j = 0;
- k = 1;
- }
- else
- {
- i = 0;
- j = 1;
- k = 2;
- }
- }
-
- x = m_el[i][i] - m_el[j][j] - m_el[k][k] + b3Scalar(1.0);
-
- temp.f[3] = (m_el[k][j] - m_el[j][k]);
- temp.f[j] = (m_el[j][i] + m_el[i][j]);
- temp.f[k] = (m_el[k][i] + m_el[i][k]);
- temp.f[i] = x;
- //temp.f[i] = s * b3Scalar(0.5);
- }
-
- s = b3Sqrt(x);
- q.set128(temp.vec);
- s = b3Scalar(0.5) / s;
-
- q *= s;
-#else
- b3Scalar trace = m_el[0].getX() + m_el[1].getY() + m_el[2].getZ();
-
- b3Scalar temp[4];
-
- if (trace > b3Scalar(0.0))
- {
- b3Scalar s = b3Sqrt(trace + b3Scalar(1.0));
- temp[3] = (s * b3Scalar(0.5));
- s = b3Scalar(0.5) / s;
-
- temp[0] = ((m_el[2].getY() - m_el[1].getZ()) * s);
- temp[1] = ((m_el[0].getZ() - m_el[2].getX()) * s);
- temp[2] = ((m_el[1].getX() - m_el[0].getY()) * s);
- }
- else
- {
- int i = m_el[0].getX() < m_el[1].getY() ? (m_el[1].getY() < m_el[2].getZ() ? 2 : 1) : (m_el[0].getX() < m_el[2].getZ() ? 2 : 0);
- int j = (i + 1) % 3;
- int k = (i + 2) % 3;
-
- b3Scalar s = b3Sqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + b3Scalar(1.0));
- temp[i] = s * b3Scalar(0.5);
- s = b3Scalar(0.5) / s;
-
- temp[3] = (m_el[k][j] - m_el[j][k]) * s;
- temp[j] = (m_el[j][i] + m_el[i][j]) * s;
- temp[k] = (m_el[k][i] + m_el[i][k]) * s;
- }
- q.setValue(temp[0], temp[1], temp[2], temp[3]);
-#endif
- }
-
- /**@brief Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR
- * @param yaw Yaw around Y axis
- * @param pitch Pitch around X axis
- * @param roll around Z axis */
- void getEulerYPR(b3Scalar & yaw, b3Scalar & pitch, b3Scalar & roll) const
- {
- // first use the normal calculus
- yaw = b3Scalar(b3Atan2(m_el[1].getX(), m_el[0].getX()));
- pitch = b3Scalar(b3Asin(-m_el[2].getX()));
- roll = b3Scalar(b3Atan2(m_el[2].getY(), m_el[2].getZ()));
-
- // on pitch = +/-HalfPI
- if (b3Fabs(pitch) == B3_HALF_PI)
- {
- if (yaw > 0)
- yaw -= B3_PI;
- else
- yaw += B3_PI;
-
- if (roll > 0)
- roll -= B3_PI;
- else
- roll += B3_PI;
- }
- };
-
- /**@brief Get the matrix represented as euler angles around ZYX
- * @param yaw Yaw around X axis
- * @param pitch Pitch around Y axis
- * @param roll around X axis
- * @param solution_number Which solution of two possible solutions ( 1 or 2) are possible values*/
- void getEulerZYX(b3Scalar & yaw, b3Scalar & pitch, b3Scalar & roll, unsigned int solution_number = 1) const
- {
- struct Euler
- {
- b3Scalar yaw;
- b3Scalar pitch;
- b3Scalar roll;
- };
-
- Euler euler_out;
- Euler euler_out2; //second solution
- //get the pointer to the raw data
-
- // Check that pitch is not at a singularity
- if (b3Fabs(m_el[2].getX()) >= 1)
- {
- euler_out.yaw = 0;
- euler_out2.yaw = 0;
-
- // From difference of angles formula
- b3Scalar delta = b3Atan2(m_el[0].getX(), m_el[0].getZ());
- if (m_el[2].getX() > 0) //gimbal locked up
- {
- euler_out.pitch = B3_PI / b3Scalar(2.0);
- euler_out2.pitch = B3_PI / b3Scalar(2.0);
- euler_out.roll = euler_out.pitch + delta;
- euler_out2.roll = euler_out.pitch + delta;
- }
- else // gimbal locked down
- {
- euler_out.pitch = -B3_PI / b3Scalar(2.0);
- euler_out2.pitch = -B3_PI / b3Scalar(2.0);
- euler_out.roll = -euler_out.pitch + delta;
- euler_out2.roll = -euler_out.pitch + delta;
- }
- }
- else
- {
- euler_out.pitch = -b3Asin(m_el[2].getX());
- euler_out2.pitch = B3_PI - euler_out.pitch;
-
- euler_out.roll = b3Atan2(m_el[2].getY() / b3Cos(euler_out.pitch),
- m_el[2].getZ() / b3Cos(euler_out.pitch));
- euler_out2.roll = b3Atan2(m_el[2].getY() / b3Cos(euler_out2.pitch),
- m_el[2].getZ() / b3Cos(euler_out2.pitch));
-
- euler_out.yaw = b3Atan2(m_el[1].getX() / b3Cos(euler_out.pitch),
- m_el[0].getX() / b3Cos(euler_out.pitch));
- euler_out2.yaw = b3Atan2(m_el[1].getX() / b3Cos(euler_out2.pitch),
- m_el[0].getX() / b3Cos(euler_out2.pitch));
- }
-
- if (solution_number == 1)
- {
- yaw = euler_out.yaw;
- pitch = euler_out.pitch;
- roll = euler_out.roll;
- }
- else
- {
- yaw = euler_out2.yaw;
- pitch = euler_out2.pitch;
- roll = euler_out2.roll;
- }
- }
-
- /**@brief Create a scaled copy of the matrix
- * @param s Scaling vector The elements of the vector will scale each column */
-
- b3Matrix3x3 scaled(const b3Vector3& s) const
- {
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
- return b3Matrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
-#else
- return b3Matrix3x3(
- m_el[0].getX() * s.getX(), m_el[0].getY() * s.getY(), m_el[0].getZ() * s.getZ(),
- m_el[1].getX() * s.getX(), m_el[1].getY() * s.getY(), m_el[1].getZ() * s.getZ(),
- m_el[2].getX() * s.getX(), m_el[2].getY() * s.getY(), m_el[2].getZ() * s.getZ());
-#endif
- }
-
- /**@brief Return the determinant of the matrix */
- b3Scalar determinant() const;
- /**@brief Return the adjoint of the matrix */
- b3Matrix3x3 adjoint() const;
- /**@brief Return the matrix with all values non negative */
- b3Matrix3x3 absolute() const;
- /**@brief Return the transpose of the matrix */
- b3Matrix3x3 transpose() const;
- /**@brief Return the inverse of the matrix */
- b3Matrix3x3 inverse() const;
-
- b3Matrix3x3 transposeTimes(const b3Matrix3x3& m) const;
- b3Matrix3x3 timesTranspose(const b3Matrix3x3& m) const;
-
- B3_FORCE_INLINE b3Scalar tdotx(const b3Vector3& v) const
- {
- return m_el[0].getX() * v.getX() + m_el[1].getX() * v.getY() + m_el[2].getX() * v.getZ();
- }
- B3_FORCE_INLINE b3Scalar tdoty(const b3Vector3& v) const
- {
- return m_el[0].getY() * v.getX() + m_el[1].getY() * v.getY() + m_el[2].getY() * v.getZ();
- }
- B3_FORCE_INLINE b3Scalar tdotz(const b3Vector3& v) const
- {
- return m_el[0].getZ() * v.getX() + m_el[1].getZ() * v.getY() + m_el[2].getZ() * v.getZ();
- }
-
- /**@brief diagonalizes this matrix by the Jacobi method.
- * @param rot stores the rotation from the coordinate system in which the matrix is diagonal to the original
- * coordinate system, i.e., old_this = rot * new_this * rot^T.
- * @param threshold See iteration
- * @param iteration The iteration stops when all off-diagonal elements are less than the threshold multiplied
- * by the sum of the absolute values of the diagonal, or when maxSteps have been executed.
- *
- * Note that this matrix is assumed to be symmetric.
- */
- void diagonalize(b3Matrix3x3 & rot, b3Scalar threshold, int maxSteps)
- {
- rot.setIdentity();
- for (int step = maxSteps; step > 0; step--)
- {
- // find off-diagonal element [p][q] with largest magnitude
- int p = 0;
- int q = 1;
- int r = 2;
- b3Scalar max = b3Fabs(m_el[0][1]);
- b3Scalar v = b3Fabs(m_el[0][2]);
- if (v > max)
- {
- q = 2;
- r = 1;
- max = v;
- }
- v = b3Fabs(m_el[1][2]);
- if (v > max)
- {
- p = 1;
- q = 2;
- r = 0;
- max = v;
- }
-
- b3Scalar t = threshold * (b3Fabs(m_el[0][0]) + b3Fabs(m_el[1][1]) + b3Fabs(m_el[2][2]));
- if (max <= t)
- {
- if (max <= B3_EPSILON * t)
- {
- return;
- }
- step = 1;
- }
-
- // compute Jacobi rotation J which leads to a zero for element [p][q]
- b3Scalar mpq = m_el[p][q];
- b3Scalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
- b3Scalar theta2 = theta * theta;
- b3Scalar cos;
- b3Scalar sin;
- if (theta2 * theta2 < b3Scalar(10 / B3_EPSILON))
- {
- t = (theta >= 0) ? 1 / (theta + b3Sqrt(1 + theta2))
- : 1 / (theta - b3Sqrt(1 + theta2));
- cos = 1 / b3Sqrt(1 + t * t);
- sin = cos * t;
- }
- else
- {
- // approximation for large theta-value, i.e., a nearly diagonal matrix
- t = 1 / (theta * (2 + b3Scalar(0.5) / theta2));
- cos = 1 - b3Scalar(0.5) * t * t;
- sin = cos * t;
- }
-
- // apply rotation to matrix (this = J^T * this * J)
- m_el[p][q] = m_el[q][p] = 0;
- m_el[p][p] -= t * mpq;
- m_el[q][q] += t * mpq;
- b3Scalar mrp = m_el[r][p];
- b3Scalar mrq = m_el[r][q];
- m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
- m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
-
- // apply rotation to rot (rot = rot * J)
- for (int i = 0; i < 3; i++)
- {
- b3Vector3& row = rot[i];
- mrp = row[p];
- mrq = row[q];
- row[p] = cos * mrp - sin * mrq;
- row[q] = cos * mrq + sin * mrp;
- }
- }
- }
-
- /**@brief Calculate the matrix cofactor
- * @param r1 The first row to use for calculating the cofactor
- * @param c1 The first column to use for calculating the cofactor
- * @param r1 The second row to use for calculating the cofactor
- * @param c1 The second column to use for calculating the cofactor
- * See http://en.wikipedia.org/wiki/Cofactor_(linear_algebra) for more details
- */
- b3Scalar cofac(int r1, int c1, int r2, int c2) const
- {
- return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
- }
-
- void serialize(struct b3Matrix3x3Data & dataOut) const;
-
- void serializeFloat(struct b3Matrix3x3FloatData & dataOut) const;
-
- void deSerialize(const struct b3Matrix3x3Data& dataIn);
-
- void deSerializeFloat(const struct b3Matrix3x3FloatData& dataIn);
-
- void deSerializeDouble(const struct b3Matrix3x3DoubleData& dataIn);
-};
-
-B3_FORCE_INLINE b3Matrix3x3&
-b3Matrix3x3::operator*=(const b3Matrix3x3& m)
-{
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- __m128 rv00, rv01, rv02;
- __m128 rv10, rv11, rv12;
- __m128 rv20, rv21, rv22;
- __m128 mv0, mv1, mv2;
-
- rv02 = m_el[0].mVec128;
- rv12 = m_el[1].mVec128;
- rv22 = m_el[2].mVec128;
-
- mv0 = _mm_and_ps(m[0].mVec128, b3vFFF0fMask);
- mv1 = _mm_and_ps(m[1].mVec128, b3vFFF0fMask);
- mv2 = _mm_and_ps(m[2].mVec128, b3vFFF0fMask);
-
- // rv0
- rv00 = b3_splat_ps(rv02, 0);
- rv01 = b3_splat_ps(rv02, 1);
- rv02 = b3_splat_ps(rv02, 2);
-
- rv00 = _mm_mul_ps(rv00, mv0);
- rv01 = _mm_mul_ps(rv01, mv1);
- rv02 = _mm_mul_ps(rv02, mv2);
-
- // rv1
- rv10 = b3_splat_ps(rv12, 0);
- rv11 = b3_splat_ps(rv12, 1);
- rv12 = b3_splat_ps(rv12, 2);
-
- rv10 = _mm_mul_ps(rv10, mv0);
- rv11 = _mm_mul_ps(rv11, mv1);
- rv12 = _mm_mul_ps(rv12, mv2);
-
- // rv2
- rv20 = b3_splat_ps(rv22, 0);
- rv21 = b3_splat_ps(rv22, 1);
- rv22 = b3_splat_ps(rv22, 2);
-
- rv20 = _mm_mul_ps(rv20, mv0);
- rv21 = _mm_mul_ps(rv21, mv1);
- rv22 = _mm_mul_ps(rv22, mv2);
-
- rv00 = _mm_add_ps(rv00, rv01);
- rv10 = _mm_add_ps(rv10, rv11);
- rv20 = _mm_add_ps(rv20, rv21);
-
- m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
- m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
- m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
-
-#elif defined(B3_USE_NEON)
-
- float32x4_t rv0, rv1, rv2;
- float32x4_t v0, v1, v2;
- float32x4_t mv0, mv1, mv2;
-
- v0 = m_el[0].mVec128;
- v1 = m_el[1].mVec128;
- v2 = m_el[2].mVec128;
-
- mv0 = (float32x4_t)vandq_s32((int32x4_t)m[0].mVec128, b3vFFF0Mask);
- mv1 = (float32x4_t)vandq_s32((int32x4_t)m[1].mVec128, b3vFFF0Mask);
- mv2 = (float32x4_t)vandq_s32((int32x4_t)m[2].mVec128, b3vFFF0Mask);
-
- rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
- rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
- rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
-
- rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
- rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
- rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
-
- rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
- rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
- rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
-
- m_el[0].mVec128 = rv0;
- m_el[1].mVec128 = rv1;
- m_el[2].mVec128 = rv2;
-#else
- setValue(
- m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
- m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
- m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
-#endif
- return *this;
-}
-
-B3_FORCE_INLINE b3Matrix3x3&
-b3Matrix3x3::operator+=(const b3Matrix3x3& m)
-{
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
- m_el[0].mVec128 = m_el[0].mVec128 + m.m_el[0].mVec128;
- m_el[1].mVec128 = m_el[1].mVec128 + m.m_el[1].mVec128;
- m_el[2].mVec128 = m_el[2].mVec128 + m.m_el[2].mVec128;
-#else
- setValue(
- m_el[0][0] + m.m_el[0][0],
- m_el[0][1] + m.m_el[0][1],
- m_el[0][2] + m.m_el[0][2],
- m_el[1][0] + m.m_el[1][0],
- m_el[1][1] + m.m_el[1][1],
- m_el[1][2] + m.m_el[1][2],
- m_el[2][0] + m.m_el[2][0],
- m_el[2][1] + m.m_el[2][1],
- m_el[2][2] + m.m_el[2][2]);
-#endif
- return *this;
-}
-
-B3_FORCE_INLINE b3Matrix3x3
-operator*(const b3Matrix3x3& m, const b3Scalar& k)
-{
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
- __m128 vk = b3_splat_ps(_mm_load_ss((float*)&k), 0x80);
- return b3Matrix3x3(
- _mm_mul_ps(m[0].mVec128, vk),
- _mm_mul_ps(m[1].mVec128, vk),
- _mm_mul_ps(m[2].mVec128, vk));
-#elif defined(B3_USE_NEON)
- return b3Matrix3x3(
- vmulq_n_f32(m[0].mVec128, k),
- vmulq_n_f32(m[1].mVec128, k),
- vmulq_n_f32(m[2].mVec128, k));
-#else
- return b3Matrix3x3(
- m[0].getX() * k, m[0].getY() * k, m[0].getZ() * k,
- m[1].getX() * k, m[1].getY() * k, m[1].getZ() * k,
- m[2].getX() * k, m[2].getY() * k, m[2].getZ() * k);
-#endif
-}
-
-B3_FORCE_INLINE b3Matrix3x3
-operator+(const b3Matrix3x3& m1, const b3Matrix3x3& m2)
-{
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
- return b3Matrix3x3(
- m1[0].mVec128 + m2[0].mVec128,
- m1[1].mVec128 + m2[1].mVec128,
- m1[2].mVec128 + m2[2].mVec128);
-#else
- return b3Matrix3x3(
- m1[0][0] + m2[0][0],
- m1[0][1] + m2[0][1],
- m1[0][2] + m2[0][2],
-
- m1[1][0] + m2[1][0],
- m1[1][1] + m2[1][1],
- m1[1][2] + m2[1][2],
-
- m1[2][0] + m2[2][0],
- m1[2][1] + m2[2][1],
- m1[2][2] + m2[2][2]);
-#endif
-}
-
-B3_FORCE_INLINE b3Matrix3x3
-operator-(const b3Matrix3x3& m1, const b3Matrix3x3& m2)
-{
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
- return b3Matrix3x3(
- m1[0].mVec128 - m2[0].mVec128,
- m1[1].mVec128 - m2[1].mVec128,
- m1[2].mVec128 - m2[2].mVec128);
-#else
- return b3Matrix3x3(
- m1[0][0] - m2[0][0],
- m1[0][1] - m2[0][1],
- m1[0][2] - m2[0][2],
-
- m1[1][0] - m2[1][0],
- m1[1][1] - m2[1][1],
- m1[1][2] - m2[1][2],
-
- m1[2][0] - m2[2][0],
- m1[2][1] - m2[2][1],
- m1[2][2] - m2[2][2]);
-#endif
-}
-
-B3_FORCE_INLINE b3Matrix3x3&
-b3Matrix3x3::operator-=(const b3Matrix3x3& m)
-{
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
- m_el[0].mVec128 = m_el[0].mVec128 - m.m_el[0].mVec128;
- m_el[1].mVec128 = m_el[1].mVec128 - m.m_el[1].mVec128;
- m_el[2].mVec128 = m_el[2].mVec128 - m.m_el[2].mVec128;
-#else
- setValue(
- m_el[0][0] - m.m_el[0][0],
- m_el[0][1] - m.m_el[0][1],
- m_el[0][2] - m.m_el[0][2],
- m_el[1][0] - m.m_el[1][0],
- m_el[1][1] - m.m_el[1][1],
- m_el[1][2] - m.m_el[1][2],
- m_el[2][0] - m.m_el[2][0],
- m_el[2][1] - m.m_el[2][1],
- m_el[2][2] - m.m_el[2][2]);
-#endif
- return *this;
-}
-
-B3_FORCE_INLINE b3Scalar
-b3Matrix3x3::determinant() const
-{
- return b3Triple((*this)[0], (*this)[1], (*this)[2]);
-}
-
-B3_FORCE_INLINE b3Matrix3x3
-b3Matrix3x3::absolute() const
-{
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
- return b3Matrix3x3(
- _mm_and_ps(m_el[0].mVec128, b3vAbsfMask),
- _mm_and_ps(m_el[1].mVec128, b3vAbsfMask),
- _mm_and_ps(m_el[2].mVec128, b3vAbsfMask));
-#elif defined(B3_USE_NEON)
- return b3Matrix3x3(
- (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, b3v3AbsMask),
- (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, b3v3AbsMask),
- (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, b3v3AbsMask));
-#else
- return b3Matrix3x3(
- b3Fabs(m_el[0].getX()), b3Fabs(m_el[0].getY()), b3Fabs(m_el[0].getZ()),
- b3Fabs(m_el[1].getX()), b3Fabs(m_el[1].getY()), b3Fabs(m_el[1].getZ()),
- b3Fabs(m_el[2].getX()), b3Fabs(m_el[2].getY()), b3Fabs(m_el[2].getZ()));
-#endif
-}
-
-B3_FORCE_INLINE b3Matrix3x3
-b3Matrix3x3::transpose() const
-{
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
- __m128 v0 = m_el[0].mVec128;
- __m128 v1 = m_el[1].mVec128;
- __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
- __m128 vT;
-
- v2 = _mm_and_ps(v2, b3vFFF0fMask); // x2 y2 z2 0
-
- vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
- v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
-
- v1 = _mm_shuffle_ps(v0, v2, B3_SHUFFLE(2, 3, 1, 3)); // y0 y1 y2 0
- v0 = _mm_shuffle_ps(v0, v2, B3_SHUFFLE(0, 1, 0, 3)); // x0 x1 x2 0
- v2 = b3CastdTo128f(_mm_move_sd(b3CastfTo128d(v2), b3CastfTo128d(vT))); // z0 z1 z2 0
-
- return b3Matrix3x3(v0, v1, v2);
-#elif defined(B3_USE_NEON)
- // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
- static const uint32x2_t zMask = (const uint32x2_t){-1, 0};
- float32x4x2_t top = vtrnq_f32(m_el[0].mVec128, m_el[1].mVec128); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
- float32x2x2_t bl = vtrn_f32(vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f)); // {x2 0 }, {y2 0}
- float32x4_t v0 = vcombine_f32(vget_low_f32(top.val[0]), bl.val[0]);
- float32x4_t v1 = vcombine_f32(vget_low_f32(top.val[1]), bl.val[1]);
- float32x2_t q = (float32x2_t)vand_u32((uint32x2_t)vget_high_f32(m_el[2].mVec128), zMask);
- float32x4_t v2 = vcombine_f32(vget_high_f32(top.val[0]), q); // z0 z1 z2 0
- return b3Matrix3x3(v0, v1, v2);
-#else
- return b3Matrix3x3(m_el[0].getX(), m_el[1].getX(), m_el[2].getX(),
- m_el[0].getY(), m_el[1].getY(), m_el[2].getY(),
- m_el[0].getZ(), m_el[1].getZ(), m_el[2].getZ());
-#endif
-}
-
-B3_FORCE_INLINE b3Matrix3x3
-b3Matrix3x3::adjoint() const
-{
- return b3Matrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
- cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
- cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
-}
-
-B3_FORCE_INLINE b3Matrix3x3
-b3Matrix3x3::inverse() const
-{
- b3Vector3 co = b3MakeVector3(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
- b3Scalar det = (*this)[0].dot(co);
- b3FullAssert(det != b3Scalar(0.0));
- b3Scalar s = b3Scalar(1.0) / det;
- return b3Matrix3x3(co.getX() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
- co.getY() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
- co.getZ() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
-}
-
-B3_FORCE_INLINE b3Matrix3x3
-b3Matrix3x3::transposeTimes(const b3Matrix3x3& m) const
-{
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
- // zeros w
- // static const __m128i xyzMask = (const __m128i){ -1ULL, 0xffffffffULL };
- __m128 row = m_el[0].mVec128;
- __m128 m0 = _mm_and_ps(m.getRow(0).mVec128, b3vFFF0fMask);
- __m128 m1 = _mm_and_ps(m.getRow(1).mVec128, b3vFFF0fMask);
- __m128 m2 = _mm_and_ps(m.getRow(2).mVec128, b3vFFF0fMask);
- __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
- __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
- __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
- row = m_el[1].mVec128;
- r0 = _mm_add_ps(r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
- r1 = _mm_add_ps(r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
- r2 = _mm_add_ps(r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
- row = m_el[2].mVec128;
- r0 = _mm_add_ps(r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
- r1 = _mm_add_ps(r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
- r2 = _mm_add_ps(r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
- return b3Matrix3x3(r0, r1, r2);
-
-#elif defined B3_USE_NEON
- // zeros w
- static const uint32x4_t xyzMask = (const uint32x4_t){-1, -1, -1, 0};
- float32x4_t m0 = (float32x4_t)vandq_u32((uint32x4_t)m.getRow(0).mVec128, xyzMask);
- float32x4_t m1 = (float32x4_t)vandq_u32((uint32x4_t)m.getRow(1).mVec128, xyzMask);
- float32x4_t m2 = (float32x4_t)vandq_u32((uint32x4_t)m.getRow(2).mVec128, xyzMask);
- float32x4_t row = m_el[0].mVec128;
- float32x4_t r0 = vmulq_lane_f32(m0, vget_low_f32(row), 0);
- float32x4_t r1 = vmulq_lane_f32(m0, vget_low_f32(row), 1);
- float32x4_t r2 = vmulq_lane_f32(m0, vget_high_f32(row), 0);
- row = m_el[1].mVec128;
- r0 = vmlaq_lane_f32(r0, m1, vget_low_f32(row), 0);
- r1 = vmlaq_lane_f32(r1, m1, vget_low_f32(row), 1);
- r2 = vmlaq_lane_f32(r2, m1, vget_high_f32(row), 0);
- row = m_el[2].mVec128;
- r0 = vmlaq_lane_f32(r0, m2, vget_low_f32(row), 0);
- r1 = vmlaq_lane_f32(r1, m2, vget_low_f32(row), 1);
- r2 = vmlaq_lane_f32(r2, m2, vget_high_f32(row), 0);
- return b3Matrix3x3(r0, r1, r2);
-#else
- return b3Matrix3x3(
- m_el[0].getX() * m[0].getX() + m_el[1].getX() * m[1].getX() + m_el[2].getX() * m[2].getX(),
- m_el[0].getX() * m[0].getY() + m_el[1].getX() * m[1].getY() + m_el[2].getX() * m[2].getY(),
- m_el[0].getX() * m[0].getZ() + m_el[1].getX() * m[1].getZ() + m_el[2].getX() * m[2].getZ(),
- m_el[0].getY() * m[0].getX() + m_el[1].getY() * m[1].getX() + m_el[2].getY() * m[2].getX(),
- m_el[0].getY() * m[0].getY() + m_el[1].getY() * m[1].getY() + m_el[2].getY() * m[2].getY(),
- m_el[0].getY() * m[0].getZ() + m_el[1].getY() * m[1].getZ() + m_el[2].getY() * m[2].getZ(),
- m_el[0].getZ() * m[0].getX() + m_el[1].getZ() * m[1].getX() + m_el[2].getZ() * m[2].getX(),
- m_el[0].getZ() * m[0].getY() + m_el[1].getZ() * m[1].getY() + m_el[2].getZ() * m[2].getY(),
- m_el[0].getZ() * m[0].getZ() + m_el[1].getZ() * m[1].getZ() + m_el[2].getZ() * m[2].getZ());
-#endif
-}
-
-B3_FORCE_INLINE b3Matrix3x3
-b3Matrix3x3::timesTranspose(const b3Matrix3x3& m) const
-{
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
- __m128 a0 = m_el[0].mVec128;
- __m128 a1 = m_el[1].mVec128;
- __m128 a2 = m_el[2].mVec128;
-
- b3Matrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
- __m128 mx = mT[0].mVec128;
- __m128 my = mT[1].mVec128;
- __m128 mz = mT[2].mVec128;
-
- __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
- __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
- __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
- r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
- r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
- r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
- r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
- r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
- r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
- return b3Matrix3x3(r0, r1, r2);
-
-#elif defined B3_USE_NEON
- float32x4_t a0 = m_el[0].mVec128;
- float32x4_t a1 = m_el[1].mVec128;
- float32x4_t a2 = m_el[2].mVec128;
-
- b3Matrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
- float32x4_t mx = mT[0].mVec128;
- float32x4_t my = mT[1].mVec128;
- float32x4_t mz = mT[2].mVec128;
-
- float32x4_t r0 = vmulq_lane_f32(mx, vget_low_f32(a0), 0);
- float32x4_t r1 = vmulq_lane_f32(mx, vget_low_f32(a1), 0);
- float32x4_t r2 = vmulq_lane_f32(mx, vget_low_f32(a2), 0);
- r0 = vmlaq_lane_f32(r0, my, vget_low_f32(a0), 1);
- r1 = vmlaq_lane_f32(r1, my, vget_low_f32(a1), 1);
- r2 = vmlaq_lane_f32(r2, my, vget_low_f32(a2), 1);
- r0 = vmlaq_lane_f32(r0, mz, vget_high_f32(a0), 0);
- r1 = vmlaq_lane_f32(r1, mz, vget_high_f32(a1), 0);
- r2 = vmlaq_lane_f32(r2, mz, vget_high_f32(a2), 0);
- return b3Matrix3x3(r0, r1, r2);
-
-#else
- return b3Matrix3x3(
- m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
- m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
- m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
-#endif
-}
-
-B3_FORCE_INLINE b3Vector3
-operator*(const b3Matrix3x3& m, const b3Vector3& v)
-{
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
- return v.dot3(m[0], m[1], m[2]);
-#else
- return b3MakeVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
-#endif
-}
-
-B3_FORCE_INLINE b3Vector3
-operator*(const b3Vector3& v, const b3Matrix3x3& m)
-{
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
-
- const __m128 vv = v.mVec128;
-
- __m128 c0 = b3_splat_ps(vv, 0);
- __m128 c1 = b3_splat_ps(vv, 1);
- __m128 c2 = b3_splat_ps(vv, 2);
-
- c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, b3vFFF0fMask));
- c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, b3vFFF0fMask));
- c0 = _mm_add_ps(c0, c1);
- c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, b3vFFF0fMask));
-
- return b3MakeVector3(_mm_add_ps(c0, c2));
-#elif defined(B3_USE_NEON)
- const float32x4_t vv = v.mVec128;
- const float32x2_t vlo = vget_low_f32(vv);
- const float32x2_t vhi = vget_high_f32(vv);
-
- float32x4_t c0, c1, c2;
-
- c0 = (float32x4_t)vandq_s32((int32x4_t)m[0].mVec128, b3vFFF0Mask);
- c1 = (float32x4_t)vandq_s32((int32x4_t)m[1].mVec128, b3vFFF0Mask);
- c2 = (float32x4_t)vandq_s32((int32x4_t)m[2].mVec128, b3vFFF0Mask);
-
- c0 = vmulq_lane_f32(c0, vlo, 0);
- c1 = vmulq_lane_f32(c1, vlo, 1);
- c2 = vmulq_lane_f32(c2, vhi, 0);
- c0 = vaddq_f32(c0, c1);
- c0 = vaddq_f32(c0, c2);
-
- return b3MakeVector3(c0);
-#else
- return b3MakeVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
-#endif
-}
-
-B3_FORCE_INLINE b3Matrix3x3
-operator*(const b3Matrix3x3& m1, const b3Matrix3x3& m2)
-{
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
-
- __m128 m10 = m1[0].mVec128;
- __m128 m11 = m1[1].mVec128;
- __m128 m12 = m1[2].mVec128;
-
- __m128 m2v = _mm_and_ps(m2[0].mVec128, b3vFFF0fMask);
-
- __m128 c0 = b3_splat_ps(m10, 0);
- __m128 c1 = b3_splat_ps(m11, 0);
- __m128 c2 = b3_splat_ps(m12, 0);
-
- c0 = _mm_mul_ps(c0, m2v);
- c1 = _mm_mul_ps(c1, m2v);
- c2 = _mm_mul_ps(c2, m2v);
-
- m2v = _mm_and_ps(m2[1].mVec128, b3vFFF0fMask);
-
- __m128 c0_1 = b3_splat_ps(m10, 1);
- __m128 c1_1 = b3_splat_ps(m11, 1);
- __m128 c2_1 = b3_splat_ps(m12, 1);
-
- c0_1 = _mm_mul_ps(c0_1, m2v);
- c1_1 = _mm_mul_ps(c1_1, m2v);
- c2_1 = _mm_mul_ps(c2_1, m2v);
-
- m2v = _mm_and_ps(m2[2].mVec128, b3vFFF0fMask);
-
- c0 = _mm_add_ps(c0, c0_1);
- c1 = _mm_add_ps(c1, c1_1);
- c2 = _mm_add_ps(c2, c2_1);
-
- m10 = b3_splat_ps(m10, 2);
- m11 = b3_splat_ps(m11, 2);
- m12 = b3_splat_ps(m12, 2);
-
- m10 = _mm_mul_ps(m10, m2v);
- m11 = _mm_mul_ps(m11, m2v);
- m12 = _mm_mul_ps(m12, m2v);
-
- c0 = _mm_add_ps(c0, m10);
- c1 = _mm_add_ps(c1, m11);
- c2 = _mm_add_ps(c2, m12);
-
- return b3Matrix3x3(c0, c1, c2);
-
-#elif defined(B3_USE_NEON)
-
- float32x4_t rv0, rv1, rv2;
- float32x4_t v0, v1, v2;
- float32x4_t mv0, mv1, mv2;
-
- v0 = m1[0].mVec128;
- v1 = m1[1].mVec128;
- v2 = m1[2].mVec128;
-
- mv0 = (float32x4_t)vandq_s32((int32x4_t)m2[0].mVec128, b3vFFF0Mask);
- mv1 = (float32x4_t)vandq_s32((int32x4_t)m2[1].mVec128, b3vFFF0Mask);
- mv2 = (float32x4_t)vandq_s32((int32x4_t)m2[2].mVec128, b3vFFF0Mask);
-
- rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
- rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
- rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
-
- rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
- rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
- rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
-
- rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
- rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
- rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
-
- return b3Matrix3x3(rv0, rv1, rv2);
-
-#else
- return b3Matrix3x3(
- m2.tdotx(m1[0]), m2.tdoty(m1[0]), m2.tdotz(m1[0]),
- m2.tdotx(m1[1]), m2.tdoty(m1[1]), m2.tdotz(m1[1]),
- m2.tdotx(m1[2]), m2.tdoty(m1[2]), m2.tdotz(m1[2]));
-#endif
-}
-
-/*
-B3_FORCE_INLINE b3Matrix3x3 b3MultTransposeLeft(const b3Matrix3x3& m1, const b3Matrix3x3& m2) {
-return b3Matrix3x3(
-m1[0][0] * m2[0][0] + m1[1][0] * m2[1][0] + m1[2][0] * m2[2][0],
-m1[0][0] * m2[0][1] + m1[1][0] * m2[1][1] + m1[2][0] * m2[2][1],
-m1[0][0] * m2[0][2] + m1[1][0] * m2[1][2] + m1[2][0] * m2[2][2],
-m1[0][1] * m2[0][0] + m1[1][1] * m2[1][0] + m1[2][1] * m2[2][0],
-m1[0][1] * m2[0][1] + m1[1][1] * m2[1][1] + m1[2][1] * m2[2][1],
-m1[0][1] * m2[0][2] + m1[1][1] * m2[1][2] + m1[2][1] * m2[2][2],
-m1[0][2] * m2[0][0] + m1[1][2] * m2[1][0] + m1[2][2] * m2[2][0],
-m1[0][2] * m2[0][1] + m1[1][2] * m2[1][1] + m1[2][2] * m2[2][1],
-m1[0][2] * m2[0][2] + m1[1][2] * m2[1][2] + m1[2][2] * m2[2][2]);
-}
-*/
-
-/**@brief Equality operator between two matrices
-* It will test all elements are equal. */
-B3_FORCE_INLINE bool operator==(const b3Matrix3x3& m1, const b3Matrix3x3& m2)
-{
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
-
- __m128 c0, c1, c2;
-
- c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
- c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
- c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
-
- c0 = _mm_and_ps(c0, c1);
- c0 = _mm_and_ps(c0, c2);
-
- return (0x7 == _mm_movemask_ps((__m128)c0));
-#else
- return (m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
- m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
- m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2]);
-#endif
-}
-
-///for serialization
-struct b3Matrix3x3FloatData
-{
- b3Vector3FloatData m_el[3];
-};
-
-///for serialization
-struct b3Matrix3x3DoubleData
-{
- b3Vector3DoubleData m_el[3];
-};
-
-B3_FORCE_INLINE void b3Matrix3x3::serialize(struct b3Matrix3x3Data& dataOut) const
-{
- for (int i = 0; i < 3; i++)
- m_el[i].serialize(dataOut.m_el[i]);
-}
-
-B3_FORCE_INLINE void b3Matrix3x3::serializeFloat(struct b3Matrix3x3FloatData& dataOut) const
-{
- for (int i = 0; i < 3; i++)
- m_el[i].serializeFloat(dataOut.m_el[i]);
-}
-
-B3_FORCE_INLINE void b3Matrix3x3::deSerialize(const struct b3Matrix3x3Data& dataIn)
-{
- for (int i = 0; i < 3; i++)
- m_el[i].deSerialize(dataIn.m_el[i]);
-}
-
-B3_FORCE_INLINE void b3Matrix3x3::deSerializeFloat(const struct b3Matrix3x3FloatData& dataIn)
-{
- for (int i = 0; i < 3; i++)
- m_el[i].deSerializeFloat(dataIn.m_el[i]);
-}
-
-B3_FORCE_INLINE void b3Matrix3x3::deSerializeDouble(const struct b3Matrix3x3DoubleData& dataIn)
-{
- for (int i = 0; i < 3; i++)
- m_el[i].deSerializeDouble(dataIn.m_el[i]);
-}
-
-#endif //B3_MATRIX3x3_H
diff --git a/thirdparty/bullet/Bullet3Common/b3MinMax.h b/thirdparty/bullet/Bullet3Common/b3MinMax.h
deleted file mode 100644
index c09c3db3f5..0000000000
--- a/thirdparty/bullet/Bullet3Common/b3MinMax.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
-Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it freely,
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef B3_GEN_MINMAX_H
-#define B3_GEN_MINMAX_H
-
-#include "b3Scalar.h"
-
-template <class T>
-B3_FORCE_INLINE const T& b3Min(const T& a, const T& b)
-{
- return a < b ? a : b;
-}
-
-template <class T>
-B3_FORCE_INLINE const T& b3Max(const T& a, const T& b)
-{
- return a > b ? a : b;
-}
-
-template <class T>
-B3_FORCE_INLINE const T& b3Clamped(const T& a, const T& lb, const T& ub)
-{
- return a < lb ? lb : (ub < a ? ub : a);
-}
-
-template <class T>
-B3_FORCE_INLINE void b3SetMin(T& a, const T& b)
-{
- if (b < a)
- {
- a = b;
- }
-}
-
-template <class T>
-B3_FORCE_INLINE void b3SetMax(T& a, const T& b)
-{
- if (a < b)
- {
- a = b;
- }
-}
-
-template <class T>
-B3_FORCE_INLINE void b3Clamp(T& a, const T& lb, const T& ub)
-{
- if (a < lb)
- {
- a = lb;
- }
- else if (ub < a)
- {
- a = ub;
- }
-}
-
-#endif //B3_GEN_MINMAX_H
diff --git a/thirdparty/bullet/Bullet3Common/b3PoolAllocator.h b/thirdparty/bullet/Bullet3Common/b3PoolAllocator.h
deleted file mode 100644
index ed56bc627d..0000000000
--- a/thirdparty/bullet/Bullet3Common/b3PoolAllocator.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
-Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it freely,
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef _BT_POOL_ALLOCATOR_H
-#define _BT_POOL_ALLOCATOR_H
-
-#include "b3Scalar.h"
-#include "b3AlignedAllocator.h"
-
-///The b3PoolAllocator class allows to efficiently allocate a large pool of objects, instead of dynamically allocating them separately.
-class b3PoolAllocator
-{
- int m_elemSize;
- int m_maxElements;
- int m_freeCount;
- void* m_firstFree;
- unsigned char* m_pool;
-
-public:
- b3PoolAllocator(int elemSize, int maxElements)
- : m_elemSize(elemSize),
- m_maxElements(maxElements)
- {
- m_pool = (unsigned char*)b3AlignedAlloc(static_cast<unsigned int>(m_elemSize * m_maxElements), 16);
-
- unsigned char* p = m_pool;
- m_firstFree = p;
- m_freeCount = m_maxElements;
- int count = m_maxElements;
- while (--count)
- {
- *(void**)p = (p + m_elemSize);
- p += m_elemSize;
- }
- *(void**)p = 0;
- }
-
- ~b3PoolAllocator()
- {
- b3AlignedFree(m_pool);
- }
-
- int getFreeCount() const
- {
- return m_freeCount;
- }
-
- int getUsedCount() const
- {
- return m_maxElements - m_freeCount;
- }
-
- int getMaxCount() const
- {
- return m_maxElements;
- }
-
- void* allocate(int size)
- {
- // release mode fix
- (void)size;
- b3Assert(!size || size <= m_elemSize);
- b3Assert(m_freeCount > 0);
- void* result = m_firstFree;
- m_firstFree = *(void**)m_firstFree;
- --m_freeCount;
- return result;
- }
-
- bool validPtr(void* ptr)
- {
- if (ptr)
- {
- if (((unsigned char*)ptr >= m_pool && (unsigned char*)ptr < m_pool + m_maxElements * m_elemSize))
- {
- return true;
- }
- }
- return false;
- }
-
- void freeMemory(void* ptr)
- {
- if (ptr)
- {
- b3Assert((unsigned char*)ptr >= m_pool && (unsigned char*)ptr < m_pool + m_maxElements * m_elemSize);
-
- *(void**)ptr = m_firstFree;
- m_firstFree = ptr;
- ++m_freeCount;
- }
- }
-
- int getElementSize() const
- {
- return m_elemSize;
- }
-
- unsigned char* getPoolAddress()
- {
- return m_pool;
- }
-
- const unsigned char* getPoolAddress() const
- {
- return m_pool;
- }
-};
-
-#endif //_BT_POOL_ALLOCATOR_H
diff --git a/thirdparty/bullet/Bullet3Common/b3QuadWord.h b/thirdparty/bullet/Bullet3Common/b3QuadWord.h
deleted file mode 100644
index 0def305fac..0000000000
--- a/thirdparty/bullet/Bullet3Common/b3QuadWord.h
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
-Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it freely,
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef B3_SIMD_QUADWORD_H
-#define B3_SIMD_QUADWORD_H
-
-#include "b3Scalar.h"
-#include "b3MinMax.h"
-
-#if defined(__CELLOS_LV2) && defined(__SPU__)
-#include <altivec.h>
-#endif
-
-/**@brief The b3QuadWord class is base class for b3Vector3 and b3Quaternion.
- * Some issues under PS3 Linux with IBM 2.1 SDK, gcc compiler prevent from using aligned quadword.
- */
-#ifndef USE_LIBSPE2
-B3_ATTRIBUTE_ALIGNED16(class)
-b3QuadWord
-#else
-class b3QuadWord
-#endif
-{
-protected:
-#if defined(__SPU__) && defined(__CELLOS_LV2__)
- union {
- vec_float4 mVec128;
- b3Scalar m_floats[4];
- };
-
-public:
- vec_float4 get128() const
- {
- return mVec128;
- }
-
-#else //__CELLOS_LV2__ __SPU__
-
-#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
-public:
- union {
- b3SimdFloat4 mVec128;
- b3Scalar m_floats[4];
- struct
- {
- b3Scalar x, y, z, w;
- };
- };
-
-public:
- B3_FORCE_INLINE b3SimdFloat4 get128() const
- {
- return mVec128;
- }
- B3_FORCE_INLINE void set128(b3SimdFloat4 v128)
- {
- mVec128 = v128;
- }
-#else
-public:
- union {
- b3Scalar m_floats[4];
- struct
- {
- b3Scalar x, y, z, w;
- };
- };
-#endif // B3_USE_SSE
-
-#endif //__CELLOS_LV2__ __SPU__
-
-public:
-#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
-
- // Set Vector
- B3_FORCE_INLINE b3QuadWord(const b3SimdFloat4 vec)
- {
- mVec128 = vec;
- }
-
- // Copy constructor
- B3_FORCE_INLINE b3QuadWord(const b3QuadWord& rhs)
- {
- mVec128 = rhs.mVec128;
- }
-
- // Assignment Operator
- B3_FORCE_INLINE b3QuadWord&
- operator=(const b3QuadWord& v)
- {
- mVec128 = v.mVec128;
-
- return *this;
- }
-
-#endif
-
- /**@brief Return the x value */
- B3_FORCE_INLINE const b3Scalar& getX() const { return m_floats[0]; }
- /**@brief Return the y value */
- B3_FORCE_INLINE const b3Scalar& getY() const { return m_floats[1]; }
- /**@brief Return the z value */
- B3_FORCE_INLINE const b3Scalar& getZ() const { return m_floats[2]; }
- /**@brief Set the x value */
- B3_FORCE_INLINE void setX(b3Scalar _x) { m_floats[0] = _x; };
- /**@brief Set the y value */
- B3_FORCE_INLINE void setY(b3Scalar _y) { m_floats[1] = _y; };
- /**@brief Set the z value */
- B3_FORCE_INLINE void setZ(b3Scalar _z) { m_floats[2] = _z; };
- /**@brief Set the w value */
- B3_FORCE_INLINE void setW(b3Scalar _w) { m_floats[3] = _w; };
- /**@brief Return the x value */
-
- //B3_FORCE_INLINE b3Scalar& operator[](int i) { return (&m_floats[0])[i]; }
- //B3_FORCE_INLINE const b3Scalar& operator[](int i) const { return (&m_floats[0])[i]; }
- ///operator b3Scalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons.
- B3_FORCE_INLINE operator b3Scalar*() { return &m_floats[0]; }
- B3_FORCE_INLINE operator const b3Scalar*() const { return &m_floats[0]; }
-
- B3_FORCE_INLINE bool operator==(const b3QuadWord& other) const
- {
-#ifdef B3_USE_SSE
- return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128)));
-#else
- return ((m_floats[3] == other.m_floats[3]) &&
- (m_floats[2] == other.m_floats[2]) &&
- (m_floats[1] == other.m_floats[1]) &&
- (m_floats[0] == other.m_floats[0]));
-#endif
- }
-
- B3_FORCE_INLINE bool operator!=(const b3QuadWord& other) const
- {
- return !(*this == other);
- }
-
- /**@brief Set x,y,z and zero w
- * @param x Value of x
- * @param y Value of y
- * @param z Value of z
- */
- B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z)
- {
- m_floats[0] = _x;
- m_floats[1] = _y;
- m_floats[2] = _z;
- m_floats[3] = 0.f;
- }
-
- /* void getValue(b3Scalar *m) const
- {
- m[0] = m_floats[0];
- m[1] = m_floats[1];
- m[2] = m_floats[2];
- }
-*/
- /**@brief Set the values
- * @param x Value of x
- * @param y Value of y
- * @param z Value of z
- * @param w Value of w
- */
- B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w)
- {
- m_floats[0] = _x;
- m_floats[1] = _y;
- m_floats[2] = _z;
- m_floats[3] = _w;
- }
- /**@brief No initialization constructor */
- B3_FORCE_INLINE b3QuadWord()
- // :m_floats[0](b3Scalar(0.)),m_floats[1](b3Scalar(0.)),m_floats[2](b3Scalar(0.)),m_floats[3](b3Scalar(0.))
- {
- }
-
- /**@brief Three argument constructor (zeros w)
- * @param x Value of x
- * @param y Value of y
- * @param z Value of z
- */
- B3_FORCE_INLINE b3QuadWord(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z)
- {
- m_floats[0] = _x, m_floats[1] = _y, m_floats[2] = _z, m_floats[3] = 0.0f;
- }
-
- /**@brief Initializing constructor
- * @param x Value of x
- * @param y Value of y
- * @param z Value of z
- * @param w Value of w
- */
- B3_FORCE_INLINE b3QuadWord(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w)
- {
- m_floats[0] = _x, m_floats[1] = _y, m_floats[2] = _z, m_floats[3] = _w;
- }
-
- /**@brief Set each element to the max of the current values and the values of another b3QuadWord
- * @param other The other b3QuadWord to compare with
- */
- B3_FORCE_INLINE void setMax(const b3QuadWord& other)
- {
-#ifdef B3_USE_SSE
- mVec128 = _mm_max_ps(mVec128, other.mVec128);
-#elif defined(B3_USE_NEON)
- mVec128 = vmaxq_f32(mVec128, other.mVec128);
-#else
- b3SetMax(m_floats[0], other.m_floats[0]);
- b3SetMax(m_floats[1], other.m_floats[1]);
- b3SetMax(m_floats[2], other.m_floats[2]);
- b3SetMax(m_floats[3], other.m_floats[3]);
-#endif
- }
- /**@brief Set each element to the min of the current values and the values of another b3QuadWord
- * @param other The other b3QuadWord to compare with
- */
- B3_FORCE_INLINE void setMin(const b3QuadWord& other)
- {
-#ifdef B3_USE_SSE
- mVec128 = _mm_min_ps(mVec128, other.mVec128);
-#elif defined(B3_USE_NEON)
- mVec128 = vminq_f32(mVec128, other.mVec128);
-#else
- b3SetMin(m_floats[0], other.m_floats[0]);
- b3SetMin(m_floats[1], other.m_floats[1]);
- b3SetMin(m_floats[2], other.m_floats[2]);
- b3SetMin(m_floats[3], other.m_floats[3]);
-#endif
- }
-};
-
-#endif //B3_SIMD_QUADWORD_H
diff --git a/thirdparty/bullet/Bullet3Common/b3Quaternion.h b/thirdparty/bullet/Bullet3Common/b3Quaternion.h
deleted file mode 100644
index 4fdd72dcc4..0000000000
--- a/thirdparty/bullet/Bullet3Common/b3Quaternion.h
+++ /dev/null
@@ -1,908 +0,0 @@
-/*
-Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it freely,
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef B3_SIMD__QUATERNION_H_
-#define B3_SIMD__QUATERNION_H_
-
-#include "b3Vector3.h"
-#include "b3QuadWord.h"
-
-#ifdef B3_USE_SSE
-
-const __m128 B3_ATTRIBUTE_ALIGNED16(b3vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
-
-#endif
-
-#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
-
-const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
-const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
-
-#endif
-
-/**@brief The b3Quaternion implements quaternion to perform linear algebra rotations in combination with b3Matrix3x3, b3Vector3 and b3Transform. */
-class b3Quaternion : public b3QuadWord
-{
-public:
- /**@brief No initialization constructor */
- b3Quaternion() {}
-
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
- // Set Vector
- B3_FORCE_INLINE b3Quaternion(const b3SimdFloat4 vec)
- {
- mVec128 = vec;
- }
-
- // Copy constructor
- B3_FORCE_INLINE b3Quaternion(const b3Quaternion& rhs)
- {
- mVec128 = rhs.mVec128;
- }
-
- // Assignment Operator
- B3_FORCE_INLINE b3Quaternion&
- operator=(const b3Quaternion& v)
- {
- mVec128 = v.mVec128;
-
- return *this;
- }
-
-#endif
-
- // template <typename b3Scalar>
- // explicit Quaternion(const b3Scalar *v) : Tuple4<b3Scalar>(v) {}
- /**@brief Constructor from scalars */
- b3Quaternion(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w)
- : b3QuadWord(_x, _y, _z, _w)
- {
- //b3Assert(!((_x==1.f) && (_y==0.f) && (_z==0.f) && (_w==0.f)));
- }
- /**@brief Axis angle Constructor
- * @param axis The axis which the rotation is around
- * @param angle The magnitude of the rotation around the angle (Radians) */
- b3Quaternion(const b3Vector3& _axis, const b3Scalar& _angle)
- {
- setRotation(_axis, _angle);
- }
- /**@brief Constructor from Euler angles
- * @param yaw Angle around Y unless B3_EULER_DEFAULT_ZYX defined then Z
- * @param pitch Angle around X unless B3_EULER_DEFAULT_ZYX defined then Y
- * @param roll Angle around Z unless B3_EULER_DEFAULT_ZYX defined then X */
- b3Quaternion(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
- {
-#ifndef B3_EULER_DEFAULT_ZYX
- setEuler(yaw, pitch, roll);
-#else
- setEulerZYX(yaw, pitch, roll);
-#endif
- }
- /**@brief Set the rotation using axis angle notation
- * @param axis The axis around which to rotate
- * @param angle The magnitude of the rotation in Radians */
- void setRotation(const b3Vector3& axis1, const b3Scalar& _angle)
- {
- b3Vector3 axis = axis1;
- axis.safeNormalize();
-
- b3Scalar d = axis.length();
- b3Assert(d != b3Scalar(0.0));
- if (d < B3_EPSILON)
- {
- setValue(0, 0, 0, 1);
- }
- else
- {
- b3Scalar s = b3Sin(_angle * b3Scalar(0.5)) / d;
- setValue(axis.getX() * s, axis.getY() * s, axis.getZ() * s,
- b3Cos(_angle * b3Scalar(0.5)));
- }
- }
- /**@brief Set the quaternion using Euler angles
- * @param yaw Angle around Y
- * @param pitch Angle around X
- * @param roll Angle around Z */
- void setEuler(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
- {
- b3Scalar halfYaw = b3Scalar(yaw) * b3Scalar(0.5);
- b3Scalar halfPitch = b3Scalar(pitch) * b3Scalar(0.5);
- b3Scalar halfRoll = b3Scalar(roll) * b3Scalar(0.5);
- b3Scalar cosYaw = b3Cos(halfYaw);
- b3Scalar sinYaw = b3Sin(halfYaw);
- b3Scalar cosPitch = b3Cos(halfPitch);
- b3Scalar sinPitch = b3Sin(halfPitch);
- b3Scalar cosRoll = b3Cos(halfRoll);
- b3Scalar sinRoll = b3Sin(halfRoll);
- setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
- cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
- sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
- cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
- }
-
- /**@brief Set the quaternion using euler angles
- * @param yaw Angle around Z
- * @param pitch Angle around Y
- * @param roll Angle around X */
- void setEulerZYX(const b3Scalar& yawZ, const b3Scalar& pitchY, const b3Scalar& rollX)
- {
- b3Scalar halfYaw = b3Scalar(yawZ) * b3Scalar(0.5);
- b3Scalar halfPitch = b3Scalar(pitchY) * b3Scalar(0.5);
- b3Scalar halfRoll = b3Scalar(rollX) * b3Scalar(0.5);
- b3Scalar cosYaw = b3Cos(halfYaw);
- b3Scalar sinYaw = b3Sin(halfYaw);
- b3Scalar cosPitch = b3Cos(halfPitch);
- b3Scalar sinPitch = b3Sin(halfPitch);
- b3Scalar cosRoll = b3Cos(halfRoll);
- b3Scalar sinRoll = b3Sin(halfRoll);
- setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
- cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
- cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
- cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
- normalize();
- }
-
- /**@brief Get the euler angles from this quaternion
- * @param yaw Angle around Z
- * @param pitch Angle around Y
- * @param roll Angle around X */
- void getEulerZYX(b3Scalar& yawZ, b3Scalar& pitchY, b3Scalar& rollX) const
- {
- b3Scalar squ;
- b3Scalar sqx;
- b3Scalar sqy;
- b3Scalar sqz;
- b3Scalar sarg;
- sqx = m_floats[0] * m_floats[0];
- sqy = m_floats[1] * m_floats[1];
- sqz = m_floats[2] * m_floats[2];
- squ = m_floats[3] * m_floats[3];
- rollX = b3Atan2(2 * (m_floats[1] * m_floats[2] + m_floats[3] * m_floats[0]), squ - sqx - sqy + sqz);
- sarg = b3Scalar(-2.) * (m_floats[0] * m_floats[2] - m_floats[3] * m_floats[1]);
- pitchY = sarg <= b3Scalar(-1.0) ? b3Scalar(-0.5) * B3_PI : (sarg >= b3Scalar(1.0) ? b3Scalar(0.5) * B3_PI : b3Asin(sarg));
- yawZ = b3Atan2(2 * (m_floats[0] * m_floats[1] + m_floats[3] * m_floats[2]), squ + sqx - sqy - sqz);
- }
-
- /**@brief Add two quaternions
- * @param q The quaternion to add to this one */
- B3_FORCE_INLINE b3Quaternion& operator+=(const b3Quaternion& q)
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- mVec128 = _mm_add_ps(mVec128, q.mVec128);
-#elif defined(B3_USE_NEON)
- mVec128 = vaddq_f32(mVec128, q.mVec128);
-#else
- m_floats[0] += q.getX();
- m_floats[1] += q.getY();
- m_floats[2] += q.getZ();
- m_floats[3] += q.m_floats[3];
-#endif
- return *this;
- }
-
- /**@brief Subtract out a quaternion
- * @param q The quaternion to subtract from this one */
- b3Quaternion& operator-=(const b3Quaternion& q)
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- mVec128 = _mm_sub_ps(mVec128, q.mVec128);
-#elif defined(B3_USE_NEON)
- mVec128 = vsubq_f32(mVec128, q.mVec128);
-#else
- m_floats[0] -= q.getX();
- m_floats[1] -= q.getY();
- m_floats[2] -= q.getZ();
- m_floats[3] -= q.m_floats[3];
-#endif
- return *this;
- }
-
- /**@brief Scale this quaternion
- * @param s The scalar to scale by */
- b3Quaternion& operator*=(const b3Scalar& s)
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
- vs = b3_pshufd_ps(vs, 0); // (S S S S)
- mVec128 = _mm_mul_ps(mVec128, vs);
-#elif defined(B3_USE_NEON)
- mVec128 = vmulq_n_f32(mVec128, s);
-#else
- m_floats[0] *= s;
- m_floats[1] *= s;
- m_floats[2] *= s;
- m_floats[3] *= s;
-#endif
- return *this;
- }
-
- /**@brief Multiply this quaternion by q on the right
- * @param q The other quaternion
- * Equivilant to this = this * q */
- b3Quaternion& operator*=(const b3Quaternion& q)
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- __m128 vQ2 = q.get128();
-
- __m128 A1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(0, 1, 2, 0));
- __m128 B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0));
-
- A1 = A1 * B1;
-
- __m128 A2 = b3_pshufd_ps(mVec128, B3_SHUFFLE(1, 2, 0, 1));
- __m128 B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1));
-
- A2 = A2 * B2;
-
- B1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(2, 0, 1, 2));
- B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2));
-
- B1 = B1 * B2; // A3 *= B3
-
- mVec128 = b3_splat_ps(mVec128, 3); // A0
- mVec128 = mVec128 * vQ2; // A0 * B0
-
- A1 = A1 + A2; // AB12
- mVec128 = mVec128 - B1; // AB03 = AB0 - AB3
- A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
- mVec128 = mVec128 + A1; // AB03 + AB12
-
-#elif defined(B3_USE_NEON)
-
- float32x4_t vQ1 = mVec128;
- float32x4_t vQ2 = q.get128();
- float32x4_t A0, A1, B1, A2, B2, A3, B3;
- float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
-
- {
- float32x2x2_t tmp;
- tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
- vQ1zx = tmp.val[0];
-
- tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
- vQ2zx = tmp.val[0];
- }
- vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
-
- vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
-
- vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
- vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
-
- A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
- B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
-
- A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
- B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
-
- A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
- B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
-
- A1 = vmulq_f32(A1, B1);
- A2 = vmulq_f32(A2, B2);
- A3 = vmulq_f32(A3, B3); // A3 *= B3
- A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
-
- A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
- A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
-
- // change the sign of the last element
- A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
- A0 = vaddq_f32(A0, A1); // AB03 + AB12
-
- mVec128 = A0;
-#else
- setValue(
- m_floats[3] * q.getX() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.getZ() - m_floats[2] * q.getY(),
- m_floats[3] * q.getY() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.getX() - m_floats[0] * q.getZ(),
- m_floats[3] * q.getZ() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.getY() - m_floats[1] * q.getX(),
- m_floats[3] * q.m_floats[3] - m_floats[0] * q.getX() - m_floats[1] * q.getY() - m_floats[2] * q.getZ());
-#endif
- return *this;
- }
- /**@brief Return the dot product between this quaternion and another
- * @param q The other quaternion */
- b3Scalar dot(const b3Quaternion& q) const
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- __m128 vd;
-
- vd = _mm_mul_ps(mVec128, q.mVec128);
-
- __m128 t = _mm_movehl_ps(vd, vd);
- vd = _mm_add_ps(vd, t);
- t = _mm_shuffle_ps(vd, vd, 0x55);
- vd = _mm_add_ss(vd, t);
-
- return _mm_cvtss_f32(vd);
-#elif defined(B3_USE_NEON)
- float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
- float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
- x = vpadd_f32(x, x);
- return vget_lane_f32(x, 0);
-#else
- return m_floats[0] * q.getX() +
- m_floats[1] * q.getY() +
- m_floats[2] * q.getZ() +
- m_floats[3] * q.m_floats[3];
-#endif
- }
-
- /**@brief Return the length squared of the quaternion */
- b3Scalar length2() const
- {
- return dot(*this);
- }
-
- /**@brief Return the length of the quaternion */
- b3Scalar length() const
- {
- return b3Sqrt(length2());
- }
-
- /**@brief Normalize the quaternion
- * Such that x^2 + y^2 + z^2 +w^2 = 1 */
- b3Quaternion& normalize()
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- __m128 vd;
-
- vd = _mm_mul_ps(mVec128, mVec128);
-
- __m128 t = _mm_movehl_ps(vd, vd);
- vd = _mm_add_ps(vd, t);
- t = _mm_shuffle_ps(vd, vd, 0x55);
- vd = _mm_add_ss(vd, t);
-
- vd = _mm_sqrt_ss(vd);
- vd = _mm_div_ss(b3vOnes, vd);
- vd = b3_pshufd_ps(vd, 0); // splat
- mVec128 = _mm_mul_ps(mVec128, vd);
-
- return *this;
-#else
- return *this /= length();
-#endif
- }
-
- /**@brief Return a scaled version of this quaternion
- * @param s The scale factor */
- B3_FORCE_INLINE b3Quaternion
- operator*(const b3Scalar& s) const
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
- vs = b3_pshufd_ps(vs, 0x00); // (S S S S)
-
- return b3Quaternion(_mm_mul_ps(mVec128, vs));
-#elif defined(B3_USE_NEON)
- return b3Quaternion(vmulq_n_f32(mVec128, s));
-#else
- return b3Quaternion(getX() * s, getY() * s, getZ() * s, m_floats[3] * s);
-#endif
- }
-
- /**@brief Return an inversely scaled versionof this quaternion
- * @param s The inverse scale factor */
- b3Quaternion operator/(const b3Scalar& s) const
- {
- b3Assert(s != b3Scalar(0.0));
- return *this * (b3Scalar(1.0) / s);
- }
-
- /**@brief Inversely scale this quaternion
- * @param s The scale factor */
- b3Quaternion& operator/=(const b3Scalar& s)
- {
- b3Assert(s != b3Scalar(0.0));
- return *this *= b3Scalar(1.0) / s;
- }
-
- /**@brief Return a normalized version of this quaternion */
- b3Quaternion normalized() const
- {
- return *this / length();
- }
- /**@brief Return the angle between this quaternion and the other
- * @param q The other quaternion */
- b3Scalar angle(const b3Quaternion& q) const
- {
- b3Scalar s = b3Sqrt(length2() * q.length2());
- b3Assert(s != b3Scalar(0.0));
- return b3Acos(dot(q) / s);
- }
- /**@brief Return the angle of rotation represented by this quaternion */
- b3Scalar getAngle() const
- {
- b3Scalar s = b3Scalar(2.) * b3Acos(m_floats[3]);
- return s;
- }
-
- /**@brief Return the axis of the rotation represented by this quaternion */
- b3Vector3 getAxis() const
- {
- b3Scalar s_squared = 1.f - m_floats[3] * m_floats[3];
-
- if (s_squared < b3Scalar(10.) * B3_EPSILON) //Check for divide by zero
- return b3MakeVector3(1.0, 0.0, 0.0); // Arbitrary
- b3Scalar s = 1.f / b3Sqrt(s_squared);
- return b3MakeVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
- }
-
- /**@brief Return the inverse of this quaternion */
- b3Quaternion inverse() const
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- return b3Quaternion(_mm_xor_ps(mVec128, b3vQInv));
-#elif defined(B3_USE_NEON)
- return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vQInv));
-#else
- return b3Quaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
-#endif
- }
-
- /**@brief Return the sum of this quaternion and the other
- * @param q2 The other quaternion */
- B3_FORCE_INLINE b3Quaternion
- operator+(const b3Quaternion& q2) const
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- return b3Quaternion(_mm_add_ps(mVec128, q2.mVec128));
-#elif defined(B3_USE_NEON)
- return b3Quaternion(vaddq_f32(mVec128, q2.mVec128));
-#else
- const b3Quaternion& q1 = *this;
- return b3Quaternion(q1.getX() + q2.getX(), q1.getY() + q2.getY(), q1.getZ() + q2.getZ(), q1.m_floats[3] + q2.m_floats[3]);
-#endif
- }
-
- /**@brief Return the difference between this quaternion and the other
- * @param q2 The other quaternion */
- B3_FORCE_INLINE b3Quaternion
- operator-(const b3Quaternion& q2) const
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- return b3Quaternion(_mm_sub_ps(mVec128, q2.mVec128));
-#elif defined(B3_USE_NEON)
- return b3Quaternion(vsubq_f32(mVec128, q2.mVec128));
-#else
- const b3Quaternion& q1 = *this;
- return b3Quaternion(q1.getX() - q2.getX(), q1.getY() - q2.getY(), q1.getZ() - q2.getZ(), q1.m_floats[3] - q2.m_floats[3]);
-#endif
- }
-
- /**@brief Return the negative of this quaternion
- * This simply negates each element */
- B3_FORCE_INLINE b3Quaternion operator-() const
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- return b3Quaternion(_mm_xor_ps(mVec128, b3vMzeroMask));
-#elif defined(B3_USE_NEON)
- return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vMzeroMask));
-#else
- const b3Quaternion& q2 = *this;
- return b3Quaternion(-q2.getX(), -q2.getY(), -q2.getZ(), -q2.m_floats[3]);
-#endif
- }
- /**@todo document this and it's use */
- B3_FORCE_INLINE b3Quaternion farthest(const b3Quaternion& qd) const
- {
- b3Quaternion diff, sum;
- diff = *this - qd;
- sum = *this + qd;
- if (diff.dot(diff) > sum.dot(sum))
- return qd;
- return (-qd);
- }
-
- /**@todo document this and it's use */
- B3_FORCE_INLINE b3Quaternion nearest(const b3Quaternion& qd) const
- {
- b3Quaternion diff, sum;
- diff = *this - qd;
- sum = *this + qd;
- if (diff.dot(diff) < sum.dot(sum))
- return qd;
- return (-qd);
- }
-
- /**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
- * @param q The other quaternion to interpolate with
- * @param t The ratio between this and q to interpolate. If t = 0 the result is this, if t=1 the result is q.
- * Slerp interpolates assuming constant velocity. */
- b3Quaternion slerp(const b3Quaternion& q, const b3Scalar& t) const
- {
- b3Scalar magnitude = b3Sqrt(length2() * q.length2());
- b3Assert(magnitude > b3Scalar(0));
-
- b3Scalar product = dot(q) / magnitude;
- if (b3Fabs(product) < b3Scalar(1))
- {
- // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
- const b3Scalar sign = (product < 0) ? b3Scalar(-1) : b3Scalar(1);
-
- const b3Scalar theta = b3Acos(sign * product);
- const b3Scalar s1 = b3Sin(sign * t * theta);
- const b3Scalar d = b3Scalar(1.0) / b3Sin(theta);
- const b3Scalar s0 = b3Sin((b3Scalar(1.0) - t) * theta);
-
- return b3Quaternion(
- (m_floats[0] * s0 + q.getX() * s1) * d,
- (m_floats[1] * s0 + q.getY() * s1) * d,
- (m_floats[2] * s0 + q.getZ() * s1) * d,
- (m_floats[3] * s0 + q.m_floats[3] * s1) * d);
- }
- else
- {
- return *this;
- }
- }
-
- static const b3Quaternion& getIdentity()
- {
- static const b3Quaternion identityQuat(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.), b3Scalar(1.));
- return identityQuat;
- }
-
- B3_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; }
-};
-
-/**@brief Return the product of two quaternions */
-B3_FORCE_INLINE b3Quaternion
-operator*(const b3Quaternion& q1, const b3Quaternion& q2)
-{
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- __m128 vQ1 = q1.get128();
- __m128 vQ2 = q2.get128();
- __m128 A0, A1, B1, A2, B2;
-
- A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0, 1, 2, 0)); // X Y z x // vtrn
- B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0)); // W W W X // vdup vext
-
- A1 = A1 * B1;
-
- A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1)); // Y Z X Y // vext
- B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1)); // z x Y Y // vtrn vdup
-
- A2 = A2 * B2;
-
- B1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2)); // z x Y Z // vtrn vext
- B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2)); // Y Z x z // vext vtrn
-
- B1 = B1 * B2; // A3 *= B3
-
- A0 = b3_splat_ps(vQ1, 3); // A0
- A0 = A0 * vQ2; // A0 * B0
-
- A1 = A1 + A2; // AB12
- A0 = A0 - B1; // AB03 = AB0 - AB3
-
- A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
- A0 = A0 + A1; // AB03 + AB12
-
- return b3Quaternion(A0);
-
-#elif defined(B3_USE_NEON)
-
- float32x4_t vQ1 = q1.get128();
- float32x4_t vQ2 = q2.get128();
- float32x4_t A0, A1, B1, A2, B2, A3, B3;
- float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
-
- {
- float32x2x2_t tmp;
- tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
- vQ1zx = tmp.val[0];
-
- tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
- vQ2zx = tmp.val[0];
- }
- vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
-
- vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
-
- vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
- vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
-
- A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
- B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
-
- A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
- B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
-
- A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
- B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
-
- A1 = vmulq_f32(A1, B1);
- A2 = vmulq_f32(A2, B2);
- A3 = vmulq_f32(A3, B3); // A3 *= B3
- A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
-
- A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
- A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
-
- // change the sign of the last element
- A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
- A0 = vaddq_f32(A0, A1); // AB03 + AB12
-
- return b3Quaternion(A0);
-
-#else
- return b3Quaternion(
- q1.getW() * q2.getX() + q1.getX() * q2.getW() + q1.getY() * q2.getZ() - q1.getZ() * q2.getY(),
- q1.getW() * q2.getY() + q1.getY() * q2.getW() + q1.getZ() * q2.getX() - q1.getX() * q2.getZ(),
- q1.getW() * q2.getZ() + q1.getZ() * q2.getW() + q1.getX() * q2.getY() - q1.getY() * q2.getX(),
- q1.getW() * q2.getW() - q1.getX() * q2.getX() - q1.getY() * q2.getY() - q1.getZ() * q2.getZ());
-#endif
-}
-
-B3_FORCE_INLINE b3Quaternion
-operator*(const b3Quaternion& q, const b3Vector3& w)
-{
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- __m128 vQ1 = q.get128();
- __m128 vQ2 = w.get128();
- __m128 A1, B1, A2, B2, A3, B3;
-
- A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(3, 3, 3, 0));
- B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(0, 1, 2, 0));
-
- A1 = A1 * B1;
-
- A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1));
- B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1));
-
- A2 = A2 * B2;
-
- A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2));
- B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2));
-
- A3 = A3 * B3; // A3 *= B3
-
- A1 = A1 + A2; // AB12
- A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
- A1 = A1 - A3; // AB123 = AB12 - AB3
-
- return b3Quaternion(A1);
-
-#elif defined(B3_USE_NEON)
-
- float32x4_t vQ1 = q.get128();
- float32x4_t vQ2 = w.get128();
- float32x4_t A1, B1, A2, B2, A3, B3;
- float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
-
- vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
- {
- float32x2x2_t tmp;
-
- tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
- vQ2zx = tmp.val[0];
-
- tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
- vQ1zx = tmp.val[0];
- }
-
- vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
-
- vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
- vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
-
- A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W W X
- B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx); // X Y z x
-
- A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
- B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
-
- A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
- B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
-
- A1 = vmulq_f32(A1, B1);
- A2 = vmulq_f32(A2, B2);
- A3 = vmulq_f32(A3, B3); // A3 *= B3
-
- A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
-
- // change the sign of the last element
- A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
-
- A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
-
- return b3Quaternion(A1);
-
-#else
- return b3Quaternion(
- q.getW() * w.getX() + q.getY() * w.getZ() - q.getZ() * w.getY(),
- q.getW() * w.getY() + q.getZ() * w.getX() - q.getX() * w.getZ(),
- q.getW() * w.getZ() + q.getX() * w.getY() - q.getY() * w.getX(),
- -q.getX() * w.getX() - q.getY() * w.getY() - q.getZ() * w.getZ());
-#endif
-}
-
-B3_FORCE_INLINE b3Quaternion
-operator*(const b3Vector3& w, const b3Quaternion& q)
-{
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- __m128 vQ1 = w.get128();
- __m128 vQ2 = q.get128();
- __m128 A1, B1, A2, B2, A3, B3;
-
- A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0, 1, 2, 0)); // X Y z x
- B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0)); // W W W X
-
- A1 = A1 * B1;
-
- A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1));
- B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1));
-
- A2 = A2 * B2;
-
- A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2));
- B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2));
-
- A3 = A3 * B3; // A3 *= B3
-
- A1 = A1 + A2; // AB12
- A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
- A1 = A1 - A3; // AB123 = AB12 - AB3
-
- return b3Quaternion(A1);
-
-#elif defined(B3_USE_NEON)
-
- float32x4_t vQ1 = w.get128();
- float32x4_t vQ2 = q.get128();
- float32x4_t A1, B1, A2, B2, A3, B3;
- float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
-
- {
- float32x2x2_t tmp;
-
- tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
- vQ1zx = tmp.val[0];
-
- tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
- vQ2zx = tmp.val[0];
- }
- vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
-
- vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
-
- vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
- vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
-
- A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
- B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
-
- A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
- B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
-
- A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
- B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
-
- A1 = vmulq_f32(A1, B1);
- A2 = vmulq_f32(A2, B2);
- A3 = vmulq_f32(A3, B3); // A3 *= B3
-
- A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
-
- // change the sign of the last element
- A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
-
- A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
-
- return b3Quaternion(A1);
-
-#else
- return b3Quaternion(
- +w.getX() * q.getW() + w.getY() * q.getZ() - w.getZ() * q.getY(),
- +w.getY() * q.getW() + w.getZ() * q.getX() - w.getX() * q.getZ(),
- +w.getZ() * q.getW() + w.getX() * q.getY() - w.getY() * q.getX(),
- -w.getX() * q.getX() - w.getY() * q.getY() - w.getZ() * q.getZ());
-#endif
-}
-
-/**@brief Calculate the dot product between two quaternions */
-B3_FORCE_INLINE b3Scalar
-b3Dot(const b3Quaternion& q1, const b3Quaternion& q2)
-{
- return q1.dot(q2);
-}
-
-/**@brief Return the length of a quaternion */
-B3_FORCE_INLINE b3Scalar
-b3Length(const b3Quaternion& q)
-{
- return q.length();
-}
-
-/**@brief Return the angle between two quaternions*/
-B3_FORCE_INLINE b3Scalar
-b3Angle(const b3Quaternion& q1, const b3Quaternion& q2)
-{
- return q1.angle(q2);
-}
-
-/**@brief Return the inverse of a quaternion*/
-B3_FORCE_INLINE b3Quaternion
-b3Inverse(const b3Quaternion& q)
-{
- return q.inverse();
-}
-
-/**@brief Return the result of spherical linear interpolation betwen two quaternions
- * @param q1 The first quaternion
- * @param q2 The second quaternion
- * @param t The ration between q1 and q2. t = 0 return q1, t=1 returns q2
- * Slerp assumes constant velocity between positions. */
-B3_FORCE_INLINE b3Quaternion
-b3Slerp(const b3Quaternion& q1, const b3Quaternion& q2, const b3Scalar& t)
-{
- return q1.slerp(q2, t);
-}
-
-B3_FORCE_INLINE b3Quaternion
-b3QuatMul(const b3Quaternion& rot0, const b3Quaternion& rot1)
-{
- return rot0 * rot1;
-}
-
-B3_FORCE_INLINE b3Quaternion
-b3QuatNormalized(const b3Quaternion& orn)
-{
- return orn.normalized();
-}
-
-B3_FORCE_INLINE b3Vector3
-b3QuatRotate(const b3Quaternion& rotation, const b3Vector3& v)
-{
- b3Quaternion q = rotation * v;
- q *= rotation.inverse();
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- return b3MakeVector3(_mm_and_ps(q.get128(), b3vFFF0fMask));
-#elif defined(B3_USE_NEON)
- return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), b3vFFF0Mask));
-#else
- return b3MakeVector3(q.getX(), q.getY(), q.getZ());
-#endif
-}
-
-B3_FORCE_INLINE b3Quaternion
-b3ShortestArcQuat(const b3Vector3& v0, const b3Vector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
-{
- b3Vector3 c = v0.cross(v1);
- b3Scalar d = v0.dot(v1);
-
- if (d < -1.0 + B3_EPSILON)
- {
- b3Vector3 n, unused;
- b3PlaneSpace1(v0, n, unused);
- return b3Quaternion(n.getX(), n.getY(), n.getZ(), 0.0f); // just pick any vector that is orthogonal to v0
- }
-
- b3Scalar s = b3Sqrt((1.0f + d) * 2.0f);
- b3Scalar rs = 1.0f / s;
-
- return b3Quaternion(c.getX() * rs, c.getY() * rs, c.getZ() * rs, s * 0.5f);
-}
-
-B3_FORCE_INLINE b3Quaternion
-b3ShortestArcQuatNormalize2(b3Vector3& v0, b3Vector3& v1)
-{
- v0.normalize();
- v1.normalize();
- return b3ShortestArcQuat(v0, v1);
-}
-
-#endif //B3_SIMD__QUATERNION_H_
diff --git a/thirdparty/bullet/Bullet3Common/b3Random.h b/thirdparty/bullet/Bullet3Common/b3Random.h
deleted file mode 100644
index c2e21496c7..0000000000
--- a/thirdparty/bullet/Bullet3Common/b3Random.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
-Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it freely,
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef B3_GEN_RANDOM_H
-#define B3_GEN_RANDOM_H
-
-#include "b3Scalar.h"
-
-#ifdef MT19937
-
-#include <limits.h>
-#include <mt19937.h>
-
-#define B3_RAND_MAX UINT_MAX
-
-B3_FORCE_INLINE void b3Srand(unsigned int seed) { init_genrand(seed); }
-B3_FORCE_INLINE unsigned int b3rand() { return genrand_int32(); }
-
-#else
-
-#include <stdlib.h>
-
-#define B3_RAND_MAX RAND_MAX
-
-B3_FORCE_INLINE void b3Srand(unsigned int seed) { srand(seed); }
-B3_FORCE_INLINE unsigned int b3rand() { return rand(); }
-
-#endif
-
-inline b3Scalar b3RandRange(b3Scalar minRange, b3Scalar maxRange)
-{
- return (b3rand() / (b3Scalar(B3_RAND_MAX) + b3Scalar(1.0))) * (maxRange - minRange) + minRange;
-}
-
-#endif //B3_GEN_RANDOM_H
diff --git a/thirdparty/bullet/Bullet3Common/b3ResizablePool.h b/thirdparty/bullet/Bullet3Common/b3ResizablePool.h
deleted file mode 100644
index cafe3ff396..0000000000
--- a/thirdparty/bullet/Bullet3Common/b3ResizablePool.h
+++ /dev/null
@@ -1,171 +0,0 @@
-
-#ifndef B3_RESIZABLE_POOL_H
-#define B3_RESIZABLE_POOL_H
-
-#include "Bullet3Common/b3AlignedObjectArray.h"
-
-enum
-{
- B3_POOL_HANDLE_TERMINAL_FREE = -1,
- B3_POOL_HANDLE_TERMINAL_USED = -2
-};
-
-template <typename U>
-struct b3PoolBodyHandle : public U
-{
- B3_DECLARE_ALIGNED_ALLOCATOR();
-
- int m_nextFreeHandle;
- void setNextFree(int next)
- {
- m_nextFreeHandle = next;
- }
- int getNextFree() const
- {
- return m_nextFreeHandle;
- }
-};
-
-template <typename T>
-class b3ResizablePool
-{
-protected:
- b3AlignedObjectArray<T> m_bodyHandles;
- int m_numUsedHandles; // number of active handles
- int m_firstFreeHandle; // free handles list
-
- T* getHandleInternal(int handle)
- {
- return &m_bodyHandles[handle];
- }
- const T* getHandleInternal(int handle) const
- {
- return &m_bodyHandles[handle];
- }
-
-public:
- b3ResizablePool()
- {
- initHandles();
- }
-
- virtual ~b3ResizablePool()
- {
- exitHandles();
- }
- ///handle management
-
- int getNumHandles() const
- {
- return m_bodyHandles.size();
- }
-
- void getUsedHandles(b3AlignedObjectArray<int>& usedHandles) const
- {
- for (int i = 0; i < m_bodyHandles.size(); i++)
- {
- if (m_bodyHandles[i].getNextFree() == B3_POOL_HANDLE_TERMINAL_USED)
- {
- usedHandles.push_back(i);
- }
- }
- }
-
- T* getHandle(int handle)
- {
- b3Assert(handle >= 0);
- b3Assert(handle < m_bodyHandles.size());
- if ((handle < 0) || (handle >= m_bodyHandles.size()))
- {
- return 0;
- }
-
- if (m_bodyHandles[handle].getNextFree() == B3_POOL_HANDLE_TERMINAL_USED)
- {
- return &m_bodyHandles[handle];
- }
- return 0;
- }
- const T* getHandle(int handle) const
- {
- b3Assert(handle >= 0);
- b3Assert(handle < m_bodyHandles.size());
- if ((handle < 0) || (handle >= m_bodyHandles.size()))
- {
- return 0;
- }
-
- if (m_bodyHandles[handle].getNextFree() == B3_POOL_HANDLE_TERMINAL_USED)
- {
- return &m_bodyHandles[handle];
- }
- return 0;
- }
-
- void increaseHandleCapacity(int extraCapacity)
- {
- int curCapacity = m_bodyHandles.size();
- //b3Assert(curCapacity == m_numUsedHandles);
- int newCapacity = curCapacity + extraCapacity;
- m_bodyHandles.resize(newCapacity);
-
- {
- for (int i = curCapacity; i < newCapacity; i++)
- m_bodyHandles[i].setNextFree(i + 1);
-
- m_bodyHandles[newCapacity - 1].setNextFree(-1);
- }
- m_firstFreeHandle = curCapacity;
- }
- void initHandles()
- {
- m_numUsedHandles = 0;
- m_firstFreeHandle = -1;
-
- increaseHandleCapacity(1);
- }
-
- void exitHandles()
- {
- m_bodyHandles.resize(0);
- m_firstFreeHandle = -1;
- m_numUsedHandles = 0;
- }
-
- int allocHandle()
- {
- b3Assert(m_firstFreeHandle >= 0);
-
- int handle = m_firstFreeHandle;
- m_firstFreeHandle = getHandleInternal(handle)->getNextFree();
- m_numUsedHandles++;
-
- if (m_firstFreeHandle < 0)
- {
- //int curCapacity = m_bodyHandles.size();
- int additionalCapacity = m_bodyHandles.size();
- increaseHandleCapacity(additionalCapacity);
-
- getHandleInternal(handle)->setNextFree(m_firstFreeHandle);
- }
- getHandleInternal(handle)->setNextFree(B3_POOL_HANDLE_TERMINAL_USED);
- getHandleInternal(handle)->clear();
- return handle;
- }
-
- void freeHandle(int handle)
- {
- b3Assert(handle >= 0);
-
- if (m_bodyHandles[handle].getNextFree() == B3_POOL_HANDLE_TERMINAL_USED)
- {
- getHandleInternal(handle)->clear();
- getHandleInternal(handle)->setNextFree(m_firstFreeHandle);
- m_firstFreeHandle = handle;
- m_numUsedHandles--;
- }
- }
-};
-///end handle management
-
-#endif //B3_RESIZABLE_POOL_H
diff --git a/thirdparty/bullet/Bullet3Common/b3Scalar.h b/thirdparty/bullet/Bullet3Common/b3Scalar.h
deleted file mode 100644
index eeb70ed632..0000000000
--- a/thirdparty/bullet/Bullet3Common/b3Scalar.h
+++ /dev/null
@@ -1,689 +0,0 @@
-/*
-Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it freely,
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef B3_SCALAR_H
-#define B3_SCALAR_H
-
-#ifdef B3_MANAGED_CODE
-//Aligned data types not supported in managed code
-#pragma unmanaged
-#endif
-
-#include <math.h>
-#include <stdlib.h> //size_t for MSVC 6.0
-#include <float.h>
-
-//Original repository is at http://github.com/erwincoumans/bullet3
-#define B3_BULLET_VERSION 300
-
-inline int b3GetVersion()
-{
- return B3_BULLET_VERSION;
-}
-
-#if defined(DEBUG) || defined(_DEBUG)
-#define B3_DEBUG
-#endif
-
-#include "b3Logging.h" //for b3Error
-
-#ifdef _WIN32
-
-#if defined(__GNUC__) // it should handle both MINGW and CYGWIN
-#define B3_FORCE_INLINE __inline__ __attribute__((always_inline))
-#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__((aligned(16)))
-#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__((aligned(64)))
-#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__((aligned(128)))
-#elif ( defined(_MSC_VER) && _MSC_VER < 1300 )
-#define B3_FORCE_INLINE inline
-#define B3_ATTRIBUTE_ALIGNED16(a) a
-#define B3_ATTRIBUTE_ALIGNED64(a) a
-#define B3_ATTRIBUTE_ALIGNED128(a) a
-#else
-//#define B3_HAS_ALIGNED_ALLOCATOR
-#pragma warning(disable : 4324) // disable padding warning
-// #pragma warning(disable:4530) // Disable the exception disable but used in MSCV Stl warning.
-#pragma warning(disable : 4996) //Turn off warnings about deprecated C routines
-// #pragma warning(disable:4786) // Disable the "debug name too long" warning
-
-#define B3_FORCE_INLINE __forceinline
-#define B3_ATTRIBUTE_ALIGNED16(a) __declspec(align(16)) a
-#define B3_ATTRIBUTE_ALIGNED64(a) __declspec(align(64)) a
-#define B3_ATTRIBUTE_ALIGNED128(a) __declspec(align(128)) a
-#ifdef _XBOX
-#define B3_USE_VMX128
-
-#include <ppcintrinsics.h>
-#define B3_HAVE_NATIVE_FSEL
-#define b3Fsel(a, b, c) __fsel((a), (b), (c))
-#else
-
-#if (defined(_WIN32) && (_MSC_VER) && _MSC_VER >= 1400) && (!defined(B3_USE_DOUBLE_PRECISION))
-#if (defined(_M_IX86) || defined(_M_X64))
-
-
-#ifdef __clang__
-//#define B3_NO_SIMD_OPERATOR_OVERLOADS
-#define B3_DISABLE_SSE
-#endif //__clang__
-
-#ifndef B3_DISABLE_SSE
-#define B3_USE_SSE
-#endif //B3_DISABLE_SSE
-
-#ifdef B3_USE_SSE
-//B3_USE_SSE_IN_API is disabled under Windows by default, because
-//it makes it harder to integrate Bullet into your application under Windows
-//(structured embedding Bullet structs/classes need to be 16-byte aligned)
-//with relatively little performance gain
-//If you are not embedded Bullet data in your classes, or make sure that you align those classes on 16-byte boundaries
-//you can manually enable this line or set it in the build system for a bit of performance gain (a few percent, dependent on usage)
-//#define B3_USE_SSE_IN_API
-#endif //B3_USE_SSE
-#include <emmintrin.h>
-#endif
-#endif
-
-#endif //_XBOX
-
-#endif //__MINGW32__
-
-#ifdef B3_DEBUG
-#ifdef _MSC_VER
-#include <stdio.h>
-#define b3Assert(x) { if(!(x)){b3Error("Assert " __FILE__ ":%u (%s)\n", __LINE__, #x);__debugbreak(); }}
-#else //_MSC_VER
-#include <assert.h>
-#define b3Assert assert
-#endif //_MSC_VER
-#else
-#define b3Assert(x)
-#endif
-//b3FullAssert is optional, slows down a lot
-#define b3FullAssert(x)
-
-#define b3Likely(_c) _c
-#define b3Unlikely(_c) _c
-
-#else
-
-#if defined(__CELLOS_LV2__)
-#define B3_FORCE_INLINE inline __attribute__((always_inline))
-#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__((aligned(16)))
-#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__((aligned(64)))
-#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__((aligned(128)))
-#ifndef assert
-#include <assert.h>
-#endif
-#ifdef B3_DEBUG
-#ifdef __SPU__
-#include <spu_printf.h>
-#define printf spu_printf
-#define b3Assert(x) \
- { \
- if (!(x)) \
- { \
- b3Error( \
- "Assert "__FILE__ \
- ":%u (" #x ")\n", \
- __LINE__); \
- spu_hcmpeq(0, 0); \
- } \
- }
-#else
-#define b3Assert assert
-#endif
-
-#else
-#define b3Assert(x)
-#endif
-//b3FullAssert is optional, slows down a lot
-#define b3FullAssert(x)
-
-#define b3Likely(_c) _c
-#define b3Unlikely(_c) _c
-
-#else
-
-#ifdef USE_LIBSPE2
-
-#define B3_FORCE_INLINE __inline
-#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__((aligned(16)))
-#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__((aligned(64)))
-#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__((aligned(128)))
-#ifndef assert
-#include <assert.h>
-#endif
-#ifdef B3_DEBUG
-#define b3Assert assert
-#else
-#define b3Assert(x)
-#endif
-//b3FullAssert is optional, slows down a lot
-#define b3FullAssert(x)
-
-#define b3Likely(_c) __builtin_expect((_c), 1)
-#define b3Unlikely(_c) __builtin_expect((_c), 0)
-
-#else
-//non-windows systems
-
-#if (defined(__APPLE__) && (!defined(B3_USE_DOUBLE_PRECISION)))
-#if defined(__i386__) || defined(__x86_64__)
-#define B3_USE_SSE
-//B3_USE_SSE_IN_API is enabled on Mac OSX by default, because memory is automatically aligned on 16-byte boundaries
-//if apps run into issues, we will disable the next line
-#define B3_USE_SSE_IN_API
-#ifdef B3_USE_SSE
-// include appropriate SSE level
-#if defined(__SSE4_1__)
-#include <smmintrin.h>
-#elif defined(__SSSE3__)
-#include <tmmintrin.h>
-#elif defined(__SSE3__)
-#include <pmmintrin.h>
-#else
-#include <emmintrin.h>
-#endif
-#endif //B3_USE_SSE
-#elif defined(__armv7__)
-#ifdef __clang__
-#define B3_USE_NEON 1
-
-#if defined B3_USE_NEON && defined(__clang__)
-#include <arm_neon.h>
-#endif //B3_USE_NEON
-#endif //__clang__
-#endif //__arm__
-
-#define B3_FORCE_INLINE inline __attribute__((always_inline))
-///@todo: check out alignment methods for other platforms/compilers
-#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__((aligned(16)))
-#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__((aligned(64)))
-#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__((aligned(128)))
-#ifndef assert
-#include <assert.h>
-#endif
-
-#if defined(DEBUG) || defined(_DEBUG)
-#if defined(__i386__) || defined(__x86_64__)
-#include <stdio.h>
-#define b3Assert(x) \
- { \
- if (!(x)) \
- { \
- b3Error("Assert %s in line %d, file %s\n", #x, __LINE__, __FILE__); \
- asm volatile("int3"); \
- } \
- }
-#else //defined (__i386__) || defined (__x86_64__)
-#define b3Assert assert
-#endif //defined (__i386__) || defined (__x86_64__)
-#else //defined(DEBUG) || defined (_DEBUG)
-#define b3Assert(x)
-#endif //defined(DEBUG) || defined (_DEBUG)
-
-//b3FullAssert is optional, slows down a lot
-#define b3FullAssert(x)
-#define b3Likely(_c) _c
-#define b3Unlikely(_c) _c
-
-#else
-
-#define B3_FORCE_INLINE inline
-///@todo: check out alignment methods for other platforms/compilers
-#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__((aligned(16)))
-#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__((aligned(64)))
-#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__((aligned(128)))
-///#define B3_ATTRIBUTE_ALIGNED16(a) a
-///#define B3_ATTRIBUTE_ALIGNED64(a) a
-///#define B3_ATTRIBUTE_ALIGNED128(a) a
-#ifndef assert
-#include <assert.h>
-#endif
-
-#if defined(DEBUG) || defined(_DEBUG)
-#define b3Assert assert
-#else
-#define b3Assert(x)
-#endif
-
-//b3FullAssert is optional, slows down a lot
-#define b3FullAssert(x)
-#define b3Likely(_c) _c
-#define b3Unlikely(_c) _c
-#endif //__APPLE__
-
-#endif // LIBSPE2
-
-#endif //__CELLOS_LV2__
-#endif
-
-///The b3Scalar type abstracts floating point numbers, to easily switch between double and single floating point precision.
-#if defined(B3_USE_DOUBLE_PRECISION)
-typedef double b3Scalar;
-//this number could be bigger in double precision
-#define B3_LARGE_FLOAT 1e30
-#else
-typedef float b3Scalar;
-//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX
-#define B3_LARGE_FLOAT 1e18f
-#endif
-
-#ifdef B3_USE_SSE
-typedef __m128 b3SimdFloat4;
-#endif //B3_USE_SSE
-
-#if defined B3_USE_SSE_IN_API && defined(B3_USE_SSE)
-#ifdef _WIN32
-
-#ifndef B3_NAN
-static int b3NanMask = 0x7F800001;
-#define B3_NAN (*(float *)&b3NanMask)
-#endif
-
-#ifndef B3_INFINITY_MASK
-static int b3InfinityMask = 0x7F800000;
-#define B3_INFINITY_MASK (*(float *)&b3InfinityMask)
-#endif
-#ifndef B3_NO_SIMD_OPERATOR_OVERLOADS
-inline __m128 operator+(const __m128 A, const __m128 B)
-{
- return _mm_add_ps(A, B);
-}
-
-inline __m128 operator-(const __m128 A, const __m128 B)
-{
- return _mm_sub_ps(A, B);
-}
-
-inline __m128 operator*(const __m128 A, const __m128 B)
-{
- return _mm_mul_ps(A, B);
-}
-#endif //B3_NO_SIMD_OPERATOR_OVERLOADS
-#define b3CastfTo128i(a) (_mm_castps_si128(a))
-#define b3CastfTo128d(a) (_mm_castps_pd(a))
-#define b3CastiTo128f(a) (_mm_castsi128_ps(a))
-#define b3CastdTo128f(a) (_mm_castpd_ps(a))
-#define b3CastdTo128i(a) (_mm_castpd_si128(a))
-#define b3Assign128(r0, r1, r2, r3) _mm_setr_ps(r0, r1, r2, r3)
-
-#else //_WIN32
-
-#define b3CastfTo128i(a) ((__m128i)(a))
-#define b3CastfTo128d(a) ((__m128d)(a))
-#define b3CastiTo128f(a) ((__m128)(a))
-#define b3CastdTo128f(a) ((__m128)(a))
-#define b3CastdTo128i(a) ((__m128i)(a))
-#define b3Assign128(r0, r1, r2, r3) \
- (__m128) { r0, r1, r2, r3 }
-#endif //_WIN32
-#endif //B3_USE_SSE_IN_API
-
-#ifdef B3_USE_NEON
-#include <arm_neon.h>
-
-typedef float32x4_t b3SimdFloat4;
-#define B3_INFINITY INFINITY
-#define B3_NAN NAN
-#define b3Assign128(r0, r1, r2, r3) \
- (float32x4_t) { r0, r1, r2, r3 }
-#endif
-
-#define B3_DECLARE_ALIGNED_ALLOCATOR() \
- B3_FORCE_INLINE void *operator new(size_t sizeInBytes) { return b3AlignedAlloc(sizeInBytes, 16); } \
- B3_FORCE_INLINE void operator delete(void *ptr) { b3AlignedFree(ptr); } \
- B3_FORCE_INLINE void *operator new(size_t, void *ptr) { return ptr; } \
- B3_FORCE_INLINE void operator delete(void *, void *) {} \
- B3_FORCE_INLINE void *operator new[](size_t sizeInBytes) { return b3AlignedAlloc(sizeInBytes, 16); } \
- B3_FORCE_INLINE void operator delete[](void *ptr) { b3AlignedFree(ptr); } \
- B3_FORCE_INLINE void *operator new[](size_t, void *ptr) { return ptr; } \
- B3_FORCE_INLINE void operator delete[](void *, void *) {}
-
-#if defined(B3_USE_DOUBLE_PRECISION) || defined(B3_FORCE_DOUBLE_FUNCTIONS)
-
-B3_FORCE_INLINE b3Scalar b3Sqrt(b3Scalar x)
-{
- return sqrt(x);
-}
-B3_FORCE_INLINE b3Scalar b3Fabs(b3Scalar x) { return fabs(x); }
-B3_FORCE_INLINE b3Scalar b3Cos(b3Scalar x) { return cos(x); }
-B3_FORCE_INLINE b3Scalar b3Sin(b3Scalar x) { return sin(x); }
-B3_FORCE_INLINE b3Scalar b3Tan(b3Scalar x) { return tan(x); }
-B3_FORCE_INLINE b3Scalar b3Acos(b3Scalar x)
-{
- if (x < b3Scalar(-1)) x = b3Scalar(-1);
- if (x > b3Scalar(1)) x = b3Scalar(1);
- return acos(x);
-}
-B3_FORCE_INLINE b3Scalar b3Asin(b3Scalar x)
-{
- if (x < b3Scalar(-1)) x = b3Scalar(-1);
- if (x > b3Scalar(1)) x = b3Scalar(1);
- return asin(x);
-}
-B3_FORCE_INLINE b3Scalar b3Atan(b3Scalar x) { return atan(x); }
-B3_FORCE_INLINE b3Scalar b3Atan2(b3Scalar x, b3Scalar y) { return atan2(x, y); }
-B3_FORCE_INLINE b3Scalar b3Exp(b3Scalar x) { return exp(x); }
-B3_FORCE_INLINE b3Scalar b3Log(b3Scalar x) { return log(x); }
-B3_FORCE_INLINE b3Scalar b3Pow(b3Scalar x, b3Scalar y) { return pow(x, y); }
-B3_FORCE_INLINE b3Scalar b3Fmod(b3Scalar x, b3Scalar y) { return fmod(x, y); }
-
-#else
-
-B3_FORCE_INLINE b3Scalar b3Sqrt(b3Scalar y)
-{
-#ifdef USE_APPROXIMATION
- double x, z, tempf;
- unsigned long *tfptr = ((unsigned long *)&tempf) + 1;
-
- tempf = y;
- *tfptr = (0xbfcdd90a - *tfptr) >> 1; /* estimate of 1/sqrt(y) */
- x = tempf;
- z = y * b3Scalar(0.5);
- x = (b3Scalar(1.5) * x) - (x * x) * (x * z); /* iteration formula */
- x = (b3Scalar(1.5) * x) - (x * x) * (x * z);
- x = (b3Scalar(1.5) * x) - (x * x) * (x * z);
- x = (b3Scalar(1.5) * x) - (x * x) * (x * z);
- x = (b3Scalar(1.5) * x) - (x * x) * (x * z);
- return x * y;
-#else
- return sqrtf(y);
-#endif
-}
-B3_FORCE_INLINE b3Scalar b3Fabs(b3Scalar x) { return fabsf(x); }
-B3_FORCE_INLINE b3Scalar b3Cos(b3Scalar x) { return cosf(x); }
-B3_FORCE_INLINE b3Scalar b3Sin(b3Scalar x) { return sinf(x); }
-B3_FORCE_INLINE b3Scalar b3Tan(b3Scalar x) { return tanf(x); }
-B3_FORCE_INLINE b3Scalar b3Acos(b3Scalar x)
-{
- if (x < b3Scalar(-1))
- x = b3Scalar(-1);
- if (x > b3Scalar(1))
- x = b3Scalar(1);
- return acosf(x);
-}
-B3_FORCE_INLINE b3Scalar b3Asin(b3Scalar x)
-{
- if (x < b3Scalar(-1))
- x = b3Scalar(-1);
- if (x > b3Scalar(1))
- x = b3Scalar(1);
- return asinf(x);
-}
-B3_FORCE_INLINE b3Scalar b3Atan(b3Scalar x) { return atanf(x); }
-B3_FORCE_INLINE b3Scalar b3Atan2(b3Scalar x, b3Scalar y) { return atan2f(x, y); }
-B3_FORCE_INLINE b3Scalar b3Exp(b3Scalar x) { return expf(x); }
-B3_FORCE_INLINE b3Scalar b3Log(b3Scalar x) { return logf(x); }
-B3_FORCE_INLINE b3Scalar b3Pow(b3Scalar x, b3Scalar y) { return powf(x, y); }
-B3_FORCE_INLINE b3Scalar b3Fmod(b3Scalar x, b3Scalar y) { return fmodf(x, y); }
-
-#endif
-
-#define B3_2_PI b3Scalar(6.283185307179586232)
-#define B3_PI (B3_2_PI * b3Scalar(0.5))
-#define B3_HALF_PI (B3_2_PI * b3Scalar(0.25))
-#define B3_RADS_PER_DEG (B3_2_PI / b3Scalar(360.0))
-#define B3_DEGS_PER_RAD (b3Scalar(360.0) / B3_2_PI)
-#define B3_SQRT12 b3Scalar(0.7071067811865475244008443621048490)
-
-#define b3RecipSqrt(x) ((b3Scalar)(b3Scalar(1.0) / b3Sqrt(b3Scalar(x)))) /* reciprocal square root */
-
-#ifdef B3_USE_DOUBLE_PRECISION
-#define B3_EPSILON DBL_EPSILON
-#define B3_INFINITY DBL_MAX
-#else
-#define B3_EPSILON FLT_EPSILON
-#define B3_INFINITY FLT_MAX
-#endif
-
-B3_FORCE_INLINE b3Scalar b3Atan2Fast(b3Scalar y, b3Scalar x)
-{
- b3Scalar coeff_1 = B3_PI / 4.0f;
- b3Scalar coeff_2 = 3.0f * coeff_1;
- b3Scalar abs_y = b3Fabs(y);
- b3Scalar angle;
- if (x >= 0.0f)
- {
- b3Scalar r = (x - abs_y) / (x + abs_y);
- angle = coeff_1 - coeff_1 * r;
- }
- else
- {
- b3Scalar r = (x + abs_y) / (abs_y - x);
- angle = coeff_2 - coeff_1 * r;
- }
- return (y < 0.0f) ? -angle : angle;
-}
-
-B3_FORCE_INLINE bool b3FuzzyZero(b3Scalar x) { return b3Fabs(x) < B3_EPSILON; }
-
-B3_FORCE_INLINE bool b3Equal(b3Scalar a, b3Scalar eps)
-{
- return (((a) <= eps) && !((a) < -eps));
-}
-B3_FORCE_INLINE bool b3GreaterEqual(b3Scalar a, b3Scalar eps)
-{
- return (!((a) <= eps));
-}
-
-B3_FORCE_INLINE int b3IsNegative(b3Scalar x)
-{
- return x < b3Scalar(0.0) ? 1 : 0;
-}
-
-B3_FORCE_INLINE b3Scalar b3Radians(b3Scalar x) { return x * B3_RADS_PER_DEG; }
-B3_FORCE_INLINE b3Scalar b3Degrees(b3Scalar x) { return x * B3_DEGS_PER_RAD; }
-
-#define B3_DECLARE_HANDLE(name) \
- typedef struct name##__ \
- { \
- int unused; \
- } * name
-
-#ifndef b3Fsel
-B3_FORCE_INLINE b3Scalar b3Fsel(b3Scalar a, b3Scalar b, b3Scalar c)
-{
- return a >= 0 ? b : c;
-}
-#endif
-#define b3Fsels(a, b, c) (b3Scalar) b3Fsel(a, b, c)
-
-B3_FORCE_INLINE bool b3MachineIsLittleEndian()
-{
- long int i = 1;
- const char *p = (const char *)&i;
- if (p[0] == 1) // Lowest address contains the least significant byte
- return true;
- else
- return false;
-}
-
-///b3Select avoids branches, which makes performance much better for consoles like Playstation 3 and XBox 360
-///Thanks Phil Knight. See also http://www.cellperformance.com/articles/2006/04/more_techniques_for_eliminatin_1.html
-B3_FORCE_INLINE unsigned b3Select(unsigned condition, unsigned valueIfConditionNonZero, unsigned valueIfConditionZero)
-{
- // Set testNz to 0xFFFFFFFF if condition is nonzero, 0x00000000 if condition is zero
- // Rely on positive value or'ed with its negative having sign bit on
- // and zero value or'ed with its negative (which is still zero) having sign bit off
- // Use arithmetic shift right, shifting the sign bit through all 32 bits
- unsigned testNz = (unsigned)(((int)condition | -(int)condition) >> 31);
- unsigned testEqz = ~testNz;
- return ((valueIfConditionNonZero & testNz) | (valueIfConditionZero & testEqz));
-}
-B3_FORCE_INLINE int b3Select(unsigned condition, int valueIfConditionNonZero, int valueIfConditionZero)
-{
- unsigned testNz = (unsigned)(((int)condition | -(int)condition) >> 31);
- unsigned testEqz = ~testNz;
- return static_cast<int>((valueIfConditionNonZero & testNz) | (valueIfConditionZero & testEqz));
-}
-B3_FORCE_INLINE float b3Select(unsigned condition, float valueIfConditionNonZero, float valueIfConditionZero)
-{
-#ifdef B3_HAVE_NATIVE_FSEL
- return (float)b3Fsel((b3Scalar)condition - b3Scalar(1.0f), valueIfConditionNonZero, valueIfConditionZero);
-#else
- return (condition != 0) ? valueIfConditionNonZero : valueIfConditionZero;
-#endif
-}
-
-template <typename T>
-B3_FORCE_INLINE void b3Swap(T &a, T &b)
-{
- T tmp = a;
- a = b;
- b = tmp;
-}
-
-//PCK: endian swapping functions
-B3_FORCE_INLINE unsigned b3SwapEndian(unsigned val)
-{
- return (((val & 0xff000000) >> 24) | ((val & 0x00ff0000) >> 8) | ((val & 0x0000ff00) << 8) | ((val & 0x000000ff) << 24));
-}
-
-B3_FORCE_INLINE unsigned short b3SwapEndian(unsigned short val)
-{
- return static_cast<unsigned short>(((val & 0xff00) >> 8) | ((val & 0x00ff) << 8));
-}
-
-B3_FORCE_INLINE unsigned b3SwapEndian(int val)
-{
- return b3SwapEndian((unsigned)val);
-}
-
-B3_FORCE_INLINE unsigned short b3SwapEndian(short val)
-{
- return b3SwapEndian((unsigned short)val);
-}
-
-///b3SwapFloat uses using char pointers to swap the endianness
-////b3SwapFloat/b3SwapDouble will NOT return a float, because the machine might 'correct' invalid floating point values
-///Not all values of sign/exponent/mantissa are valid floating point numbers according to IEEE 754.
-///When a floating point unit is faced with an invalid value, it may actually change the value, or worse, throw an exception.
-///In most systems, running user mode code, you wouldn't get an exception, but instead the hardware/os/runtime will 'fix' the number for you.
-///so instead of returning a float/double, we return integer/long long integer
-B3_FORCE_INLINE unsigned int b3SwapEndianFloat(float d)
-{
- unsigned int a = 0;
- unsigned char *dst = (unsigned char *)&a;
- unsigned char *src = (unsigned char *)&d;
-
- dst[0] = src[3];
- dst[1] = src[2];
- dst[2] = src[1];
- dst[3] = src[0];
- return a;
-}
-
-// unswap using char pointers
-B3_FORCE_INLINE float b3UnswapEndianFloat(unsigned int a)
-{
- float d = 0.0f;
- unsigned char *src = (unsigned char *)&a;
- unsigned char *dst = (unsigned char *)&d;
-
- dst[0] = src[3];
- dst[1] = src[2];
- dst[2] = src[1];
- dst[3] = src[0];
-
- return d;
-}
-
-// swap using char pointers
-B3_FORCE_INLINE void b3SwapEndianDouble(double d, unsigned char *dst)
-{
- unsigned char *src = (unsigned char *)&d;
-
- dst[0] = src[7];
- dst[1] = src[6];
- dst[2] = src[5];
- dst[3] = src[4];
- dst[4] = src[3];
- dst[5] = src[2];
- dst[6] = src[1];
- dst[7] = src[0];
-}
-
-// unswap using char pointers
-B3_FORCE_INLINE double b3UnswapEndianDouble(const unsigned char *src)
-{
- double d = 0.0;
- unsigned char *dst = (unsigned char *)&d;
-
- dst[0] = src[7];
- dst[1] = src[6];
- dst[2] = src[5];
- dst[3] = src[4];
- dst[4] = src[3];
- dst[5] = src[2];
- dst[6] = src[1];
- dst[7] = src[0];
-
- return d;
-}
-
-// returns normalized value in range [-B3_PI, B3_PI]
-B3_FORCE_INLINE b3Scalar b3NormalizeAngle(b3Scalar angleInRadians)
-{
- angleInRadians = b3Fmod(angleInRadians, B3_2_PI);
- if (angleInRadians < -B3_PI)
- {
- return angleInRadians + B3_2_PI;
- }
- else if (angleInRadians > B3_PI)
- {
- return angleInRadians - B3_2_PI;
- }
- else
- {
- return angleInRadians;
- }
-}
-
-///rudimentary class to provide type info
-struct b3TypedObject
-{
- b3TypedObject(int objectType)
- : m_objectType(objectType)
- {
- }
- int m_objectType;
- inline int getObjectType() const
- {
- return m_objectType;
- }
-};
-
-///align a pointer to the provided alignment, upwards
-template <typename T>
-T *b3AlignPointer(T *unalignedPtr, size_t alignment)
-{
- struct b3ConvertPointerSizeT
- {
- union {
- T *ptr;
- size_t integer;
- };
- };
- b3ConvertPointerSizeT converter;
-
- const size_t bit_mask = ~(alignment - 1);
- converter.ptr = unalignedPtr;
- converter.integer += alignment - 1;
- converter.integer &= bit_mask;
- return converter.ptr;
-}
-
-#endif //B3_SCALAR_H
diff --git a/thirdparty/bullet/Bullet3Common/b3StackAlloc.h b/thirdparty/bullet/Bullet3Common/b3StackAlloc.h
deleted file mode 100644
index 4972236ac7..0000000000
--- a/thirdparty/bullet/Bullet3Common/b3StackAlloc.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
-Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it freely,
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-/*
-StackAlloc extracted from GJK-EPA collision solver by Nathanael Presson
-Nov.2006
-*/
-
-#ifndef B3_STACK_ALLOC
-#define B3_STACK_ALLOC
-
-#include "b3Scalar.h" //for b3Assert
-#include "b3AlignedAllocator.h"
-
-///The b3Block class is an internal structure for the b3StackAlloc memory allocator.
-struct b3Block
-{
- b3Block* previous;
- unsigned char* address;
-};
-
-///The StackAlloc class provides some fast stack-based memory allocator (LIFO last-in first-out)
-class b3StackAlloc
-{
-public:
- b3StackAlloc(unsigned int size)
- {
- ctor();
- create(size);
- }
- ~b3StackAlloc() { destroy(); }
-
- inline void create(unsigned int size)
- {
- destroy();
- data = (unsigned char*)b3AlignedAlloc(size, 16);
- totalsize = size;
- }
- inline void destroy()
- {
- b3Assert(usedsize == 0);
- //Raise(L"StackAlloc is still in use");
-
- if (usedsize == 0)
- {
- if (!ischild && data)
- b3AlignedFree(data);
-
- data = 0;
- usedsize = 0;
- }
- }
-
- int getAvailableMemory() const
- {
- return static_cast<int>(totalsize - usedsize);
- }
-
- unsigned char* allocate(unsigned int size)
- {
- const unsigned int nus(usedsize + size);
- if (nus < totalsize)
- {
- usedsize = nus;
- return (data + (usedsize - size));
- }
- b3Assert(0);
- //&& (L"Not enough memory"));
-
- return (0);
- }
- B3_FORCE_INLINE b3Block* beginBlock()
- {
- b3Block* pb = (b3Block*)allocate(sizeof(b3Block));
- pb->previous = current;
- pb->address = data + usedsize;
- current = pb;
- return (pb);
- }
- B3_FORCE_INLINE void endBlock(b3Block* block)
- {
- b3Assert(block == current);
- //Raise(L"Unmatched blocks");
- if (block == current)
- {
- current = block->previous;
- usedsize = (unsigned int)((block->address - data) - sizeof(b3Block));
- }
- }
-
-private:
- void ctor()
- {
- data = 0;
- totalsize = 0;
- usedsize = 0;
- current = 0;
- ischild = false;
- }
- unsigned char* data;
- unsigned int totalsize;
- unsigned int usedsize;
- b3Block* current;
- bool ischild;
-};
-
-#endif //B3_STACK_ALLOC
diff --git a/thirdparty/bullet/Bullet3Common/b3Transform.h b/thirdparty/bullet/Bullet3Common/b3Transform.h
deleted file mode 100644
index 149da9d148..0000000000
--- a/thirdparty/bullet/Bullet3Common/b3Transform.h
+++ /dev/null
@@ -1,286 +0,0 @@
-/*
-Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it freely,
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef B3_TRANSFORM_H
-#define B3_TRANSFORM_H
-
-#include "b3Matrix3x3.h"
-
-#ifdef B3_USE_DOUBLE_PRECISION
-#define b3TransformData b3TransformDoubleData
-#else
-#define b3TransformData b3TransformFloatData
-#endif
-
-/**@brief The b3Transform class supports rigid transforms with only translation and rotation and no scaling/shear.
- *It can be used in combination with b3Vector3, b3Quaternion and b3Matrix3x3 linear algebra classes. */
-B3_ATTRIBUTE_ALIGNED16(class)
-b3Transform
-{
- ///Storage for the rotation
- b3Matrix3x3 m_basis;
- ///Storage for the translation
- b3Vector3 m_origin;
-
-public:
- /**@brief No initialization constructor */
- b3Transform() {}
- /**@brief Constructor from b3Quaternion (optional b3Vector3 )
- * @param q Rotation from quaternion
- * @param c Translation from Vector (default 0,0,0) */
- explicit B3_FORCE_INLINE b3Transform(const b3Quaternion& q,
- const b3Vector3& c = b3MakeVector3(b3Scalar(0), b3Scalar(0), b3Scalar(0)))
- : m_basis(q),
- m_origin(c)
- {
- }
-
- /**@brief Constructor from b3Matrix3x3 (optional b3Vector3)
- * @param b Rotation from Matrix
- * @param c Translation from Vector default (0,0,0)*/
- explicit B3_FORCE_INLINE b3Transform(const b3Matrix3x3& b,
- const b3Vector3& c = b3MakeVector3(b3Scalar(0), b3Scalar(0), b3Scalar(0)))
- : m_basis(b),
- m_origin(c)
- {
- }
- /**@brief Copy constructor */
- B3_FORCE_INLINE b3Transform(const b3Transform& other)
- : m_basis(other.m_basis),
- m_origin(other.m_origin)
- {
- }
- /**@brief Assignment Operator */
- B3_FORCE_INLINE b3Transform& operator=(const b3Transform& other)
- {
- m_basis = other.m_basis;
- m_origin = other.m_origin;
- return *this;
- }
-
- /**@brief Set the current transform as the value of the product of two transforms
- * @param t1 Transform 1
- * @param t2 Transform 2
- * This = Transform1 * Transform2 */
- B3_FORCE_INLINE void mult(const b3Transform& t1, const b3Transform& t2)
- {
- m_basis = t1.m_basis * t2.m_basis;
- m_origin = t1(t2.m_origin);
- }
-
- /* void multInverseLeft(const b3Transform& t1, const b3Transform& t2) {
- b3Vector3 v = t2.m_origin - t1.m_origin;
- m_basis = b3MultTransposeLeft(t1.m_basis, t2.m_basis);
- m_origin = v * t1.m_basis;
- }
- */
-
- /**@brief Return the transform of the vector */
- B3_FORCE_INLINE b3Vector3 operator()(const b3Vector3& x) const
- {
- return x.dot3(m_basis[0], m_basis[1], m_basis[2]) + m_origin;
- }
-
- /**@brief Return the transform of the vector */
- B3_FORCE_INLINE b3Vector3 operator*(const b3Vector3& x) const
- {
- return (*this)(x);
- }
-
- /**@brief Return the transform of the b3Quaternion */
- B3_FORCE_INLINE b3Quaternion operator*(const b3Quaternion& q) const
- {
- return getRotation() * q;
- }
-
- /**@brief Return the basis matrix for the rotation */
- B3_FORCE_INLINE b3Matrix3x3& getBasis() { return m_basis; }
- /**@brief Return the basis matrix for the rotation */
- B3_FORCE_INLINE const b3Matrix3x3& getBasis() const { return m_basis; }
-
- /**@brief Return the origin vector translation */
- B3_FORCE_INLINE b3Vector3& getOrigin() { return m_origin; }
- /**@brief Return the origin vector translation */
- B3_FORCE_INLINE const b3Vector3& getOrigin() const { return m_origin; }
-
- /**@brief Return a quaternion representing the rotation */
- b3Quaternion getRotation() const
- {
- b3Quaternion q;
- m_basis.getRotation(q);
- return q;
- }
-
- /**@brief Set from an array
- * @param m A pointer to a 15 element array (12 rotation(row major padded on the right by 1), and 3 translation */
- void setFromOpenGLMatrix(const b3Scalar* m)
- {
- m_basis.setFromOpenGLSubMatrix(m);
- m_origin.setValue(m[12], m[13], m[14]);
- }
-
- /**@brief Fill an array representation
- * @param m A pointer to a 15 element array (12 rotation(row major padded on the right by 1), and 3 translation */
- void getOpenGLMatrix(b3Scalar * m) const
- {
- m_basis.getOpenGLSubMatrix(m);
- m[12] = m_origin.getX();
- m[13] = m_origin.getY();
- m[14] = m_origin.getZ();
- m[15] = b3Scalar(1.0);
- }
-
- /**@brief Set the translational element
- * @param origin The vector to set the translation to */
- B3_FORCE_INLINE void setOrigin(const b3Vector3& origin)
- {
- m_origin = origin;
- }
-
- B3_FORCE_INLINE b3Vector3 invXform(const b3Vector3& inVec) const;
-
- /**@brief Set the rotational element by b3Matrix3x3 */
- B3_FORCE_INLINE void setBasis(const b3Matrix3x3& basis)
- {
- m_basis = basis;
- }
-
- /**@brief Set the rotational element by b3Quaternion */
- B3_FORCE_INLINE void setRotation(const b3Quaternion& q)
- {
- m_basis.setRotation(q);
- }
-
- /**@brief Set this transformation to the identity */
- void setIdentity()
- {
- m_basis.setIdentity();
- m_origin.setValue(b3Scalar(0.0), b3Scalar(0.0), b3Scalar(0.0));
- }
-
- /**@brief Multiply this Transform by another(this = this * another)
- * @param t The other transform */
- b3Transform& operator*=(const b3Transform& t)
- {
- m_origin += m_basis * t.m_origin;
- m_basis *= t.m_basis;
- return *this;
- }
-
- /**@brief Return the inverse of this transform */
- b3Transform inverse() const
- {
- b3Matrix3x3 inv = m_basis.transpose();
- return b3Transform(inv, inv * -m_origin);
- }
-
- /**@brief Return the inverse of this transform times the other transform
- * @param t The other transform
- * return this.inverse() * the other */
- b3Transform inverseTimes(const b3Transform& t) const;
-
- /**@brief Return the product of this transform and the other */
- b3Transform operator*(const b3Transform& t) const;
-
- /**@brief Return an identity transform */
- static const b3Transform& getIdentity()
- {
- static const b3Transform identityTransform(b3Matrix3x3::getIdentity());
- return identityTransform;
- }
-
- void serialize(struct b3TransformData & dataOut) const;
-
- void serializeFloat(struct b3TransformFloatData & dataOut) const;
-
- void deSerialize(const struct b3TransformData& dataIn);
-
- void deSerializeDouble(const struct b3TransformDoubleData& dataIn);
-
- void deSerializeFloat(const struct b3TransformFloatData& dataIn);
-};
-
-B3_FORCE_INLINE b3Vector3
-b3Transform::invXform(const b3Vector3& inVec) const
-{
- b3Vector3 v = inVec - m_origin;
- return (m_basis.transpose() * v);
-}
-
-B3_FORCE_INLINE b3Transform
-b3Transform::inverseTimes(const b3Transform& t) const
-{
- b3Vector3 v = t.getOrigin() - m_origin;
- return b3Transform(m_basis.transposeTimes(t.m_basis),
- v * m_basis);
-}
-
-B3_FORCE_INLINE b3Transform
- b3Transform::operator*(const b3Transform& t) const
-{
- return b3Transform(m_basis * t.m_basis,
- (*this)(t.m_origin));
-}
-
-/**@brief Test if two transforms have all elements equal */
-B3_FORCE_INLINE bool operator==(const b3Transform& t1, const b3Transform& t2)
-{
- return (t1.getBasis() == t2.getBasis() &&
- t1.getOrigin() == t2.getOrigin());
-}
-
-///for serialization
-struct b3TransformFloatData
-{
- b3Matrix3x3FloatData m_basis;
- b3Vector3FloatData m_origin;
-};
-
-struct b3TransformDoubleData
-{
- b3Matrix3x3DoubleData m_basis;
- b3Vector3DoubleData m_origin;
-};
-
-B3_FORCE_INLINE void b3Transform::serialize(b3TransformData& dataOut) const
-{
- m_basis.serialize(dataOut.m_basis);
- m_origin.serialize(dataOut.m_origin);
-}
-
-B3_FORCE_INLINE void b3Transform::serializeFloat(b3TransformFloatData& dataOut) const
-{
- m_basis.serializeFloat(dataOut.m_basis);
- m_origin.serializeFloat(dataOut.m_origin);
-}
-
-B3_FORCE_INLINE void b3Transform::deSerialize(const b3TransformData& dataIn)
-{
- m_basis.deSerialize(dataIn.m_basis);
- m_origin.deSerialize(dataIn.m_origin);
-}
-
-B3_FORCE_INLINE void b3Transform::deSerializeFloat(const b3TransformFloatData& dataIn)
-{
- m_basis.deSerializeFloat(dataIn.m_basis);
- m_origin.deSerializeFloat(dataIn.m_origin);
-}
-
-B3_FORCE_INLINE void b3Transform::deSerializeDouble(const b3TransformDoubleData& dataIn)
-{
- m_basis.deSerializeDouble(dataIn.m_basis);
- m_origin.deSerializeDouble(dataIn.m_origin);
-}
-
-#endif //B3_TRANSFORM_H
diff --git a/thirdparty/bullet/Bullet3Common/b3TransformUtil.h b/thirdparty/bullet/Bullet3Common/b3TransformUtil.h
deleted file mode 100644
index 1850a9be5f..0000000000
--- a/thirdparty/bullet/Bullet3Common/b3TransformUtil.h
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
-Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it freely,
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef B3_TRANSFORM_UTIL_H
-#define B3_TRANSFORM_UTIL_H
-
-#include "b3Transform.h"
-#define B3_ANGULAR_MOTION_THRESHOLD b3Scalar(0.5) * B3_HALF_PI
-
-B3_FORCE_INLINE b3Vector3 b3AabbSupport(const b3Vector3& halfExtents, const b3Vector3& supportDir)
-{
- return b3MakeVector3(supportDir.getX() < b3Scalar(0.0) ? -halfExtents.getX() : halfExtents.getX(),
- supportDir.getY() < b3Scalar(0.0) ? -halfExtents.getY() : halfExtents.getY(),
- supportDir.getZ() < b3Scalar(0.0) ? -halfExtents.getZ() : halfExtents.getZ());
-}
-
-/// Utils related to temporal transforms
-class b3TransformUtil
-{
-public:
- static void integrateTransform(const b3Transform& curTrans, const b3Vector3& linvel, const b3Vector3& angvel, b3Scalar timeStep, b3Transform& predictedTransform)
- {
- predictedTransform.setOrigin(curTrans.getOrigin() + linvel * timeStep);
- // #define QUATERNION_DERIVATIVE
-#ifdef QUATERNION_DERIVATIVE
- b3Quaternion predictedOrn = curTrans.getRotation();
- predictedOrn += (angvel * predictedOrn) * (timeStep * b3Scalar(0.5));
- predictedOrn.normalize();
-#else
- //Exponential map
- //google for "Practical Parameterization of Rotations Using the Exponential Map", F. Sebastian Grassia
-
- b3Vector3 axis;
- b3Scalar fAngle = angvel.length();
- //limit the angular motion
- if (fAngle * timeStep > B3_ANGULAR_MOTION_THRESHOLD)
- {
- fAngle = B3_ANGULAR_MOTION_THRESHOLD / timeStep;
- }
-
- if (fAngle < b3Scalar(0.001))
- {
- // use Taylor's expansions of sync function
- axis = angvel * (b3Scalar(0.5) * timeStep - (timeStep * timeStep * timeStep) * (b3Scalar(0.020833333333)) * fAngle * fAngle);
- }
- else
- {
- // sync(fAngle) = sin(c*fAngle)/t
- axis = angvel * (b3Sin(b3Scalar(0.5) * fAngle * timeStep) / fAngle);
- }
- b3Quaternion dorn(axis.getX(), axis.getY(), axis.getZ(), b3Cos(fAngle * timeStep * b3Scalar(0.5)));
- b3Quaternion orn0 = curTrans.getRotation();
-
- b3Quaternion predictedOrn = dorn * orn0;
- predictedOrn.normalize();
-#endif
- predictedTransform.setRotation(predictedOrn);
- }
-
- static void calculateVelocityQuaternion(const b3Vector3& pos0, const b3Vector3& pos1, const b3Quaternion& orn0, const b3Quaternion& orn1, b3Scalar timeStep, b3Vector3& linVel, b3Vector3& angVel)
- {
- linVel = (pos1 - pos0) / timeStep;
- b3Vector3 axis;
- b3Scalar angle;
- if (orn0 != orn1)
- {
- calculateDiffAxisAngleQuaternion(orn0, orn1, axis, angle);
- angVel = axis * angle / timeStep;
- }
- else
- {
- angVel.setValue(0, 0, 0);
- }
- }
-
- static void calculateDiffAxisAngleQuaternion(const b3Quaternion& orn0, const b3Quaternion& orn1a, b3Vector3& axis, b3Scalar& angle)
- {
- b3Quaternion orn1 = orn0.nearest(orn1a);
- b3Quaternion dorn = orn1 * orn0.inverse();
- angle = dorn.getAngle();
- axis = b3MakeVector3(dorn.getX(), dorn.getY(), dorn.getZ());
- axis[3] = b3Scalar(0.);
- //check for axis length
- b3Scalar len = axis.length2();
- if (len < B3_EPSILON * B3_EPSILON)
- axis = b3MakeVector3(b3Scalar(1.), b3Scalar(0.), b3Scalar(0.));
- else
- axis /= b3Sqrt(len);
- }
-
- static void calculateVelocity(const b3Transform& transform0, const b3Transform& transform1, b3Scalar timeStep, b3Vector3& linVel, b3Vector3& angVel)
- {
- linVel = (transform1.getOrigin() - transform0.getOrigin()) / timeStep;
- b3Vector3 axis;
- b3Scalar angle;
- calculateDiffAxisAngle(transform0, transform1, axis, angle);
- angVel = axis * angle / timeStep;
- }
-
- static void calculateDiffAxisAngle(const b3Transform& transform0, const b3Transform& transform1, b3Vector3& axis, b3Scalar& angle)
- {
- b3Matrix3x3 dmat = transform1.getBasis() * transform0.getBasis().inverse();
- b3Quaternion dorn;
- dmat.getRotation(dorn);
-
- ///floating point inaccuracy can lead to w component > 1..., which breaks
- dorn.normalize();
-
- angle = dorn.getAngle();
- axis = b3MakeVector3(dorn.getX(), dorn.getY(), dorn.getZ());
- axis[3] = b3Scalar(0.);
- //check for axis length
- b3Scalar len = axis.length2();
- if (len < B3_EPSILON * B3_EPSILON)
- axis = b3MakeVector3(b3Scalar(1.), b3Scalar(0.), b3Scalar(0.));
- else
- axis /= b3Sqrt(len);
- }
-};
-
-///The b3ConvexSeparatingDistanceUtil can help speed up convex collision detection
-///by conservatively updating a cached separating distance/vector instead of re-calculating the closest distance
-class b3ConvexSeparatingDistanceUtil
-{
- b3Quaternion m_ornA;
- b3Quaternion m_ornB;
- b3Vector3 m_posA;
- b3Vector3 m_posB;
-
- b3Vector3 m_separatingNormal;
-
- b3Scalar m_boundingRadiusA;
- b3Scalar m_boundingRadiusB;
- b3Scalar m_separatingDistance;
-
-public:
- b3ConvexSeparatingDistanceUtil(b3Scalar boundingRadiusA, b3Scalar boundingRadiusB)
- : m_boundingRadiusA(boundingRadiusA),
- m_boundingRadiusB(boundingRadiusB),
- m_separatingDistance(0.f)
- {
- }
-
- b3Scalar getConservativeSeparatingDistance()
- {
- return m_separatingDistance;
- }
-
- void updateSeparatingDistance(const b3Transform& transA, const b3Transform& transB)
- {
- const b3Vector3& toPosA = transA.getOrigin();
- const b3Vector3& toPosB = transB.getOrigin();
- b3Quaternion toOrnA = transA.getRotation();
- b3Quaternion toOrnB = transB.getRotation();
-
- if (m_separatingDistance > 0.f)
- {
- b3Vector3 linVelA, angVelA, linVelB, angVelB;
- b3TransformUtil::calculateVelocityQuaternion(m_posA, toPosA, m_ornA, toOrnA, b3Scalar(1.), linVelA, angVelA);
- b3TransformUtil::calculateVelocityQuaternion(m_posB, toPosB, m_ornB, toOrnB, b3Scalar(1.), linVelB, angVelB);
- b3Scalar maxAngularProjectedVelocity = angVelA.length() * m_boundingRadiusA + angVelB.length() * m_boundingRadiusB;
- b3Vector3 relLinVel = (linVelB - linVelA);
- b3Scalar relLinVelocLength = relLinVel.dot(m_separatingNormal);
- if (relLinVelocLength < 0.f)
- {
- relLinVelocLength = 0.f;
- }
-
- b3Scalar projectedMotion = maxAngularProjectedVelocity + relLinVelocLength;
- m_separatingDistance -= projectedMotion;
- }
-
- m_posA = toPosA;
- m_posB = toPosB;
- m_ornA = toOrnA;
- m_ornB = toOrnB;
- }
-
- void initSeparatingDistance(const b3Vector3& separatingVector, b3Scalar separatingDistance, const b3Transform& transA, const b3Transform& transB)
- {
- m_separatingDistance = separatingDistance;
-
- if (m_separatingDistance > 0.f)
- {
- m_separatingNormal = separatingVector;
-
- const b3Vector3& toPosA = transA.getOrigin();
- const b3Vector3& toPosB = transB.getOrigin();
- b3Quaternion toOrnA = transA.getRotation();
- b3Quaternion toOrnB = transB.getRotation();
- m_posA = toPosA;
- m_posB = toPosB;
- m_ornA = toOrnA;
- m_ornB = toOrnB;
- }
- }
-};
-
-#endif //B3_TRANSFORM_UTIL_H
diff --git a/thirdparty/bullet/Bullet3Common/b3Vector3.cpp b/thirdparty/bullet/Bullet3Common/b3Vector3.cpp
deleted file mode 100644
index 100fb774c1..0000000000
--- a/thirdparty/bullet/Bullet3Common/b3Vector3.cpp
+++ /dev/null
@@ -1,1637 +0,0 @@
-/*
- Copyright (c) 2011-213 Apple Inc. http://bulletphysics.org
-
- This software is provided 'as-is', without any express or implied warranty.
- In no event will the authors be held liable for any damages arising from the use of this software.
- Permission is granted to anyone to use this software for any purpose,
- including commercial applications, and to alter it and redistribute it freely,
- subject to the following restrictions:
-
- 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
- 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
- 3. This notice may not be removed or altered from any source distribution.
-
- This source version has been altered.
- */
-
-#if defined(_WIN32) || defined(__i386__)
-#define B3_USE_SSE_IN_API
-#endif
-
-#include "b3Vector3.h"
-
-#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
-
-#ifdef __APPLE__
-#include <stdint.h>
-typedef float float4 __attribute__((vector_size(16)));
-#else
-#define float4 __m128
-#endif
-//typedef uint32_t uint4 __attribute__ ((vector_size(16)));
-
-#if defined B3_USE_SSE || defined _WIN32
-
-#define LOG2_ARRAY_SIZE 6
-#define STACK_ARRAY_COUNT (1UL << LOG2_ARRAY_SIZE)
-
-#include <emmintrin.h>
-
-long b3_maxdot_large(const float *vv, const float *vec, unsigned long count, float *dotResult);
-long b3_maxdot_large(const float *vv, const float *vec, unsigned long count, float *dotResult)
-{
- const float4 *vertices = (const float4 *)vv;
- static const unsigned char indexTable[16] = {(unsigned char)-1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0};
- float4 dotMax = b3Assign128(-B3_INFINITY, -B3_INFINITY, -B3_INFINITY, -B3_INFINITY);
- float4 vvec = _mm_loadu_ps(vec);
- float4 vHi = b3CastiTo128f(_mm_shuffle_epi32(b3CastfTo128i(vvec), 0xaa)); /// zzzz
- float4 vLo = _mm_movelh_ps(vvec, vvec); /// xyxy
-
- long maxIndex = -1L;
-
- size_t segment = 0;
- float4 stack_array[STACK_ARRAY_COUNT];
-
-#if DEBUG
- // memset( stack_array, -1, STACK_ARRAY_COUNT * sizeof(stack_array[0]) );
-#endif
-
- size_t index;
- float4 max;
- // Faster loop without cleanup code for full tiles
- for (segment = 0; segment + STACK_ARRAY_COUNT * 4 <= count; segment += STACK_ARRAY_COUNT * 4)
- {
- max = dotMax;
-
- for (index = 0; index < STACK_ARRAY_COUNT; index += 4)
- { // do four dot products at a time. Carefully avoid touching the w element.
- float4 v0 = vertices[0];
- float4 v1 = vertices[1];
- float4 v2 = vertices[2];
- float4 v3 = vertices[3];
- vertices += 4;
-
- float4 lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1
- float4 hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1
- float4 lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3
- float4 hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3
-
- lo0 = lo0 * vLo;
- lo1 = lo1 * vLo;
- float4 z = _mm_shuffle_ps(hi0, hi1, 0x88);
- float4 x = _mm_shuffle_ps(lo0, lo1, 0x88);
- float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd);
- z = z * vHi;
- x = x + y;
- x = x + z;
- stack_array[index] = x;
- max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan
-
- v0 = vertices[0];
- v1 = vertices[1];
- v2 = vertices[2];
- v3 = vertices[3];
- vertices += 4;
-
- lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1
- hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1
- lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3
- hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3
-
- lo0 = lo0 * vLo;
- lo1 = lo1 * vLo;
- z = _mm_shuffle_ps(hi0, hi1, 0x88);
- x = _mm_shuffle_ps(lo0, lo1, 0x88);
- y = _mm_shuffle_ps(lo0, lo1, 0xdd);
- z = z * vHi;
- x = x + y;
- x = x + z;
- stack_array[index + 1] = x;
- max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan
-
- v0 = vertices[0];
- v1 = vertices[1];
- v2 = vertices[2];
- v3 = vertices[3];
- vertices += 4;
-
- lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1
- hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1
- lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3
- hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3
-
- lo0 = lo0 * vLo;
- lo1 = lo1 * vLo;
- z = _mm_shuffle_ps(hi0, hi1, 0x88);
- x = _mm_shuffle_ps(lo0, lo1, 0x88);
- y = _mm_shuffle_ps(lo0, lo1, 0xdd);
- z = z * vHi;
- x = x + y;
- x = x + z;
- stack_array[index + 2] = x;
- max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan
-
- v0 = vertices[0];
- v1 = vertices[1];
- v2 = vertices[2];
- v3 = vertices[3];
- vertices += 4;
-
- lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1
- hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1
- lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3
- hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3
-
- lo0 = lo0 * vLo;
- lo1 = lo1 * vLo;
- z = _mm_shuffle_ps(hi0, hi1, 0x88);
- x = _mm_shuffle_ps(lo0, lo1, 0x88);
- y = _mm_shuffle_ps(lo0, lo1, 0xdd);
- z = z * vHi;
- x = x + y;
- x = x + z;
- stack_array[index + 3] = x;
- max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan
-
- // It is too costly to keep the index of the max here. We will look for it again later. We save a lot of work this way.
- }
-
- // If we found a new max
- if (0xf != _mm_movemask_ps((float4)_mm_cmpeq_ps(max, dotMax)))
- {
- // copy the new max across all lanes of our max accumulator
- max = _mm_max_ps(max, (float4)_mm_shuffle_ps(max, max, 0x4e));
- max = _mm_max_ps(max, (float4)_mm_shuffle_ps(max, max, 0xb1));
-
- dotMax = max;
-
- // find first occurrence of that max
- size_t test;
- for (index = 0; 0 == (test = _mm_movemask_ps(_mm_cmpeq_ps(stack_array[index], max))); index++) // local_count must be a multiple of 4
- {
- }
- // record where it is.
- maxIndex = 4 * index + segment + indexTable[test];
- }
- }
-
- // account for work we've already done
- count -= segment;
-
- // Deal with the last < STACK_ARRAY_COUNT vectors
- max = dotMax;
- index = 0;
-
- if (b3Unlikely(count > 16))
- {
- for (; index + 4 <= count / 4; index += 4)
- { // do four dot products at a time. Carefully avoid touching the w element.
- float4 v0 = vertices[0];
- float4 v1 = vertices[1];
- float4 v2 = vertices[2];
- float4 v3 = vertices[3];
- vertices += 4;
-
- float4 lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1
- float4 hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1
- float4 lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3
- float4 hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3
-
- lo0 = lo0 * vLo;
- lo1 = lo1 * vLo;
- float4 z = _mm_shuffle_ps(hi0, hi1, 0x88);
- float4 x = _mm_shuffle_ps(lo0, lo1, 0x88);
- float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd);
- z = z * vHi;
- x = x + y;
- x = x + z;
- stack_array[index] = x;
- max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan
-
- v0 = vertices[0];
- v1 = vertices[1];
- v2 = vertices[2];
- v3 = vertices[3];
- vertices += 4;
-
- lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1
- hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1
- lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3
- hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3
-
- lo0 = lo0 * vLo;
- lo1 = lo1 * vLo;
- z = _mm_shuffle_ps(hi0, hi1, 0x88);
- x = _mm_shuffle_ps(lo0, lo1, 0x88);
- y = _mm_shuffle_ps(lo0, lo1, 0xdd);
- z = z * vHi;
- x = x + y;
- x = x + z;
- stack_array[index + 1] = x;
- max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan
-
- v0 = vertices[0];
- v1 = vertices[1];
- v2 = vertices[2];
- v3 = vertices[3];
- vertices += 4;
-
- lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1
- hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1
- lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3
- hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3
-
- lo0 = lo0 * vLo;
- lo1 = lo1 * vLo;
- z = _mm_shuffle_ps(hi0, hi1, 0x88);
- x = _mm_shuffle_ps(lo0, lo1, 0x88);
- y = _mm_shuffle_ps(lo0, lo1, 0xdd);
- z = z * vHi;
- x = x + y;
- x = x + z;
- stack_array[index + 2] = x;
- max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan
-
- v0 = vertices[0];
- v1 = vertices[1];
- v2 = vertices[2];
- v3 = vertices[3];
- vertices += 4;
-
- lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1
- hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1
- lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3
- hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3
-
- lo0 = lo0 * vLo;
- lo1 = lo1 * vLo;
- z = _mm_shuffle_ps(hi0, hi1, 0x88);
- x = _mm_shuffle_ps(lo0, lo1, 0x88);
- y = _mm_shuffle_ps(lo0, lo1, 0xdd);
- z = z * vHi;
- x = x + y;
- x = x + z;
- stack_array[index + 3] = x;
- max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan
-
- // It is too costly to keep the index of the max here. We will look for it again later. We save a lot of work this way.
- }
- }
-
- size_t localCount = (count & -4L) - 4 * index;
- if (localCount)
- {
-#ifdef __APPLE__
- float4 t0, t1, t2, t3, t4;
- float4 *sap = &stack_array[index + localCount / 4];
- vertices += localCount; // counter the offset
- size_t byteIndex = -(localCount) * sizeof(float);
- //AT&T Code style assembly
- asm volatile(
- ".align 4 \n\
- 0: movaps %[max], %[t2] // move max out of the way to avoid propagating NaNs in max \n\
- movaps (%[vertices], %[byteIndex], 4), %[t0] // vertices[0] \n\
- movaps 16(%[vertices], %[byteIndex], 4), %[t1] // vertices[1] \n\
- movaps %[t0], %[max] // vertices[0] \n\
- movlhps %[t1], %[max] // x0y0x1y1 \n\
- movaps 32(%[vertices], %[byteIndex], 4), %[t3] // vertices[2] \n\
- movaps 48(%[vertices], %[byteIndex], 4), %[t4] // vertices[3] \n\
- mulps %[vLo], %[max] // x0y0x1y1 * vLo \n\
- movhlps %[t0], %[t1] // z0w0z1w1 \n\
- movaps %[t3], %[t0] // vertices[2] \n\
- movlhps %[t4], %[t0] // x2y2x3y3 \n\
- mulps %[vLo], %[t0] // x2y2x3y3 * vLo \n\
- movhlps %[t3], %[t4] // z2w2z3w3 \n\
- shufps $0x88, %[t4], %[t1] // z0z1z2z3 \n\
- mulps %[vHi], %[t1] // z0z1z2z3 * vHi \n\
- movaps %[max], %[t3] // x0y0x1y1 * vLo \n\
- shufps $0x88, %[t0], %[max] // x0x1x2x3 * vLo.x \n\
- shufps $0xdd, %[t0], %[t3] // y0y1y2y3 * vLo.y \n\
- addps %[t3], %[max] // x + y \n\
- addps %[t1], %[max] // x + y + z \n\
- movaps %[max], (%[sap], %[byteIndex]) // record result for later scrutiny \n\
- maxps %[t2], %[max] // record max, restore max \n\
- add $16, %[byteIndex] // advance loop counter\n\
- jnz 0b \n\
- "
- : [max] "+x"(max), [t0] "=&x"(t0), [t1] "=&x"(t1), [t2] "=&x"(t2), [t3] "=&x"(t3), [t4] "=&x"(t4), [byteIndex] "+r"(byteIndex)
- : [vLo] "x"(vLo), [vHi] "x"(vHi), [vertices] "r"(vertices), [sap] "r"(sap)
- : "memory", "cc");
- index += localCount / 4;
-#else
- {
- for (unsigned int i = 0; i < localCount / 4; i++, index++)
- { // do four dot products at a time. Carefully avoid touching the w element.
- float4 v0 = vertices[0];
- float4 v1 = vertices[1];
- float4 v2 = vertices[2];
- float4 v3 = vertices[3];
- vertices += 4;
-
- float4 lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1
- float4 hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1
- float4 lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3
- float4 hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3
-
- lo0 = lo0 * vLo;
- lo1 = lo1 * vLo;
- float4 z = _mm_shuffle_ps(hi0, hi1, 0x88);
- float4 x = _mm_shuffle_ps(lo0, lo1, 0x88);
- float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd);
- z = z * vHi;
- x = x + y;
- x = x + z;
- stack_array[index] = x;
- max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan
- }
- }
-#endif //__APPLE__
- }
-
- // process the last few points
- if (count & 3)
- {
- float4 v0, v1, v2, x, y, z;
- switch (count & 3)
- {
- case 3:
- {
- v0 = vertices[0];
- v1 = vertices[1];
- v2 = vertices[2];
-
- // Calculate 3 dot products, transpose, duplicate v2
- float4 lo0 = _mm_movelh_ps(v0, v1); // xyxy.lo
- float4 hi0 = _mm_movehl_ps(v1, v0); // z?z?.lo
- lo0 = lo0 * vLo;
- z = _mm_shuffle_ps(hi0, v2, 0xa8); // z0z1z2z2
- z = z * vHi;
- float4 lo1 = _mm_movelh_ps(v2, v2); // xyxy
- lo1 = lo1 * vLo;
- x = _mm_shuffle_ps(lo0, lo1, 0x88);
- y = _mm_shuffle_ps(lo0, lo1, 0xdd);
- }
- break;
- case 2:
- {
- v0 = vertices[0];
- v1 = vertices[1];
- float4 xy = _mm_movelh_ps(v0, v1);
- z = _mm_movehl_ps(v1, v0);
- xy = xy * vLo;
- z = _mm_shuffle_ps(z, z, 0xa8);
- x = _mm_shuffle_ps(xy, xy, 0xa8);
- y = _mm_shuffle_ps(xy, xy, 0xfd);
- z = z * vHi;
- }
- break;
- case 1:
- {
- float4 xy = vertices[0];
- z = _mm_shuffle_ps(xy, xy, 0xaa);
- xy = xy * vLo;
- z = z * vHi;
- x = _mm_shuffle_ps(xy, xy, 0);
- y = _mm_shuffle_ps(xy, xy, 0x55);
- }
- break;
- }
- x = x + y;
- x = x + z;
- stack_array[index] = x;
- max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan
- index++;
- }
-
- // if we found a new max.
- if (0 == segment || 0xf != _mm_movemask_ps((float4)_mm_cmpeq_ps(max, dotMax)))
- { // we found a new max. Search for it
- // find max across the max vector, place in all elements of max -- big latency hit here
- max = _mm_max_ps(max, (float4)_mm_shuffle_ps(max, max, 0x4e));
- max = _mm_max_ps(max, (float4)_mm_shuffle_ps(max, max, 0xb1));
-
- // It is slightly faster to do this part in scalar code when count < 8. However, the common case for
- // this where it actually makes a difference is handled in the early out at the top of the function,
- // so it is less than a 1% difference here. I opted for improved code size, fewer branches and reduced
- // complexity, and removed it.
-
- dotMax = max;
-
- // scan for the first occurence of max in the array
- size_t test;
- for (index = 0; 0 == (test = _mm_movemask_ps(_mm_cmpeq_ps(stack_array[index], max))); index++) // local_count must be a multiple of 4
- {
- }
- maxIndex = 4 * index + segment + indexTable[test];
- }
-
- _mm_store_ss(dotResult, dotMax);
- return maxIndex;
-}
-
-long b3_mindot_large(const float *vv, const float *vec, unsigned long count, float *dotResult);
-
-long b3_mindot_large(const float *vv, const float *vec, unsigned long count, float *dotResult)
-{
- const float4 *vertices = (const float4 *)vv;
- static const unsigned char indexTable[16] = {(unsigned char)-1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0};
-
- float4 dotmin = b3Assign128(B3_INFINITY, B3_INFINITY, B3_INFINITY, B3_INFINITY);
- float4 vvec = _mm_loadu_ps(vec);
- float4 vHi = b3CastiTo128f(_mm_shuffle_epi32(b3CastfTo128i(vvec), 0xaa)); /// zzzz
- float4 vLo = _mm_movelh_ps(vvec, vvec); /// xyxy
-
- long minIndex = -1L;
-
- size_t segment = 0;
- float4 stack_array[STACK_ARRAY_COUNT];
-
-#if DEBUG
- // memset( stack_array, -1, STACK_ARRAY_COUNT * sizeof(stack_array[0]) );
-#endif
-
- size_t index;
- float4 min;
- // Faster loop without cleanup code for full tiles
- for (segment = 0; segment + STACK_ARRAY_COUNT * 4 <= count; segment += STACK_ARRAY_COUNT * 4)
- {
- min = dotmin;
-
- for (index = 0; index < STACK_ARRAY_COUNT; index += 4)
- { // do four dot products at a time. Carefully avoid touching the w element.
- float4 v0 = vertices[0];
- float4 v1 = vertices[1];
- float4 v2 = vertices[2];
- float4 v3 = vertices[3];
- vertices += 4;
-
- float4 lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1
- float4 hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1
- float4 lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3
- float4 hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3
-
- lo0 = lo0 * vLo;
- lo1 = lo1 * vLo;
- float4 z = _mm_shuffle_ps(hi0, hi1, 0x88);
- float4 x = _mm_shuffle_ps(lo0, lo1, 0x88);
- float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd);
- z = z * vHi;
- x = x + y;
- x = x + z;
- stack_array[index] = x;
- min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan
-
- v0 = vertices[0];
- v1 = vertices[1];
- v2 = vertices[2];
- v3 = vertices[3];
- vertices += 4;
-
- lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1
- hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1
- lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3
- hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3
-
- lo0 = lo0 * vLo;
- lo1 = lo1 * vLo;
- z = _mm_shuffle_ps(hi0, hi1, 0x88);
- x = _mm_shuffle_ps(lo0, lo1, 0x88);
- y = _mm_shuffle_ps(lo0, lo1, 0xdd);
- z = z * vHi;
- x = x + y;
- x = x + z;
- stack_array[index + 1] = x;
- min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan
-
- v0 = vertices[0];
- v1 = vertices[1];
- v2 = vertices[2];
- v3 = vertices[3];
- vertices += 4;
-
- lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1
- hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1
- lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3
- hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3
-
- lo0 = lo0 * vLo;
- lo1 = lo1 * vLo;
- z = _mm_shuffle_ps(hi0, hi1, 0x88);
- x = _mm_shuffle_ps(lo0, lo1, 0x88);
- y = _mm_shuffle_ps(lo0, lo1, 0xdd);
- z = z * vHi;
- x = x + y;
- x = x + z;
- stack_array[index + 2] = x;
- min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan
-
- v0 = vertices[0];
- v1 = vertices[1];
- v2 = vertices[2];
- v3 = vertices[3];
- vertices += 4;
-
- lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1
- hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1
- lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3
- hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3
-
- lo0 = lo0 * vLo;
- lo1 = lo1 * vLo;
- z = _mm_shuffle_ps(hi0, hi1, 0x88);
- x = _mm_shuffle_ps(lo0, lo1, 0x88);
- y = _mm_shuffle_ps(lo0, lo1, 0xdd);
- z = z * vHi;
- x = x + y;
- x = x + z;
- stack_array[index + 3] = x;
- min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan
-
- // It is too costly to keep the index of the min here. We will look for it again later. We save a lot of work this way.
- }
-
- // If we found a new min
- if (0xf != _mm_movemask_ps((float4)_mm_cmpeq_ps(min, dotmin)))
- {
- // copy the new min across all lanes of our min accumulator
- min = _mm_min_ps(min, (float4)_mm_shuffle_ps(min, min, 0x4e));
- min = _mm_min_ps(min, (float4)_mm_shuffle_ps(min, min, 0xb1));
-
- dotmin = min;
-
- // find first occurrence of that min
- size_t test;
- for (index = 0; 0 == (test = _mm_movemask_ps(_mm_cmpeq_ps(stack_array[index], min))); index++) // local_count must be a multiple of 4
- {
- }
- // record where it is.
- minIndex = 4 * index + segment + indexTable[test];
- }
- }
-
- // account for work we've already done
- count -= segment;
-
- // Deal with the last < STACK_ARRAY_COUNT vectors
- min = dotmin;
- index = 0;
-
- if (b3Unlikely(count > 16))
- {
- for (; index + 4 <= count / 4; index += 4)
- { // do four dot products at a time. Carefully avoid touching the w element.
- float4 v0 = vertices[0];
- float4 v1 = vertices[1];
- float4 v2 = vertices[2];
- float4 v3 = vertices[3];
- vertices += 4;
-
- float4 lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1
- float4 hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1
- float4 lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3
- float4 hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3
-
- lo0 = lo0 * vLo;
- lo1 = lo1 * vLo;
- float4 z = _mm_shuffle_ps(hi0, hi1, 0x88);
- float4 x = _mm_shuffle_ps(lo0, lo1, 0x88);
- float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd);
- z = z * vHi;
- x = x + y;
- x = x + z;
- stack_array[index] = x;
- min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan
-
- v0 = vertices[0];
- v1 = vertices[1];
- v2 = vertices[2];
- v3 = vertices[3];
- vertices += 4;
-
- lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1
- hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1
- lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3
- hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3
-
- lo0 = lo0 * vLo;
- lo1 = lo1 * vLo;
- z = _mm_shuffle_ps(hi0, hi1, 0x88);
- x = _mm_shuffle_ps(lo0, lo1, 0x88);
- y = _mm_shuffle_ps(lo0, lo1, 0xdd);
- z = z * vHi;
- x = x + y;
- x = x + z;
- stack_array[index + 1] = x;
- min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan
-
- v0 = vertices[0];
- v1 = vertices[1];
- v2 = vertices[2];
- v3 = vertices[3];
- vertices += 4;
-
- lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1
- hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1
- lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3
- hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3
-
- lo0 = lo0 * vLo;
- lo1 = lo1 * vLo;
- z = _mm_shuffle_ps(hi0, hi1, 0x88);
- x = _mm_shuffle_ps(lo0, lo1, 0x88);
- y = _mm_shuffle_ps(lo0, lo1, 0xdd);
- z = z * vHi;
- x = x + y;
- x = x + z;
- stack_array[index + 2] = x;
- min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan
-
- v0 = vertices[0];
- v1 = vertices[1];
- v2 = vertices[2];
- v3 = vertices[3];
- vertices += 4;
-
- lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1
- hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1
- lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3
- hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3
-
- lo0 = lo0 * vLo;
- lo1 = lo1 * vLo;
- z = _mm_shuffle_ps(hi0, hi1, 0x88);
- x = _mm_shuffle_ps(lo0, lo1, 0x88);
- y = _mm_shuffle_ps(lo0, lo1, 0xdd);
- z = z * vHi;
- x = x + y;
- x = x + z;
- stack_array[index + 3] = x;
- min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan
-
- // It is too costly to keep the index of the min here. We will look for it again later. We save a lot of work this way.
- }
- }
-
- size_t localCount = (count & -4L) - 4 * index;
- if (localCount)
- {
-#ifdef __APPLE__
- vertices += localCount; // counter the offset
- float4 t0, t1, t2, t3, t4;
- size_t byteIndex = -(localCount) * sizeof(float);
- float4 *sap = &stack_array[index + localCount / 4];
-
- asm volatile(
- ".align 4 \n\
- 0: movaps %[min], %[t2] // move min out of the way to avoid propagating NaNs in min \n\
- movaps (%[vertices], %[byteIndex], 4), %[t0] // vertices[0] \n\
- movaps 16(%[vertices], %[byteIndex], 4), %[t1] // vertices[1] \n\
- movaps %[t0], %[min] // vertices[0] \n\
- movlhps %[t1], %[min] // x0y0x1y1 \n\
- movaps 32(%[vertices], %[byteIndex], 4), %[t3] // vertices[2] \n\
- movaps 48(%[vertices], %[byteIndex], 4), %[t4] // vertices[3] \n\
- mulps %[vLo], %[min] // x0y0x1y1 * vLo \n\
- movhlps %[t0], %[t1] // z0w0z1w1 \n\
- movaps %[t3], %[t0] // vertices[2] \n\
- movlhps %[t4], %[t0] // x2y2x3y3 \n\
- movhlps %[t3], %[t4] // z2w2z3w3 \n\
- mulps %[vLo], %[t0] // x2y2x3y3 * vLo \n\
- shufps $0x88, %[t4], %[t1] // z0z1z2z3 \n\
- mulps %[vHi], %[t1] // z0z1z2z3 * vHi \n\
- movaps %[min], %[t3] // x0y0x1y1 * vLo \n\
- shufps $0x88, %[t0], %[min] // x0x1x2x3 * vLo.x \n\
- shufps $0xdd, %[t0], %[t3] // y0y1y2y3 * vLo.y \n\
- addps %[t3], %[min] // x + y \n\
- addps %[t1], %[min] // x + y + z \n\
- movaps %[min], (%[sap], %[byteIndex]) // record result for later scrutiny \n\
- minps %[t2], %[min] // record min, restore min \n\
- add $16, %[byteIndex] // advance loop counter\n\
- jnz 0b \n\
- "
- : [min] "+x"(min), [t0] "=&x"(t0), [t1] "=&x"(t1), [t2] "=&x"(t2), [t3] "=&x"(t3), [t4] "=&x"(t4), [byteIndex] "+r"(byteIndex)
- : [vLo] "x"(vLo), [vHi] "x"(vHi), [vertices] "r"(vertices), [sap] "r"(sap)
- : "memory", "cc");
- index += localCount / 4;
-#else
- {
- for (unsigned int i = 0; i < localCount / 4; i++, index++)
- { // do four dot products at a time. Carefully avoid touching the w element.
- float4 v0 = vertices[0];
- float4 v1 = vertices[1];
- float4 v2 = vertices[2];
- float4 v3 = vertices[3];
- vertices += 4;
-
- float4 lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1
- float4 hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1
- float4 lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3
- float4 hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3
-
- lo0 = lo0 * vLo;
- lo1 = lo1 * vLo;
- float4 z = _mm_shuffle_ps(hi0, hi1, 0x88);
- float4 x = _mm_shuffle_ps(lo0, lo1, 0x88);
- float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd);
- z = z * vHi;
- x = x + y;
- x = x + z;
- stack_array[index] = x;
- min = _mm_min_ps(x, min); // control the order here so that max is never NaN even if x is nan
- }
- }
-
-#endif
- }
-
- // process the last few points
- if (count & 3)
- {
- float4 v0, v1, v2, x, y, z;
- switch (count & 3)
- {
- case 3:
- {
- v0 = vertices[0];
- v1 = vertices[1];
- v2 = vertices[2];
-
- // Calculate 3 dot products, transpose, duplicate v2
- float4 lo0 = _mm_movelh_ps(v0, v1); // xyxy.lo
- float4 hi0 = _mm_movehl_ps(v1, v0); // z?z?.lo
- lo0 = lo0 * vLo;
- z = _mm_shuffle_ps(hi0, v2, 0xa8); // z0z1z2z2
- z = z * vHi;
- float4 lo1 = _mm_movelh_ps(v2, v2); // xyxy
- lo1 = lo1 * vLo;
- x = _mm_shuffle_ps(lo0, lo1, 0x88);
- y = _mm_shuffle_ps(lo0, lo1, 0xdd);
- }
- break;
- case 2:
- {
- v0 = vertices[0];
- v1 = vertices[1];
- float4 xy = _mm_movelh_ps(v0, v1);
- z = _mm_movehl_ps(v1, v0);
- xy = xy * vLo;
- z = _mm_shuffle_ps(z, z, 0xa8);
- x = _mm_shuffle_ps(xy, xy, 0xa8);
- y = _mm_shuffle_ps(xy, xy, 0xfd);
- z = z * vHi;
- }
- break;
- case 1:
- {
- float4 xy = vertices[0];
- z = _mm_shuffle_ps(xy, xy, 0xaa);
- xy = xy * vLo;
- z = z * vHi;
- x = _mm_shuffle_ps(xy, xy, 0);
- y = _mm_shuffle_ps(xy, xy, 0x55);
- }
- break;
- }
- x = x + y;
- x = x + z;
- stack_array[index] = x;
- min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan
- index++;
- }
-
- // if we found a new min.
- if (0 == segment || 0xf != _mm_movemask_ps((float4)_mm_cmpeq_ps(min, dotmin)))
- { // we found a new min. Search for it
- // find min across the min vector, place in all elements of min -- big latency hit here
- min = _mm_min_ps(min, (float4)_mm_shuffle_ps(min, min, 0x4e));
- min = _mm_min_ps(min, (float4)_mm_shuffle_ps(min, min, 0xb1));
-
- // It is slightly faster to do this part in scalar code when count < 8. However, the common case for
- // this where it actually makes a difference is handled in the early out at the top of the function,
- // so it is less than a 1% difference here. I opted for improved code size, fewer branches and reduced
- // complexity, and removed it.
-
- dotmin = min;
-
- // scan for the first occurence of min in the array
- size_t test;
- for (index = 0; 0 == (test = _mm_movemask_ps(_mm_cmpeq_ps(stack_array[index], min))); index++) // local_count must be a multiple of 4
- {
- }
- minIndex = 4 * index + segment + indexTable[test];
- }
-
- _mm_store_ss(dotResult, dotmin);
- return minIndex;
-}
-
-#elif defined B3_USE_NEON
-#define ARM_NEON_GCC_COMPATIBILITY 1
-#include <arm_neon.h>
-
-static long b3_maxdot_large_v0(const float *vv, const float *vec, unsigned long count, float *dotResult);
-static long b3_maxdot_large_v1(const float *vv, const float *vec, unsigned long count, float *dotResult);
-static long b3_maxdot_large_sel(const float *vv, const float *vec, unsigned long count, float *dotResult);
-static long b3_mindot_large_v0(const float *vv, const float *vec, unsigned long count, float *dotResult);
-static long b3_mindot_large_v1(const float *vv, const float *vec, unsigned long count, float *dotResult);
-static long b3_mindot_large_sel(const float *vv, const float *vec, unsigned long count, float *dotResult);
-
-long (*b3_maxdot_large)(const float *vv, const float *vec, unsigned long count, float *dotResult) = b3_maxdot_large_sel;
-long (*b3_mindot_large)(const float *vv, const float *vec, unsigned long count, float *dotResult) = b3_mindot_large_sel;
-
-extern "C"
-{
- int _get_cpu_capabilities(void);
-}
-
-static long b3_maxdot_large_sel(const float *vv, const float *vec, unsigned long count, float *dotResult)
-{
- if (_get_cpu_capabilities() & 0x2000)
- b3_maxdot_large = _maxdot_large_v1;
- else
- b3_maxdot_large = _maxdot_large_v0;
-
- return b3_maxdot_large(vv, vec, count, dotResult);
-}
-
-static long b3_mindot_large_sel(const float *vv, const float *vec, unsigned long count, float *dotResult)
-{
- if (_get_cpu_capabilities() & 0x2000)
- b3_mindot_large = _mindot_large_v1;
- else
- b3_mindot_large = _mindot_large_v0;
-
- return b3_mindot_large(vv, vec, count, dotResult);
-}
-
-#define vld1q_f32_aligned_postincrement(_ptr) ({ float32x4_t _r; asm( "vld1.f32 {%0}, [%1, :128]!\n" : "=w" (_r), "+r" (_ptr) ); /*return*/ _r; })
-
-long b3_maxdot_large_v0(const float *vv, const float *vec, unsigned long count, float *dotResult)
-{
- unsigned long i = 0;
- float32x4_t vvec = vld1q_f32_aligned_postincrement(vec);
- float32x2_t vLo = vget_low_f32(vvec);
- float32x2_t vHi = vdup_lane_f32(vget_high_f32(vvec), 0);
- float32x2_t dotMaxLo = (float32x2_t){-B3_INFINITY, -B3_INFINITY};
- float32x2_t dotMaxHi = (float32x2_t){-B3_INFINITY, -B3_INFINITY};
- uint32x2_t indexLo = (uint32x2_t){0, 1};
- uint32x2_t indexHi = (uint32x2_t){2, 3};
- uint32x2_t iLo = (uint32x2_t){-1, -1};
- uint32x2_t iHi = (uint32x2_t){-1, -1};
- const uint32x2_t four = (uint32x2_t){4, 4};
-
- for (; i + 8 <= count; i += 8)
- {
- float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v3 = vld1q_f32_aligned_postincrement(vv);
-
- float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo);
- float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo);
- float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo);
- float32x2_t xy3 = vmul_f32(vget_low_f32(v3), vLo);
-
- float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1));
- float32x2x2_t z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3));
- float32x2_t zLo = vmul_f32(z0.val[0], vHi);
- float32x2_t zHi = vmul_f32(z1.val[0], vHi);
-
- float32x2_t rLo = vpadd_f32(xy0, xy1);
- float32x2_t rHi = vpadd_f32(xy2, xy3);
- rLo = vadd_f32(rLo, zLo);
- rHi = vadd_f32(rHi, zHi);
-
- uint32x2_t maskLo = vcgt_f32(rLo, dotMaxLo);
- uint32x2_t maskHi = vcgt_f32(rHi, dotMaxHi);
- dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo);
- dotMaxHi = vbsl_f32(maskHi, rHi, dotMaxHi);
- iLo = vbsl_u32(maskLo, indexLo, iLo);
- iHi = vbsl_u32(maskHi, indexHi, iHi);
- indexLo = vadd_u32(indexLo, four);
- indexHi = vadd_u32(indexHi, four);
-
- v0 = vld1q_f32_aligned_postincrement(vv);
- v1 = vld1q_f32_aligned_postincrement(vv);
- v2 = vld1q_f32_aligned_postincrement(vv);
- v3 = vld1q_f32_aligned_postincrement(vv);
-
- xy0 = vmul_f32(vget_low_f32(v0), vLo);
- xy1 = vmul_f32(vget_low_f32(v1), vLo);
- xy2 = vmul_f32(vget_low_f32(v2), vLo);
- xy3 = vmul_f32(vget_low_f32(v3), vLo);
-
- z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1));
- z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3));
- zLo = vmul_f32(z0.val[0], vHi);
- zHi = vmul_f32(z1.val[0], vHi);
-
- rLo = vpadd_f32(xy0, xy1);
- rHi = vpadd_f32(xy2, xy3);
- rLo = vadd_f32(rLo, zLo);
- rHi = vadd_f32(rHi, zHi);
-
- maskLo = vcgt_f32(rLo, dotMaxLo);
- maskHi = vcgt_f32(rHi, dotMaxHi);
- dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo);
- dotMaxHi = vbsl_f32(maskHi, rHi, dotMaxHi);
- iLo = vbsl_u32(maskLo, indexLo, iLo);
- iHi = vbsl_u32(maskHi, indexHi, iHi);
- indexLo = vadd_u32(indexLo, four);
- indexHi = vadd_u32(indexHi, four);
- }
-
- for (; i + 4 <= count; i += 4)
- {
- float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v3 = vld1q_f32_aligned_postincrement(vv);
-
- float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo);
- float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo);
- float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo);
- float32x2_t xy3 = vmul_f32(vget_low_f32(v3), vLo);
-
- float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1));
- float32x2x2_t z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3));
- float32x2_t zLo = vmul_f32(z0.val[0], vHi);
- float32x2_t zHi = vmul_f32(z1.val[0], vHi);
-
- float32x2_t rLo = vpadd_f32(xy0, xy1);
- float32x2_t rHi = vpadd_f32(xy2, xy3);
- rLo = vadd_f32(rLo, zLo);
- rHi = vadd_f32(rHi, zHi);
-
- uint32x2_t maskLo = vcgt_f32(rLo, dotMaxLo);
- uint32x2_t maskHi = vcgt_f32(rHi, dotMaxHi);
- dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo);
- dotMaxHi = vbsl_f32(maskHi, rHi, dotMaxHi);
- iLo = vbsl_u32(maskLo, indexLo, iLo);
- iHi = vbsl_u32(maskHi, indexHi, iHi);
- indexLo = vadd_u32(indexLo, four);
- indexHi = vadd_u32(indexHi, four);
- }
-
- switch (count & 3)
- {
- case 3:
- {
- float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
-
- float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo);
- float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo);
- float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo);
-
- float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1));
- float32x2_t zLo = vmul_f32(z0.val[0], vHi);
- float32x2_t zHi = vmul_f32(vdup_lane_f32(vget_high_f32(v2), 0), vHi);
-
- float32x2_t rLo = vpadd_f32(xy0, xy1);
- float32x2_t rHi = vpadd_f32(xy2, xy2);
- rLo = vadd_f32(rLo, zLo);
- rHi = vadd_f32(rHi, zHi);
-
- uint32x2_t maskLo = vcgt_f32(rLo, dotMaxLo);
- uint32x2_t maskHi = vcgt_f32(rHi, dotMaxHi);
- dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo);
- dotMaxHi = vbsl_f32(maskHi, rHi, dotMaxHi);
- iLo = vbsl_u32(maskLo, indexLo, iLo);
- iHi = vbsl_u32(maskHi, indexHi, iHi);
- }
- break;
- case 2:
- {
- float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
-
- float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo);
- float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo);
-
- float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1));
- float32x2_t zLo = vmul_f32(z0.val[0], vHi);
-
- float32x2_t rLo = vpadd_f32(xy0, xy1);
- rLo = vadd_f32(rLo, zLo);
-
- uint32x2_t maskLo = vcgt_f32(rLo, dotMaxLo);
- dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo);
- iLo = vbsl_u32(maskLo, indexLo, iLo);
- }
- break;
- case 1:
- {
- float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
- float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo);
- float32x2_t z0 = vdup_lane_f32(vget_high_f32(v0), 0);
- float32x2_t zLo = vmul_f32(z0, vHi);
- float32x2_t rLo = vpadd_f32(xy0, xy0);
- rLo = vadd_f32(rLo, zLo);
- uint32x2_t maskLo = vcgt_f32(rLo, dotMaxLo);
- dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo);
- iLo = vbsl_u32(maskLo, indexLo, iLo);
- }
- break;
-
- default:
- break;
- }
-
- // select best answer between hi and lo results
- uint32x2_t mask = vcgt_f32(dotMaxHi, dotMaxLo);
- dotMaxLo = vbsl_f32(mask, dotMaxHi, dotMaxLo);
- iLo = vbsl_u32(mask, iHi, iLo);
-
- // select best answer between even and odd results
- dotMaxHi = vdup_lane_f32(dotMaxLo, 1);
- iHi = vdup_lane_u32(iLo, 1);
- mask = vcgt_f32(dotMaxHi, dotMaxLo);
- dotMaxLo = vbsl_f32(mask, dotMaxHi, dotMaxLo);
- iLo = vbsl_u32(mask, iHi, iLo);
-
- *dotResult = vget_lane_f32(dotMaxLo, 0);
- return vget_lane_u32(iLo, 0);
-}
-
-long b3_maxdot_large_v1(const float *vv, const float *vec, unsigned long count, float *dotResult)
-{
- float32x4_t vvec = vld1q_f32_aligned_postincrement(vec);
- float32x4_t vLo = vcombine_f32(vget_low_f32(vvec), vget_low_f32(vvec));
- float32x4_t vHi = vdupq_lane_f32(vget_high_f32(vvec), 0);
- const uint32x4_t four = (uint32x4_t){4, 4, 4, 4};
- uint32x4_t local_index = (uint32x4_t){0, 1, 2, 3};
- uint32x4_t index = (uint32x4_t){-1, -1, -1, -1};
- float32x4_t maxDot = (float32x4_t){-B3_INFINITY, -B3_INFINITY, -B3_INFINITY, -B3_INFINITY};
-
- unsigned long i = 0;
- for (; i + 8 <= count; i += 8)
- {
- float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v3 = vld1q_f32_aligned_postincrement(vv);
-
- // the next two lines should resolve to a single vswp d, d
- float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1));
- float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3));
- // the next two lines should resolve to a single vswp d, d
- float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1));
- float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3));
-
- xy0 = vmulq_f32(xy0, vLo);
- xy1 = vmulq_f32(xy1, vLo);
-
- float32x4x2_t zb = vuzpq_f32(z0, z1);
- float32x4_t z = vmulq_f32(zb.val[0], vHi);
- float32x4x2_t xy = vuzpq_f32(xy0, xy1);
- float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
- x = vaddq_f32(x, z);
-
- uint32x4_t mask = vcgtq_f32(x, maxDot);
- maxDot = vbslq_f32(mask, x, maxDot);
- index = vbslq_u32(mask, local_index, index);
- local_index = vaddq_u32(local_index, four);
-
- v0 = vld1q_f32_aligned_postincrement(vv);
- v1 = vld1q_f32_aligned_postincrement(vv);
- v2 = vld1q_f32_aligned_postincrement(vv);
- v3 = vld1q_f32_aligned_postincrement(vv);
-
- // the next two lines should resolve to a single vswp d, d
- xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1));
- xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3));
- // the next two lines should resolve to a single vswp d, d
- z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1));
- z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3));
-
- xy0 = vmulq_f32(xy0, vLo);
- xy1 = vmulq_f32(xy1, vLo);
-
- zb = vuzpq_f32(z0, z1);
- z = vmulq_f32(zb.val[0], vHi);
- xy = vuzpq_f32(xy0, xy1);
- x = vaddq_f32(xy.val[0], xy.val[1]);
- x = vaddq_f32(x, z);
-
- mask = vcgtq_f32(x, maxDot);
- maxDot = vbslq_f32(mask, x, maxDot);
- index = vbslq_u32(mask, local_index, index);
- local_index = vaddq_u32(local_index, four);
- }
-
- for (; i + 4 <= count; i += 4)
- {
- float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v3 = vld1q_f32_aligned_postincrement(vv);
-
- // the next two lines should resolve to a single vswp d, d
- float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1));
- float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3));
- // the next two lines should resolve to a single vswp d, d
- float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1));
- float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3));
-
- xy0 = vmulq_f32(xy0, vLo);
- xy1 = vmulq_f32(xy1, vLo);
-
- float32x4x2_t zb = vuzpq_f32(z0, z1);
- float32x4_t z = vmulq_f32(zb.val[0], vHi);
- float32x4x2_t xy = vuzpq_f32(xy0, xy1);
- float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
- x = vaddq_f32(x, z);
-
- uint32x4_t mask = vcgtq_f32(x, maxDot);
- maxDot = vbslq_f32(mask, x, maxDot);
- index = vbslq_u32(mask, local_index, index);
- local_index = vaddq_u32(local_index, four);
- }
-
- switch (count & 3)
- {
- case 3:
- {
- float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
-
- // the next two lines should resolve to a single vswp d, d
- float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1));
- float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v2));
- // the next two lines should resolve to a single vswp d, d
- float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1));
- float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v2));
-
- xy0 = vmulq_f32(xy0, vLo);
- xy1 = vmulq_f32(xy1, vLo);
-
- float32x4x2_t zb = vuzpq_f32(z0, z1);
- float32x4_t z = vmulq_f32(zb.val[0], vHi);
- float32x4x2_t xy = vuzpq_f32(xy0, xy1);
- float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
- x = vaddq_f32(x, z);
-
- uint32x4_t mask = vcgtq_f32(x, maxDot);
- maxDot = vbslq_f32(mask, x, maxDot);
- index = vbslq_u32(mask, local_index, index);
- local_index = vaddq_u32(local_index, four);
- }
- break;
-
- case 2:
- {
- float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
-
- // the next two lines should resolve to a single vswp d, d
- float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1));
- // the next two lines should resolve to a single vswp d, d
- float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1));
-
- xy0 = vmulq_f32(xy0, vLo);
-
- float32x4x2_t zb = vuzpq_f32(z0, z0);
- float32x4_t z = vmulq_f32(zb.val[0], vHi);
- float32x4x2_t xy = vuzpq_f32(xy0, xy0);
- float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
- x = vaddq_f32(x, z);
-
- uint32x4_t mask = vcgtq_f32(x, maxDot);
- maxDot = vbslq_f32(mask, x, maxDot);
- index = vbslq_u32(mask, local_index, index);
- local_index = vaddq_u32(local_index, four);
- }
- break;
-
- case 1:
- {
- float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
-
- // the next two lines should resolve to a single vswp d, d
- float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v0));
- // the next two lines should resolve to a single vswp d, d
- float32x4_t z = vdupq_lane_f32(vget_high_f32(v0), 0);
-
- xy0 = vmulq_f32(xy0, vLo);
-
- z = vmulq_f32(z, vHi);
- float32x4x2_t xy = vuzpq_f32(xy0, xy0);
- float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
- x = vaddq_f32(x, z);
-
- uint32x4_t mask = vcgtq_f32(x, maxDot);
- maxDot = vbslq_f32(mask, x, maxDot);
- index = vbslq_u32(mask, local_index, index);
- local_index = vaddq_u32(local_index, four);
- }
- break;
-
- default:
- break;
- }
-
- // select best answer between hi and lo results
- uint32x2_t mask = vcgt_f32(vget_high_f32(maxDot), vget_low_f32(maxDot));
- float32x2_t maxDot2 = vbsl_f32(mask, vget_high_f32(maxDot), vget_low_f32(maxDot));
- uint32x2_t index2 = vbsl_u32(mask, vget_high_u32(index), vget_low_u32(index));
-
- // select best answer between even and odd results
- float32x2_t maxDotO = vdup_lane_f32(maxDot2, 1);
- uint32x2_t indexHi = vdup_lane_u32(index2, 1);
- mask = vcgt_f32(maxDotO, maxDot2);
- maxDot2 = vbsl_f32(mask, maxDotO, maxDot2);
- index2 = vbsl_u32(mask, indexHi, index2);
-
- *dotResult = vget_lane_f32(maxDot2, 0);
- return vget_lane_u32(index2, 0);
-}
-
-long b3_mindot_large_v0(const float *vv, const float *vec, unsigned long count, float *dotResult)
-{
- unsigned long i = 0;
- float32x4_t vvec = vld1q_f32_aligned_postincrement(vec);
- float32x2_t vLo = vget_low_f32(vvec);
- float32x2_t vHi = vdup_lane_f32(vget_high_f32(vvec), 0);
- float32x2_t dotMinLo = (float32x2_t){B3_INFINITY, B3_INFINITY};
- float32x2_t dotMinHi = (float32x2_t){B3_INFINITY, B3_INFINITY};
- uint32x2_t indexLo = (uint32x2_t){0, 1};
- uint32x2_t indexHi = (uint32x2_t){2, 3};
- uint32x2_t iLo = (uint32x2_t){-1, -1};
- uint32x2_t iHi = (uint32x2_t){-1, -1};
- const uint32x2_t four = (uint32x2_t){4, 4};
-
- for (; i + 8 <= count; i += 8)
- {
- float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v3 = vld1q_f32_aligned_postincrement(vv);
-
- float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo);
- float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo);
- float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo);
- float32x2_t xy3 = vmul_f32(vget_low_f32(v3), vLo);
-
- float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1));
- float32x2x2_t z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3));
- float32x2_t zLo = vmul_f32(z0.val[0], vHi);
- float32x2_t zHi = vmul_f32(z1.val[0], vHi);
-
- float32x2_t rLo = vpadd_f32(xy0, xy1);
- float32x2_t rHi = vpadd_f32(xy2, xy3);
- rLo = vadd_f32(rLo, zLo);
- rHi = vadd_f32(rHi, zHi);
-
- uint32x2_t maskLo = vclt_f32(rLo, dotMinLo);
- uint32x2_t maskHi = vclt_f32(rHi, dotMinHi);
- dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo);
- dotMinHi = vbsl_f32(maskHi, rHi, dotMinHi);
- iLo = vbsl_u32(maskLo, indexLo, iLo);
- iHi = vbsl_u32(maskHi, indexHi, iHi);
- indexLo = vadd_u32(indexLo, four);
- indexHi = vadd_u32(indexHi, four);
-
- v0 = vld1q_f32_aligned_postincrement(vv);
- v1 = vld1q_f32_aligned_postincrement(vv);
- v2 = vld1q_f32_aligned_postincrement(vv);
- v3 = vld1q_f32_aligned_postincrement(vv);
-
- xy0 = vmul_f32(vget_low_f32(v0), vLo);
- xy1 = vmul_f32(vget_low_f32(v1), vLo);
- xy2 = vmul_f32(vget_low_f32(v2), vLo);
- xy3 = vmul_f32(vget_low_f32(v3), vLo);
-
- z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1));
- z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3));
- zLo = vmul_f32(z0.val[0], vHi);
- zHi = vmul_f32(z1.val[0], vHi);
-
- rLo = vpadd_f32(xy0, xy1);
- rHi = vpadd_f32(xy2, xy3);
- rLo = vadd_f32(rLo, zLo);
- rHi = vadd_f32(rHi, zHi);
-
- maskLo = vclt_f32(rLo, dotMinLo);
- maskHi = vclt_f32(rHi, dotMinHi);
- dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo);
- dotMinHi = vbsl_f32(maskHi, rHi, dotMinHi);
- iLo = vbsl_u32(maskLo, indexLo, iLo);
- iHi = vbsl_u32(maskHi, indexHi, iHi);
- indexLo = vadd_u32(indexLo, four);
- indexHi = vadd_u32(indexHi, four);
- }
-
- for (; i + 4 <= count; i += 4)
- {
- float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v3 = vld1q_f32_aligned_postincrement(vv);
-
- float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo);
- float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo);
- float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo);
- float32x2_t xy3 = vmul_f32(vget_low_f32(v3), vLo);
-
- float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1));
- float32x2x2_t z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3));
- float32x2_t zLo = vmul_f32(z0.val[0], vHi);
- float32x2_t zHi = vmul_f32(z1.val[0], vHi);
-
- float32x2_t rLo = vpadd_f32(xy0, xy1);
- float32x2_t rHi = vpadd_f32(xy2, xy3);
- rLo = vadd_f32(rLo, zLo);
- rHi = vadd_f32(rHi, zHi);
-
- uint32x2_t maskLo = vclt_f32(rLo, dotMinLo);
- uint32x2_t maskHi = vclt_f32(rHi, dotMinHi);
- dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo);
- dotMinHi = vbsl_f32(maskHi, rHi, dotMinHi);
- iLo = vbsl_u32(maskLo, indexLo, iLo);
- iHi = vbsl_u32(maskHi, indexHi, iHi);
- indexLo = vadd_u32(indexLo, four);
- indexHi = vadd_u32(indexHi, four);
- }
- switch (count & 3)
- {
- case 3:
- {
- float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
-
- float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo);
- float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo);
- float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo);
-
- float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1));
- float32x2_t zLo = vmul_f32(z0.val[0], vHi);
- float32x2_t zHi = vmul_f32(vdup_lane_f32(vget_high_f32(v2), 0), vHi);
-
- float32x2_t rLo = vpadd_f32(xy0, xy1);
- float32x2_t rHi = vpadd_f32(xy2, xy2);
- rLo = vadd_f32(rLo, zLo);
- rHi = vadd_f32(rHi, zHi);
-
- uint32x2_t maskLo = vclt_f32(rLo, dotMinLo);
- uint32x2_t maskHi = vclt_f32(rHi, dotMinHi);
- dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo);
- dotMinHi = vbsl_f32(maskHi, rHi, dotMinHi);
- iLo = vbsl_u32(maskLo, indexLo, iLo);
- iHi = vbsl_u32(maskHi, indexHi, iHi);
- }
- break;
- case 2:
- {
- float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
-
- float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo);
- float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo);
-
- float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1));
- float32x2_t zLo = vmul_f32(z0.val[0], vHi);
-
- float32x2_t rLo = vpadd_f32(xy0, xy1);
- rLo = vadd_f32(rLo, zLo);
-
- uint32x2_t maskLo = vclt_f32(rLo, dotMinLo);
- dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo);
- iLo = vbsl_u32(maskLo, indexLo, iLo);
- }
- break;
- case 1:
- {
- float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
- float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo);
- float32x2_t z0 = vdup_lane_f32(vget_high_f32(v0), 0);
- float32x2_t zLo = vmul_f32(z0, vHi);
- float32x2_t rLo = vpadd_f32(xy0, xy0);
- rLo = vadd_f32(rLo, zLo);
- uint32x2_t maskLo = vclt_f32(rLo, dotMinLo);
- dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo);
- iLo = vbsl_u32(maskLo, indexLo, iLo);
- }
- break;
-
- default:
- break;
- }
-
- // select best answer between hi and lo results
- uint32x2_t mask = vclt_f32(dotMinHi, dotMinLo);
- dotMinLo = vbsl_f32(mask, dotMinHi, dotMinLo);
- iLo = vbsl_u32(mask, iHi, iLo);
-
- // select best answer between even and odd results
- dotMinHi = vdup_lane_f32(dotMinLo, 1);
- iHi = vdup_lane_u32(iLo, 1);
- mask = vclt_f32(dotMinHi, dotMinLo);
- dotMinLo = vbsl_f32(mask, dotMinHi, dotMinLo);
- iLo = vbsl_u32(mask, iHi, iLo);
-
- *dotResult = vget_lane_f32(dotMinLo, 0);
- return vget_lane_u32(iLo, 0);
-}
-
-long b3_mindot_large_v1(const float *vv, const float *vec, unsigned long count, float *dotResult)
-{
- float32x4_t vvec = vld1q_f32_aligned_postincrement(vec);
- float32x4_t vLo = vcombine_f32(vget_low_f32(vvec), vget_low_f32(vvec));
- float32x4_t vHi = vdupq_lane_f32(vget_high_f32(vvec), 0);
- const uint32x4_t four = (uint32x4_t){4, 4, 4, 4};
- uint32x4_t local_index = (uint32x4_t){0, 1, 2, 3};
- uint32x4_t index = (uint32x4_t){-1, -1, -1, -1};
- float32x4_t minDot = (float32x4_t){B3_INFINITY, B3_INFINITY, B3_INFINITY, B3_INFINITY};
-
- unsigned long i = 0;
- for (; i + 8 <= count; i += 8)
- {
- float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v3 = vld1q_f32_aligned_postincrement(vv);
-
- // the next two lines should resolve to a single vswp d, d
- float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1));
- float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3));
- // the next two lines should resolve to a single vswp d, d
- float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1));
- float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3));
-
- xy0 = vmulq_f32(xy0, vLo);
- xy1 = vmulq_f32(xy1, vLo);
-
- float32x4x2_t zb = vuzpq_f32(z0, z1);
- float32x4_t z = vmulq_f32(zb.val[0], vHi);
- float32x4x2_t xy = vuzpq_f32(xy0, xy1);
- float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
- x = vaddq_f32(x, z);
-
- uint32x4_t mask = vcltq_f32(x, minDot);
- minDot = vbslq_f32(mask, x, minDot);
- index = vbslq_u32(mask, local_index, index);
- local_index = vaddq_u32(local_index, four);
-
- v0 = vld1q_f32_aligned_postincrement(vv);
- v1 = vld1q_f32_aligned_postincrement(vv);
- v2 = vld1q_f32_aligned_postincrement(vv);
- v3 = vld1q_f32_aligned_postincrement(vv);
-
- // the next two lines should resolve to a single vswp d, d
- xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1));
- xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3));
- // the next two lines should resolve to a single vswp d, d
- z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1));
- z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3));
-
- xy0 = vmulq_f32(xy0, vLo);
- xy1 = vmulq_f32(xy1, vLo);
-
- zb = vuzpq_f32(z0, z1);
- z = vmulq_f32(zb.val[0], vHi);
- xy = vuzpq_f32(xy0, xy1);
- x = vaddq_f32(xy.val[0], xy.val[1]);
- x = vaddq_f32(x, z);
-
- mask = vcltq_f32(x, minDot);
- minDot = vbslq_f32(mask, x, minDot);
- index = vbslq_u32(mask, local_index, index);
- local_index = vaddq_u32(local_index, four);
- }
-
- for (; i + 4 <= count; i += 4)
- {
- float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v3 = vld1q_f32_aligned_postincrement(vv);
-
- // the next two lines should resolve to a single vswp d, d
- float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1));
- float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3));
- // the next two lines should resolve to a single vswp d, d
- float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1));
- float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3));
-
- xy0 = vmulq_f32(xy0, vLo);
- xy1 = vmulq_f32(xy1, vLo);
-
- float32x4x2_t zb = vuzpq_f32(z0, z1);
- float32x4_t z = vmulq_f32(zb.val[0], vHi);
- float32x4x2_t xy = vuzpq_f32(xy0, xy1);
- float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
- x = vaddq_f32(x, z);
-
- uint32x4_t mask = vcltq_f32(x, minDot);
- minDot = vbslq_f32(mask, x, minDot);
- index = vbslq_u32(mask, local_index, index);
- local_index = vaddq_u32(local_index, four);
- }
-
- switch (count & 3)
- {
- case 3:
- {
- float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
-
- // the next two lines should resolve to a single vswp d, d
- float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1));
- float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v2));
- // the next two lines should resolve to a single vswp d, d
- float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1));
- float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v2));
-
- xy0 = vmulq_f32(xy0, vLo);
- xy1 = vmulq_f32(xy1, vLo);
-
- float32x4x2_t zb = vuzpq_f32(z0, z1);
- float32x4_t z = vmulq_f32(zb.val[0], vHi);
- float32x4x2_t xy = vuzpq_f32(xy0, xy1);
- float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
- x = vaddq_f32(x, z);
-
- uint32x4_t mask = vcltq_f32(x, minDot);
- minDot = vbslq_f32(mask, x, minDot);
- index = vbslq_u32(mask, local_index, index);
- local_index = vaddq_u32(local_index, four);
- }
- break;
-
- case 2:
- {
- float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
- float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
-
- // the next two lines should resolve to a single vswp d, d
- float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1));
- // the next two lines should resolve to a single vswp d, d
- float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1));
-
- xy0 = vmulq_f32(xy0, vLo);
-
- float32x4x2_t zb = vuzpq_f32(z0, z0);
- float32x4_t z = vmulq_f32(zb.val[0], vHi);
- float32x4x2_t xy = vuzpq_f32(xy0, xy0);
- float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
- x = vaddq_f32(x, z);
-
- uint32x4_t mask = vcltq_f32(x, minDot);
- minDot = vbslq_f32(mask, x, minDot);
- index = vbslq_u32(mask, local_index, index);
- local_index = vaddq_u32(local_index, four);
- }
- break;
-
- case 1:
- {
- float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
-
- // the next two lines should resolve to a single vswp d, d
- float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v0));
- // the next two lines should resolve to a single vswp d, d
- float32x4_t z = vdupq_lane_f32(vget_high_f32(v0), 0);
-
- xy0 = vmulq_f32(xy0, vLo);
-
- z = vmulq_f32(z, vHi);
- float32x4x2_t xy = vuzpq_f32(xy0, xy0);
- float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
- x = vaddq_f32(x, z);
-
- uint32x4_t mask = vcltq_f32(x, minDot);
- minDot = vbslq_f32(mask, x, minDot);
- index = vbslq_u32(mask, local_index, index);
- local_index = vaddq_u32(local_index, four);
- }
- break;
-
- default:
- break;
- }
-
- // select best answer between hi and lo results
- uint32x2_t mask = vclt_f32(vget_high_f32(minDot), vget_low_f32(minDot));
- float32x2_t minDot2 = vbsl_f32(mask, vget_high_f32(minDot), vget_low_f32(minDot));
- uint32x2_t index2 = vbsl_u32(mask, vget_high_u32(index), vget_low_u32(index));
-
- // select best answer between even and odd results
- float32x2_t minDotO = vdup_lane_f32(minDot2, 1);
- uint32x2_t indexHi = vdup_lane_u32(index2, 1);
- mask = vclt_f32(minDotO, minDot2);
- minDot2 = vbsl_f32(mask, minDotO, minDot2);
- index2 = vbsl_u32(mask, indexHi, index2);
-
- *dotResult = vget_lane_f32(minDot2, 0);
- return vget_lane_u32(index2, 0);
-}
-
-#else
-#error Unhandled __APPLE__ arch
-#endif
-
-#endif /* __APPLE__ */
diff --git a/thirdparty/bullet/Bullet3Common/b3Vector3.h b/thirdparty/bullet/Bullet3Common/b3Vector3.h
deleted file mode 100644
index a70d68d6e1..0000000000
--- a/thirdparty/bullet/Bullet3Common/b3Vector3.h
+++ /dev/null
@@ -1,1303 +0,0 @@
-/*
-Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it freely,
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef B3_VECTOR3_H
-#define B3_VECTOR3_H
-
-//#include <stdint.h>
-#include "b3Scalar.h"
-#include "b3MinMax.h"
-#include "b3AlignedAllocator.h"
-
-#ifdef B3_USE_DOUBLE_PRECISION
-#define b3Vector3Data b3Vector3DoubleData
-#define b3Vector3DataName "b3Vector3DoubleData"
-#else
-#define b3Vector3Data b3Vector3FloatData
-#define b3Vector3DataName "b3Vector3FloatData"
-#endif //B3_USE_DOUBLE_PRECISION
-
-#if defined B3_USE_SSE
-
-//typedef uint32_t __m128i __attribute__ ((vector_size(16)));
-
-#ifdef _MSC_VER
-#pragma warning(disable : 4556) // value of intrinsic immediate argument '4294967239' is out of range '0 - 255'
-#endif
-
-#define B3_SHUFFLE(x, y, z, w) (((w) << 6 | (z) << 4 | (y) << 2 | (x)) & 0xff)
-//#define b3_pshufd_ps( _a, _mask ) (__m128) _mm_shuffle_epi32((__m128i)(_a), (_mask) )
-#define b3_pshufd_ps(_a, _mask) _mm_shuffle_ps((_a), (_a), (_mask))
-#define b3_splat3_ps(_a, _i) b3_pshufd_ps((_a), B3_SHUFFLE(_i, _i, _i, 3))
-#define b3_splat_ps(_a, _i) b3_pshufd_ps((_a), B3_SHUFFLE(_i, _i, _i, _i))
-
-#define b3v3AbsiMask (_mm_set_epi32(0x00000000, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
-#define b3vAbsMask (_mm_set_epi32(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
-#define b3vFFF0Mask (_mm_set_epi32(0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF))
-#define b3v3AbsfMask b3CastiTo128f(b3v3AbsiMask)
-#define b3vFFF0fMask b3CastiTo128f(b3vFFF0Mask)
-#define b3vxyzMaskf b3vFFF0fMask
-#define b3vAbsfMask b3CastiTo128f(b3vAbsMask)
-
-const __m128 B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
-const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
-const __m128 B3_ATTRIBUTE_ALIGNED16(b3vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
-const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1_5) = {1.5f, 1.5f, 1.5f, 1.5f};
-
-#endif
-
-#ifdef B3_USE_NEON
-
-const float32x4_t B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = (float32x4_t){-0.0f, -0.0f, -0.0f, -0.0f};
-const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3vFFF0Mask) = (int32x4_t){0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0};
-const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3vAbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
-const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3v3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0};
-
-#endif
-
-class b3Vector3;
-class b3Vector4;
-
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
-//#if defined (B3_USE_SSE) || defined (B3_USE_NEON)
-inline b3Vector3 b3MakeVector3(b3SimdFloat4 v);
-inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec);
-#endif
-
-inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z);
-inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w);
-inline b3Vector4 b3MakeVector4(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w);
-
-/**@brief b3Vector3 can be used to represent 3D points and vectors.
- * It has an un-used w component to suit 16-byte alignment when b3Vector3 is stored in containers. This extra component can be used by derived classes (Quaternion?) or by user
- * Ideally, this class should be replaced by a platform optimized SIMD version that keeps the data in registers
- */
-B3_ATTRIBUTE_ALIGNED16(class)
-b3Vector3
-{
-public:
-#if defined(B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM
- union {
- b3SimdFloat4 mVec128;
- float m_floats[4];
- struct
- {
- float x, y, z, w;
- };
- };
-#else
- union {
- float m_floats[4];
- struct
- {
- float x, y, z, w;
- };
- };
-#endif
-
-public:
- B3_DECLARE_ALIGNED_ALLOCATOR();
-
-#if defined(B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM
-
- /*B3_FORCE_INLINE b3Vector3()
- {
- }
- */
-
- B3_FORCE_INLINE b3SimdFloat4 get128() const
- {
- return mVec128;
- }
- B3_FORCE_INLINE void set128(b3SimdFloat4 v128)
- {
- mVec128 = v128;
- }
-#endif
-
-public:
- /**@brief Add a vector to this one
- * @param The vector to add to this one */
- B3_FORCE_INLINE b3Vector3& operator+=(const b3Vector3& v)
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- mVec128 = _mm_add_ps(mVec128, v.mVec128);
-#elif defined(B3_USE_NEON)
- mVec128 = vaddq_f32(mVec128, v.mVec128);
-#else
- m_floats[0] += v.m_floats[0];
- m_floats[1] += v.m_floats[1];
- m_floats[2] += v.m_floats[2];
-#endif
- return *this;
- }
-
- /**@brief Subtract a vector from this one
- * @param The vector to subtract */
- B3_FORCE_INLINE b3Vector3& operator-=(const b3Vector3& v)
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- mVec128 = _mm_sub_ps(mVec128, v.mVec128);
-#elif defined(B3_USE_NEON)
- mVec128 = vsubq_f32(mVec128, v.mVec128);
-#else
- m_floats[0] -= v.m_floats[0];
- m_floats[1] -= v.m_floats[1];
- m_floats[2] -= v.m_floats[2];
-#endif
- return *this;
- }
-
- /**@brief Scale the vector
- * @param s Scale factor */
- B3_FORCE_INLINE b3Vector3& operator*=(const b3Scalar& s)
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
- vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0)
- mVec128 = _mm_mul_ps(mVec128, vs);
-#elif defined(B3_USE_NEON)
- mVec128 = vmulq_n_f32(mVec128, s);
-#else
- m_floats[0] *= s;
- m_floats[1] *= s;
- m_floats[2] *= s;
-#endif
- return *this;
- }
-
- /**@brief Inversely scale the vector
- * @param s Scale factor to divide by */
- B3_FORCE_INLINE b3Vector3& operator/=(const b3Scalar& s)
- {
- b3FullAssert(s != b3Scalar(0.0));
-
-#if 0 //defined(B3_USE_SSE_IN_API)
-// this code is not faster !
- __m128 vs = _mm_load_ss(&s);
- vs = _mm_div_ss(b3v1110, vs);
- vs = b3_pshufd_ps(vs, 0x00); // (S S S S)
-
- mVec128 = _mm_mul_ps(mVec128, vs);
-
- return *this;
-#else
- return *this *= b3Scalar(1.0) / s;
-#endif
- }
-
- /**@brief Return the dot product
- * @param v The other vector in the dot product */
- B3_FORCE_INLINE b3Scalar dot(const b3Vector3& v) const
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- __m128 vd = _mm_mul_ps(mVec128, v.mVec128);
- __m128 z = _mm_movehl_ps(vd, vd);
- __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
- vd = _mm_add_ss(vd, y);
- vd = _mm_add_ss(vd, z);
- return _mm_cvtss_f32(vd);
-#elif defined(B3_USE_NEON)
- float32x4_t vd = vmulq_f32(mVec128, v.mVec128);
- float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_low_f32(vd));
- x = vadd_f32(x, vget_high_f32(vd));
- return vget_lane_f32(x, 0);
-#else
- return m_floats[0] * v.m_floats[0] +
- m_floats[1] * v.m_floats[1] +
- m_floats[2] * v.m_floats[2];
-#endif
- }
-
- /**@brief Return the length of the vector squared */
- B3_FORCE_INLINE b3Scalar length2() const
- {
- return dot(*this);
- }
-
- /**@brief Return the length of the vector */
- B3_FORCE_INLINE b3Scalar length() const
- {
- return b3Sqrt(length2());
- }
-
- /**@brief Return the distance squared between the ends of this and another vector
- * This is symantically treating the vector like a point */
- B3_FORCE_INLINE b3Scalar distance2(const b3Vector3& v) const;
-
- /**@brief Return the distance between the ends of this and another vector
- * This is symantically treating the vector like a point */
- B3_FORCE_INLINE b3Scalar distance(const b3Vector3& v) const;
-
- B3_FORCE_INLINE b3Vector3& safeNormalize()
- {
- b3Scalar l2 = length2();
- //triNormal.normalize();
- if (l2 >= B3_EPSILON * B3_EPSILON)
- {
- (*this) /= b3Sqrt(l2);
- }
- else
- {
- setValue(1, 0, 0);
- }
- return *this;
- }
-
- /**@brief Normalize this vector
- * x^2 + y^2 + z^2 = 1 */
- B3_FORCE_INLINE b3Vector3& normalize()
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- // dot product first
- __m128 vd = _mm_mul_ps(mVec128, mVec128);
- __m128 z = _mm_movehl_ps(vd, vd);
- __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
- vd = _mm_add_ss(vd, y);
- vd = _mm_add_ss(vd, z);
-
-#if 0
- vd = _mm_sqrt_ss(vd);
- vd = _mm_div_ss(b3v1110, vd);
- vd = b3_splat_ps(vd, 0x80);
- mVec128 = _mm_mul_ps(mVec128, vd);
-#else
-
- // NR step 1/sqrt(x) - vd is x, y is output
- y = _mm_rsqrt_ss(vd); // estimate
-
- // one step NR
- z = b3v1_5;
- vd = _mm_mul_ss(vd, b3vHalf); // vd * 0.5
- //x2 = vd;
- vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0
- vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0
- z = _mm_sub_ss(z, vd); // 1.5 - vd * 0.5 * y0 * y0
-
- y = _mm_mul_ss(y, z); // y0 * (1.5 - vd * 0.5 * y0 * y0)
-
- y = b3_splat_ps(y, 0x80);
- mVec128 = _mm_mul_ps(mVec128, y);
-
-#endif
-
- return *this;
-#else
- return *this /= length();
-#endif
- }
-
- /**@brief Return a normalized version of this vector */
- B3_FORCE_INLINE b3Vector3 normalized() const;
-
- /**@brief Return a rotated version of this vector
- * @param wAxis The axis to rotate about
- * @param angle The angle to rotate by */
- B3_FORCE_INLINE b3Vector3 rotate(const b3Vector3& wAxis, const b3Scalar angle) const;
-
- /**@brief Return the angle between this and another vector
- * @param v The other vector */
- B3_FORCE_INLINE b3Scalar angle(const b3Vector3& v) const
- {
- b3Scalar s = b3Sqrt(length2() * v.length2());
- b3FullAssert(s != b3Scalar(0.0));
- return b3Acos(dot(v) / s);
- }
-
- /**@brief Return a vector will the absolute values of each element */
- B3_FORCE_INLINE b3Vector3 absolute() const
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- return b3MakeVector3(_mm_and_ps(mVec128, b3v3AbsfMask));
-#elif defined(B3_USE_NEON)
- return b3Vector3(vabsq_f32(mVec128));
-#else
- return b3MakeVector3(
- b3Fabs(m_floats[0]),
- b3Fabs(m_floats[1]),
- b3Fabs(m_floats[2]));
-#endif
- }
-
- /**@brief Return the cross product between this and another vector
- * @param v The other vector */
- B3_FORCE_INLINE b3Vector3 cross(const b3Vector3& v) const
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- __m128 T, V;
-
- T = b3_pshufd_ps(mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
- V = b3_pshufd_ps(v.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
-
- V = _mm_mul_ps(V, mVec128);
- T = _mm_mul_ps(T, v.mVec128);
- V = _mm_sub_ps(V, T);
-
- V = b3_pshufd_ps(V, B3_SHUFFLE(1, 2, 0, 3));
- return b3MakeVector3(V);
-#elif defined(B3_USE_NEON)
- float32x4_t T, V;
- // form (Y, Z, X, _) of mVec128 and v.mVec128
- float32x2_t Tlow = vget_low_f32(mVec128);
- float32x2_t Vlow = vget_low_f32(v.mVec128);
- T = vcombine_f32(vext_f32(Tlow, vget_high_f32(mVec128), 1), Tlow);
- V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v.mVec128), 1), Vlow);
-
- V = vmulq_f32(V, mVec128);
- T = vmulq_f32(T, v.mVec128);
- V = vsubq_f32(V, T);
- Vlow = vget_low_f32(V);
- // form (Y, Z, X, _);
- V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
- V = (float32x4_t)vandq_s32((int32x4_t)V, b3vFFF0Mask);
-
- return b3Vector3(V);
-#else
- return b3MakeVector3(
- m_floats[1] * v.m_floats[2] - m_floats[2] * v.m_floats[1],
- m_floats[2] * v.m_floats[0] - m_floats[0] * v.m_floats[2],
- m_floats[0] * v.m_floats[1] - m_floats[1] * v.m_floats[0]);
-#endif
- }
-
- B3_FORCE_INLINE b3Scalar triple(const b3Vector3& v1, const b3Vector3& v2) const
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- // cross:
- __m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
- __m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
-
- V = _mm_mul_ps(V, v1.mVec128);
- T = _mm_mul_ps(T, v2.mVec128);
- V = _mm_sub_ps(V, T);
-
- V = _mm_shuffle_ps(V, V, B3_SHUFFLE(1, 2, 0, 3));
-
- // dot:
- V = _mm_mul_ps(V, mVec128);
- __m128 z = _mm_movehl_ps(V, V);
- __m128 y = _mm_shuffle_ps(V, V, 0x55);
- V = _mm_add_ss(V, y);
- V = _mm_add_ss(V, z);
- return _mm_cvtss_f32(V);
-
-#elif defined(B3_USE_NEON)
- // cross:
- float32x4_t T, V;
- // form (Y, Z, X, _) of mVec128 and v.mVec128
- float32x2_t Tlow = vget_low_f32(v1.mVec128);
- float32x2_t Vlow = vget_low_f32(v2.mVec128);
- T = vcombine_f32(vext_f32(Tlow, vget_high_f32(v1.mVec128), 1), Tlow);
- V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v2.mVec128), 1), Vlow);
-
- V = vmulq_f32(V, v1.mVec128);
- T = vmulq_f32(T, v2.mVec128);
- V = vsubq_f32(V, T);
- Vlow = vget_low_f32(V);
- // form (Y, Z, X, _);
- V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
-
- // dot:
- V = vmulq_f32(mVec128, V);
- float32x2_t x = vpadd_f32(vget_low_f32(V), vget_low_f32(V));
- x = vadd_f32(x, vget_high_f32(V));
- return vget_lane_f32(x, 0);
-#else
- return m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) +
- m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) +
- m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]);
-#endif
- }
-
- /**@brief Return the axis with the smallest value
- * Note return values are 0,1,2 for x, y, or z */
- B3_FORCE_INLINE int minAxis() const
- {
- return m_floats[0] < m_floats[1] ? (m_floats[0] < m_floats[2] ? 0 : 2) : (m_floats[1] < m_floats[2] ? 1 : 2);
- }
-
- /**@brief Return the axis with the largest value
- * Note return values are 0,1,2 for x, y, or z */
- B3_FORCE_INLINE int maxAxis() const
- {
- return m_floats[0] < m_floats[1] ? (m_floats[1] < m_floats[2] ? 2 : 1) : (m_floats[0] < m_floats[2] ? 2 : 0);
- }
-
- B3_FORCE_INLINE int furthestAxis() const
- {
- return absolute().minAxis();
- }
-
- B3_FORCE_INLINE int closestAxis() const
- {
- return absolute().maxAxis();
- }
-
- B3_FORCE_INLINE void setInterpolate3(const b3Vector3& v0, const b3Vector3& v1, b3Scalar rt)
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- __m128 vrt = _mm_load_ss(&rt); // (rt 0 0 0)
- b3Scalar s = b3Scalar(1.0) - rt;
- __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
- vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0)
- __m128 r0 = _mm_mul_ps(v0.mVec128, vs);
- vrt = b3_pshufd_ps(vrt, 0x80); // (rt rt rt 0.0)
- __m128 r1 = _mm_mul_ps(v1.mVec128, vrt);
- __m128 tmp3 = _mm_add_ps(r0, r1);
- mVec128 = tmp3;
-#elif defined(B3_USE_NEON)
- float32x4_t vl = vsubq_f32(v1.mVec128, v0.mVec128);
- vl = vmulq_n_f32(vl, rt);
- mVec128 = vaddq_f32(vl, v0.mVec128);
-#else
- b3Scalar s = b3Scalar(1.0) - rt;
- m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0];
- m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1];
- m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2];
- //don't do the unused w component
- // m_co[3] = s * v0[3] + rt * v1[3];
-#endif
- }
-
- /**@brief Return the linear interpolation between this and another vector
- * @param v The other vector
- * @param t The ration of this to v (t = 0 => return this, t=1 => return other) */
- B3_FORCE_INLINE b3Vector3 lerp(const b3Vector3& v, const b3Scalar& t) const
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- __m128 vt = _mm_load_ss(&t); // (t 0 0 0)
- vt = b3_pshufd_ps(vt, 0x80); // (rt rt rt 0.0)
- __m128 vl = _mm_sub_ps(v.mVec128, mVec128);
- vl = _mm_mul_ps(vl, vt);
- vl = _mm_add_ps(vl, mVec128);
-
- return b3MakeVector3(vl);
-#elif defined(B3_USE_NEON)
- float32x4_t vl = vsubq_f32(v.mVec128, mVec128);
- vl = vmulq_n_f32(vl, t);
- vl = vaddq_f32(vl, mVec128);
-
- return b3Vector3(vl);
-#else
- return b3MakeVector3(m_floats[0] + (v.m_floats[0] - m_floats[0]) * t,
- m_floats[1] + (v.m_floats[1] - m_floats[1]) * t,
- m_floats[2] + (v.m_floats[2] - m_floats[2]) * t);
-#endif
- }
-
- /**@brief Elementwise multiply this vector by the other
- * @param v The other vector */
- B3_FORCE_INLINE b3Vector3& operator*=(const b3Vector3& v)
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- mVec128 = _mm_mul_ps(mVec128, v.mVec128);
-#elif defined(B3_USE_NEON)
- mVec128 = vmulq_f32(mVec128, v.mVec128);
-#else
- m_floats[0] *= v.m_floats[0];
- m_floats[1] *= v.m_floats[1];
- m_floats[2] *= v.m_floats[2];
-#endif
- return *this;
- }
-
- /**@brief Return the x value */
- B3_FORCE_INLINE const b3Scalar& getX() const { return m_floats[0]; }
- /**@brief Return the y value */
- B3_FORCE_INLINE const b3Scalar& getY() const { return m_floats[1]; }
- /**@brief Return the z value */
- B3_FORCE_INLINE const b3Scalar& getZ() const { return m_floats[2]; }
- /**@brief Return the w value */
- B3_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; }
-
- /**@brief Set the x value */
- B3_FORCE_INLINE void setX(b3Scalar _x) { m_floats[0] = _x; };
- /**@brief Set the y value */
- B3_FORCE_INLINE void setY(b3Scalar _y) { m_floats[1] = _y; };
- /**@brief Set the z value */
- B3_FORCE_INLINE void setZ(b3Scalar _z) { m_floats[2] = _z; };
- /**@brief Set the w value */
- B3_FORCE_INLINE void setW(b3Scalar _w) { m_floats[3] = _w; };
-
- //B3_FORCE_INLINE b3Scalar& operator[](int i) { return (&m_floats[0])[i]; }
- //B3_FORCE_INLINE const b3Scalar& operator[](int i) const { return (&m_floats[0])[i]; }
- ///operator b3Scalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons.
- B3_FORCE_INLINE operator b3Scalar*() { return &m_floats[0]; }
- B3_FORCE_INLINE operator const b3Scalar*() const { return &m_floats[0]; }
-
- B3_FORCE_INLINE bool operator==(const b3Vector3& other) const
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128)));
-#else
- return ((m_floats[3] == other.m_floats[3]) &&
- (m_floats[2] == other.m_floats[2]) &&
- (m_floats[1] == other.m_floats[1]) &&
- (m_floats[0] == other.m_floats[0]));
-#endif
- }
-
- B3_FORCE_INLINE bool operator!=(const b3Vector3& other) const
- {
- return !(*this == other);
- }
-
- /**@brief Set each element to the max of the current values and the values of another b3Vector3
- * @param other The other b3Vector3 to compare with
- */
- B3_FORCE_INLINE void setMax(const b3Vector3& other)
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- mVec128 = _mm_max_ps(mVec128, other.mVec128);
-#elif defined(B3_USE_NEON)
- mVec128 = vmaxq_f32(mVec128, other.mVec128);
-#else
- b3SetMax(m_floats[0], other.m_floats[0]);
- b3SetMax(m_floats[1], other.m_floats[1]);
- b3SetMax(m_floats[2], other.m_floats[2]);
- b3SetMax(m_floats[3], other.m_floats[3]);
-#endif
- }
-
- /**@brief Set each element to the min of the current values and the values of another b3Vector3
- * @param other The other b3Vector3 to compare with
- */
- B3_FORCE_INLINE void setMin(const b3Vector3& other)
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- mVec128 = _mm_min_ps(mVec128, other.mVec128);
-#elif defined(B3_USE_NEON)
- mVec128 = vminq_f32(mVec128, other.mVec128);
-#else
- b3SetMin(m_floats[0], other.m_floats[0]);
- b3SetMin(m_floats[1], other.m_floats[1]);
- b3SetMin(m_floats[2], other.m_floats[2]);
- b3SetMin(m_floats[3], other.m_floats[3]);
-#endif
- }
-
- B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z)
- {
- m_floats[0] = _x;
- m_floats[1] = _y;
- m_floats[2] = _z;
- m_floats[3] = b3Scalar(0.f);
- }
-
- void getSkewSymmetricMatrix(b3Vector3 * v0, b3Vector3 * v1, b3Vector3 * v2) const
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
-
- __m128 V = _mm_and_ps(mVec128, b3vFFF0fMask);
- __m128 V0 = _mm_xor_ps(b3vMzeroMask, V);
- __m128 V2 = _mm_movelh_ps(V0, V);
-
- __m128 V1 = _mm_shuffle_ps(V, V0, 0xCE);
-
- V0 = _mm_shuffle_ps(V0, V, 0xDB);
- V2 = _mm_shuffle_ps(V2, V, 0xF9);
-
- v0->mVec128 = V0;
- v1->mVec128 = V1;
- v2->mVec128 = V2;
-#else
- v0->setValue(0., -getZ(), getY());
- v1->setValue(getZ(), 0., -getX());
- v2->setValue(-getY(), getX(), 0.);
-#endif
- }
-
- void setZero()
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- mVec128 = (__m128)_mm_xor_ps(mVec128, mVec128);
-#elif defined(B3_USE_NEON)
- int32x4_t vi = vdupq_n_s32(0);
- mVec128 = vreinterpretq_f32_s32(vi);
-#else
- setValue(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.));
-#endif
- }
-
- B3_FORCE_INLINE bool isZero() const
- {
- return m_floats[0] == b3Scalar(0) && m_floats[1] == b3Scalar(0) && m_floats[2] == b3Scalar(0);
- }
-
- B3_FORCE_INLINE bool fuzzyZero() const
- {
- return length2() < B3_EPSILON;
- }
-
- B3_FORCE_INLINE void serialize(struct b3Vector3Data & dataOut) const;
-
- B3_FORCE_INLINE void deSerialize(const struct b3Vector3Data& dataIn);
-
- B3_FORCE_INLINE void serializeFloat(struct b3Vector3FloatData & dataOut) const;
-
- B3_FORCE_INLINE void deSerializeFloat(const struct b3Vector3FloatData& dataIn);
-
- B3_FORCE_INLINE void serializeDouble(struct b3Vector3DoubleData & dataOut) const;
-
- B3_FORCE_INLINE void deSerializeDouble(const struct b3Vector3DoubleData& dataIn);
-
- /**@brief returns index of maximum dot product between this and vectors in array[]
- * @param array The other vectors
- * @param array_count The number of other vectors
- * @param dotOut The maximum dot product */
- B3_FORCE_INLINE long maxDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const;
-
- /**@brief returns index of minimum dot product between this and vectors in array[]
- * @param array The other vectors
- * @param array_count The number of other vectors
- * @param dotOut The minimum dot product */
- B3_FORCE_INLINE long minDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const;
-
- /* create a vector as b3Vector3( this->dot( b3Vector3 v0 ), this->dot( b3Vector3 v1), this->dot( b3Vector3 v2 )) */
- B3_FORCE_INLINE b3Vector3 dot3(const b3Vector3& v0, const b3Vector3& v1, const b3Vector3& v2) const
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
-
- __m128 a0 = _mm_mul_ps(v0.mVec128, this->mVec128);
- __m128 a1 = _mm_mul_ps(v1.mVec128, this->mVec128);
- __m128 a2 = _mm_mul_ps(v2.mVec128, this->mVec128);
- __m128 b0 = _mm_unpacklo_ps(a0, a1);
- __m128 b1 = _mm_unpackhi_ps(a0, a1);
- __m128 b2 = _mm_unpacklo_ps(a2, _mm_setzero_ps());
- __m128 r = _mm_movelh_ps(b0, b2);
- r = _mm_add_ps(r, _mm_movehl_ps(b2, b0));
- a2 = _mm_and_ps(a2, b3vxyzMaskf);
- r = _mm_add_ps(r, b3CastdTo128f(_mm_move_sd(b3CastfTo128d(a2), b3CastfTo128d(b1))));
- return b3MakeVector3(r);
-
-#elif defined(B3_USE_NEON)
- static const uint32x4_t xyzMask = (const uint32x4_t){-1, -1, -1, 0};
- float32x4_t a0 = vmulq_f32(v0.mVec128, this->mVec128);
- float32x4_t a1 = vmulq_f32(v1.mVec128, this->mVec128);
- float32x4_t a2 = vmulq_f32(v2.mVec128, this->mVec128);
- float32x2x2_t zLo = vtrn_f32(vget_high_f32(a0), vget_high_f32(a1));
- a2 = (float32x4_t)vandq_u32((uint32x4_t)a2, xyzMask);
- float32x2_t b0 = vadd_f32(vpadd_f32(vget_low_f32(a0), vget_low_f32(a1)), zLo.val[0]);
- float32x2_t b1 = vpadd_f32(vpadd_f32(vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f));
- return b3Vector3(vcombine_f32(b0, b1));
-#else
- return b3MakeVector3(dot(v0), dot(v1), dot(v2));
-#endif
- }
-};
-
-/**@brief Return the sum of two vectors (Point symantics)*/
-B3_FORCE_INLINE b3Vector3
-operator+(const b3Vector3& v1, const b3Vector3& v2)
-{
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- return b3MakeVector3(_mm_add_ps(v1.mVec128, v2.mVec128));
-#elif defined(B3_USE_NEON)
- return b3MakeVector3(vaddq_f32(v1.mVec128, v2.mVec128));
-#else
- return b3MakeVector3(
- v1.m_floats[0] + v2.m_floats[0],
- v1.m_floats[1] + v2.m_floats[1],
- v1.m_floats[2] + v2.m_floats[2]);
-#endif
-}
-
-/**@brief Return the elementwise product of two vectors */
-B3_FORCE_INLINE b3Vector3
-operator*(const b3Vector3& v1, const b3Vector3& v2)
-{
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- return b3MakeVector3(_mm_mul_ps(v1.mVec128, v2.mVec128));
-#elif defined(B3_USE_NEON)
- return b3MakeVector3(vmulq_f32(v1.mVec128, v2.mVec128));
-#else
- return b3MakeVector3(
- v1.m_floats[0] * v2.m_floats[0],
- v1.m_floats[1] * v2.m_floats[1],
- v1.m_floats[2] * v2.m_floats[2]);
-#endif
-}
-
-/**@brief Return the difference between two vectors */
-B3_FORCE_INLINE b3Vector3
-operator-(const b3Vector3& v1, const b3Vector3& v2)
-{
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
-
- // without _mm_and_ps this code causes slowdown in Concave moving
- __m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128);
- return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask));
-#elif defined(B3_USE_NEON)
- float32x4_t r = vsubq_f32(v1.mVec128, v2.mVec128);
- return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask));
-#else
- return b3MakeVector3(
- v1.m_floats[0] - v2.m_floats[0],
- v1.m_floats[1] - v2.m_floats[1],
- v1.m_floats[2] - v2.m_floats[2]);
-#endif
-}
-
-/**@brief Return the negative of the vector */
-B3_FORCE_INLINE b3Vector3
-operator-(const b3Vector3& v)
-{
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
- __m128 r = _mm_xor_ps(v.mVec128, b3vMzeroMask);
- return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask));
-#elif defined(B3_USE_NEON)
- return b3MakeVector3((b3SimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)b3vMzeroMask));
-#else
- return b3MakeVector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]);
-#endif
-}
-
-/**@brief Return the vector scaled by s */
-B3_FORCE_INLINE b3Vector3
-operator*(const b3Vector3& v, const b3Scalar& s)
-{
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
- vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0)
- return b3MakeVector3(_mm_mul_ps(v.mVec128, vs));
-#elif defined(B3_USE_NEON)
- float32x4_t r = vmulq_n_f32(v.mVec128, s);
- return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask));
-#else
- return b3MakeVector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s);
-#endif
-}
-
-/**@brief Return the vector scaled by s */
-B3_FORCE_INLINE b3Vector3
-operator*(const b3Scalar& s, const b3Vector3& v)
-{
- return v * s;
-}
-
-/**@brief Return the vector inversely scaled by s */
-B3_FORCE_INLINE b3Vector3
-operator/(const b3Vector3& v, const b3Scalar& s)
-{
- b3FullAssert(s != b3Scalar(0.0));
-#if 0 //defined(B3_USE_SSE_IN_API)
-// this code is not faster !
- __m128 vs = _mm_load_ss(&s);
- vs = _mm_div_ss(b3v1110, vs);
- vs = b3_pshufd_ps(vs, 0x00); // (S S S S)
-
- return b3Vector3(_mm_mul_ps(v.mVec128, vs));
-#else
- return v * (b3Scalar(1.0) / s);
-#endif
-}
-
-/**@brief Return the vector inversely scaled by s */
-B3_FORCE_INLINE b3Vector3
-operator/(const b3Vector3& v1, const b3Vector3& v2)
-{
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
- __m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128);
- vec = _mm_and_ps(vec, b3vFFF0fMask);
- return b3MakeVector3(vec);
-#elif defined(B3_USE_NEON)
- float32x4_t x, y, v, m;
-
- x = v1.mVec128;
- y = v2.mVec128;
-
- v = vrecpeq_f32(y); // v ~ 1/y
- m = vrecpsq_f32(y, v); // m = (2-v*y)
- v = vmulq_f32(v, m); // vv = v*m ~~ 1/y
- m = vrecpsq_f32(y, v); // mm = (2-vv*y)
- v = vmulq_f32(v, x); // x*vv
- v = vmulq_f32(v, m); // (x*vv)*(2-vv*y) = x*(vv(2-vv*y)) ~~~ x/y
-
- return b3Vector3(v);
-#else
- return b3MakeVector3(
- v1.m_floats[0] / v2.m_floats[0],
- v1.m_floats[1] / v2.m_floats[1],
- v1.m_floats[2] / v2.m_floats[2]);
-#endif
-}
-
-/**@brief Return the dot product between two vectors */
-B3_FORCE_INLINE b3Scalar
-b3Dot(const b3Vector3& v1, const b3Vector3& v2)
-{
- return v1.dot(v2);
-}
-
-/**@brief Return the distance squared between two vectors */
-B3_FORCE_INLINE b3Scalar
-b3Distance2(const b3Vector3& v1, const b3Vector3& v2)
-{
- return v1.distance2(v2);
-}
-
-/**@brief Return the distance between two vectors */
-B3_FORCE_INLINE b3Scalar
-b3Distance(const b3Vector3& v1, const b3Vector3& v2)
-{
- return v1.distance(v2);
-}
-
-/**@brief Return the angle between two vectors */
-B3_FORCE_INLINE b3Scalar
-b3Angle(const b3Vector3& v1, const b3Vector3& v2)
-{
- return v1.angle(v2);
-}
-
-/**@brief Return the cross product of two vectors */
-B3_FORCE_INLINE b3Vector3
-b3Cross(const b3Vector3& v1, const b3Vector3& v2)
-{
- return v1.cross(v2);
-}
-
-B3_FORCE_INLINE b3Scalar
-b3Triple(const b3Vector3& v1, const b3Vector3& v2, const b3Vector3& v3)
-{
- return v1.triple(v2, v3);
-}
-
-/**@brief Return the linear interpolation between two vectors
- * @param v1 One vector
- * @param v2 The other vector
- * @param t The ration of this to v (t = 0 => return v1, t=1 => return v2) */
-B3_FORCE_INLINE b3Vector3
-b3Lerp(const b3Vector3& v1, const b3Vector3& v2, const b3Scalar& t)
-{
- return v1.lerp(v2, t);
-}
-
-B3_FORCE_INLINE b3Scalar b3Vector3::distance2(const b3Vector3& v) const
-{
- return (v - *this).length2();
-}
-
-B3_FORCE_INLINE b3Scalar b3Vector3::distance(const b3Vector3& v) const
-{
- return (v - *this).length();
-}
-
-B3_FORCE_INLINE b3Vector3 b3Vector3::normalized() const
-{
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- b3Vector3 norm = *this;
-
- return norm.normalize();
-#else
- return *this / length();
-#endif
-}
-
-B3_FORCE_INLINE b3Vector3 b3Vector3::rotate(const b3Vector3& wAxis, const b3Scalar _angle) const
-{
- // wAxis must be a unit lenght vector
-
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
-
- __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128);
- b3Scalar ssin = b3Sin(_angle);
- __m128 C = wAxis.cross(b3MakeVector3(mVec128)).mVec128;
- O = _mm_and_ps(O, b3vFFF0fMask);
- b3Scalar scos = b3Cos(_angle);
-
- __m128 vsin = _mm_load_ss(&ssin); // (S 0 0 0)
- __m128 vcos = _mm_load_ss(&scos); // (S 0 0 0)
-
- __m128 Y = b3_pshufd_ps(O, 0xC9); // (Y Z X 0)
- __m128 Z = b3_pshufd_ps(O, 0xD2); // (Z X Y 0)
- O = _mm_add_ps(O, Y);
- vsin = b3_pshufd_ps(vsin, 0x80); // (S S S 0)
- O = _mm_add_ps(O, Z);
- vcos = b3_pshufd_ps(vcos, 0x80); // (S S S 0)
-
- vsin = vsin * C;
- O = O * wAxis.mVec128;
- __m128 X = mVec128 - O;
-
- O = O + vsin;
- vcos = vcos * X;
- O = O + vcos;
-
- return b3MakeVector3(O);
-#else
- b3Vector3 o = wAxis * wAxis.dot(*this);
- b3Vector3 _x = *this - o;
- b3Vector3 _y;
-
- _y = wAxis.cross(*this);
-
- return (o + _x * b3Cos(_angle) + _y * b3Sin(_angle));
-#endif
-}
-
-B3_FORCE_INLINE long b3Vector3::maxDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const
-{
-#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
-#if defined _WIN32 || defined(B3_USE_SSE)
- const long scalar_cutoff = 10;
- long b3_maxdot_large(const float* array, const float* vec, unsigned long array_count, float* dotOut);
-#elif defined B3_USE_NEON
- const long scalar_cutoff = 4;
- extern long (*_maxdot_large)(const float* array, const float* vec, unsigned long array_count, float* dotOut);
-#endif
- if (array_count < scalar_cutoff)
-#else
-
-#endif //B3_USE_SSE || B3_USE_NEON
- {
- b3Scalar maxDot = -B3_INFINITY;
- int i = 0;
- int ptIndex = -1;
- for (i = 0; i < array_count; i++)
- {
- b3Scalar dot = array[i].dot(*this);
-
- if (dot > maxDot)
- {
- maxDot = dot;
- ptIndex = i;
- }
- }
-
- b3Assert(ptIndex >= 0);
- if (ptIndex < 0)
- {
- ptIndex = 0;
- }
- dotOut = maxDot;
- return ptIndex;
- }
-#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
- return b3_maxdot_large((float*)array, (float*)&m_floats[0], array_count, &dotOut);
-#endif
-}
-
-B3_FORCE_INLINE long b3Vector3::minDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const
-{
-#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
-#if defined B3_USE_SSE
- const long scalar_cutoff = 10;
- long b3_mindot_large(const float* array, const float* vec, unsigned long array_count, float* dotOut);
-#elif defined B3_USE_NEON
- const long scalar_cutoff = 4;
- extern long (*b3_mindot_large)(const float* array, const float* vec, unsigned long array_count, float* dotOut);
-#else
-#error unhandled arch!
-#endif
-
- if (array_count < scalar_cutoff)
-#endif //B3_USE_SSE || B3_USE_NEON
- {
- b3Scalar minDot = B3_INFINITY;
- int i = 0;
- int ptIndex = -1;
-
- for (i = 0; i < array_count; i++)
- {
- b3Scalar dot = array[i].dot(*this);
-
- if (dot < minDot)
- {
- minDot = dot;
- ptIndex = i;
- }
- }
-
- dotOut = minDot;
-
- return ptIndex;
- }
-#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
- return b3_mindot_large((float*)array, (float*)&m_floats[0], array_count, &dotOut);
-#endif
-}
-
-class b3Vector4 : public b3Vector3
-{
-public:
- B3_FORCE_INLINE b3Vector4 absolute4() const
- {
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
- return b3MakeVector4(_mm_and_ps(mVec128, b3vAbsfMask));
-#elif defined(B3_USE_NEON)
- return b3Vector4(vabsq_f32(mVec128));
-#else
- return b3MakeVector4(
- b3Fabs(m_floats[0]),
- b3Fabs(m_floats[1]),
- b3Fabs(m_floats[2]),
- b3Fabs(m_floats[3]));
-#endif
- }
-
- b3Scalar getW() const { return m_floats[3]; }
-
- B3_FORCE_INLINE int maxAxis4() const
- {
- int maxIndex = -1;
- b3Scalar maxVal = b3Scalar(-B3_LARGE_FLOAT);
- if (m_floats[0] > maxVal)
- {
- maxIndex = 0;
- maxVal = m_floats[0];
- }
- if (m_floats[1] > maxVal)
- {
- maxIndex = 1;
- maxVal = m_floats[1];
- }
- if (m_floats[2] > maxVal)
- {
- maxIndex = 2;
- maxVal = m_floats[2];
- }
- if (m_floats[3] > maxVal)
- {
- maxIndex = 3;
- }
-
- return maxIndex;
- }
-
- B3_FORCE_INLINE int minAxis4() const
- {
- int minIndex = -1;
- b3Scalar minVal = b3Scalar(B3_LARGE_FLOAT);
- if (m_floats[0] < minVal)
- {
- minIndex = 0;
- minVal = m_floats[0];
- }
- if (m_floats[1] < minVal)
- {
- minIndex = 1;
- minVal = m_floats[1];
- }
- if (m_floats[2] < minVal)
- {
- minIndex = 2;
- minVal = m_floats[2];
- }
- if (m_floats[3] < minVal)
- {
- minIndex = 3;
- minVal = m_floats[3];
- }
-
- return minIndex;
- }
-
- B3_FORCE_INLINE int closestAxis4() const
- {
- return absolute4().maxAxis4();
- }
-
- /**@brief Set x,y,z and zero w
- * @param x Value of x
- * @param y Value of y
- * @param z Value of z
- */
-
- /* void getValue(b3Scalar *m) const
- {
- m[0] = m_floats[0];
- m[1] = m_floats[1];
- m[2] =m_floats[2];
- }
-*/
- /**@brief Set the values
- * @param x Value of x
- * @param y Value of y
- * @param z Value of z
- * @param w Value of w
- */
- B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w)
- {
- m_floats[0] = _x;
- m_floats[1] = _y;
- m_floats[2] = _z;
- m_floats[3] = _w;
- }
-};
-
-///b3SwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
-B3_FORCE_INLINE void b3SwapScalarEndian(const b3Scalar& sourceVal, b3Scalar& destVal)
-{
-#ifdef B3_USE_DOUBLE_PRECISION
- unsigned char* dest = (unsigned char*)&destVal;
- unsigned char* src = (unsigned char*)&sourceVal;
- dest[0] = src[7];
- dest[1] = src[6];
- dest[2] = src[5];
- dest[3] = src[4];
- dest[4] = src[3];
- dest[5] = src[2];
- dest[6] = src[1];
- dest[7] = src[0];
-#else
- unsigned char* dest = (unsigned char*)&destVal;
- unsigned char* src = (unsigned char*)&sourceVal;
- dest[0] = src[3];
- dest[1] = src[2];
- dest[2] = src[1];
- dest[3] = src[0];
-#endif //B3_USE_DOUBLE_PRECISION
-}
-///b3SwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
-B3_FORCE_INLINE void b3SwapVector3Endian(const b3Vector3& sourceVec, b3Vector3& destVec)
-{
- for (int i = 0; i < 4; i++)
- {
- b3SwapScalarEndian(sourceVec[i], destVec[i]);
- }
-}
-
-///b3UnSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
-B3_FORCE_INLINE void b3UnSwapVector3Endian(b3Vector3& vector)
-{
- b3Vector3 swappedVec;
- for (int i = 0; i < 4; i++)
- {
- b3SwapScalarEndian(vector[i], swappedVec[i]);
- }
- vector = swappedVec;
-}
-
-template <class T>
-B3_FORCE_INLINE void b3PlaneSpace1(const T& n, T& p, T& q)
-{
- if (b3Fabs(n[2]) > B3_SQRT12)
- {
- // choose p in y-z plane
- b3Scalar a = n[1] * n[1] + n[2] * n[2];
- b3Scalar k = b3RecipSqrt(a);
- p[0] = 0;
- p[1] = -n[2] * k;
- p[2] = n[1] * k;
- // set q = n x p
- q[0] = a * k;
- q[1] = -n[0] * p[2];
- q[2] = n[0] * p[1];
- }
- else
- {
- // choose p in x-y plane
- b3Scalar a = n[0] * n[0] + n[1] * n[1];
- b3Scalar k = b3RecipSqrt(a);
- p[0] = -n[1] * k;
- p[1] = n[0] * k;
- p[2] = 0;
- // set q = n x p
- q[0] = -n[2] * p[1];
- q[1] = n[2] * p[0];
- q[2] = a * k;
- }
-}
-
-struct b3Vector3FloatData
-{
- float m_floats[4];
-};
-
-struct b3Vector3DoubleData
-{
- double m_floats[4];
-};
-
-B3_FORCE_INLINE void b3Vector3::serializeFloat(struct b3Vector3FloatData& dataOut) const
-{
- ///could also do a memcpy, check if it is worth it
- for (int i = 0; i < 4; i++)
- dataOut.m_floats[i] = float(m_floats[i]);
-}
-
-B3_FORCE_INLINE void b3Vector3::deSerializeFloat(const struct b3Vector3FloatData& dataIn)
-{
- for (int i = 0; i < 4; i++)
- m_floats[i] = b3Scalar(dataIn.m_floats[i]);
-}
-
-B3_FORCE_INLINE void b3Vector3::serializeDouble(struct b3Vector3DoubleData& dataOut) const
-{
- ///could also do a memcpy, check if it is worth it
- for (int i = 0; i < 4; i++)
- dataOut.m_floats[i] = double(m_floats[i]);
-}
-
-B3_FORCE_INLINE void b3Vector3::deSerializeDouble(const struct b3Vector3DoubleData& dataIn)
-{
- for (int i = 0; i < 4; i++)
- m_floats[i] = b3Scalar(dataIn.m_floats[i]);
-}
-
-B3_FORCE_INLINE void b3Vector3::serialize(struct b3Vector3Data& dataOut) const
-{
- ///could also do a memcpy, check if it is worth it
- for (int i = 0; i < 4; i++)
- dataOut.m_floats[i] = m_floats[i];
-}
-
-B3_FORCE_INLINE void b3Vector3::deSerialize(const struct b3Vector3Data& dataIn)
-{
- for (int i = 0; i < 4; i++)
- m_floats[i] = dataIn.m_floats[i];
-}
-
-inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z)
-{
- b3Vector3 tmp;
- tmp.setValue(x, y, z);
- return tmp;
-}
-
-inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w)
-{
- b3Vector3 tmp;
- tmp.setValue(x, y, z);
- tmp.w = w;
- return tmp;
-}
-
-inline b3Vector4 b3MakeVector4(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w)
-{
- b3Vector4 tmp;
- tmp.setValue(x, y, z, w);
- return tmp;
-}
-
-#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
-
-inline b3Vector3 b3MakeVector3(b3SimdFloat4 v)
-{
- b3Vector3 tmp;
- tmp.set128(v);
- return tmp;
-}
-
-inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec)
-{
- b3Vector4 tmp;
- tmp.set128(vec);
- return tmp;
-}
-
-#endif
-
-#endif //B3_VECTOR3_H
diff --git a/thirdparty/bullet/Bullet3Common/shared/b3Float4.h b/thirdparty/bullet/Bullet3Common/shared/b3Float4.h
deleted file mode 100644
index d8a9f47411..0000000000
--- a/thirdparty/bullet/Bullet3Common/shared/b3Float4.h
+++ /dev/null
@@ -1,90 +0,0 @@
-#ifndef B3_FLOAT4_H
-#define B3_FLOAT4_H
-
-#include "Bullet3Common/shared/b3PlatformDefinitions.h"
-
-#ifdef __cplusplus
-#include "Bullet3Common/b3Vector3.h"
-#define b3Float4 b3Vector3
-#define b3Float4ConstArg const b3Vector3&
-#define b3Dot3F4 b3Dot
-#define b3Cross3 b3Cross
-#define b3MakeFloat4 b3MakeVector3
-inline b3Vector3 b3Normalized(const b3Vector3& vec)
-{
- return vec.normalized();
-}
-
-inline b3Float4 b3FastNormalized3(b3Float4ConstArg v)
-{
- return v.normalized();
-}
-
-inline b3Float4 b3MaxFloat4(const b3Float4& a, const b3Float4& b)
-{
- b3Float4 tmp = a;
- tmp.setMax(b);
- return tmp;
-}
-inline b3Float4 b3MinFloat4(const b3Float4& a, const b3Float4& b)
-{
- b3Float4 tmp = a;
- tmp.setMin(b);
- return tmp;
-}
-
-#else
-typedef float4 b3Float4;
-#define b3Float4ConstArg const b3Float4
-#define b3MakeFloat4 (float4)
-float b3Dot3F4(b3Float4ConstArg v0, b3Float4ConstArg v1)
-{
- float4 a1 = b3MakeFloat4(v0.xyz, 0.f);
- float4 b1 = b3MakeFloat4(v1.xyz, 0.f);
- return dot(a1, b1);
-}
-b3Float4 b3Cross3(b3Float4ConstArg v0, b3Float4ConstArg v1)
-{
- float4 a1 = b3MakeFloat4(v0.xyz, 0.f);
- float4 b1 = b3MakeFloat4(v1.xyz, 0.f);
- return cross(a1, b1);
-}
-#define b3MinFloat4 min
-#define b3MaxFloat4 max
-
-#define b3Normalized(a) normalize(a)
-
-#endif
-
-inline bool b3IsAlmostZero(b3Float4ConstArg v)
-{
- if (b3Fabs(v.x) > 1e-6 || b3Fabs(v.y) > 1e-6 || b3Fabs(v.z) > 1e-6)
- return false;
- return true;
-}
-
-inline int b3MaxDot(b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut)
-{
- float maxDot = -B3_INFINITY;
- int i = 0;
- int ptIndex = -1;
- for (i = 0; i < vecLen; i++)
- {
- float dot = b3Dot3F4(vecArray[i], vec);
-
- if (dot > maxDot)
- {
- maxDot = dot;
- ptIndex = i;
- }
- }
- b3Assert(ptIndex >= 0);
- if (ptIndex < 0)
- {
- ptIndex = 0;
- }
- *dotOut = maxDot;
- return ptIndex;
-}
-
-#endif //B3_FLOAT4_H
diff --git a/thirdparty/bullet/Bullet3Common/shared/b3Int2.h b/thirdparty/bullet/Bullet3Common/shared/b3Int2.h
deleted file mode 100644
index 7b84de4436..0000000000
--- a/thirdparty/bullet/Bullet3Common/shared/b3Int2.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it freely,
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef B3_INT2_H
-#define B3_INT2_H
-
-#ifdef __cplusplus
-
-struct b3UnsignedInt2
-{
- union {
- struct
- {
- unsigned int x, y;
- };
- struct
- {
- unsigned int s[2];
- };
- };
-};
-
-struct b3Int2
-{
- union {
- struct
- {
- int x, y;
- };
- struct
- {
- int s[2];
- };
- };
-};
-
-inline b3Int2 b3MakeInt2(int x, int y)
-{
- b3Int2 v;
- v.s[0] = x;
- v.s[1] = y;
- return v;
-}
-#else
-
-#define b3UnsignedInt2 uint2
-#define b3Int2 int2
-#define b3MakeInt2 (int2)
-
-#endif //__cplusplus
-#endif \ No newline at end of file
diff --git a/thirdparty/bullet/Bullet3Common/shared/b3Int4.h b/thirdparty/bullet/Bullet3Common/shared/b3Int4.h
deleted file mode 100644
index f6a1754245..0000000000
--- a/thirdparty/bullet/Bullet3Common/shared/b3Int4.h
+++ /dev/null
@@ -1,71 +0,0 @@
-#ifndef B3_INT4_H
-#define B3_INT4_H
-
-#ifdef __cplusplus
-
-#include "Bullet3Common/b3Scalar.h"
-
-B3_ATTRIBUTE_ALIGNED16(struct)
-b3UnsignedInt4
-{
- B3_DECLARE_ALIGNED_ALLOCATOR();
-
- union {
- struct
- {
- unsigned int x, y, z, w;
- };
- struct
- {
- unsigned int s[4];
- };
- };
-};
-
-B3_ATTRIBUTE_ALIGNED16(struct)
-b3Int4
-{
- B3_DECLARE_ALIGNED_ALLOCATOR();
-
- union {
- struct
- {
- int x, y, z, w;
- };
- struct
- {
- int s[4];
- };
- };
-};
-
-B3_FORCE_INLINE b3Int4 b3MakeInt4(int x, int y, int z, int w = 0)
-{
- b3Int4 v;
- v.s[0] = x;
- v.s[1] = y;
- v.s[2] = z;
- v.s[3] = w;
- return v;
-}
-
-B3_FORCE_INLINE b3UnsignedInt4 b3MakeUnsignedInt4(unsigned int x, unsigned int y, unsigned int z, unsigned int w = 0)
-{
- b3UnsignedInt4 v;
- v.s[0] = x;
- v.s[1] = y;
- v.s[2] = z;
- v.s[3] = w;
- return v;
-}
-
-#else
-
-#define b3UnsignedInt4 uint4
-#define b3Int4 int4
-#define b3MakeInt4 (int4)
-#define b3MakeUnsignedInt4 (uint4)
-
-#endif //__cplusplus
-
-#endif //B3_INT4_H
diff --git a/thirdparty/bullet/Bullet3Common/shared/b3Mat3x3.h b/thirdparty/bullet/Bullet3Common/shared/b3Mat3x3.h
deleted file mode 100644
index ce6482b5a6..0000000000
--- a/thirdparty/bullet/Bullet3Common/shared/b3Mat3x3.h
+++ /dev/null
@@ -1,157 +0,0 @@
-
-#ifndef B3_MAT3x3_H
-#define B3_MAT3x3_H
-
-#include "Bullet3Common/shared/b3Quat.h"
-
-#ifdef __cplusplus
-
-#include "Bullet3Common/b3Matrix3x3.h"
-
-#define b3Mat3x3 b3Matrix3x3
-#define b3Mat3x3ConstArg const b3Matrix3x3&
-
-inline b3Mat3x3 b3QuatGetRotationMatrix(b3QuatConstArg quat)
-{
- return b3Mat3x3(quat);
-}
-
-inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg mat)
-{
- return mat.absolute();
-}
-
-#define b3GetRow(m, row) m.getRow(row)
-
-__inline b3Float4 mtMul3(b3Float4ConstArg a, b3Mat3x3ConstArg b)
-{
- return b * a;
-}
-
-#else
-
-typedef struct
-{
- b3Float4 m_row[3];
-} b3Mat3x3;
-
-#define b3Mat3x3ConstArg const b3Mat3x3
-#define b3GetRow(m, row) (m.m_row[row])
-
-inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)
-{
- b3Float4 quat2 = (b3Float4)(quat.x * quat.x, quat.y * quat.y, quat.z * quat.z, 0.f);
- b3Mat3x3 out;
-
- out.m_row[0].x = 1 - 2 * quat2.y - 2 * quat2.z;
- out.m_row[0].y = 2 * quat.x * quat.y - 2 * quat.w * quat.z;
- out.m_row[0].z = 2 * quat.x * quat.z + 2 * quat.w * quat.y;
- out.m_row[0].w = 0.f;
-
- out.m_row[1].x = 2 * quat.x * quat.y + 2 * quat.w * quat.z;
- out.m_row[1].y = 1 - 2 * quat2.x - 2 * quat2.z;
- out.m_row[1].z = 2 * quat.y * quat.z - 2 * quat.w * quat.x;
- out.m_row[1].w = 0.f;
-
- out.m_row[2].x = 2 * quat.x * quat.z - 2 * quat.w * quat.y;
- out.m_row[2].y = 2 * quat.y * quat.z + 2 * quat.w * quat.x;
- out.m_row[2].z = 1 - 2 * quat2.x - 2 * quat2.y;
- out.m_row[2].w = 0.f;
-
- return out;
-}
-
-inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)
-{
- b3Mat3x3 out;
- out.m_row[0] = fabs(matIn.m_row[0]);
- out.m_row[1] = fabs(matIn.m_row[1]);
- out.m_row[2] = fabs(matIn.m_row[2]);
- return out;
-}
-
-__inline b3Mat3x3 mtZero();
-
-__inline b3Mat3x3 mtIdentity();
-
-__inline b3Mat3x3 mtTranspose(b3Mat3x3 m);
-
-__inline b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);
-
-__inline b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);
-
-__inline b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);
-
-__inline b3Mat3x3 mtZero()
-{
- b3Mat3x3 m;
- m.m_row[0] = (b3Float4)(0.f);
- m.m_row[1] = (b3Float4)(0.f);
- m.m_row[2] = (b3Float4)(0.f);
- return m;
-}
-
-__inline b3Mat3x3 mtIdentity()
-{
- b3Mat3x3 m;
- m.m_row[0] = (b3Float4)(1, 0, 0, 0);
- m.m_row[1] = (b3Float4)(0, 1, 0, 0);
- m.m_row[2] = (b3Float4)(0, 0, 1, 0);
- return m;
-}
-
-__inline b3Mat3x3 mtTranspose(b3Mat3x3 m)
-{
- b3Mat3x3 out;
- out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);
- out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);
- out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);
- return out;
-}
-
-__inline b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)
-{
- b3Mat3x3 transB;
- transB = mtTranspose(b);
- b3Mat3x3 ans;
- // why this doesn't run when 0ing in the for{}
- a.m_row[0].w = 0.f;
- a.m_row[1].w = 0.f;
- a.m_row[2].w = 0.f;
- for (int i = 0; i < 3; i++)
- {
- // a.m_row[i].w = 0.f;
- ans.m_row[i].x = b3Dot3F4(a.m_row[i], transB.m_row[0]);
- ans.m_row[i].y = b3Dot3F4(a.m_row[i], transB.m_row[1]);
- ans.m_row[i].z = b3Dot3F4(a.m_row[i], transB.m_row[2]);
- ans.m_row[i].w = 0.f;
- }
- return ans;
-}
-
-__inline b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)
-{
- b3Float4 ans;
- ans.x = b3Dot3F4(a.m_row[0], b);
- ans.y = b3Dot3F4(a.m_row[1], b);
- ans.z = b3Dot3F4(a.m_row[2], b);
- ans.w = 0.f;
- return ans;
-}
-
-__inline b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)
-{
- b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);
- b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);
- b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);
-
- b3Float4 ans;
- ans.x = b3Dot3F4(a, colx);
- ans.y = b3Dot3F4(a, coly);
- ans.z = b3Dot3F4(a, colz);
- return ans;
-}
-
-#endif
-
-#endif //B3_MAT3x3_H
diff --git a/thirdparty/bullet/Bullet3Common/shared/b3PlatformDefinitions.h b/thirdparty/bullet/Bullet3Common/shared/b3PlatformDefinitions.h
deleted file mode 100644
index b72bee9310..0000000000
--- a/thirdparty/bullet/Bullet3Common/shared/b3PlatformDefinitions.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef B3_PLATFORM_DEFINITIONS_H
-#define B3_PLATFORM_DEFINITIONS_H
-
-struct MyTest
-{
- int bla;
-};
-
-#ifdef __cplusplus
-//#define b3ConstArray(a) const b3AlignedObjectArray<a>&
-#define b3ConstArray(a) const a *
-#define b3AtomicInc(a) ((*a)++)
-
-inline int b3AtomicAdd(volatile int *p, int val)
-{
- int oldValue = *p;
- int newValue = oldValue + val;
- *p = newValue;
- return oldValue;
-}
-
-#define __global
-
-#define B3_STATIC static
-#else
-//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX
-#define B3_LARGE_FLOAT 1e18f
-#define B3_INFINITY 1e18f
-#define b3Assert(a)
-#define b3ConstArray(a) __global const a *
-#define b3AtomicInc atomic_inc
-#define b3AtomicAdd atomic_add
-#define b3Fabs fabs
-#define b3Sqrt native_sqrt
-#define b3Sin native_sin
-#define b3Cos native_cos
-
-#define B3_STATIC
-#endif
-
-#endif
diff --git a/thirdparty/bullet/Bullet3Common/shared/b3Quat.h b/thirdparty/bullet/Bullet3Common/shared/b3Quat.h
deleted file mode 100644
index 940610c77b..0000000000
--- a/thirdparty/bullet/Bullet3Common/shared/b3Quat.h
+++ /dev/null
@@ -1,100 +0,0 @@
-#ifndef B3_QUAT_H
-#define B3_QUAT_H
-
-#include "Bullet3Common/shared/b3PlatformDefinitions.h"
-#include "Bullet3Common/shared/b3Float4.h"
-
-#ifdef __cplusplus
-#include "Bullet3Common/b3Quaternion.h"
-#include "Bullet3Common/b3Transform.h"
-
-#define b3Quat b3Quaternion
-#define b3QuatConstArg const b3Quaternion&
-inline b3Quat b3QuatInverse(b3QuatConstArg orn)
-{
- return orn.inverse();
-}
-
-inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)
-{
- b3Transform tr;
- tr.setOrigin(translation);
- tr.setRotation(orientation);
- return tr(point);
-}
-
-#else
-typedef float4 b3Quat;
-#define b3QuatConstArg const b3Quat
-
-inline float4 b3FastNormalize4(float4 v)
-{
- v = (float4)(v.xyz, 0.f);
- return fast_normalize(v);
-}
-
-inline b3Quat b3QuatMul(b3Quat a, b3Quat b);
-inline b3Quat b3QuatNormalized(b3QuatConstArg in);
-inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);
-inline b3Quat b3QuatInvert(b3QuatConstArg q);
-inline b3Quat b3QuatInverse(b3QuatConstArg q);
-
-inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)
-{
- b3Quat ans;
- ans = b3Cross3(a, b);
- ans += a.w * b + b.w * a;
- // ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);
- ans.w = a.w * b.w - b3Dot3F4(a, b);
- return ans;
-}
-
-inline b3Quat b3QuatNormalized(b3QuatConstArg in)
-{
- b3Quat q;
- q = in;
- //return b3FastNormalize4(in);
- float len = native_sqrt(dot(q, q));
- if (len > 0.f)
- {
- q *= 1.f / len;
- }
- else
- {
- q.x = q.y = q.z = 0.f;
- q.w = 1.f;
- }
- return q;
-}
-inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)
-{
- b3Quat qInv = b3QuatInvert(q);
- float4 vcpy = vec;
- vcpy.w = 0.f;
- float4 out = b3QuatMul(b3QuatMul(q, vcpy), qInv);
- return out;
-}
-
-inline b3Quat b3QuatInverse(b3QuatConstArg q)
-{
- return (b3Quat)(-q.xyz, q.w);
-}
-
-inline b3Quat b3QuatInvert(b3QuatConstArg q)
-{
- return (b3Quat)(-q.xyz, q.w);
-}
-
-inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)
-{
- return b3QuatRotate(b3QuatInvert(q), vec);
-}
-
-inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)
-{
- return b3QuatRotate(orientation, point) + (translation);
-}
-
-#endif
-
-#endif //B3_QUAT_H