diff options
author | Rémi Verschelde <rverschelde@gmail.com> | 2022-03-09 21:15:53 +0100 |
---|---|---|
committer | Rémi Verschelde <rverschelde@gmail.com> | 2022-03-09 21:45:47 +0100 |
commit | 3d7f1555865a981b7144becfc58d3f3f34362f5f (patch) | |
tree | d92912c6d700468b3330148b9179026b9f4efcb4 /thirdparty/bullet/Bullet3Common | |
parent | 33c907f9f5b3ec1a43d0251d7cac80da49b5b658 (diff) |
Remove unused Bullet module and thirdparty code
It has been disabled in `master` since one year (#45852) and our plan
is for Bullet, and possibly other thirdparty physics engines, to be
implemented via GDExtension so that they can be selected by the users
who need them.
Diffstat (limited to 'thirdparty/bullet/Bullet3Common')
27 files changed, 0 insertions, 9414 deletions
diff --git a/thirdparty/bullet/Bullet3Common/b3AlignedAllocator.cpp b/thirdparty/bullet/Bullet3Common/b3AlignedAllocator.cpp deleted file mode 100644 index d546d5e066..0000000000 --- a/thirdparty/bullet/Bullet3Common/b3AlignedAllocator.cpp +++ /dev/null @@ -1,186 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3AlignedAllocator.h" - -#ifdef B3_ALLOCATOR_STATISTICS -int b3g_numAlignedAllocs = 0; -int b3g_numAlignedFree = 0; -int b3g_totalBytesAlignedAllocs = 0; //detect memory leaks -#endif - -static void *b3AllocDefault(size_t size) -{ - return malloc(size); -} - -static void b3FreeDefault(void *ptr) -{ - free(ptr); -} - -static b3AllocFunc *b3s_allocFunc = b3AllocDefault; -static b3FreeFunc *b3s_freeFunc = b3FreeDefault; - -#if defined(B3_HAS_ALIGNED_ALLOCATOR) -#include <malloc.h> -static void *b3AlignedAllocDefault(size_t size, int alignment) -{ - return _aligned_malloc(size, (size_t)alignment); -} - -static void b3AlignedFreeDefault(void *ptr) -{ - _aligned_free(ptr); -} -#elif defined(__CELLOS_LV2__) -#include <stdlib.h> - -static inline void *b3AlignedAllocDefault(size_t size, int alignment) -{ - return memalign(alignment, size); -} - -static inline void b3AlignedFreeDefault(void *ptr) -{ - free(ptr); -} -#else - -static inline void *b3AlignedAllocDefault(size_t size, int alignment) -{ - void *ret; - char *real; - real = (char *)b3s_allocFunc(size + sizeof(void *) + (alignment - 1)); - if (real) - { - ret = b3AlignPointer(real + sizeof(void *), alignment); - *((void **)(ret)-1) = (void *)(real); - } - else - { - ret = (void *)(real); - } - return (ret); -} - -static inline void b3AlignedFreeDefault(void *ptr) -{ - void *real; - - if (ptr) - { - real = *((void **)(ptr)-1); - b3s_freeFunc(real); - } -} -#endif - -static b3AlignedAllocFunc *b3s_alignedAllocFunc = b3AlignedAllocDefault; -static b3AlignedFreeFunc *b3s_alignedFreeFunc = b3AlignedFreeDefault; - -void b3AlignedAllocSetCustomAligned(b3AlignedAllocFunc *allocFunc, b3AlignedFreeFunc *freeFunc) -{ - b3s_alignedAllocFunc = allocFunc ? allocFunc : b3AlignedAllocDefault; - b3s_alignedFreeFunc = freeFunc ? freeFunc : b3AlignedFreeDefault; -} - -void b3AlignedAllocSetCustom(b3AllocFunc *allocFunc, b3FreeFunc *freeFunc) -{ - b3s_allocFunc = allocFunc ? allocFunc : b3AllocDefault; - b3s_freeFunc = freeFunc ? freeFunc : b3FreeDefault; -} - -#ifdef B3_DEBUG_MEMORY_ALLOCATIONS -//this generic allocator provides the total allocated number of bytes -#include <stdio.h> - -void *b3AlignedAllocInternal(size_t size, int alignment, int line, char *filename) -{ - void *ret; - char *real; -#ifdef B3_ALLOCATOR_STATISTICS - b3g_totalBytesAlignedAllocs += size; - b3g_numAlignedAllocs++; -#endif - real = (char *)b3s_allocFunc(size + 2 * sizeof(void *) + (alignment - 1)); - if (real) - { - ret = (void *)b3AlignPointer(real + 2 * sizeof(void *), alignment); - *((void **)(ret)-1) = (void *)(real); - *((int *)(ret)-2) = size; - } - else - { - ret = (void *)(real); //?? - } - - b3Printf("allocation#%d at address %x, from %s,line %d, size %d\n", b3g_numAlignedAllocs, real, filename, line, size); - - int *ptr = (int *)ret; - *ptr = 12; - return (ret); -} - -void b3AlignedFreeInternal(void *ptr, int line, char *filename) -{ - void *real; -#ifdef B3_ALLOCATOR_STATISTICS - b3g_numAlignedFree++; -#endif - if (ptr) - { - real = *((void **)(ptr)-1); - int size = *((int *)(ptr)-2); -#ifdef B3_ALLOCATOR_STATISTICS - b3g_totalBytesAlignedAllocs -= size; -#endif - b3Printf("free #%d at address %x, from %s,line %d, size %d\n", b3g_numAlignedFree, real, filename, line, size); - - b3s_freeFunc(real); - } - else - { - b3Printf("NULL ptr\n"); - } -} - -#else //B3_DEBUG_MEMORY_ALLOCATIONS - -void *b3AlignedAllocInternal(size_t size, int alignment) -{ -#ifdef B3_ALLOCATOR_STATISTICS - b3g_numAlignedAllocs++; -#endif - void *ptr; - ptr = b3s_alignedAllocFunc(size, alignment); - // b3Printf("b3AlignedAllocInternal %d, %x\n",size,ptr); - return ptr; -} - -void b3AlignedFreeInternal(void *ptr) -{ - if (!ptr) - { - return; - } -#ifdef B3_ALLOCATOR_STATISTICS - b3g_numAlignedFree++; -#endif - // b3Printf("b3AlignedFreeInternal %x\n",ptr); - b3s_alignedFreeFunc(ptr); -} - -#endif //B3_DEBUG_MEMORY_ALLOCATIONS diff --git a/thirdparty/bullet/Bullet3Common/b3AlignedAllocator.h b/thirdparty/bullet/Bullet3Common/b3AlignedAllocator.h deleted file mode 100644 index bcff9f128e..0000000000 --- a/thirdparty/bullet/Bullet3Common/b3AlignedAllocator.h +++ /dev/null @@ -1,110 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_ALIGNED_ALLOCATOR -#define B3_ALIGNED_ALLOCATOR - -///we probably replace this with our own aligned memory allocator -///so we replace _aligned_malloc and _aligned_free with our own -///that is better portable and more predictable - -#include "b3Scalar.h" -//#define B3_DEBUG_MEMORY_ALLOCATIONS 1 -#ifdef B3_DEBUG_MEMORY_ALLOCATIONS - -#define b3AlignedAlloc(a, b) \ - b3AlignedAllocInternal(a, b, __LINE__, __FILE__) - -#define b3AlignedFree(ptr) \ - b3AlignedFreeInternal(ptr, __LINE__, __FILE__) - -void* b3AlignedAllocInternal(size_t size, int alignment, int line, char* filename); - -void b3AlignedFreeInternal(void* ptr, int line, char* filename); - -#else -void* b3AlignedAllocInternal(size_t size, int alignment); -void b3AlignedFreeInternal(void* ptr); - -#define b3AlignedAlloc(size, alignment) b3AlignedAllocInternal(size, alignment) -#define b3AlignedFree(ptr) b3AlignedFreeInternal(ptr) - -#endif -typedef int btSizeType; - -typedef void*(b3AlignedAllocFunc)(size_t size, int alignment); -typedef void(b3AlignedFreeFunc)(void* memblock); -typedef void*(b3AllocFunc)(size_t size); -typedef void(b3FreeFunc)(void* memblock); - -///The developer can let all Bullet memory allocations go through a custom memory allocator, using b3AlignedAllocSetCustom -void b3AlignedAllocSetCustom(b3AllocFunc* allocFunc, b3FreeFunc* freeFunc); -///If the developer has already an custom aligned allocator, then b3AlignedAllocSetCustomAligned can be used. The default aligned allocator pre-allocates extra memory using the non-aligned allocator, and instruments it. -void b3AlignedAllocSetCustomAligned(b3AlignedAllocFunc* allocFunc, b3AlignedFreeFunc* freeFunc); - -///The b3AlignedAllocator is a portable class for aligned memory allocations. -///Default implementations for unaligned and aligned allocations can be overridden by a custom allocator using b3AlignedAllocSetCustom and b3AlignedAllocSetCustomAligned. -template <typename T, unsigned Alignment> -class b3AlignedAllocator -{ - typedef b3AlignedAllocator<T, Alignment> self_type; - -public: - //just going down a list: - b3AlignedAllocator() {} - /* - b3AlignedAllocator( const self_type & ) {} - */ - - template <typename Other> - b3AlignedAllocator(const b3AlignedAllocator<Other, Alignment>&) - { - } - - typedef const T* const_pointer; - typedef const T& const_reference; - typedef T* pointer; - typedef T& reference; - typedef T value_type; - - pointer address(reference ref) const { return &ref; } - const_pointer address(const_reference ref) const { return &ref; } - pointer allocate(btSizeType n, const_pointer* hint = 0) - { - (void)hint; - return reinterpret_cast<pointer>(b3AlignedAlloc(sizeof(value_type) * n, Alignment)); - } - void construct(pointer ptr, const value_type& value) { new (ptr) value_type(value); } - void deallocate(pointer ptr) - { - b3AlignedFree(reinterpret_cast<void*>(ptr)); - } - void destroy(pointer ptr) { ptr->~value_type(); } - - template <typename O> - struct rebind - { - typedef b3AlignedAllocator<O, Alignment> other; - }; - template <typename O> - self_type& operator=(const b3AlignedAllocator<O, Alignment>&) - { - return *this; - } - - friend bool operator==(const self_type&, const self_type&) { return true; } -}; - -#endif //B3_ALIGNED_ALLOCATOR diff --git a/thirdparty/bullet/Bullet3Common/b3AlignedObjectArray.h b/thirdparty/bullet/Bullet3Common/b3AlignedObjectArray.h deleted file mode 100644 index 249e381bf1..0000000000 --- a/thirdparty/bullet/Bullet3Common/b3AlignedObjectArray.h +++ /dev/null @@ -1,522 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_OBJECT_ARRAY__ -#define B3_OBJECT_ARRAY__ - -#include "b3Scalar.h" // has definitions like B3_FORCE_INLINE -#include "b3AlignedAllocator.h" - -///If the platform doesn't support placement new, you can disable B3_USE_PLACEMENT_NEW -///then the b3AlignedObjectArray doesn't support objects with virtual methods, and non-trivial constructors/destructors -///You can enable B3_USE_MEMCPY, then swapping elements in the array will use memcpy instead of operator= -///see discussion here: http://continuousphysics.com/Bullet/phpBB2/viewtopic.php?t=1231 and -///http://www.continuousphysics.com/Bullet/phpBB2/viewtopic.php?t=1240 - -#define B3_USE_PLACEMENT_NEW 1 -//#define B3_USE_MEMCPY 1 //disable, because it is cumbersome to find out for each platform where memcpy is defined. It can be in <memory.h> or <string.h> or otherwise... -#define B3_ALLOW_ARRAY_COPY_OPERATOR // enabling this can accidently perform deep copies of data if you are not careful - -#ifdef B3_USE_MEMCPY -#include <memory.h> -#include <string.h> -#endif //B3_USE_MEMCPY - -#ifdef B3_USE_PLACEMENT_NEW -#include <new> //for placement new -#endif //B3_USE_PLACEMENT_NEW - -///The b3AlignedObjectArray template class uses a subset of the stl::vector interface for its methods -///It is developed to replace stl::vector to avoid portability issues, including STL alignment issues to add SIMD/SSE data -template <typename T> -//template <class T> -class b3AlignedObjectArray -{ - b3AlignedAllocator<T, 16> m_allocator; - - int m_size; - int m_capacity; - T* m_data; - //PCK: added this line - bool m_ownsMemory; - -#ifdef B3_ALLOW_ARRAY_COPY_OPERATOR -public: - B3_FORCE_INLINE b3AlignedObjectArray<T>& operator=(const b3AlignedObjectArray<T>& other) - { - copyFromArray(other); - return *this; - } -#else //B3_ALLOW_ARRAY_COPY_OPERATOR -private: - B3_FORCE_INLINE b3AlignedObjectArray<T>& operator=(const b3AlignedObjectArray<T>& other); -#endif //B3_ALLOW_ARRAY_COPY_OPERATOR - -protected: - B3_FORCE_INLINE int allocSize(int size) - { - return (size ? size * 2 : 1); - } - B3_FORCE_INLINE void copy(int start, int end, T* dest) const - { - int i; - for (i = start; i < end; ++i) -#ifdef B3_USE_PLACEMENT_NEW - new (&dest[i]) T(m_data[i]); -#else - dest[i] = m_data[i]; -#endif //B3_USE_PLACEMENT_NEW - } - - B3_FORCE_INLINE void init() - { - //PCK: added this line - m_ownsMemory = true; - m_data = 0; - m_size = 0; - m_capacity = 0; - } - B3_FORCE_INLINE void destroy(int first, int last) - { - int i; - for (i = first; i < last; i++) - { - m_data[i].~T(); - } - } - - B3_FORCE_INLINE void* allocate(int size) - { - if (size) - return m_allocator.allocate(size); - return 0; - } - - B3_FORCE_INLINE void deallocate() - { - if (m_data) - { - //PCK: enclosed the deallocation in this block - if (m_ownsMemory) - { - m_allocator.deallocate(m_data); - } - m_data = 0; - } - } - -public: - b3AlignedObjectArray() - { - init(); - } - - ~b3AlignedObjectArray() - { - clear(); - } - - ///Generally it is best to avoid using the copy constructor of an b3AlignedObjectArray, and use a (const) reference to the array instead. - b3AlignedObjectArray(const b3AlignedObjectArray& otherArray) - { - init(); - - int otherSize = otherArray.size(); - resize(otherSize); - otherArray.copy(0, otherSize, m_data); - } - - /// return the number of elements in the array - B3_FORCE_INLINE int size() const - { - return m_size; - } - - B3_FORCE_INLINE const T& at(int n) const - { - b3Assert(n >= 0); - b3Assert(n < size()); - return m_data[n]; - } - - B3_FORCE_INLINE T& at(int n) - { - b3Assert(n >= 0); - b3Assert(n < size()); - return m_data[n]; - } - - B3_FORCE_INLINE const T& operator[](int n) const - { - b3Assert(n >= 0); - b3Assert(n < size()); - return m_data[n]; - } - - B3_FORCE_INLINE T& operator[](int n) - { - b3Assert(n >= 0); - b3Assert(n < size()); - return m_data[n]; - } - - ///clear the array, deallocated memory. Generally it is better to use array.resize(0), to reduce performance overhead of run-time memory (de)allocations. - B3_FORCE_INLINE void clear() - { - destroy(0, size()); - - deallocate(); - - init(); - } - - B3_FORCE_INLINE void pop_back() - { - b3Assert(m_size > 0); - m_size--; - m_data[m_size].~T(); - } - - ///resize changes the number of elements in the array. If the new size is larger, the new elements will be constructed using the optional second argument. - ///when the new number of elements is smaller, the destructor will be called, but memory will not be freed, to reduce performance overhead of run-time memory (de)allocations. - B3_FORCE_INLINE void resizeNoInitialize(int newsize) - { - int curSize = size(); - - if (newsize < curSize) - { - } - else - { - if (newsize > size()) - { - reserve(newsize); - } - //leave this uninitialized - } - m_size = newsize; - } - - B3_FORCE_INLINE void resize(int newsize, const T& fillData = T()) - { - int curSize = size(); - - if (newsize < curSize) - { - for (int i = newsize; i < curSize; i++) - { - m_data[i].~T(); - } - } - else - { - if (newsize > size()) - { - reserve(newsize); - } -#ifdef B3_USE_PLACEMENT_NEW - for (int i = curSize; i < newsize; i++) - { - new (&m_data[i]) T(fillData); - } -#endif //B3_USE_PLACEMENT_NEW - } - - m_size = newsize; - } - B3_FORCE_INLINE T& expandNonInitializing() - { - int sz = size(); - if (sz == capacity()) - { - reserve(allocSize(size())); - } - m_size++; - - return m_data[sz]; - } - - B3_FORCE_INLINE T& expand(const T& fillValue = T()) - { - int sz = size(); - if (sz == capacity()) - { - reserve(allocSize(size())); - } - m_size++; -#ifdef B3_USE_PLACEMENT_NEW - new (&m_data[sz]) T(fillValue); //use the in-place new (not really allocating heap memory) -#endif - - return m_data[sz]; - } - - B3_FORCE_INLINE void push_back(const T& _Val) - { - int sz = size(); - if (sz == capacity()) - { - reserve(allocSize(size())); - } - -#ifdef B3_USE_PLACEMENT_NEW - new (&m_data[m_size]) T(_Val); -#else - m_data[size()] = _Val; -#endif //B3_USE_PLACEMENT_NEW - - m_size++; - } - - /// return the pre-allocated (reserved) elements, this is at least as large as the total number of elements,see size() and reserve() - B3_FORCE_INLINE int capacity() const - { - return m_capacity; - } - - B3_FORCE_INLINE void reserve(int _Count) - { // determine new minimum length of allocated storage - if (capacity() < _Count) - { // not enough room, reallocate - T* s = (T*)allocate(_Count); - b3Assert(s); - if (s == 0) - { - b3Error("b3AlignedObjectArray reserve out-of-memory\n"); - _Count = 0; - m_size = 0; - } - copy(0, size(), s); - - destroy(0, size()); - - deallocate(); - - //PCK: added this line - m_ownsMemory = true; - - m_data = s; - - m_capacity = _Count; - } - } - - class less - { - public: - bool operator()(const T& a, const T& b) - { - return (a < b); - } - }; - - template <typename L> - void quickSortInternal(const L& CompareFunc, int lo, int hi) - { - // lo is the lower index, hi is the upper index - // of the region of array a that is to be sorted - int i = lo, j = hi; - T x = m_data[(lo + hi) / 2]; - - // partition - do - { - while (CompareFunc(m_data[i], x)) - i++; - while (CompareFunc(x, m_data[j])) - j--; - if (i <= j) - { - swap(i, j); - i++; - j--; - } - } while (i <= j); - - // recursion - if (lo < j) - quickSortInternal(CompareFunc, lo, j); - if (i < hi) - quickSortInternal(CompareFunc, i, hi); - } - - template <typename L> - void quickSort(const L& CompareFunc) - { - //don't sort 0 or 1 elements - if (size() > 1) - { - quickSortInternal(CompareFunc, 0, size() - 1); - } - } - - ///heap sort from http://www.csse.monash.edu.au/~lloyd/tildeAlgDS/Sort/Heap/ - template <typename L> - void downHeap(T* pArr, int k, int n, const L& CompareFunc) - { - /* PRE: a[k+1..N] is a heap */ - /* POST: a[k..N] is a heap */ - - T temp = pArr[k - 1]; - /* k has child(s) */ - while (k <= n / 2) - { - int child = 2 * k; - - if ((child < n) && CompareFunc(pArr[child - 1], pArr[child])) - { - child++; - } - /* pick larger child */ - if (CompareFunc(temp, pArr[child - 1])) - { - /* move child up */ - pArr[k - 1] = pArr[child - 1]; - k = child; - } - else - { - break; - } - } - pArr[k - 1] = temp; - } /*downHeap*/ - - void swap(int index0, int index1) - { -#ifdef B3_USE_MEMCPY - char temp[sizeof(T)]; - memcpy(temp, &m_data[index0], sizeof(T)); - memcpy(&m_data[index0], &m_data[index1], sizeof(T)); - memcpy(&m_data[index1], temp, sizeof(T)); -#else - T temp = m_data[index0]; - m_data[index0] = m_data[index1]; - m_data[index1] = temp; -#endif //B3_USE_PLACEMENT_NEW - } - - template <typename L> - void heapSort(const L& CompareFunc) - { - /* sort a[0..N-1], N.B. 0 to N-1 */ - int k; - int n = m_size; - for (k = n / 2; k > 0; k--) - { - downHeap(m_data, k, n, CompareFunc); - } - - /* a[1..N] is now a heap */ - while (n >= 1) - { - swap(0, n - 1); /* largest of a[0..n-1] */ - - n = n - 1; - /* restore a[1..i-1] heap */ - downHeap(m_data, 1, n, CompareFunc); - } - } - - ///non-recursive binary search, assumes sorted array - int findBinarySearch(const T& key) const - { - int first = 0; - int last = size() - 1; - - //assume sorted array - while (first <= last) - { - int mid = (first + last) / 2; // compute mid point. - if (key > m_data[mid]) - first = mid + 1; // repeat search in top half. - else if (key < m_data[mid]) - last = mid - 1; // repeat search in bottom half. - else - return mid; // found it. return position ///// - } - return size(); // failed to find key - } - - int findLinearSearch(const T& key) const - { - int index = size(); - int i; - - for (i = 0; i < size(); i++) - { - if (m_data[i] == key) - { - index = i; - break; - } - } - return index; - } - - int findLinearSearch2(const T& key) const - { - int index = -1; - int i; - - for (i = 0; i < size(); i++) - { - if (m_data[i] == key) - { - index = i; - break; - } - } - return index; - } - - void remove(const T& key) - { - int findIndex = findLinearSearch(key); - if (findIndex < size()) - { - swap(findIndex, size() - 1); - pop_back(); - } - } - - //PCK: whole function - void initializeFromBuffer(void* buffer, int size, int capacity) - { - clear(); - m_ownsMemory = false; - m_data = (T*)buffer; - m_size = size; - m_capacity = capacity; - } - - void copyFromArray(const b3AlignedObjectArray& otherArray) - { - int otherSize = otherArray.size(); - resize(otherSize); - otherArray.copy(0, otherSize, m_data); - } - - void removeAtIndex(int index) - { - if (index < size()) - { - swap(index, size() - 1); - pop_back(); - } - } -}; - -#endif //B3_OBJECT_ARRAY__ diff --git a/thirdparty/bullet/Bullet3Common/b3CommandLineArgs.h b/thirdparty/bullet/Bullet3Common/b3CommandLineArgs.h deleted file mode 100644 index 5fe4f25f8d..0000000000 --- a/thirdparty/bullet/Bullet3Common/b3CommandLineArgs.h +++ /dev/null @@ -1,106 +0,0 @@ -#ifndef COMMAND_LINE_ARGS_H -#define COMMAND_LINE_ARGS_H - -/****************************************************************************** - * Command-line parsing - ******************************************************************************/ -#include <map> -#include <algorithm> -#include <string> -#include <cstring> -#include <sstream> -class b3CommandLineArgs -{ -protected: - std::map<std::string, std::string> pairs; - -public: - // Constructor - b3CommandLineArgs(int argc, char **argv) - { - addArgs(argc, argv); - } - - void addArgs(int argc, char **argv) - { - for (int i = 1; i < argc; i++) - { - std::string arg = argv[i]; - - if ((arg.length() < 2) || (arg[0] != '-') || (arg[1] != '-')) - { - continue; - } - - std::string::size_type pos; - std::string key, val; - if ((pos = arg.find('=')) == std::string::npos) - { - key = std::string(arg, 2, arg.length() - 2); - val = ""; - } - else - { - key = std::string(arg, 2, pos - 2); - val = std::string(arg, pos + 1, arg.length() - 1); - } - - //only add new keys, don't replace existing - if (pairs.find(key) == pairs.end()) - { - pairs[key] = val; - } - } - } - - bool CheckCmdLineFlag(const char *arg_name) - { - std::map<std::string, std::string>::iterator itr; - if ((itr = pairs.find(arg_name)) != pairs.end()) - { - return true; - } - return false; - } - - template <typename T> - bool GetCmdLineArgument(const char *arg_name, T &val); - - int ParsedArgc() - { - return pairs.size(); - } -}; - -template <typename T> -inline bool b3CommandLineArgs::GetCmdLineArgument(const char *arg_name, T &val) -{ - std::map<std::string, std::string>::iterator itr; - if ((itr = pairs.find(arg_name)) != pairs.end()) - { - std::istringstream strstream(itr->second); - strstream >> val; - return true; - } - return false; -} - -template <> -inline bool b3CommandLineArgs::GetCmdLineArgument<char *>(const char *arg_name, char *&val) -{ - std::map<std::string, std::string>::iterator itr; - if ((itr = pairs.find(arg_name)) != pairs.end()) - { - std::string s = itr->second; - val = (char *)malloc(sizeof(char) * (s.length() + 1)); - std::strcpy(val, s.c_str()); - return true; - } - else - { - val = NULL; - } - return false; -} - -#endif //COMMAND_LINE_ARGS_H diff --git a/thirdparty/bullet/Bullet3Common/b3FileUtils.h b/thirdparty/bullet/Bullet3Common/b3FileUtils.h deleted file mode 100644 index 9ded17eaaf..0000000000 --- a/thirdparty/bullet/Bullet3Common/b3FileUtils.h +++ /dev/null @@ -1,133 +0,0 @@ -#ifndef B3_FILE_UTILS_H -#define B3_FILE_UTILS_H - -#include <stdio.h> -#include "b3Scalar.h" -#include <stddef.h> //ptrdiff_h -#include <string.h> - -struct b3FileUtils -{ - b3FileUtils() - { - } - virtual ~b3FileUtils() - { - } - - static bool findFile(const char* orgFileName, char* relativeFileName, int maxRelativeFileNameMaxLen) - { - FILE* f = 0; - f = fopen(orgFileName, "rb"); - if (f) - { - //printf("original file found: [%s]\n", orgFileName); - sprintf(relativeFileName, "%s", orgFileName); - fclose(f); - return true; - } - - //printf("Trying various directories, relative to current working directory\n"); - const char* prefix[] = {"./", "./data/", "../data/", "../../data/", "../../../data/", "../../../../data/"}; - int numPrefixes = sizeof(prefix) / sizeof(const char*); - - f = 0; - bool fileFound = false; - - for (int i = 0; !f && i < numPrefixes; i++) - { -#ifdef _MSC_VER - sprintf_s(relativeFileName, maxRelativeFileNameMaxLen, "%s%s", prefix[i], orgFileName); -#else - sprintf(relativeFileName, "%s%s", prefix[i], orgFileName); -#endif - f = fopen(relativeFileName, "rb"); - if (f) - { - fileFound = true; - break; - } - } - if (f) - { - fclose(f); - } - - return fileFound; - } - - static const char* strip2(const char* name, const char* pattern) - { - size_t const patlen = strlen(pattern); - size_t patcnt = 0; - const char* oriptr; - const char* patloc; - // find how many times the pattern occurs in the original string - for (oriptr = name; (patloc = strstr(oriptr, pattern)); oriptr = patloc + patlen) - { - patcnt++; - } - return oriptr; - } - - static int extractPath(const char* fileName, char* path, int maxPathLength) - { - const char* stripped = strip2(fileName, "/"); - stripped = strip2(stripped, "\\"); - - ptrdiff_t len = stripped - fileName; - b3Assert((len + 1) < maxPathLength); - - if (len && ((len + 1) < maxPathLength)) - { - for (int i = 0; i < len; i++) - { - path[i] = fileName[i]; - } - path[len] = 0; - } - else - { - len = 0; - b3Assert(maxPathLength > 0); - if (maxPathLength > 0) - { - path[len] = 0; - } - } - return len; - } - - static char toLowerChar(const char t) - { - if (t >= (char)'A' && t <= (char)'Z') - return t + ((char)'a' - (char)'A'); - else - return t; - } - - static void toLower(char* str) - { - int len = strlen(str); - for (int i = 0; i < len; i++) - { - str[i] = toLowerChar(str[i]); - } - } - - /*static const char* strip2(const char* name, const char* pattern) - { - size_t const patlen = strlen(pattern); - size_t patcnt = 0; - const char * oriptr; - const char * patloc; - // find how many times the pattern occurs in the original string - for (oriptr = name; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen) - { - patcnt++; - } - return oriptr; - } - */ -}; -#endif //B3_FILE_UTILS_H diff --git a/thirdparty/bullet/Bullet3Common/b3HashMap.h b/thirdparty/bullet/Bullet3Common/b3HashMap.h deleted file mode 100644 index 3009e2cf2f..0000000000 --- a/thirdparty/bullet/Bullet3Common/b3HashMap.h +++ /dev/null @@ -1,462 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_HASH_MAP_H -#define B3_HASH_MAP_H - -#include "b3AlignedObjectArray.h" - -#include <string> - -///very basic hashable string implementation, compatible with b3HashMap -struct b3HashString -{ - std::string m_string; - unsigned int m_hash; - - B3_FORCE_INLINE unsigned int getHash() const - { - return m_hash; - } - - b3HashString(const char* name) - : m_string(name) - { - /* magic numbers from http://www.isthe.com/chongo/tech/comp/fnv/ */ - static const unsigned int InitialFNV = 2166136261u; - static const unsigned int FNVMultiple = 16777619u; - - /* Fowler / Noll / Vo (FNV) Hash */ - unsigned int hash = InitialFNV; - int len = m_string.length(); - for (int i = 0; i < len; i++) - { - hash = hash ^ (m_string[i]); /* xor the low 8 bits */ - hash = hash * FNVMultiple; /* multiply by the magic number */ - } - m_hash = hash; - } - - int portableStringCompare(const char* src, const char* dst) const - { - int ret = 0; - - while (!(ret = *(unsigned char*)src - *(unsigned char*)dst) && *dst) - ++src, ++dst; - - if (ret < 0) - ret = -1; - else if (ret > 0) - ret = 1; - - return (ret); - } - - bool equals(const b3HashString& other) const - { - return (m_string == other.m_string); - } -}; - -const int B3_HASH_NULL = 0xffffffff; - -class b3HashInt -{ - int m_uid; - -public: - b3HashInt(int uid) : m_uid(uid) - { - } - - int getUid1() const - { - return m_uid; - } - - void setUid1(int uid) - { - m_uid = uid; - } - - bool equals(const b3HashInt& other) const - { - return getUid1() == other.getUid1(); - } - //to our success - B3_FORCE_INLINE unsigned int getHash() const - { - int key = m_uid; - // Thomas Wang's hash - key += ~(key << 15); - key ^= (key >> 10); - key += (key << 3); - key ^= (key >> 6); - key += ~(key << 11); - key ^= (key >> 16); - return key; - } -}; - -class b3HashPtr -{ - union { - const void* m_pointer; - int m_hashValues[2]; - }; - -public: - b3HashPtr(const void* ptr) - : m_pointer(ptr) - { - } - - const void* getPointer() const - { - return m_pointer; - } - - bool equals(const b3HashPtr& other) const - { - return getPointer() == other.getPointer(); - } - - //to our success - B3_FORCE_INLINE unsigned int getHash() const - { - const bool VOID_IS_8 = ((sizeof(void*) == 8)); - - int key = VOID_IS_8 ? m_hashValues[0] + m_hashValues[1] : m_hashValues[0]; - - // Thomas Wang's hash - key += ~(key << 15); - key ^= (key >> 10); - key += (key << 3); - key ^= (key >> 6); - key += ~(key << 11); - key ^= (key >> 16); - return key; - } -}; - -template <class Value> -class b3HashKeyPtr -{ - int m_uid; - -public: - b3HashKeyPtr(int uid) : m_uid(uid) - { - } - - int getUid1() const - { - return m_uid; - } - - bool equals(const b3HashKeyPtr<Value>& other) const - { - return getUid1() == other.getUid1(); - } - - //to our success - B3_FORCE_INLINE unsigned int getHash() const - { - int key = m_uid; - // Thomas Wang's hash - key += ~(key << 15); - key ^= (key >> 10); - key += (key << 3); - key ^= (key >> 6); - key += ~(key << 11); - key ^= (key >> 16); - return key; - } -}; - -template <class Value> -class b3HashKey -{ - int m_uid; - -public: - b3HashKey(int uid) : m_uid(uid) - { - } - - int getUid1() const - { - return m_uid; - } - - bool equals(const b3HashKey<Value>& other) const - { - return getUid1() == other.getUid1(); - } - //to our success - B3_FORCE_INLINE unsigned int getHash() const - { - int key = m_uid; - // Thomas Wang's hash - key += ~(key << 15); - key ^= (key >> 10); - key += (key << 3); - key ^= (key >> 6); - key += ~(key << 11); - key ^= (key >> 16); - return key; - } -}; - -///The b3HashMap template class implements a generic and lightweight hashmap. -///A basic sample of how to use b3HashMap is located in Demos\BasicDemo\main.cpp -template <class Key, class Value> -class b3HashMap -{ -protected: - b3AlignedObjectArray<int> m_hashTable; - b3AlignedObjectArray<int> m_next; - - b3AlignedObjectArray<Value> m_valueArray; - b3AlignedObjectArray<Key> m_keyArray; - - void growTables(const Key& /*key*/) - { - int newCapacity = m_valueArray.capacity(); - - if (m_hashTable.size() < newCapacity) - { - //grow hashtable and next table - int curHashtableSize = m_hashTable.size(); - - m_hashTable.resize(newCapacity); - m_next.resize(newCapacity); - - int i; - - for (i = 0; i < newCapacity; ++i) - { - m_hashTable[i] = B3_HASH_NULL; - } - for (i = 0; i < newCapacity; ++i) - { - m_next[i] = B3_HASH_NULL; - } - - for (i = 0; i < curHashtableSize; i++) - { - //const Value& value = m_valueArray[i]; - //const Key& key = m_keyArray[i]; - - int hashValue = m_keyArray[i].getHash() & (m_valueArray.capacity() - 1); // New hash value with new mask - m_next[i] = m_hashTable[hashValue]; - m_hashTable[hashValue] = i; - } - } - } - -public: - void insert(const Key& key, const Value& value) - { - int hash = key.getHash() & (m_valueArray.capacity() - 1); - - //replace value if the key is already there - int index = findIndex(key); - if (index != B3_HASH_NULL) - { - m_valueArray[index] = value; - return; - } - - int count = m_valueArray.size(); - int oldCapacity = m_valueArray.capacity(); - m_valueArray.push_back(value); - m_keyArray.push_back(key); - - int newCapacity = m_valueArray.capacity(); - if (oldCapacity < newCapacity) - { - growTables(key); - //hash with new capacity - hash = key.getHash() & (m_valueArray.capacity() - 1); - } - m_next[count] = m_hashTable[hash]; - m_hashTable[hash] = count; - } - - void remove(const Key& key) - { - int hash = key.getHash() & (m_valueArray.capacity() - 1); - - int pairIndex = findIndex(key); - - if (pairIndex == B3_HASH_NULL) - { - return; - } - - // Remove the pair from the hash table. - int index = m_hashTable[hash]; - b3Assert(index != B3_HASH_NULL); - - int previous = B3_HASH_NULL; - while (index != pairIndex) - { - previous = index; - index = m_next[index]; - } - - if (previous != B3_HASH_NULL) - { - b3Assert(m_next[previous] == pairIndex); - m_next[previous] = m_next[pairIndex]; - } - else - { - m_hashTable[hash] = m_next[pairIndex]; - } - - // We now move the last pair into spot of the - // pair being removed. We need to fix the hash - // table indices to support the move. - - int lastPairIndex = m_valueArray.size() - 1; - - // If the removed pair is the last pair, we are done. - if (lastPairIndex == pairIndex) - { - m_valueArray.pop_back(); - m_keyArray.pop_back(); - return; - } - - // Remove the last pair from the hash table. - int lastHash = m_keyArray[lastPairIndex].getHash() & (m_valueArray.capacity() - 1); - - index = m_hashTable[lastHash]; - b3Assert(index != B3_HASH_NULL); - - previous = B3_HASH_NULL; - while (index != lastPairIndex) - { - previous = index; - index = m_next[index]; - } - - if (previous != B3_HASH_NULL) - { - b3Assert(m_next[previous] == lastPairIndex); - m_next[previous] = m_next[lastPairIndex]; - } - else - { - m_hashTable[lastHash] = m_next[lastPairIndex]; - } - - // Copy the last pair into the remove pair's spot. - m_valueArray[pairIndex] = m_valueArray[lastPairIndex]; - m_keyArray[pairIndex] = m_keyArray[lastPairIndex]; - - // Insert the last pair into the hash table - m_next[pairIndex] = m_hashTable[lastHash]; - m_hashTable[lastHash] = pairIndex; - - m_valueArray.pop_back(); - m_keyArray.pop_back(); - } - - int size() const - { - return m_valueArray.size(); - } - - const Value* getAtIndex(int index) const - { - b3Assert(index < m_valueArray.size()); - - return &m_valueArray[index]; - } - - Value* getAtIndex(int index) - { - b3Assert(index < m_valueArray.size()); - - return &m_valueArray[index]; - } - - Key getKeyAtIndex(int index) - { - b3Assert(index < m_keyArray.size()); - return m_keyArray[index]; - } - - const Key getKeyAtIndex(int index) const - { - b3Assert(index < m_keyArray.size()); - return m_keyArray[index]; - } - - Value* operator[](const Key& key) - { - return find(key); - } - - const Value* find(const Key& key) const - { - int index = findIndex(key); - if (index == B3_HASH_NULL) - { - return NULL; - } - return &m_valueArray[index]; - } - - Value* find(const Key& key) - { - int index = findIndex(key); - if (index == B3_HASH_NULL) - { - return NULL; - } - return &m_valueArray[index]; - } - - int findIndex(const Key& key) const - { - unsigned int hash = key.getHash() & (m_valueArray.capacity() - 1); - - if (hash >= (unsigned int)m_hashTable.size()) - { - return B3_HASH_NULL; - } - - int index = m_hashTable[hash]; - while ((index != B3_HASH_NULL) && key.equals(m_keyArray[index]) == false) - { - index = m_next[index]; - } - return index; - } - - void clear() - { - m_hashTable.clear(); - m_next.clear(); - m_valueArray.clear(); - m_keyArray.clear(); - } -}; - -#endif //B3_HASH_MAP_H diff --git a/thirdparty/bullet/Bullet3Common/b3Logging.cpp b/thirdparty/bullet/Bullet3Common/b3Logging.cpp deleted file mode 100644 index 9c9f7c09ea..0000000000 --- a/thirdparty/bullet/Bullet3Common/b3Logging.cpp +++ /dev/null @@ -1,145 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#include "b3Logging.h" - -#include <stdio.h> -#include <stdarg.h> - -#ifdef _WIN32 -#include <windows.h> -#endif //_WIN32 - -void b3PrintfFuncDefault(const char* msg) -{ -#ifdef _WIN32 - OutputDebugStringA(msg); -#endif - printf("%s", msg); - //is this portable? - fflush(stdout); -} - -void b3WarningMessageFuncDefault(const char* msg) -{ -#ifdef _WIN32 - OutputDebugStringA(msg); -#endif - printf("%s", msg); - //is this portable? - fflush(stdout); -} - -void b3ErrorMessageFuncDefault(const char* msg) -{ -#ifdef _WIN32 - OutputDebugStringA(msg); -#endif - printf("%s", msg); - - //is this portable? - fflush(stdout); -} - -static b3PrintfFunc* b3s_printfFunc = b3PrintfFuncDefault; -static b3WarningMessageFunc* b3s_warningMessageFunc = b3WarningMessageFuncDefault; -static b3ErrorMessageFunc* b3s_errorMessageFunc = b3ErrorMessageFuncDefault; - -///The developer can route b3Printf output using their own implementation -void b3SetCustomPrintfFunc(b3PrintfFunc* printfFunc) -{ - b3s_printfFunc = printfFunc; -} -void b3SetCustomWarningMessageFunc(b3PrintfFunc* warningMessageFunc) -{ - b3s_warningMessageFunc = warningMessageFunc; -} -void b3SetCustomErrorMessageFunc(b3PrintfFunc* errorMessageFunc) -{ - b3s_errorMessageFunc = errorMessageFunc; -} - -//#define B3_MAX_DEBUG_STRING_LENGTH 2048 -#define B3_MAX_DEBUG_STRING_LENGTH 32768 - -void b3OutputPrintfVarArgsInternal(const char* str, ...) -{ - char strDebug[B3_MAX_DEBUG_STRING_LENGTH] = {0}; - va_list argList; - va_start(argList, str); -#ifdef _MSC_VER - vsprintf_s(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList); -#else - vsnprintf(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList); -#endif - (b3s_printfFunc)(strDebug); - va_end(argList); -} -void b3OutputWarningMessageVarArgsInternal(const char* str, ...) -{ - char strDebug[B3_MAX_DEBUG_STRING_LENGTH] = {0}; - va_list argList; - va_start(argList, str); -#ifdef _MSC_VER - vsprintf_s(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList); -#else - vsnprintf(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList); -#endif - (b3s_warningMessageFunc)(strDebug); - va_end(argList); -} -void b3OutputErrorMessageVarArgsInternal(const char* str, ...) -{ - char strDebug[B3_MAX_DEBUG_STRING_LENGTH] = {0}; - va_list argList; - va_start(argList, str); -#ifdef _MSC_VER - vsprintf_s(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList); -#else - vsnprintf(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList); -#endif - (b3s_errorMessageFunc)(strDebug); - va_end(argList); -} - -void b3EnterProfileZoneDefault(const char* name) -{ -} -void b3LeaveProfileZoneDefault() -{ -} -static b3EnterProfileZoneFunc* b3s_enterFunc = b3EnterProfileZoneDefault; -static b3LeaveProfileZoneFunc* b3s_leaveFunc = b3LeaveProfileZoneDefault; -void b3EnterProfileZone(const char* name) -{ - (b3s_enterFunc)(name); -} -void b3LeaveProfileZone() -{ - (b3s_leaveFunc)(); -} - -void b3SetCustomEnterProfileZoneFunc(b3EnterProfileZoneFunc* enterFunc) -{ - b3s_enterFunc = enterFunc; -} -void b3SetCustomLeaveProfileZoneFunc(b3LeaveProfileZoneFunc* leaveFunc) -{ - b3s_leaveFunc = leaveFunc; -} - -#ifndef _MSC_VER -#undef vsprintf_s -#endif diff --git a/thirdparty/bullet/Bullet3Common/b3Logging.h b/thirdparty/bullet/Bullet3Common/b3Logging.h deleted file mode 100644 index f61149de77..0000000000 --- a/thirdparty/bullet/Bullet3Common/b3Logging.h +++ /dev/null @@ -1,74 +0,0 @@ - -#ifndef B3_LOGGING_H -#define B3_LOGGING_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -///We add the do/while so that the statement "if (condition) b3Printf("test"); else {...}" would fail -///You can also customize the message by uncommenting out a different line below -#define b3Printf(...) b3OutputPrintfVarArgsInternal(__VA_ARGS__) - //#define b3Printf(...) do {b3OutputPrintfVarArgsInternal("b3Printf[%s,%d]:",__FILE__,__LINE__);b3OutputPrintfVarArgsInternal(__VA_ARGS__); } while(0) - //#define b3Printf b3OutputPrintfVarArgsInternal - //#define b3Printf(...) printf(__VA_ARGS__) - //#define b3Printf(...) -#define b3Warning(...) do{ b3OutputWarningMessageVarArgsInternal("b3Warning[%s,%d]:\n", __FILE__, __LINE__);b3OutputWarningMessageVarArgsInternal(__VA_ARGS__);} while (0) -#define b3Error(...)do {b3OutputErrorMessageVarArgsInternal("b3Error[%s,%d]:\n", __FILE__, __LINE__);b3OutputErrorMessageVarArgsInternal(__VA_ARGS__);} while (0) -#ifndef B3_NO_PROFILE - - void b3EnterProfileZone(const char* name); - void b3LeaveProfileZone(); -#ifdef __cplusplus - - class b3ProfileZone - { - public: - b3ProfileZone(const char* name) - { - b3EnterProfileZone(name); - } - - ~b3ProfileZone() - { - b3LeaveProfileZone(); - } - }; - -#define B3_PROFILE(name) b3ProfileZone __profile(name) -#endif - -#else //B3_NO_PROFILE - -#define B3_PROFILE(name) -#define b3StartProfile(a) -#define b3StopProfile - -#endif //#ifndef B3_NO_PROFILE - - typedef void(b3PrintfFunc)(const char* msg); - typedef void(b3WarningMessageFunc)(const char* msg); - typedef void(b3ErrorMessageFunc)(const char* msg); - typedef void(b3EnterProfileZoneFunc)(const char* msg); - typedef void(b3LeaveProfileZoneFunc)(); - - ///The developer can route b3Printf output using their own implementation - void b3SetCustomPrintfFunc(b3PrintfFunc* printfFunc); - void b3SetCustomWarningMessageFunc(b3WarningMessageFunc* warningMsgFunc); - void b3SetCustomErrorMessageFunc(b3ErrorMessageFunc* errorMsgFunc); - - ///Set custom profile zone functions (zones can be nested) - void b3SetCustomEnterProfileZoneFunc(b3EnterProfileZoneFunc* enterFunc); - void b3SetCustomLeaveProfileZoneFunc(b3LeaveProfileZoneFunc* leaveFunc); - - ///Don't use those internal functions directly, use the b3Printf or b3SetCustomPrintfFunc instead (or warning/error version) - void b3OutputPrintfVarArgsInternal(const char* str, ...); - void b3OutputWarningMessageVarArgsInternal(const char* str, ...); - void b3OutputErrorMessageVarArgsInternal(const char* str, ...); - -#ifdef __cplusplus -} -#endif - -#endif //B3_LOGGING_H
\ No newline at end of file diff --git a/thirdparty/bullet/Bullet3Common/b3Matrix3x3.h b/thirdparty/bullet/Bullet3Common/b3Matrix3x3.h deleted file mode 100644 index 6c46536a81..0000000000 --- a/thirdparty/bullet/Bullet3Common/b3Matrix3x3.h +++ /dev/null @@ -1,1354 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_MATRIX3x3_H -#define B3_MATRIX3x3_H - -#include "b3Vector3.h" -#include "b3Quaternion.h" -#include <stdio.h> - -#ifdef B3_USE_SSE -//const __m128 B3_ATTRIBUTE_ALIGNED16(b3v2220) = {2.0f, 2.0f, 2.0f, 0.0f}; -const __m128 B3_ATTRIBUTE_ALIGNED16(b3vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f}; -#endif - -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) -const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v1000) = {1.0f, 0.0f, 0.0f, 0.0f}; -const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v0100) = {0.0f, 1.0f, 0.0f, 0.0f}; -const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v0010) = {0.0f, 0.0f, 1.0f, 0.0f}; -#endif - -#ifdef B3_USE_DOUBLE_PRECISION -#define b3Matrix3x3Data b3Matrix3x3DoubleData -#else -#define b3Matrix3x3Data b3Matrix3x3FloatData -#endif //B3_USE_DOUBLE_PRECISION - -/**@brief The b3Matrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with b3Quaternion, b3Transform and b3Vector3. -* Make sure to only include a pure orthogonal matrix without scaling. */ -B3_ATTRIBUTE_ALIGNED16(class) -b3Matrix3x3 -{ - ///Data storage for the matrix, each vector is a row of the matrix - b3Vector3 m_el[3]; - -public: - /** @brief No initializaion constructor */ - b3Matrix3x3() {} - - // explicit b3Matrix3x3(const b3Scalar *m) { setFromOpenGLSubMatrix(m); } - - /**@brief Constructor from Quaternion */ - explicit b3Matrix3x3(const b3Quaternion& q) { setRotation(q); } - /* - template <typename b3Scalar> - Matrix3x3(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll) - { - setEulerYPR(yaw, pitch, roll); - } - */ - /** @brief Constructor with row major formatting */ - b3Matrix3x3(const b3Scalar& xx, const b3Scalar& xy, const b3Scalar& xz, - const b3Scalar& yx, const b3Scalar& yy, const b3Scalar& yz, - const b3Scalar& zx, const b3Scalar& zy, const b3Scalar& zz) - { - setValue(xx, xy, xz, - yx, yy, yz, - zx, zy, zz); - } - -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - B3_FORCE_INLINE b3Matrix3x3(const b3SimdFloat4 v0, const b3SimdFloat4 v1, const b3SimdFloat4 v2) - { - m_el[0].mVec128 = v0; - m_el[1].mVec128 = v1; - m_el[2].mVec128 = v2; - } - - B3_FORCE_INLINE b3Matrix3x3(const b3Vector3& v0, const b3Vector3& v1, const b3Vector3& v2) - { - m_el[0] = v0; - m_el[1] = v1; - m_el[2] = v2; - } - - // Copy constructor - B3_FORCE_INLINE b3Matrix3x3(const b3Matrix3x3& rhs) - { - m_el[0].mVec128 = rhs.m_el[0].mVec128; - m_el[1].mVec128 = rhs.m_el[1].mVec128; - m_el[2].mVec128 = rhs.m_el[2].mVec128; - } - - // Assignment Operator - B3_FORCE_INLINE b3Matrix3x3& operator=(const b3Matrix3x3& m) - { - m_el[0].mVec128 = m.m_el[0].mVec128; - m_el[1].mVec128 = m.m_el[1].mVec128; - m_el[2].mVec128 = m.m_el[2].mVec128; - - return *this; - } - -#else - - /** @brief Copy constructor */ - B3_FORCE_INLINE b3Matrix3x3(const b3Matrix3x3& other) - { - m_el[0] = other.m_el[0]; - m_el[1] = other.m_el[1]; - m_el[2] = other.m_el[2]; - } - - /** @brief Assignment Operator */ - B3_FORCE_INLINE b3Matrix3x3& operator=(const b3Matrix3x3& other) - { - m_el[0] = other.m_el[0]; - m_el[1] = other.m_el[1]; - m_el[2] = other.m_el[2]; - return *this; - } - -#endif - - /** @brief Get a column of the matrix as a vector - * @param i Column number 0 indexed */ - B3_FORCE_INLINE b3Vector3 getColumn(int i) const - { - return b3MakeVector3(m_el[0][i], m_el[1][i], m_el[2][i]); - } - - /** @brief Get a row of the matrix as a vector - * @param i Row number 0 indexed */ - B3_FORCE_INLINE const b3Vector3& getRow(int i) const - { - b3FullAssert(0 <= i && i < 3); - return m_el[i]; - } - - /** @brief Get a mutable reference to a row of the matrix as a vector - * @param i Row number 0 indexed */ - B3_FORCE_INLINE b3Vector3& operator[](int i) - { - b3FullAssert(0 <= i && i < 3); - return m_el[i]; - } - - /** @brief Get a const reference to a row of the matrix as a vector - * @param i Row number 0 indexed */ - B3_FORCE_INLINE const b3Vector3& operator[](int i) const - { - b3FullAssert(0 <= i && i < 3); - return m_el[i]; - } - - /** @brief Multiply by the target matrix on the right - * @param m Rotation matrix to be applied - * Equivilant to this = this * m */ - b3Matrix3x3& operator*=(const b3Matrix3x3& m); - - /** @brief Adds by the target matrix on the right - * @param m matrix to be applied - * Equivilant to this = this + m */ - b3Matrix3x3& operator+=(const b3Matrix3x3& m); - - /** @brief Substractss by the target matrix on the right - * @param m matrix to be applied - * Equivilant to this = this - m */ - b3Matrix3x3& operator-=(const b3Matrix3x3& m); - - /** @brief Set from the rotational part of a 4x4 OpenGL matrix - * @param m A pointer to the beginning of the array of scalars*/ - void setFromOpenGLSubMatrix(const b3Scalar* m) - { - m_el[0].setValue(m[0], m[4], m[8]); - m_el[1].setValue(m[1], m[5], m[9]); - m_el[2].setValue(m[2], m[6], m[10]); - } - /** @brief Set the values of the matrix explicitly (row major) - * @param xx Top left - * @param xy Top Middle - * @param xz Top Right - * @param yx Middle Left - * @param yy Middle Middle - * @param yz Middle Right - * @param zx Bottom Left - * @param zy Bottom Middle - * @param zz Bottom Right*/ - void setValue(const b3Scalar& xx, const b3Scalar& xy, const b3Scalar& xz, - const b3Scalar& yx, const b3Scalar& yy, const b3Scalar& yz, - const b3Scalar& zx, const b3Scalar& zy, const b3Scalar& zz) - { - m_el[0].setValue(xx, xy, xz); - m_el[1].setValue(yx, yy, yz); - m_el[2].setValue(zx, zy, zz); - } - - /** @brief Set the matrix from a quaternion - * @param q The Quaternion to match */ - void setRotation(const b3Quaternion& q) - { - b3Scalar d = q.length2(); - b3FullAssert(d != b3Scalar(0.0)); - b3Scalar s = b3Scalar(2.0) / d; - -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vs, Q = q.get128(); - __m128i Qi = b3CastfTo128i(Q); - __m128 Y, Z; - __m128 V1, V2, V3; - __m128 V11, V21, V31; - __m128 NQ = _mm_xor_ps(Q, b3vMzeroMask); - __m128i NQi = b3CastfTo128i(NQ); - - V1 = b3CastiTo128f(_mm_shuffle_epi32(Qi, B3_SHUFFLE(1, 0, 2, 3))); // Y X Z W - V2 = _mm_shuffle_ps(NQ, Q, B3_SHUFFLE(0, 0, 1, 3)); // -X -X Y W - V3 = b3CastiTo128f(_mm_shuffle_epi32(Qi, B3_SHUFFLE(2, 1, 0, 3))); // Z Y X W - V1 = _mm_xor_ps(V1, b3vMPPP); // change the sign of the first element - - V11 = b3CastiTo128f(_mm_shuffle_epi32(Qi, B3_SHUFFLE(1, 1, 0, 3))); // Y Y X W - V21 = _mm_unpackhi_ps(Q, Q); // Z Z W W - V31 = _mm_shuffle_ps(Q, NQ, B3_SHUFFLE(0, 2, 0, 3)); // X Z -X -W - - V2 = V2 * V1; // - V1 = V1 * V11; // - V3 = V3 * V31; // - - V11 = _mm_shuffle_ps(NQ, Q, B3_SHUFFLE(2, 3, 1, 3)); // -Z -W Y W - V11 = V11 * V21; // - V21 = _mm_xor_ps(V21, b3vMPPP); // change the sign of the first element - V31 = _mm_shuffle_ps(Q, NQ, B3_SHUFFLE(3, 3, 1, 3)); // W W -Y -W - V31 = _mm_xor_ps(V31, b3vMPPP); // change the sign of the first element - Y = b3CastiTo128f(_mm_shuffle_epi32(NQi, B3_SHUFFLE(3, 2, 0, 3))); // -W -Z -X -W - Z = b3CastiTo128f(_mm_shuffle_epi32(Qi, B3_SHUFFLE(1, 0, 1, 3))); // Y X Y W - - vs = _mm_load_ss(&s); - V21 = V21 * Y; - V31 = V31 * Z; - - V1 = V1 + V11; - V2 = V2 + V21; - V3 = V3 + V31; - - vs = b3_splat3_ps(vs, 0); - // s ready - V1 = V1 * vs; - V2 = V2 * vs; - V3 = V3 * vs; - - V1 = V1 + b3v1000; - V2 = V2 + b3v0100; - V3 = V3 + b3v0010; - - m_el[0] = b3MakeVector3(V1); - m_el[1] = b3MakeVector3(V2); - m_el[2] = b3MakeVector3(V3); -#else - b3Scalar xs = q.getX() * s, ys = q.getY() * s, zs = q.getZ() * s; - b3Scalar wx = q.getW() * xs, wy = q.getW() * ys, wz = q.getW() * zs; - b3Scalar xx = q.getX() * xs, xy = q.getX() * ys, xz = q.getX() * zs; - b3Scalar yy = q.getY() * ys, yz = q.getY() * zs, zz = q.getZ() * zs; - setValue( - b3Scalar(1.0) - (yy + zz), xy - wz, xz + wy, - xy + wz, b3Scalar(1.0) - (xx + zz), yz - wx, - xz - wy, yz + wx, b3Scalar(1.0) - (xx + yy)); -#endif - } - - /** @brief Set the matrix from euler angles using YPR around YXZ respectively - * @param yaw Yaw about Y axis - * @param pitch Pitch about X axis - * @param roll Roll about Z axis - */ - void setEulerYPR(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll) - { - setEulerZYX(roll, pitch, yaw); - } - - /** @brief Set the matrix from euler angles YPR around ZYX axes - * @param eulerX Roll about X axis - * @param eulerY Pitch around Y axis - * @param eulerZ Yaw aboud Z axis - * - * These angles are used to produce a rotation matrix. The euler - * angles are applied in ZYX order. I.e a vector is first rotated - * about X then Y and then Z - **/ - void setEulerZYX(b3Scalar eulerX, b3Scalar eulerY, b3Scalar eulerZ) - { - ///@todo proposed to reverse this since it's labeled zyx but takes arguments xyz and it will match all other parts of the code - b3Scalar ci(b3Cos(eulerX)); - b3Scalar cj(b3Cos(eulerY)); - b3Scalar ch(b3Cos(eulerZ)); - b3Scalar si(b3Sin(eulerX)); - b3Scalar sj(b3Sin(eulerY)); - b3Scalar sh(b3Sin(eulerZ)); - b3Scalar cc = ci * ch; - b3Scalar cs = ci * sh; - b3Scalar sc = si * ch; - b3Scalar ss = si * sh; - - setValue(cj * ch, sj * sc - cs, sj * cc + ss, - cj * sh, sj * ss + cc, sj * cs - sc, - -sj, cj * si, cj * ci); - } - - /**@brief Set the matrix to the identity */ - void setIdentity() - { -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - m_el[0] = b3MakeVector3(b3v1000); - m_el[1] = b3MakeVector3(b3v0100); - m_el[2] = b3MakeVector3(b3v0010); -#else - setValue(b3Scalar(1.0), b3Scalar(0.0), b3Scalar(0.0), - b3Scalar(0.0), b3Scalar(1.0), b3Scalar(0.0), - b3Scalar(0.0), b3Scalar(0.0), b3Scalar(1.0)); -#endif - } - - static const b3Matrix3x3& getIdentity() - { -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - static const b3Matrix3x3 - identityMatrix(b3v1000, b3v0100, b3v0010); -#else - static const b3Matrix3x3 - identityMatrix( - b3Scalar(1.0), b3Scalar(0.0), b3Scalar(0.0), - b3Scalar(0.0), b3Scalar(1.0), b3Scalar(0.0), - b3Scalar(0.0), b3Scalar(0.0), b3Scalar(1.0)); -#endif - return identityMatrix; - } - - /**@brief Fill the rotational part of an OpenGL matrix and clear the shear/perspective - * @param m The array to be filled */ - void getOpenGLSubMatrix(b3Scalar * m) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 v0 = m_el[0].mVec128; - __m128 v1 = m_el[1].mVec128; - __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2 - __m128* vm = (__m128*)m; - __m128 vT; - - v2 = _mm_and_ps(v2, b3vFFF0fMask); // x2 y2 z2 0 - - vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * * - v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1 - - v1 = _mm_shuffle_ps(v0, v2, B3_SHUFFLE(2, 3, 1, 3)); // y0 y1 y2 0 - v0 = _mm_shuffle_ps(v0, v2, B3_SHUFFLE(0, 1, 0, 3)); // x0 x1 x2 0 - v2 = b3CastdTo128f(_mm_move_sd(b3CastfTo128d(v2), b3CastfTo128d(vT))); // z0 z1 z2 0 - - vm[0] = v0; - vm[1] = v1; - vm[2] = v2; -#elif defined(B3_USE_NEON) - // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions. - static const uint32x2_t zMask = (const uint32x2_t){-1, 0}; - float32x4_t* vm = (float32x4_t*)m; - float32x4x2_t top = vtrnq_f32(m_el[0].mVec128, m_el[1].mVec128); // {x0 x1 z0 z1}, {y0 y1 w0 w1} - float32x2x2_t bl = vtrn_f32(vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f)); // {x2 0 }, {y2 0} - float32x4_t v0 = vcombine_f32(vget_low_f32(top.val[0]), bl.val[0]); - float32x4_t v1 = vcombine_f32(vget_low_f32(top.val[1]), bl.val[1]); - float32x2_t q = (float32x2_t)vand_u32((uint32x2_t)vget_high_f32(m_el[2].mVec128), zMask); - float32x4_t v2 = vcombine_f32(vget_high_f32(top.val[0]), q); // z0 z1 z2 0 - - vm[0] = v0; - vm[1] = v1; - vm[2] = v2; -#else - m[0] = b3Scalar(m_el[0].getX()); - m[1] = b3Scalar(m_el[1].getX()); - m[2] = b3Scalar(m_el[2].getX()); - m[3] = b3Scalar(0.0); - m[4] = b3Scalar(m_el[0].getY()); - m[5] = b3Scalar(m_el[1].getY()); - m[6] = b3Scalar(m_el[2].getY()); - m[7] = b3Scalar(0.0); - m[8] = b3Scalar(m_el[0].getZ()); - m[9] = b3Scalar(m_el[1].getZ()); - m[10] = b3Scalar(m_el[2].getZ()); - m[11] = b3Scalar(0.0); -#endif - } - - /**@brief Get the matrix represented as a quaternion - * @param q The quaternion which will be set */ - void getRotation(b3Quaternion & q) const - { -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - b3Scalar trace = m_el[0].getX() + m_el[1].getY() + m_el[2].getZ(); - b3Scalar s, x; - - union { - b3SimdFloat4 vec; - b3Scalar f[4]; - } temp; - - if (trace > b3Scalar(0.0)) - { - x = trace + b3Scalar(1.0); - - temp.f[0] = m_el[2].getY() - m_el[1].getZ(); - temp.f[1] = m_el[0].getZ() - m_el[2].getX(); - temp.f[2] = m_el[1].getX() - m_el[0].getY(); - temp.f[3] = x; - //temp.f[3]= s * b3Scalar(0.5); - } - else - { - int i, j, k; - if (m_el[0].getX() < m_el[1].getY()) - { - if (m_el[1].getY() < m_el[2].getZ()) - { - i = 2; - j = 0; - k = 1; - } - else - { - i = 1; - j = 2; - k = 0; - } - } - else - { - if (m_el[0].getX() < m_el[2].getZ()) - { - i = 2; - j = 0; - k = 1; - } - else - { - i = 0; - j = 1; - k = 2; - } - } - - x = m_el[i][i] - m_el[j][j] - m_el[k][k] + b3Scalar(1.0); - - temp.f[3] = (m_el[k][j] - m_el[j][k]); - temp.f[j] = (m_el[j][i] + m_el[i][j]); - temp.f[k] = (m_el[k][i] + m_el[i][k]); - temp.f[i] = x; - //temp.f[i] = s * b3Scalar(0.5); - } - - s = b3Sqrt(x); - q.set128(temp.vec); - s = b3Scalar(0.5) / s; - - q *= s; -#else - b3Scalar trace = m_el[0].getX() + m_el[1].getY() + m_el[2].getZ(); - - b3Scalar temp[4]; - - if (trace > b3Scalar(0.0)) - { - b3Scalar s = b3Sqrt(trace + b3Scalar(1.0)); - temp[3] = (s * b3Scalar(0.5)); - s = b3Scalar(0.5) / s; - - temp[0] = ((m_el[2].getY() - m_el[1].getZ()) * s); - temp[1] = ((m_el[0].getZ() - m_el[2].getX()) * s); - temp[2] = ((m_el[1].getX() - m_el[0].getY()) * s); - } - else - { - int i = m_el[0].getX() < m_el[1].getY() ? (m_el[1].getY() < m_el[2].getZ() ? 2 : 1) : (m_el[0].getX() < m_el[2].getZ() ? 2 : 0); - int j = (i + 1) % 3; - int k = (i + 2) % 3; - - b3Scalar s = b3Sqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + b3Scalar(1.0)); - temp[i] = s * b3Scalar(0.5); - s = b3Scalar(0.5) / s; - - temp[3] = (m_el[k][j] - m_el[j][k]) * s; - temp[j] = (m_el[j][i] + m_el[i][j]) * s; - temp[k] = (m_el[k][i] + m_el[i][k]) * s; - } - q.setValue(temp[0], temp[1], temp[2], temp[3]); -#endif - } - - /**@brief Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR - * @param yaw Yaw around Y axis - * @param pitch Pitch around X axis - * @param roll around Z axis */ - void getEulerYPR(b3Scalar & yaw, b3Scalar & pitch, b3Scalar & roll) const - { - // first use the normal calculus - yaw = b3Scalar(b3Atan2(m_el[1].getX(), m_el[0].getX())); - pitch = b3Scalar(b3Asin(-m_el[2].getX())); - roll = b3Scalar(b3Atan2(m_el[2].getY(), m_el[2].getZ())); - - // on pitch = +/-HalfPI - if (b3Fabs(pitch) == B3_HALF_PI) - { - if (yaw > 0) - yaw -= B3_PI; - else - yaw += B3_PI; - - if (roll > 0) - roll -= B3_PI; - else - roll += B3_PI; - } - }; - - /**@brief Get the matrix represented as euler angles around ZYX - * @param yaw Yaw around X axis - * @param pitch Pitch around Y axis - * @param roll around X axis - * @param solution_number Which solution of two possible solutions ( 1 or 2) are possible values*/ - void getEulerZYX(b3Scalar & yaw, b3Scalar & pitch, b3Scalar & roll, unsigned int solution_number = 1) const - { - struct Euler - { - b3Scalar yaw; - b3Scalar pitch; - b3Scalar roll; - }; - - Euler euler_out; - Euler euler_out2; //second solution - //get the pointer to the raw data - - // Check that pitch is not at a singularity - if (b3Fabs(m_el[2].getX()) >= 1) - { - euler_out.yaw = 0; - euler_out2.yaw = 0; - - // From difference of angles formula - b3Scalar delta = b3Atan2(m_el[0].getX(), m_el[0].getZ()); - if (m_el[2].getX() > 0) //gimbal locked up - { - euler_out.pitch = B3_PI / b3Scalar(2.0); - euler_out2.pitch = B3_PI / b3Scalar(2.0); - euler_out.roll = euler_out.pitch + delta; - euler_out2.roll = euler_out.pitch + delta; - } - else // gimbal locked down - { - euler_out.pitch = -B3_PI / b3Scalar(2.0); - euler_out2.pitch = -B3_PI / b3Scalar(2.0); - euler_out.roll = -euler_out.pitch + delta; - euler_out2.roll = -euler_out.pitch + delta; - } - } - else - { - euler_out.pitch = -b3Asin(m_el[2].getX()); - euler_out2.pitch = B3_PI - euler_out.pitch; - - euler_out.roll = b3Atan2(m_el[2].getY() / b3Cos(euler_out.pitch), - m_el[2].getZ() / b3Cos(euler_out.pitch)); - euler_out2.roll = b3Atan2(m_el[2].getY() / b3Cos(euler_out2.pitch), - m_el[2].getZ() / b3Cos(euler_out2.pitch)); - - euler_out.yaw = b3Atan2(m_el[1].getX() / b3Cos(euler_out.pitch), - m_el[0].getX() / b3Cos(euler_out.pitch)); - euler_out2.yaw = b3Atan2(m_el[1].getX() / b3Cos(euler_out2.pitch), - m_el[0].getX() / b3Cos(euler_out2.pitch)); - } - - if (solution_number == 1) - { - yaw = euler_out.yaw; - pitch = euler_out.pitch; - roll = euler_out.roll; - } - else - { - yaw = euler_out2.yaw; - pitch = euler_out2.pitch; - roll = euler_out2.roll; - } - } - - /**@brief Create a scaled copy of the matrix - * @param s Scaling vector The elements of the vector will scale each column */ - - b3Matrix3x3 scaled(const b3Vector3& s) const - { -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - return b3Matrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s); -#else - return b3Matrix3x3( - m_el[0].getX() * s.getX(), m_el[0].getY() * s.getY(), m_el[0].getZ() * s.getZ(), - m_el[1].getX() * s.getX(), m_el[1].getY() * s.getY(), m_el[1].getZ() * s.getZ(), - m_el[2].getX() * s.getX(), m_el[2].getY() * s.getY(), m_el[2].getZ() * s.getZ()); -#endif - } - - /**@brief Return the determinant of the matrix */ - b3Scalar determinant() const; - /**@brief Return the adjoint of the matrix */ - b3Matrix3x3 adjoint() const; - /**@brief Return the matrix with all values non negative */ - b3Matrix3x3 absolute() const; - /**@brief Return the transpose of the matrix */ - b3Matrix3x3 transpose() const; - /**@brief Return the inverse of the matrix */ - b3Matrix3x3 inverse() const; - - b3Matrix3x3 transposeTimes(const b3Matrix3x3& m) const; - b3Matrix3x3 timesTranspose(const b3Matrix3x3& m) const; - - B3_FORCE_INLINE b3Scalar tdotx(const b3Vector3& v) const - { - return m_el[0].getX() * v.getX() + m_el[1].getX() * v.getY() + m_el[2].getX() * v.getZ(); - } - B3_FORCE_INLINE b3Scalar tdoty(const b3Vector3& v) const - { - return m_el[0].getY() * v.getX() + m_el[1].getY() * v.getY() + m_el[2].getY() * v.getZ(); - } - B3_FORCE_INLINE b3Scalar tdotz(const b3Vector3& v) const - { - return m_el[0].getZ() * v.getX() + m_el[1].getZ() * v.getY() + m_el[2].getZ() * v.getZ(); - } - - /**@brief diagonalizes this matrix by the Jacobi method. - * @param rot stores the rotation from the coordinate system in which the matrix is diagonal to the original - * coordinate system, i.e., old_this = rot * new_this * rot^T. - * @param threshold See iteration - * @param iteration The iteration stops when all off-diagonal elements are less than the threshold multiplied - * by the sum of the absolute values of the diagonal, or when maxSteps have been executed. - * - * Note that this matrix is assumed to be symmetric. - */ - void diagonalize(b3Matrix3x3 & rot, b3Scalar threshold, int maxSteps) - { - rot.setIdentity(); - for (int step = maxSteps; step > 0; step--) - { - // find off-diagonal element [p][q] with largest magnitude - int p = 0; - int q = 1; - int r = 2; - b3Scalar max = b3Fabs(m_el[0][1]); - b3Scalar v = b3Fabs(m_el[0][2]); - if (v > max) - { - q = 2; - r = 1; - max = v; - } - v = b3Fabs(m_el[1][2]); - if (v > max) - { - p = 1; - q = 2; - r = 0; - max = v; - } - - b3Scalar t = threshold * (b3Fabs(m_el[0][0]) + b3Fabs(m_el[1][1]) + b3Fabs(m_el[2][2])); - if (max <= t) - { - if (max <= B3_EPSILON * t) - { - return; - } - step = 1; - } - - // compute Jacobi rotation J which leads to a zero for element [p][q] - b3Scalar mpq = m_el[p][q]; - b3Scalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq); - b3Scalar theta2 = theta * theta; - b3Scalar cos; - b3Scalar sin; - if (theta2 * theta2 < b3Scalar(10 / B3_EPSILON)) - { - t = (theta >= 0) ? 1 / (theta + b3Sqrt(1 + theta2)) - : 1 / (theta - b3Sqrt(1 + theta2)); - cos = 1 / b3Sqrt(1 + t * t); - sin = cos * t; - } - else - { - // approximation for large theta-value, i.e., a nearly diagonal matrix - t = 1 / (theta * (2 + b3Scalar(0.5) / theta2)); - cos = 1 - b3Scalar(0.5) * t * t; - sin = cos * t; - } - - // apply rotation to matrix (this = J^T * this * J) - m_el[p][q] = m_el[q][p] = 0; - m_el[p][p] -= t * mpq; - m_el[q][q] += t * mpq; - b3Scalar mrp = m_el[r][p]; - b3Scalar mrq = m_el[r][q]; - m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq; - m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp; - - // apply rotation to rot (rot = rot * J) - for (int i = 0; i < 3; i++) - { - b3Vector3& row = rot[i]; - mrp = row[p]; - mrq = row[q]; - row[p] = cos * mrp - sin * mrq; - row[q] = cos * mrq + sin * mrp; - } - } - } - - /**@brief Calculate the matrix cofactor - * @param r1 The first row to use for calculating the cofactor - * @param c1 The first column to use for calculating the cofactor - * @param r1 The second row to use for calculating the cofactor - * @param c1 The second column to use for calculating the cofactor - * See http://en.wikipedia.org/wiki/Cofactor_(linear_algebra) for more details - */ - b3Scalar cofac(int r1, int c1, int r2, int c2) const - { - return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1]; - } - - void serialize(struct b3Matrix3x3Data & dataOut) const; - - void serializeFloat(struct b3Matrix3x3FloatData & dataOut) const; - - void deSerialize(const struct b3Matrix3x3Data& dataIn); - - void deSerializeFloat(const struct b3Matrix3x3FloatData& dataIn); - - void deSerializeDouble(const struct b3Matrix3x3DoubleData& dataIn); -}; - -B3_FORCE_INLINE b3Matrix3x3& -b3Matrix3x3::operator*=(const b3Matrix3x3& m) -{ -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 rv00, rv01, rv02; - __m128 rv10, rv11, rv12; - __m128 rv20, rv21, rv22; - __m128 mv0, mv1, mv2; - - rv02 = m_el[0].mVec128; - rv12 = m_el[1].mVec128; - rv22 = m_el[2].mVec128; - - mv0 = _mm_and_ps(m[0].mVec128, b3vFFF0fMask); - mv1 = _mm_and_ps(m[1].mVec128, b3vFFF0fMask); - mv2 = _mm_and_ps(m[2].mVec128, b3vFFF0fMask); - - // rv0 - rv00 = b3_splat_ps(rv02, 0); - rv01 = b3_splat_ps(rv02, 1); - rv02 = b3_splat_ps(rv02, 2); - - rv00 = _mm_mul_ps(rv00, mv0); - rv01 = _mm_mul_ps(rv01, mv1); - rv02 = _mm_mul_ps(rv02, mv2); - - // rv1 - rv10 = b3_splat_ps(rv12, 0); - rv11 = b3_splat_ps(rv12, 1); - rv12 = b3_splat_ps(rv12, 2); - - rv10 = _mm_mul_ps(rv10, mv0); - rv11 = _mm_mul_ps(rv11, mv1); - rv12 = _mm_mul_ps(rv12, mv2); - - // rv2 - rv20 = b3_splat_ps(rv22, 0); - rv21 = b3_splat_ps(rv22, 1); - rv22 = b3_splat_ps(rv22, 2); - - rv20 = _mm_mul_ps(rv20, mv0); - rv21 = _mm_mul_ps(rv21, mv1); - rv22 = _mm_mul_ps(rv22, mv2); - - rv00 = _mm_add_ps(rv00, rv01); - rv10 = _mm_add_ps(rv10, rv11); - rv20 = _mm_add_ps(rv20, rv21); - - m_el[0].mVec128 = _mm_add_ps(rv00, rv02); - m_el[1].mVec128 = _mm_add_ps(rv10, rv12); - m_el[2].mVec128 = _mm_add_ps(rv20, rv22); - -#elif defined(B3_USE_NEON) - - float32x4_t rv0, rv1, rv2; - float32x4_t v0, v1, v2; - float32x4_t mv0, mv1, mv2; - - v0 = m_el[0].mVec128; - v1 = m_el[1].mVec128; - v2 = m_el[2].mVec128; - - mv0 = (float32x4_t)vandq_s32((int32x4_t)m[0].mVec128, b3vFFF0Mask); - mv1 = (float32x4_t)vandq_s32((int32x4_t)m[1].mVec128, b3vFFF0Mask); - mv2 = (float32x4_t)vandq_s32((int32x4_t)m[2].mVec128, b3vFFF0Mask); - - rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0); - rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0); - rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0); - - rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1); - rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1); - rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1); - - rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0); - rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0); - rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0); - - m_el[0].mVec128 = rv0; - m_el[1].mVec128 = rv1; - m_el[2].mVec128 = rv2; -#else - setValue( - m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]), - m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]), - m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2])); -#endif - return *this; -} - -B3_FORCE_INLINE b3Matrix3x3& -b3Matrix3x3::operator+=(const b3Matrix3x3& m) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - m_el[0].mVec128 = m_el[0].mVec128 + m.m_el[0].mVec128; - m_el[1].mVec128 = m_el[1].mVec128 + m.m_el[1].mVec128; - m_el[2].mVec128 = m_el[2].mVec128 + m.m_el[2].mVec128; -#else - setValue( - m_el[0][0] + m.m_el[0][0], - m_el[0][1] + m.m_el[0][1], - m_el[0][2] + m.m_el[0][2], - m_el[1][0] + m.m_el[1][0], - m_el[1][1] + m.m_el[1][1], - m_el[1][2] + m.m_el[1][2], - m_el[2][0] + m.m_el[2][0], - m_el[2][1] + m.m_el[2][1], - m_el[2][2] + m.m_el[2][2]); -#endif - return *this; -} - -B3_FORCE_INLINE b3Matrix3x3 -operator*(const b3Matrix3x3& m, const b3Scalar& k) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - __m128 vk = b3_splat_ps(_mm_load_ss((float*)&k), 0x80); - return b3Matrix3x3( - _mm_mul_ps(m[0].mVec128, vk), - _mm_mul_ps(m[1].mVec128, vk), - _mm_mul_ps(m[2].mVec128, vk)); -#elif defined(B3_USE_NEON) - return b3Matrix3x3( - vmulq_n_f32(m[0].mVec128, k), - vmulq_n_f32(m[1].mVec128, k), - vmulq_n_f32(m[2].mVec128, k)); -#else - return b3Matrix3x3( - m[0].getX() * k, m[0].getY() * k, m[0].getZ() * k, - m[1].getX() * k, m[1].getY() * k, m[1].getZ() * k, - m[2].getX() * k, m[2].getY() * k, m[2].getZ() * k); -#endif -} - -B3_FORCE_INLINE b3Matrix3x3 -operator+(const b3Matrix3x3& m1, const b3Matrix3x3& m2) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - return b3Matrix3x3( - m1[0].mVec128 + m2[0].mVec128, - m1[1].mVec128 + m2[1].mVec128, - m1[2].mVec128 + m2[2].mVec128); -#else - return b3Matrix3x3( - m1[0][0] + m2[0][0], - m1[0][1] + m2[0][1], - m1[0][2] + m2[0][2], - - m1[1][0] + m2[1][0], - m1[1][1] + m2[1][1], - m1[1][2] + m2[1][2], - - m1[2][0] + m2[2][0], - m1[2][1] + m2[2][1], - m1[2][2] + m2[2][2]); -#endif -} - -B3_FORCE_INLINE b3Matrix3x3 -operator-(const b3Matrix3x3& m1, const b3Matrix3x3& m2) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - return b3Matrix3x3( - m1[0].mVec128 - m2[0].mVec128, - m1[1].mVec128 - m2[1].mVec128, - m1[2].mVec128 - m2[2].mVec128); -#else - return b3Matrix3x3( - m1[0][0] - m2[0][0], - m1[0][1] - m2[0][1], - m1[0][2] - m2[0][2], - - m1[1][0] - m2[1][0], - m1[1][1] - m2[1][1], - m1[1][2] - m2[1][2], - - m1[2][0] - m2[2][0], - m1[2][1] - m2[2][1], - m1[2][2] - m2[2][2]); -#endif -} - -B3_FORCE_INLINE b3Matrix3x3& -b3Matrix3x3::operator-=(const b3Matrix3x3& m) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - m_el[0].mVec128 = m_el[0].mVec128 - m.m_el[0].mVec128; - m_el[1].mVec128 = m_el[1].mVec128 - m.m_el[1].mVec128; - m_el[2].mVec128 = m_el[2].mVec128 - m.m_el[2].mVec128; -#else - setValue( - m_el[0][0] - m.m_el[0][0], - m_el[0][1] - m.m_el[0][1], - m_el[0][2] - m.m_el[0][2], - m_el[1][0] - m.m_el[1][0], - m_el[1][1] - m.m_el[1][1], - m_el[1][2] - m.m_el[1][2], - m_el[2][0] - m.m_el[2][0], - m_el[2][1] - m.m_el[2][1], - m_el[2][2] - m.m_el[2][2]); -#endif - return *this; -} - -B3_FORCE_INLINE b3Scalar -b3Matrix3x3::determinant() const -{ - return b3Triple((*this)[0], (*this)[1], (*this)[2]); -} - -B3_FORCE_INLINE b3Matrix3x3 -b3Matrix3x3::absolute() const -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - return b3Matrix3x3( - _mm_and_ps(m_el[0].mVec128, b3vAbsfMask), - _mm_and_ps(m_el[1].mVec128, b3vAbsfMask), - _mm_and_ps(m_el[2].mVec128, b3vAbsfMask)); -#elif defined(B3_USE_NEON) - return b3Matrix3x3( - (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, b3v3AbsMask), - (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, b3v3AbsMask), - (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, b3v3AbsMask)); -#else - return b3Matrix3x3( - b3Fabs(m_el[0].getX()), b3Fabs(m_el[0].getY()), b3Fabs(m_el[0].getZ()), - b3Fabs(m_el[1].getX()), b3Fabs(m_el[1].getY()), b3Fabs(m_el[1].getZ()), - b3Fabs(m_el[2].getX()), b3Fabs(m_el[2].getY()), b3Fabs(m_el[2].getZ())); -#endif -} - -B3_FORCE_INLINE b3Matrix3x3 -b3Matrix3x3::transpose() const -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - __m128 v0 = m_el[0].mVec128; - __m128 v1 = m_el[1].mVec128; - __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2 - __m128 vT; - - v2 = _mm_and_ps(v2, b3vFFF0fMask); // x2 y2 z2 0 - - vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * * - v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1 - - v1 = _mm_shuffle_ps(v0, v2, B3_SHUFFLE(2, 3, 1, 3)); // y0 y1 y2 0 - v0 = _mm_shuffle_ps(v0, v2, B3_SHUFFLE(0, 1, 0, 3)); // x0 x1 x2 0 - v2 = b3CastdTo128f(_mm_move_sd(b3CastfTo128d(v2), b3CastfTo128d(vT))); // z0 z1 z2 0 - - return b3Matrix3x3(v0, v1, v2); -#elif defined(B3_USE_NEON) - // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions. - static const uint32x2_t zMask = (const uint32x2_t){-1, 0}; - float32x4x2_t top = vtrnq_f32(m_el[0].mVec128, m_el[1].mVec128); // {x0 x1 z0 z1}, {y0 y1 w0 w1} - float32x2x2_t bl = vtrn_f32(vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f)); // {x2 0 }, {y2 0} - float32x4_t v0 = vcombine_f32(vget_low_f32(top.val[0]), bl.val[0]); - float32x4_t v1 = vcombine_f32(vget_low_f32(top.val[1]), bl.val[1]); - float32x2_t q = (float32x2_t)vand_u32((uint32x2_t)vget_high_f32(m_el[2].mVec128), zMask); - float32x4_t v2 = vcombine_f32(vget_high_f32(top.val[0]), q); // z0 z1 z2 0 - return b3Matrix3x3(v0, v1, v2); -#else - return b3Matrix3x3(m_el[0].getX(), m_el[1].getX(), m_el[2].getX(), - m_el[0].getY(), m_el[1].getY(), m_el[2].getY(), - m_el[0].getZ(), m_el[1].getZ(), m_el[2].getZ()); -#endif -} - -B3_FORCE_INLINE b3Matrix3x3 -b3Matrix3x3::adjoint() const -{ - return b3Matrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2), - cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0), - cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1)); -} - -B3_FORCE_INLINE b3Matrix3x3 -b3Matrix3x3::inverse() const -{ - b3Vector3 co = b3MakeVector3(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1)); - b3Scalar det = (*this)[0].dot(co); - b3FullAssert(det != b3Scalar(0.0)); - b3Scalar s = b3Scalar(1.0) / det; - return b3Matrix3x3(co.getX() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s, - co.getY() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s, - co.getZ() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s); -} - -B3_FORCE_INLINE b3Matrix3x3 -b3Matrix3x3::transposeTimes(const b3Matrix3x3& m) const -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - // zeros w - // static const __m128i xyzMask = (const __m128i){ -1ULL, 0xffffffffULL }; - __m128 row = m_el[0].mVec128; - __m128 m0 = _mm_and_ps(m.getRow(0).mVec128, b3vFFF0fMask); - __m128 m1 = _mm_and_ps(m.getRow(1).mVec128, b3vFFF0fMask); - __m128 m2 = _mm_and_ps(m.getRow(2).mVec128, b3vFFF0fMask); - __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0)); - __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55)); - __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa)); - row = m_el[1].mVec128; - r0 = _mm_add_ps(r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0))); - r1 = _mm_add_ps(r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55))); - r2 = _mm_add_ps(r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa))); - row = m_el[2].mVec128; - r0 = _mm_add_ps(r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0))); - r1 = _mm_add_ps(r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55))); - r2 = _mm_add_ps(r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa))); - return b3Matrix3x3(r0, r1, r2); - -#elif defined B3_USE_NEON - // zeros w - static const uint32x4_t xyzMask = (const uint32x4_t){-1, -1, -1, 0}; - float32x4_t m0 = (float32x4_t)vandq_u32((uint32x4_t)m.getRow(0).mVec128, xyzMask); - float32x4_t m1 = (float32x4_t)vandq_u32((uint32x4_t)m.getRow(1).mVec128, xyzMask); - float32x4_t m2 = (float32x4_t)vandq_u32((uint32x4_t)m.getRow(2).mVec128, xyzMask); - float32x4_t row = m_el[0].mVec128; - float32x4_t r0 = vmulq_lane_f32(m0, vget_low_f32(row), 0); - float32x4_t r1 = vmulq_lane_f32(m0, vget_low_f32(row), 1); - float32x4_t r2 = vmulq_lane_f32(m0, vget_high_f32(row), 0); - row = m_el[1].mVec128; - r0 = vmlaq_lane_f32(r0, m1, vget_low_f32(row), 0); - r1 = vmlaq_lane_f32(r1, m1, vget_low_f32(row), 1); - r2 = vmlaq_lane_f32(r2, m1, vget_high_f32(row), 0); - row = m_el[2].mVec128; - r0 = vmlaq_lane_f32(r0, m2, vget_low_f32(row), 0); - r1 = vmlaq_lane_f32(r1, m2, vget_low_f32(row), 1); - r2 = vmlaq_lane_f32(r2, m2, vget_high_f32(row), 0); - return b3Matrix3x3(r0, r1, r2); -#else - return b3Matrix3x3( - m_el[0].getX() * m[0].getX() + m_el[1].getX() * m[1].getX() + m_el[2].getX() * m[2].getX(), - m_el[0].getX() * m[0].getY() + m_el[1].getX() * m[1].getY() + m_el[2].getX() * m[2].getY(), - m_el[0].getX() * m[0].getZ() + m_el[1].getX() * m[1].getZ() + m_el[2].getX() * m[2].getZ(), - m_el[0].getY() * m[0].getX() + m_el[1].getY() * m[1].getX() + m_el[2].getY() * m[2].getX(), - m_el[0].getY() * m[0].getY() + m_el[1].getY() * m[1].getY() + m_el[2].getY() * m[2].getY(), - m_el[0].getY() * m[0].getZ() + m_el[1].getY() * m[1].getZ() + m_el[2].getY() * m[2].getZ(), - m_el[0].getZ() * m[0].getX() + m_el[1].getZ() * m[1].getX() + m_el[2].getZ() * m[2].getX(), - m_el[0].getZ() * m[0].getY() + m_el[1].getZ() * m[1].getY() + m_el[2].getZ() * m[2].getY(), - m_el[0].getZ() * m[0].getZ() + m_el[1].getZ() * m[1].getZ() + m_el[2].getZ() * m[2].getZ()); -#endif -} - -B3_FORCE_INLINE b3Matrix3x3 -b3Matrix3x3::timesTranspose(const b3Matrix3x3& m) const -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - __m128 a0 = m_el[0].mVec128; - __m128 a1 = m_el[1].mVec128; - __m128 a2 = m_el[2].mVec128; - - b3Matrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here - __m128 mx = mT[0].mVec128; - __m128 my = mT[1].mVec128; - __m128 mz = mT[2].mVec128; - - __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00)); - __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00)); - __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00)); - r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55))); - r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55))); - r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55))); - r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa))); - r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa))); - r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa))); - return b3Matrix3x3(r0, r1, r2); - -#elif defined B3_USE_NEON - float32x4_t a0 = m_el[0].mVec128; - float32x4_t a1 = m_el[1].mVec128; - float32x4_t a2 = m_el[2].mVec128; - - b3Matrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here - float32x4_t mx = mT[0].mVec128; - float32x4_t my = mT[1].mVec128; - float32x4_t mz = mT[2].mVec128; - - float32x4_t r0 = vmulq_lane_f32(mx, vget_low_f32(a0), 0); - float32x4_t r1 = vmulq_lane_f32(mx, vget_low_f32(a1), 0); - float32x4_t r2 = vmulq_lane_f32(mx, vget_low_f32(a2), 0); - r0 = vmlaq_lane_f32(r0, my, vget_low_f32(a0), 1); - r1 = vmlaq_lane_f32(r1, my, vget_low_f32(a1), 1); - r2 = vmlaq_lane_f32(r2, my, vget_low_f32(a2), 1); - r0 = vmlaq_lane_f32(r0, mz, vget_high_f32(a0), 0); - r1 = vmlaq_lane_f32(r1, mz, vget_high_f32(a1), 0); - r2 = vmlaq_lane_f32(r2, mz, vget_high_f32(a2), 0); - return b3Matrix3x3(r0, r1, r2); - -#else - return b3Matrix3x3( - m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]), - m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]), - m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2])); -#endif -} - -B3_FORCE_INLINE b3Vector3 -operator*(const b3Matrix3x3& m, const b3Vector3& v) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - return v.dot3(m[0], m[1], m[2]); -#else - return b3MakeVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v)); -#endif -} - -B3_FORCE_INLINE b3Vector3 -operator*(const b3Vector3& v, const b3Matrix3x3& m) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - - const __m128 vv = v.mVec128; - - __m128 c0 = b3_splat_ps(vv, 0); - __m128 c1 = b3_splat_ps(vv, 1); - __m128 c2 = b3_splat_ps(vv, 2); - - c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, b3vFFF0fMask)); - c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, b3vFFF0fMask)); - c0 = _mm_add_ps(c0, c1); - c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, b3vFFF0fMask)); - - return b3MakeVector3(_mm_add_ps(c0, c2)); -#elif defined(B3_USE_NEON) - const float32x4_t vv = v.mVec128; - const float32x2_t vlo = vget_low_f32(vv); - const float32x2_t vhi = vget_high_f32(vv); - - float32x4_t c0, c1, c2; - - c0 = (float32x4_t)vandq_s32((int32x4_t)m[0].mVec128, b3vFFF0Mask); - c1 = (float32x4_t)vandq_s32((int32x4_t)m[1].mVec128, b3vFFF0Mask); - c2 = (float32x4_t)vandq_s32((int32x4_t)m[2].mVec128, b3vFFF0Mask); - - c0 = vmulq_lane_f32(c0, vlo, 0); - c1 = vmulq_lane_f32(c1, vlo, 1); - c2 = vmulq_lane_f32(c2, vhi, 0); - c0 = vaddq_f32(c0, c1); - c0 = vaddq_f32(c0, c2); - - return b3MakeVector3(c0); -#else - return b3MakeVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v)); -#endif -} - -B3_FORCE_INLINE b3Matrix3x3 -operator*(const b3Matrix3x3& m1, const b3Matrix3x3& m2) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - - __m128 m10 = m1[0].mVec128; - __m128 m11 = m1[1].mVec128; - __m128 m12 = m1[2].mVec128; - - __m128 m2v = _mm_and_ps(m2[0].mVec128, b3vFFF0fMask); - - __m128 c0 = b3_splat_ps(m10, 0); - __m128 c1 = b3_splat_ps(m11, 0); - __m128 c2 = b3_splat_ps(m12, 0); - - c0 = _mm_mul_ps(c0, m2v); - c1 = _mm_mul_ps(c1, m2v); - c2 = _mm_mul_ps(c2, m2v); - - m2v = _mm_and_ps(m2[1].mVec128, b3vFFF0fMask); - - __m128 c0_1 = b3_splat_ps(m10, 1); - __m128 c1_1 = b3_splat_ps(m11, 1); - __m128 c2_1 = b3_splat_ps(m12, 1); - - c0_1 = _mm_mul_ps(c0_1, m2v); - c1_1 = _mm_mul_ps(c1_1, m2v); - c2_1 = _mm_mul_ps(c2_1, m2v); - - m2v = _mm_and_ps(m2[2].mVec128, b3vFFF0fMask); - - c0 = _mm_add_ps(c0, c0_1); - c1 = _mm_add_ps(c1, c1_1); - c2 = _mm_add_ps(c2, c2_1); - - m10 = b3_splat_ps(m10, 2); - m11 = b3_splat_ps(m11, 2); - m12 = b3_splat_ps(m12, 2); - - m10 = _mm_mul_ps(m10, m2v); - m11 = _mm_mul_ps(m11, m2v); - m12 = _mm_mul_ps(m12, m2v); - - c0 = _mm_add_ps(c0, m10); - c1 = _mm_add_ps(c1, m11); - c2 = _mm_add_ps(c2, m12); - - return b3Matrix3x3(c0, c1, c2); - -#elif defined(B3_USE_NEON) - - float32x4_t rv0, rv1, rv2; - float32x4_t v0, v1, v2; - float32x4_t mv0, mv1, mv2; - - v0 = m1[0].mVec128; - v1 = m1[1].mVec128; - v2 = m1[2].mVec128; - - mv0 = (float32x4_t)vandq_s32((int32x4_t)m2[0].mVec128, b3vFFF0Mask); - mv1 = (float32x4_t)vandq_s32((int32x4_t)m2[1].mVec128, b3vFFF0Mask); - mv2 = (float32x4_t)vandq_s32((int32x4_t)m2[2].mVec128, b3vFFF0Mask); - - rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0); - rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0); - rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0); - - rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1); - rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1); - rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1); - - rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0); - rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0); - rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0); - - return b3Matrix3x3(rv0, rv1, rv2); - -#else - return b3Matrix3x3( - m2.tdotx(m1[0]), m2.tdoty(m1[0]), m2.tdotz(m1[0]), - m2.tdotx(m1[1]), m2.tdoty(m1[1]), m2.tdotz(m1[1]), - m2.tdotx(m1[2]), m2.tdoty(m1[2]), m2.tdotz(m1[2])); -#endif -} - -/* -B3_FORCE_INLINE b3Matrix3x3 b3MultTransposeLeft(const b3Matrix3x3& m1, const b3Matrix3x3& m2) { -return b3Matrix3x3( -m1[0][0] * m2[0][0] + m1[1][0] * m2[1][0] + m1[2][0] * m2[2][0], -m1[0][0] * m2[0][1] + m1[1][0] * m2[1][1] + m1[2][0] * m2[2][1], -m1[0][0] * m2[0][2] + m1[1][0] * m2[1][2] + m1[2][0] * m2[2][2], -m1[0][1] * m2[0][0] + m1[1][1] * m2[1][0] + m1[2][1] * m2[2][0], -m1[0][1] * m2[0][1] + m1[1][1] * m2[1][1] + m1[2][1] * m2[2][1], -m1[0][1] * m2[0][2] + m1[1][1] * m2[1][2] + m1[2][1] * m2[2][2], -m1[0][2] * m2[0][0] + m1[1][2] * m2[1][0] + m1[2][2] * m2[2][0], -m1[0][2] * m2[0][1] + m1[1][2] * m2[1][1] + m1[2][2] * m2[2][1], -m1[0][2] * m2[0][2] + m1[1][2] * m2[1][2] + m1[2][2] * m2[2][2]); -} -*/ - -/**@brief Equality operator between two matrices -* It will test all elements are equal. */ -B3_FORCE_INLINE bool operator==(const b3Matrix3x3& m1, const b3Matrix3x3& m2) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - - __m128 c0, c1, c2; - - c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128); - c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128); - c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128); - - c0 = _mm_and_ps(c0, c1); - c0 = _mm_and_ps(c0, c2); - - return (0x7 == _mm_movemask_ps((__m128)c0)); -#else - return (m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] && - m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] && - m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2]); -#endif -} - -///for serialization -struct b3Matrix3x3FloatData -{ - b3Vector3FloatData m_el[3]; -}; - -///for serialization -struct b3Matrix3x3DoubleData -{ - b3Vector3DoubleData m_el[3]; -}; - -B3_FORCE_INLINE void b3Matrix3x3::serialize(struct b3Matrix3x3Data& dataOut) const -{ - for (int i = 0; i < 3; i++) - m_el[i].serialize(dataOut.m_el[i]); -} - -B3_FORCE_INLINE void b3Matrix3x3::serializeFloat(struct b3Matrix3x3FloatData& dataOut) const -{ - for (int i = 0; i < 3; i++) - m_el[i].serializeFloat(dataOut.m_el[i]); -} - -B3_FORCE_INLINE void b3Matrix3x3::deSerialize(const struct b3Matrix3x3Data& dataIn) -{ - for (int i = 0; i < 3; i++) - m_el[i].deSerialize(dataIn.m_el[i]); -} - -B3_FORCE_INLINE void b3Matrix3x3::deSerializeFloat(const struct b3Matrix3x3FloatData& dataIn) -{ - for (int i = 0; i < 3; i++) - m_el[i].deSerializeFloat(dataIn.m_el[i]); -} - -B3_FORCE_INLINE void b3Matrix3x3::deSerializeDouble(const struct b3Matrix3x3DoubleData& dataIn) -{ - for (int i = 0; i < 3; i++) - m_el[i].deSerializeDouble(dataIn.m_el[i]); -} - -#endif //B3_MATRIX3x3_H diff --git a/thirdparty/bullet/Bullet3Common/b3MinMax.h b/thirdparty/bullet/Bullet3Common/b3MinMax.h deleted file mode 100644 index c09c3db3f5..0000000000 --- a/thirdparty/bullet/Bullet3Common/b3MinMax.h +++ /dev/null @@ -1,69 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_GEN_MINMAX_H -#define B3_GEN_MINMAX_H - -#include "b3Scalar.h" - -template <class T> -B3_FORCE_INLINE const T& b3Min(const T& a, const T& b) -{ - return a < b ? a : b; -} - -template <class T> -B3_FORCE_INLINE const T& b3Max(const T& a, const T& b) -{ - return a > b ? a : b; -} - -template <class T> -B3_FORCE_INLINE const T& b3Clamped(const T& a, const T& lb, const T& ub) -{ - return a < lb ? lb : (ub < a ? ub : a); -} - -template <class T> -B3_FORCE_INLINE void b3SetMin(T& a, const T& b) -{ - if (b < a) - { - a = b; - } -} - -template <class T> -B3_FORCE_INLINE void b3SetMax(T& a, const T& b) -{ - if (a < b) - { - a = b; - } -} - -template <class T> -B3_FORCE_INLINE void b3Clamp(T& a, const T& lb, const T& ub) -{ - if (a < lb) - { - a = lb; - } - else if (ub < a) - { - a = ub; - } -} - -#endif //B3_GEN_MINMAX_H diff --git a/thirdparty/bullet/Bullet3Common/b3PoolAllocator.h b/thirdparty/bullet/Bullet3Common/b3PoolAllocator.h deleted file mode 100644 index ed56bc627d..0000000000 --- a/thirdparty/bullet/Bullet3Common/b3PoolAllocator.h +++ /dev/null @@ -1,121 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef _BT_POOL_ALLOCATOR_H -#define _BT_POOL_ALLOCATOR_H - -#include "b3Scalar.h" -#include "b3AlignedAllocator.h" - -///The b3PoolAllocator class allows to efficiently allocate a large pool of objects, instead of dynamically allocating them separately. -class b3PoolAllocator -{ - int m_elemSize; - int m_maxElements; - int m_freeCount; - void* m_firstFree; - unsigned char* m_pool; - -public: - b3PoolAllocator(int elemSize, int maxElements) - : m_elemSize(elemSize), - m_maxElements(maxElements) - { - m_pool = (unsigned char*)b3AlignedAlloc(static_cast<unsigned int>(m_elemSize * m_maxElements), 16); - - unsigned char* p = m_pool; - m_firstFree = p; - m_freeCount = m_maxElements; - int count = m_maxElements; - while (--count) - { - *(void**)p = (p + m_elemSize); - p += m_elemSize; - } - *(void**)p = 0; - } - - ~b3PoolAllocator() - { - b3AlignedFree(m_pool); - } - - int getFreeCount() const - { - return m_freeCount; - } - - int getUsedCount() const - { - return m_maxElements - m_freeCount; - } - - int getMaxCount() const - { - return m_maxElements; - } - - void* allocate(int size) - { - // release mode fix - (void)size; - b3Assert(!size || size <= m_elemSize); - b3Assert(m_freeCount > 0); - void* result = m_firstFree; - m_firstFree = *(void**)m_firstFree; - --m_freeCount; - return result; - } - - bool validPtr(void* ptr) - { - if (ptr) - { - if (((unsigned char*)ptr >= m_pool && (unsigned char*)ptr < m_pool + m_maxElements * m_elemSize)) - { - return true; - } - } - return false; - } - - void freeMemory(void* ptr) - { - if (ptr) - { - b3Assert((unsigned char*)ptr >= m_pool && (unsigned char*)ptr < m_pool + m_maxElements * m_elemSize); - - *(void**)ptr = m_firstFree; - m_firstFree = ptr; - ++m_freeCount; - } - } - - int getElementSize() const - { - return m_elemSize; - } - - unsigned char* getPoolAddress() - { - return m_pool; - } - - const unsigned char* getPoolAddress() const - { - return m_pool; - } -}; - -#endif //_BT_POOL_ALLOCATOR_H diff --git a/thirdparty/bullet/Bullet3Common/b3QuadWord.h b/thirdparty/bullet/Bullet3Common/b3QuadWord.h deleted file mode 100644 index 0def305fac..0000000000 --- a/thirdparty/bullet/Bullet3Common/b3QuadWord.h +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_SIMD_QUADWORD_H -#define B3_SIMD_QUADWORD_H - -#include "b3Scalar.h" -#include "b3MinMax.h" - -#if defined(__CELLOS_LV2) && defined(__SPU__) -#include <altivec.h> -#endif - -/**@brief The b3QuadWord class is base class for b3Vector3 and b3Quaternion. - * Some issues under PS3 Linux with IBM 2.1 SDK, gcc compiler prevent from using aligned quadword. - */ -#ifndef USE_LIBSPE2 -B3_ATTRIBUTE_ALIGNED16(class) -b3QuadWord -#else -class b3QuadWord -#endif -{ -protected: -#if defined(__SPU__) && defined(__CELLOS_LV2__) - union { - vec_float4 mVec128; - b3Scalar m_floats[4]; - }; - -public: - vec_float4 get128() const - { - return mVec128; - } - -#else //__CELLOS_LV2__ __SPU__ - -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) -public: - union { - b3SimdFloat4 mVec128; - b3Scalar m_floats[4]; - struct - { - b3Scalar x, y, z, w; - }; - }; - -public: - B3_FORCE_INLINE b3SimdFloat4 get128() const - { - return mVec128; - } - B3_FORCE_INLINE void set128(b3SimdFloat4 v128) - { - mVec128 = v128; - } -#else -public: - union { - b3Scalar m_floats[4]; - struct - { - b3Scalar x, y, z, w; - }; - }; -#endif // B3_USE_SSE - -#endif //__CELLOS_LV2__ __SPU__ - -public: -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) - - // Set Vector - B3_FORCE_INLINE b3QuadWord(const b3SimdFloat4 vec) - { - mVec128 = vec; - } - - // Copy constructor - B3_FORCE_INLINE b3QuadWord(const b3QuadWord& rhs) - { - mVec128 = rhs.mVec128; - } - - // Assignment Operator - B3_FORCE_INLINE b3QuadWord& - operator=(const b3QuadWord& v) - { - mVec128 = v.mVec128; - - return *this; - } - -#endif - - /**@brief Return the x value */ - B3_FORCE_INLINE const b3Scalar& getX() const { return m_floats[0]; } - /**@brief Return the y value */ - B3_FORCE_INLINE const b3Scalar& getY() const { return m_floats[1]; } - /**@brief Return the z value */ - B3_FORCE_INLINE const b3Scalar& getZ() const { return m_floats[2]; } - /**@brief Set the x value */ - B3_FORCE_INLINE void setX(b3Scalar _x) { m_floats[0] = _x; }; - /**@brief Set the y value */ - B3_FORCE_INLINE void setY(b3Scalar _y) { m_floats[1] = _y; }; - /**@brief Set the z value */ - B3_FORCE_INLINE void setZ(b3Scalar _z) { m_floats[2] = _z; }; - /**@brief Set the w value */ - B3_FORCE_INLINE void setW(b3Scalar _w) { m_floats[3] = _w; }; - /**@brief Return the x value */ - - //B3_FORCE_INLINE b3Scalar& operator[](int i) { return (&m_floats[0])[i]; } - //B3_FORCE_INLINE const b3Scalar& operator[](int i) const { return (&m_floats[0])[i]; } - ///operator b3Scalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons. - B3_FORCE_INLINE operator b3Scalar*() { return &m_floats[0]; } - B3_FORCE_INLINE operator const b3Scalar*() const { return &m_floats[0]; } - - B3_FORCE_INLINE bool operator==(const b3QuadWord& other) const - { -#ifdef B3_USE_SSE - return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128))); -#else - return ((m_floats[3] == other.m_floats[3]) && - (m_floats[2] == other.m_floats[2]) && - (m_floats[1] == other.m_floats[1]) && - (m_floats[0] == other.m_floats[0])); -#endif - } - - B3_FORCE_INLINE bool operator!=(const b3QuadWord& other) const - { - return !(*this == other); - } - - /**@brief Set x,y,z and zero w - * @param x Value of x - * @param y Value of y - * @param z Value of z - */ - B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z) - { - m_floats[0] = _x; - m_floats[1] = _y; - m_floats[2] = _z; - m_floats[3] = 0.f; - } - - /* void getValue(b3Scalar *m) const - { - m[0] = m_floats[0]; - m[1] = m_floats[1]; - m[2] = m_floats[2]; - } -*/ - /**@brief Set the values - * @param x Value of x - * @param y Value of y - * @param z Value of z - * @param w Value of w - */ - B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w) - { - m_floats[0] = _x; - m_floats[1] = _y; - m_floats[2] = _z; - m_floats[3] = _w; - } - /**@brief No initialization constructor */ - B3_FORCE_INLINE b3QuadWord() - // :m_floats[0](b3Scalar(0.)),m_floats[1](b3Scalar(0.)),m_floats[2](b3Scalar(0.)),m_floats[3](b3Scalar(0.)) - { - } - - /**@brief Three argument constructor (zeros w) - * @param x Value of x - * @param y Value of y - * @param z Value of z - */ - B3_FORCE_INLINE b3QuadWord(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z) - { - m_floats[0] = _x, m_floats[1] = _y, m_floats[2] = _z, m_floats[3] = 0.0f; - } - - /**@brief Initializing constructor - * @param x Value of x - * @param y Value of y - * @param z Value of z - * @param w Value of w - */ - B3_FORCE_INLINE b3QuadWord(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w) - { - m_floats[0] = _x, m_floats[1] = _y, m_floats[2] = _z, m_floats[3] = _w; - } - - /**@brief Set each element to the max of the current values and the values of another b3QuadWord - * @param other The other b3QuadWord to compare with - */ - B3_FORCE_INLINE void setMax(const b3QuadWord& other) - { -#ifdef B3_USE_SSE - mVec128 = _mm_max_ps(mVec128, other.mVec128); -#elif defined(B3_USE_NEON) - mVec128 = vmaxq_f32(mVec128, other.mVec128); -#else - b3SetMax(m_floats[0], other.m_floats[0]); - b3SetMax(m_floats[1], other.m_floats[1]); - b3SetMax(m_floats[2], other.m_floats[2]); - b3SetMax(m_floats[3], other.m_floats[3]); -#endif - } - /**@brief Set each element to the min of the current values and the values of another b3QuadWord - * @param other The other b3QuadWord to compare with - */ - B3_FORCE_INLINE void setMin(const b3QuadWord& other) - { -#ifdef B3_USE_SSE - mVec128 = _mm_min_ps(mVec128, other.mVec128); -#elif defined(B3_USE_NEON) - mVec128 = vminq_f32(mVec128, other.mVec128); -#else - b3SetMin(m_floats[0], other.m_floats[0]); - b3SetMin(m_floats[1], other.m_floats[1]); - b3SetMin(m_floats[2], other.m_floats[2]); - b3SetMin(m_floats[3], other.m_floats[3]); -#endif - } -}; - -#endif //B3_SIMD_QUADWORD_H diff --git a/thirdparty/bullet/Bullet3Common/b3Quaternion.h b/thirdparty/bullet/Bullet3Common/b3Quaternion.h deleted file mode 100644 index 4fdd72dcc4..0000000000 --- a/thirdparty/bullet/Bullet3Common/b3Quaternion.h +++ /dev/null @@ -1,908 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_SIMD__QUATERNION_H_ -#define B3_SIMD__QUATERNION_H_ - -#include "b3Vector3.h" -#include "b3QuadWord.h" - -#ifdef B3_USE_SSE - -const __m128 B3_ATTRIBUTE_ALIGNED16(b3vOnes) = {1.0f, 1.0f, 1.0f, 1.0f}; - -#endif - -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) - -const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f}; -const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f}; - -#endif - -/**@brief The b3Quaternion implements quaternion to perform linear algebra rotations in combination with b3Matrix3x3, b3Vector3 and b3Transform. */ -class b3Quaternion : public b3QuadWord -{ -public: - /**@brief No initialization constructor */ - b3Quaternion() {} - -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - // Set Vector - B3_FORCE_INLINE b3Quaternion(const b3SimdFloat4 vec) - { - mVec128 = vec; - } - - // Copy constructor - B3_FORCE_INLINE b3Quaternion(const b3Quaternion& rhs) - { - mVec128 = rhs.mVec128; - } - - // Assignment Operator - B3_FORCE_INLINE b3Quaternion& - operator=(const b3Quaternion& v) - { - mVec128 = v.mVec128; - - return *this; - } - -#endif - - // template <typename b3Scalar> - // explicit Quaternion(const b3Scalar *v) : Tuple4<b3Scalar>(v) {} - /**@brief Constructor from scalars */ - b3Quaternion(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w) - : b3QuadWord(_x, _y, _z, _w) - { - //b3Assert(!((_x==1.f) && (_y==0.f) && (_z==0.f) && (_w==0.f))); - } - /**@brief Axis angle Constructor - * @param axis The axis which the rotation is around - * @param angle The magnitude of the rotation around the angle (Radians) */ - b3Quaternion(const b3Vector3& _axis, const b3Scalar& _angle) - { - setRotation(_axis, _angle); - } - /**@brief Constructor from Euler angles - * @param yaw Angle around Y unless B3_EULER_DEFAULT_ZYX defined then Z - * @param pitch Angle around X unless B3_EULER_DEFAULT_ZYX defined then Y - * @param roll Angle around Z unless B3_EULER_DEFAULT_ZYX defined then X */ - b3Quaternion(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll) - { -#ifndef B3_EULER_DEFAULT_ZYX - setEuler(yaw, pitch, roll); -#else - setEulerZYX(yaw, pitch, roll); -#endif - } - /**@brief Set the rotation using axis angle notation - * @param axis The axis around which to rotate - * @param angle The magnitude of the rotation in Radians */ - void setRotation(const b3Vector3& axis1, const b3Scalar& _angle) - { - b3Vector3 axis = axis1; - axis.safeNormalize(); - - b3Scalar d = axis.length(); - b3Assert(d != b3Scalar(0.0)); - if (d < B3_EPSILON) - { - setValue(0, 0, 0, 1); - } - else - { - b3Scalar s = b3Sin(_angle * b3Scalar(0.5)) / d; - setValue(axis.getX() * s, axis.getY() * s, axis.getZ() * s, - b3Cos(_angle * b3Scalar(0.5))); - } - } - /**@brief Set the quaternion using Euler angles - * @param yaw Angle around Y - * @param pitch Angle around X - * @param roll Angle around Z */ - void setEuler(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll) - { - b3Scalar halfYaw = b3Scalar(yaw) * b3Scalar(0.5); - b3Scalar halfPitch = b3Scalar(pitch) * b3Scalar(0.5); - b3Scalar halfRoll = b3Scalar(roll) * b3Scalar(0.5); - b3Scalar cosYaw = b3Cos(halfYaw); - b3Scalar sinYaw = b3Sin(halfYaw); - b3Scalar cosPitch = b3Cos(halfPitch); - b3Scalar sinPitch = b3Sin(halfPitch); - b3Scalar cosRoll = b3Cos(halfRoll); - b3Scalar sinRoll = b3Sin(halfRoll); - setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, - cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, - sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, - cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); - } - - /**@brief Set the quaternion using euler angles - * @param yaw Angle around Z - * @param pitch Angle around Y - * @param roll Angle around X */ - void setEulerZYX(const b3Scalar& yawZ, const b3Scalar& pitchY, const b3Scalar& rollX) - { - b3Scalar halfYaw = b3Scalar(yawZ) * b3Scalar(0.5); - b3Scalar halfPitch = b3Scalar(pitchY) * b3Scalar(0.5); - b3Scalar halfRoll = b3Scalar(rollX) * b3Scalar(0.5); - b3Scalar cosYaw = b3Cos(halfYaw); - b3Scalar sinYaw = b3Sin(halfYaw); - b3Scalar cosPitch = b3Cos(halfPitch); - b3Scalar sinPitch = b3Sin(halfPitch); - b3Scalar cosRoll = b3Cos(halfRoll); - b3Scalar sinRoll = b3Sin(halfRoll); - setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x - cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y - cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z - cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx - normalize(); - } - - /**@brief Get the euler angles from this quaternion - * @param yaw Angle around Z - * @param pitch Angle around Y - * @param roll Angle around X */ - void getEulerZYX(b3Scalar& yawZ, b3Scalar& pitchY, b3Scalar& rollX) const - { - b3Scalar squ; - b3Scalar sqx; - b3Scalar sqy; - b3Scalar sqz; - b3Scalar sarg; - sqx = m_floats[0] * m_floats[0]; - sqy = m_floats[1] * m_floats[1]; - sqz = m_floats[2] * m_floats[2]; - squ = m_floats[3] * m_floats[3]; - rollX = b3Atan2(2 * (m_floats[1] * m_floats[2] + m_floats[3] * m_floats[0]), squ - sqx - sqy + sqz); - sarg = b3Scalar(-2.) * (m_floats[0] * m_floats[2] - m_floats[3] * m_floats[1]); - pitchY = sarg <= b3Scalar(-1.0) ? b3Scalar(-0.5) * B3_PI : (sarg >= b3Scalar(1.0) ? b3Scalar(0.5) * B3_PI : b3Asin(sarg)); - yawZ = b3Atan2(2 * (m_floats[0] * m_floats[1] + m_floats[3] * m_floats[2]), squ + sqx - sqy - sqz); - } - - /**@brief Add two quaternions - * @param q The quaternion to add to this one */ - B3_FORCE_INLINE b3Quaternion& operator+=(const b3Quaternion& q) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - mVec128 = _mm_add_ps(mVec128, q.mVec128); -#elif defined(B3_USE_NEON) - mVec128 = vaddq_f32(mVec128, q.mVec128); -#else - m_floats[0] += q.getX(); - m_floats[1] += q.getY(); - m_floats[2] += q.getZ(); - m_floats[3] += q.m_floats[3]; -#endif - return *this; - } - - /**@brief Subtract out a quaternion - * @param q The quaternion to subtract from this one */ - b3Quaternion& operator-=(const b3Quaternion& q) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - mVec128 = _mm_sub_ps(mVec128, q.mVec128); -#elif defined(B3_USE_NEON) - mVec128 = vsubq_f32(mVec128, q.mVec128); -#else - m_floats[0] -= q.getX(); - m_floats[1] -= q.getY(); - m_floats[2] -= q.getZ(); - m_floats[3] -= q.m_floats[3]; -#endif - return *this; - } - - /**@brief Scale this quaternion - * @param s The scalar to scale by */ - b3Quaternion& operator*=(const b3Scalar& s) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vs = _mm_load_ss(&s); // (S 0 0 0) - vs = b3_pshufd_ps(vs, 0); // (S S S S) - mVec128 = _mm_mul_ps(mVec128, vs); -#elif defined(B3_USE_NEON) - mVec128 = vmulq_n_f32(mVec128, s); -#else - m_floats[0] *= s; - m_floats[1] *= s; - m_floats[2] *= s; - m_floats[3] *= s; -#endif - return *this; - } - - /**@brief Multiply this quaternion by q on the right - * @param q The other quaternion - * Equivilant to this = this * q */ - b3Quaternion& operator*=(const b3Quaternion& q) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vQ2 = q.get128(); - - __m128 A1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(0, 1, 2, 0)); - __m128 B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0)); - - A1 = A1 * B1; - - __m128 A2 = b3_pshufd_ps(mVec128, B3_SHUFFLE(1, 2, 0, 1)); - __m128 B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1)); - - A2 = A2 * B2; - - B1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(2, 0, 1, 2)); - B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2)); - - B1 = B1 * B2; // A3 *= B3 - - mVec128 = b3_splat_ps(mVec128, 3); // A0 - mVec128 = mVec128 * vQ2; // A0 * B0 - - A1 = A1 + A2; // AB12 - mVec128 = mVec128 - B1; // AB03 = AB0 - AB3 - A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element - mVec128 = mVec128 + A1; // AB03 + AB12 - -#elif defined(B3_USE_NEON) - - float32x4_t vQ1 = mVec128; - float32x4_t vQ2 = q.get128(); - float32x4_t A0, A1, B1, A2, B2, A3, B3; - float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz; - - { - float32x2x2_t tmp; - tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y} - vQ1zx = tmp.val[0]; - - tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y} - vQ2zx = tmp.val[0]; - } - vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1); - - vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1); - - vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1); - vQ2xz = vext_f32(vQ2zx, vQ2zx, 1); - - A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x - B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X - - A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1)); - B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1)); - - A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z - B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z - - A1 = vmulq_f32(A1, B1); - A2 = vmulq_f32(A2, B2); - A3 = vmulq_f32(A3, B3); // A3 *= B3 - A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0 - - A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2 - A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3 - - // change the sign of the last element - A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM); - A0 = vaddq_f32(A0, A1); // AB03 + AB12 - - mVec128 = A0; -#else - setValue( - m_floats[3] * q.getX() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.getZ() - m_floats[2] * q.getY(), - m_floats[3] * q.getY() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.getX() - m_floats[0] * q.getZ(), - m_floats[3] * q.getZ() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.getY() - m_floats[1] * q.getX(), - m_floats[3] * q.m_floats[3] - m_floats[0] * q.getX() - m_floats[1] * q.getY() - m_floats[2] * q.getZ()); -#endif - return *this; - } - /**@brief Return the dot product between this quaternion and another - * @param q The other quaternion */ - b3Scalar dot(const b3Quaternion& q) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vd; - - vd = _mm_mul_ps(mVec128, q.mVec128); - - __m128 t = _mm_movehl_ps(vd, vd); - vd = _mm_add_ps(vd, t); - t = _mm_shuffle_ps(vd, vd, 0x55); - vd = _mm_add_ss(vd, t); - - return _mm_cvtss_f32(vd); -#elif defined(B3_USE_NEON) - float32x4_t vd = vmulq_f32(mVec128, q.mVec128); - float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd)); - x = vpadd_f32(x, x); - return vget_lane_f32(x, 0); -#else - return m_floats[0] * q.getX() + - m_floats[1] * q.getY() + - m_floats[2] * q.getZ() + - m_floats[3] * q.m_floats[3]; -#endif - } - - /**@brief Return the length squared of the quaternion */ - b3Scalar length2() const - { - return dot(*this); - } - - /**@brief Return the length of the quaternion */ - b3Scalar length() const - { - return b3Sqrt(length2()); - } - - /**@brief Normalize the quaternion - * Such that x^2 + y^2 + z^2 +w^2 = 1 */ - b3Quaternion& normalize() - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vd; - - vd = _mm_mul_ps(mVec128, mVec128); - - __m128 t = _mm_movehl_ps(vd, vd); - vd = _mm_add_ps(vd, t); - t = _mm_shuffle_ps(vd, vd, 0x55); - vd = _mm_add_ss(vd, t); - - vd = _mm_sqrt_ss(vd); - vd = _mm_div_ss(b3vOnes, vd); - vd = b3_pshufd_ps(vd, 0); // splat - mVec128 = _mm_mul_ps(mVec128, vd); - - return *this; -#else - return *this /= length(); -#endif - } - - /**@brief Return a scaled version of this quaternion - * @param s The scale factor */ - B3_FORCE_INLINE b3Quaternion - operator*(const b3Scalar& s) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vs = _mm_load_ss(&s); // (S 0 0 0) - vs = b3_pshufd_ps(vs, 0x00); // (S S S S) - - return b3Quaternion(_mm_mul_ps(mVec128, vs)); -#elif defined(B3_USE_NEON) - return b3Quaternion(vmulq_n_f32(mVec128, s)); -#else - return b3Quaternion(getX() * s, getY() * s, getZ() * s, m_floats[3] * s); -#endif - } - - /**@brief Return an inversely scaled versionof this quaternion - * @param s The inverse scale factor */ - b3Quaternion operator/(const b3Scalar& s) const - { - b3Assert(s != b3Scalar(0.0)); - return *this * (b3Scalar(1.0) / s); - } - - /**@brief Inversely scale this quaternion - * @param s The scale factor */ - b3Quaternion& operator/=(const b3Scalar& s) - { - b3Assert(s != b3Scalar(0.0)); - return *this *= b3Scalar(1.0) / s; - } - - /**@brief Return a normalized version of this quaternion */ - b3Quaternion normalized() const - { - return *this / length(); - } - /**@brief Return the angle between this quaternion and the other - * @param q The other quaternion */ - b3Scalar angle(const b3Quaternion& q) const - { - b3Scalar s = b3Sqrt(length2() * q.length2()); - b3Assert(s != b3Scalar(0.0)); - return b3Acos(dot(q) / s); - } - /**@brief Return the angle of rotation represented by this quaternion */ - b3Scalar getAngle() const - { - b3Scalar s = b3Scalar(2.) * b3Acos(m_floats[3]); - return s; - } - - /**@brief Return the axis of the rotation represented by this quaternion */ - b3Vector3 getAxis() const - { - b3Scalar s_squared = 1.f - m_floats[3] * m_floats[3]; - - if (s_squared < b3Scalar(10.) * B3_EPSILON) //Check for divide by zero - return b3MakeVector3(1.0, 0.0, 0.0); // Arbitrary - b3Scalar s = 1.f / b3Sqrt(s_squared); - return b3MakeVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s); - } - - /**@brief Return the inverse of this quaternion */ - b3Quaternion inverse() const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - return b3Quaternion(_mm_xor_ps(mVec128, b3vQInv)); -#elif defined(B3_USE_NEON) - return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vQInv)); -#else - return b3Quaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]); -#endif - } - - /**@brief Return the sum of this quaternion and the other - * @param q2 The other quaternion */ - B3_FORCE_INLINE b3Quaternion - operator+(const b3Quaternion& q2) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - return b3Quaternion(_mm_add_ps(mVec128, q2.mVec128)); -#elif defined(B3_USE_NEON) - return b3Quaternion(vaddq_f32(mVec128, q2.mVec128)); -#else - const b3Quaternion& q1 = *this; - return b3Quaternion(q1.getX() + q2.getX(), q1.getY() + q2.getY(), q1.getZ() + q2.getZ(), q1.m_floats[3] + q2.m_floats[3]); -#endif - } - - /**@brief Return the difference between this quaternion and the other - * @param q2 The other quaternion */ - B3_FORCE_INLINE b3Quaternion - operator-(const b3Quaternion& q2) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - return b3Quaternion(_mm_sub_ps(mVec128, q2.mVec128)); -#elif defined(B3_USE_NEON) - return b3Quaternion(vsubq_f32(mVec128, q2.mVec128)); -#else - const b3Quaternion& q1 = *this; - return b3Quaternion(q1.getX() - q2.getX(), q1.getY() - q2.getY(), q1.getZ() - q2.getZ(), q1.m_floats[3] - q2.m_floats[3]); -#endif - } - - /**@brief Return the negative of this quaternion - * This simply negates each element */ - B3_FORCE_INLINE b3Quaternion operator-() const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - return b3Quaternion(_mm_xor_ps(mVec128, b3vMzeroMask)); -#elif defined(B3_USE_NEON) - return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vMzeroMask)); -#else - const b3Quaternion& q2 = *this; - return b3Quaternion(-q2.getX(), -q2.getY(), -q2.getZ(), -q2.m_floats[3]); -#endif - } - /**@todo document this and it's use */ - B3_FORCE_INLINE b3Quaternion farthest(const b3Quaternion& qd) const - { - b3Quaternion diff, sum; - diff = *this - qd; - sum = *this + qd; - if (diff.dot(diff) > sum.dot(sum)) - return qd; - return (-qd); - } - - /**@todo document this and it's use */ - B3_FORCE_INLINE b3Quaternion nearest(const b3Quaternion& qd) const - { - b3Quaternion diff, sum; - diff = *this - qd; - sum = *this + qd; - if (diff.dot(diff) < sum.dot(sum)) - return qd; - return (-qd); - } - - /**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion - * @param q The other quaternion to interpolate with - * @param t The ratio between this and q to interpolate. If t = 0 the result is this, if t=1 the result is q. - * Slerp interpolates assuming constant velocity. */ - b3Quaternion slerp(const b3Quaternion& q, const b3Scalar& t) const - { - b3Scalar magnitude = b3Sqrt(length2() * q.length2()); - b3Assert(magnitude > b3Scalar(0)); - - b3Scalar product = dot(q) / magnitude; - if (b3Fabs(product) < b3Scalar(1)) - { - // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp - const b3Scalar sign = (product < 0) ? b3Scalar(-1) : b3Scalar(1); - - const b3Scalar theta = b3Acos(sign * product); - const b3Scalar s1 = b3Sin(sign * t * theta); - const b3Scalar d = b3Scalar(1.0) / b3Sin(theta); - const b3Scalar s0 = b3Sin((b3Scalar(1.0) - t) * theta); - - return b3Quaternion( - (m_floats[0] * s0 + q.getX() * s1) * d, - (m_floats[1] * s0 + q.getY() * s1) * d, - (m_floats[2] * s0 + q.getZ() * s1) * d, - (m_floats[3] * s0 + q.m_floats[3] * s1) * d); - } - else - { - return *this; - } - } - - static const b3Quaternion& getIdentity() - { - static const b3Quaternion identityQuat(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.), b3Scalar(1.)); - return identityQuat; - } - - B3_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; } -}; - -/**@brief Return the product of two quaternions */ -B3_FORCE_INLINE b3Quaternion -operator*(const b3Quaternion& q1, const b3Quaternion& q2) -{ -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vQ1 = q1.get128(); - __m128 vQ2 = q2.get128(); - __m128 A0, A1, B1, A2, B2; - - A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0, 1, 2, 0)); // X Y z x // vtrn - B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0)); // W W W X // vdup vext - - A1 = A1 * B1; - - A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1)); // Y Z X Y // vext - B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1)); // z x Y Y // vtrn vdup - - A2 = A2 * B2; - - B1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2)); // z x Y Z // vtrn vext - B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2)); // Y Z x z // vext vtrn - - B1 = B1 * B2; // A3 *= B3 - - A0 = b3_splat_ps(vQ1, 3); // A0 - A0 = A0 * vQ2; // A0 * B0 - - A1 = A1 + A2; // AB12 - A0 = A0 - B1; // AB03 = AB0 - AB3 - - A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element - A0 = A0 + A1; // AB03 + AB12 - - return b3Quaternion(A0); - -#elif defined(B3_USE_NEON) - - float32x4_t vQ1 = q1.get128(); - float32x4_t vQ2 = q2.get128(); - float32x4_t A0, A1, B1, A2, B2, A3, B3; - float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz; - - { - float32x2x2_t tmp; - tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y} - vQ1zx = tmp.val[0]; - - tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y} - vQ2zx = tmp.val[0]; - } - vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1); - - vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1); - - vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1); - vQ2xz = vext_f32(vQ2zx, vQ2zx, 1); - - A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x - B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X - - A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1)); - B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1)); - - A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z - B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z - - A1 = vmulq_f32(A1, B1); - A2 = vmulq_f32(A2, B2); - A3 = vmulq_f32(A3, B3); // A3 *= B3 - A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0 - - A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2 - A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3 - - // change the sign of the last element - A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM); - A0 = vaddq_f32(A0, A1); // AB03 + AB12 - - return b3Quaternion(A0); - -#else - return b3Quaternion( - q1.getW() * q2.getX() + q1.getX() * q2.getW() + q1.getY() * q2.getZ() - q1.getZ() * q2.getY(), - q1.getW() * q2.getY() + q1.getY() * q2.getW() + q1.getZ() * q2.getX() - q1.getX() * q2.getZ(), - q1.getW() * q2.getZ() + q1.getZ() * q2.getW() + q1.getX() * q2.getY() - q1.getY() * q2.getX(), - q1.getW() * q2.getW() - q1.getX() * q2.getX() - q1.getY() * q2.getY() - q1.getZ() * q2.getZ()); -#endif -} - -B3_FORCE_INLINE b3Quaternion -operator*(const b3Quaternion& q, const b3Vector3& w) -{ -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vQ1 = q.get128(); - __m128 vQ2 = w.get128(); - __m128 A1, B1, A2, B2, A3, B3; - - A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(3, 3, 3, 0)); - B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(0, 1, 2, 0)); - - A1 = A1 * B1; - - A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1)); - B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1)); - - A2 = A2 * B2; - - A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2)); - B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2)); - - A3 = A3 * B3; // A3 *= B3 - - A1 = A1 + A2; // AB12 - A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element - A1 = A1 - A3; // AB123 = AB12 - AB3 - - return b3Quaternion(A1); - -#elif defined(B3_USE_NEON) - - float32x4_t vQ1 = q.get128(); - float32x4_t vQ2 = w.get128(); - float32x4_t A1, B1, A2, B2, A3, B3; - float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz; - - vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1); - { - float32x2x2_t tmp; - - tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y} - vQ2zx = tmp.val[0]; - - tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y} - vQ1zx = tmp.val[0]; - } - - vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1); - - vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1); - vQ2xz = vext_f32(vQ2zx, vQ2zx, 1); - - A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W W X - B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx); // X Y z x - - A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1)); - B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1)); - - A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z - B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z - - A1 = vmulq_f32(A1, B1); - A2 = vmulq_f32(A2, B2); - A3 = vmulq_f32(A3, B3); // A3 *= B3 - - A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2 - - // change the sign of the last element - A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM); - - A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3 - - return b3Quaternion(A1); - -#else - return b3Quaternion( - q.getW() * w.getX() + q.getY() * w.getZ() - q.getZ() * w.getY(), - q.getW() * w.getY() + q.getZ() * w.getX() - q.getX() * w.getZ(), - q.getW() * w.getZ() + q.getX() * w.getY() - q.getY() * w.getX(), - -q.getX() * w.getX() - q.getY() * w.getY() - q.getZ() * w.getZ()); -#endif -} - -B3_FORCE_INLINE b3Quaternion -operator*(const b3Vector3& w, const b3Quaternion& q) -{ -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vQ1 = w.get128(); - __m128 vQ2 = q.get128(); - __m128 A1, B1, A2, B2, A3, B3; - - A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0, 1, 2, 0)); // X Y z x - B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0)); // W W W X - - A1 = A1 * B1; - - A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1)); - B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1)); - - A2 = A2 * B2; - - A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2)); - B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2)); - - A3 = A3 * B3; // A3 *= B3 - - A1 = A1 + A2; // AB12 - A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element - A1 = A1 - A3; // AB123 = AB12 - AB3 - - return b3Quaternion(A1); - -#elif defined(B3_USE_NEON) - - float32x4_t vQ1 = w.get128(); - float32x4_t vQ2 = q.get128(); - float32x4_t A1, B1, A2, B2, A3, B3; - float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz; - - { - float32x2x2_t tmp; - - tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y} - vQ1zx = tmp.val[0]; - - tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y} - vQ2zx = tmp.val[0]; - } - vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1); - - vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1); - - vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1); - vQ2xz = vext_f32(vQ2zx, vQ2zx, 1); - - A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x - B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X - - A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1)); - B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1)); - - A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z - B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z - - A1 = vmulq_f32(A1, B1); - A2 = vmulq_f32(A2, B2); - A3 = vmulq_f32(A3, B3); // A3 *= B3 - - A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2 - - // change the sign of the last element - A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM); - - A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3 - - return b3Quaternion(A1); - -#else - return b3Quaternion( - +w.getX() * q.getW() + w.getY() * q.getZ() - w.getZ() * q.getY(), - +w.getY() * q.getW() + w.getZ() * q.getX() - w.getX() * q.getZ(), - +w.getZ() * q.getW() + w.getX() * q.getY() - w.getY() * q.getX(), - -w.getX() * q.getX() - w.getY() * q.getY() - w.getZ() * q.getZ()); -#endif -} - -/**@brief Calculate the dot product between two quaternions */ -B3_FORCE_INLINE b3Scalar -b3Dot(const b3Quaternion& q1, const b3Quaternion& q2) -{ - return q1.dot(q2); -} - -/**@brief Return the length of a quaternion */ -B3_FORCE_INLINE b3Scalar -b3Length(const b3Quaternion& q) -{ - return q.length(); -} - -/**@brief Return the angle between two quaternions*/ -B3_FORCE_INLINE b3Scalar -b3Angle(const b3Quaternion& q1, const b3Quaternion& q2) -{ - return q1.angle(q2); -} - -/**@brief Return the inverse of a quaternion*/ -B3_FORCE_INLINE b3Quaternion -b3Inverse(const b3Quaternion& q) -{ - return q.inverse(); -} - -/**@brief Return the result of spherical linear interpolation betwen two quaternions - * @param q1 The first quaternion - * @param q2 The second quaternion - * @param t The ration between q1 and q2. t = 0 return q1, t=1 returns q2 - * Slerp assumes constant velocity between positions. */ -B3_FORCE_INLINE b3Quaternion -b3Slerp(const b3Quaternion& q1, const b3Quaternion& q2, const b3Scalar& t) -{ - return q1.slerp(q2, t); -} - -B3_FORCE_INLINE b3Quaternion -b3QuatMul(const b3Quaternion& rot0, const b3Quaternion& rot1) -{ - return rot0 * rot1; -} - -B3_FORCE_INLINE b3Quaternion -b3QuatNormalized(const b3Quaternion& orn) -{ - return orn.normalized(); -} - -B3_FORCE_INLINE b3Vector3 -b3QuatRotate(const b3Quaternion& rotation, const b3Vector3& v) -{ - b3Quaternion q = rotation * v; - q *= rotation.inverse(); -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - return b3MakeVector3(_mm_and_ps(q.get128(), b3vFFF0fMask)); -#elif defined(B3_USE_NEON) - return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), b3vFFF0Mask)); -#else - return b3MakeVector3(q.getX(), q.getY(), q.getZ()); -#endif -} - -B3_FORCE_INLINE b3Quaternion -b3ShortestArcQuat(const b3Vector3& v0, const b3Vector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized -{ - b3Vector3 c = v0.cross(v1); - b3Scalar d = v0.dot(v1); - - if (d < -1.0 + B3_EPSILON) - { - b3Vector3 n, unused; - b3PlaneSpace1(v0, n, unused); - return b3Quaternion(n.getX(), n.getY(), n.getZ(), 0.0f); // just pick any vector that is orthogonal to v0 - } - - b3Scalar s = b3Sqrt((1.0f + d) * 2.0f); - b3Scalar rs = 1.0f / s; - - return b3Quaternion(c.getX() * rs, c.getY() * rs, c.getZ() * rs, s * 0.5f); -} - -B3_FORCE_INLINE b3Quaternion -b3ShortestArcQuatNormalize2(b3Vector3& v0, b3Vector3& v1) -{ - v0.normalize(); - v1.normalize(); - return b3ShortestArcQuat(v0, v1); -} - -#endif //B3_SIMD__QUATERNION_H_ diff --git a/thirdparty/bullet/Bullet3Common/b3Random.h b/thirdparty/bullet/Bullet3Common/b3Random.h deleted file mode 100644 index c2e21496c7..0000000000 --- a/thirdparty/bullet/Bullet3Common/b3Random.h +++ /dev/null @@ -1,46 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_GEN_RANDOM_H -#define B3_GEN_RANDOM_H - -#include "b3Scalar.h" - -#ifdef MT19937 - -#include <limits.h> -#include <mt19937.h> - -#define B3_RAND_MAX UINT_MAX - -B3_FORCE_INLINE void b3Srand(unsigned int seed) { init_genrand(seed); } -B3_FORCE_INLINE unsigned int b3rand() { return genrand_int32(); } - -#else - -#include <stdlib.h> - -#define B3_RAND_MAX RAND_MAX - -B3_FORCE_INLINE void b3Srand(unsigned int seed) { srand(seed); } -B3_FORCE_INLINE unsigned int b3rand() { return rand(); } - -#endif - -inline b3Scalar b3RandRange(b3Scalar minRange, b3Scalar maxRange) -{ - return (b3rand() / (b3Scalar(B3_RAND_MAX) + b3Scalar(1.0))) * (maxRange - minRange) + minRange; -} - -#endif //B3_GEN_RANDOM_H diff --git a/thirdparty/bullet/Bullet3Common/b3ResizablePool.h b/thirdparty/bullet/Bullet3Common/b3ResizablePool.h deleted file mode 100644 index cafe3ff396..0000000000 --- a/thirdparty/bullet/Bullet3Common/b3ResizablePool.h +++ /dev/null @@ -1,171 +0,0 @@ - -#ifndef B3_RESIZABLE_POOL_H -#define B3_RESIZABLE_POOL_H - -#include "Bullet3Common/b3AlignedObjectArray.h" - -enum -{ - B3_POOL_HANDLE_TERMINAL_FREE = -1, - B3_POOL_HANDLE_TERMINAL_USED = -2 -}; - -template <typename U> -struct b3PoolBodyHandle : public U -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - int m_nextFreeHandle; - void setNextFree(int next) - { - m_nextFreeHandle = next; - } - int getNextFree() const - { - return m_nextFreeHandle; - } -}; - -template <typename T> -class b3ResizablePool -{ -protected: - b3AlignedObjectArray<T> m_bodyHandles; - int m_numUsedHandles; // number of active handles - int m_firstFreeHandle; // free handles list - - T* getHandleInternal(int handle) - { - return &m_bodyHandles[handle]; - } - const T* getHandleInternal(int handle) const - { - return &m_bodyHandles[handle]; - } - -public: - b3ResizablePool() - { - initHandles(); - } - - virtual ~b3ResizablePool() - { - exitHandles(); - } - ///handle management - - int getNumHandles() const - { - return m_bodyHandles.size(); - } - - void getUsedHandles(b3AlignedObjectArray<int>& usedHandles) const - { - for (int i = 0; i < m_bodyHandles.size(); i++) - { - if (m_bodyHandles[i].getNextFree() == B3_POOL_HANDLE_TERMINAL_USED) - { - usedHandles.push_back(i); - } - } - } - - T* getHandle(int handle) - { - b3Assert(handle >= 0); - b3Assert(handle < m_bodyHandles.size()); - if ((handle < 0) || (handle >= m_bodyHandles.size())) - { - return 0; - } - - if (m_bodyHandles[handle].getNextFree() == B3_POOL_HANDLE_TERMINAL_USED) - { - return &m_bodyHandles[handle]; - } - return 0; - } - const T* getHandle(int handle) const - { - b3Assert(handle >= 0); - b3Assert(handle < m_bodyHandles.size()); - if ((handle < 0) || (handle >= m_bodyHandles.size())) - { - return 0; - } - - if (m_bodyHandles[handle].getNextFree() == B3_POOL_HANDLE_TERMINAL_USED) - { - return &m_bodyHandles[handle]; - } - return 0; - } - - void increaseHandleCapacity(int extraCapacity) - { - int curCapacity = m_bodyHandles.size(); - //b3Assert(curCapacity == m_numUsedHandles); - int newCapacity = curCapacity + extraCapacity; - m_bodyHandles.resize(newCapacity); - - { - for (int i = curCapacity; i < newCapacity; i++) - m_bodyHandles[i].setNextFree(i + 1); - - m_bodyHandles[newCapacity - 1].setNextFree(-1); - } - m_firstFreeHandle = curCapacity; - } - void initHandles() - { - m_numUsedHandles = 0; - m_firstFreeHandle = -1; - - increaseHandleCapacity(1); - } - - void exitHandles() - { - m_bodyHandles.resize(0); - m_firstFreeHandle = -1; - m_numUsedHandles = 0; - } - - int allocHandle() - { - b3Assert(m_firstFreeHandle >= 0); - - int handle = m_firstFreeHandle; - m_firstFreeHandle = getHandleInternal(handle)->getNextFree(); - m_numUsedHandles++; - - if (m_firstFreeHandle < 0) - { - //int curCapacity = m_bodyHandles.size(); - int additionalCapacity = m_bodyHandles.size(); - increaseHandleCapacity(additionalCapacity); - - getHandleInternal(handle)->setNextFree(m_firstFreeHandle); - } - getHandleInternal(handle)->setNextFree(B3_POOL_HANDLE_TERMINAL_USED); - getHandleInternal(handle)->clear(); - return handle; - } - - void freeHandle(int handle) - { - b3Assert(handle >= 0); - - if (m_bodyHandles[handle].getNextFree() == B3_POOL_HANDLE_TERMINAL_USED) - { - getHandleInternal(handle)->clear(); - getHandleInternal(handle)->setNextFree(m_firstFreeHandle); - m_firstFreeHandle = handle; - m_numUsedHandles--; - } - } -}; -///end handle management - -#endif //B3_RESIZABLE_POOL_H diff --git a/thirdparty/bullet/Bullet3Common/b3Scalar.h b/thirdparty/bullet/Bullet3Common/b3Scalar.h deleted file mode 100644 index eeb70ed632..0000000000 --- a/thirdparty/bullet/Bullet3Common/b3Scalar.h +++ /dev/null @@ -1,689 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_SCALAR_H -#define B3_SCALAR_H - -#ifdef B3_MANAGED_CODE -//Aligned data types not supported in managed code -#pragma unmanaged -#endif - -#include <math.h> -#include <stdlib.h> //size_t for MSVC 6.0 -#include <float.h> - -//Original repository is at http://github.com/erwincoumans/bullet3 -#define B3_BULLET_VERSION 300 - -inline int b3GetVersion() -{ - return B3_BULLET_VERSION; -} - -#if defined(DEBUG) || defined(_DEBUG) -#define B3_DEBUG -#endif - -#include "b3Logging.h" //for b3Error - -#ifdef _WIN32 - -#if defined(__GNUC__) // it should handle both MINGW and CYGWIN -#define B3_FORCE_INLINE __inline__ __attribute__((always_inline)) -#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__((aligned(16))) -#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__((aligned(64))) -#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__((aligned(128))) -#elif ( defined(_MSC_VER) && _MSC_VER < 1300 ) -#define B3_FORCE_INLINE inline -#define B3_ATTRIBUTE_ALIGNED16(a) a -#define B3_ATTRIBUTE_ALIGNED64(a) a -#define B3_ATTRIBUTE_ALIGNED128(a) a -#else -//#define B3_HAS_ALIGNED_ALLOCATOR -#pragma warning(disable : 4324) // disable padding warning -// #pragma warning(disable:4530) // Disable the exception disable but used in MSCV Stl warning. -#pragma warning(disable : 4996) //Turn off warnings about deprecated C routines -// #pragma warning(disable:4786) // Disable the "debug name too long" warning - -#define B3_FORCE_INLINE __forceinline -#define B3_ATTRIBUTE_ALIGNED16(a) __declspec(align(16)) a -#define B3_ATTRIBUTE_ALIGNED64(a) __declspec(align(64)) a -#define B3_ATTRIBUTE_ALIGNED128(a) __declspec(align(128)) a -#ifdef _XBOX -#define B3_USE_VMX128 - -#include <ppcintrinsics.h> -#define B3_HAVE_NATIVE_FSEL -#define b3Fsel(a, b, c) __fsel((a), (b), (c)) -#else - -#if (defined(_WIN32) && (_MSC_VER) && _MSC_VER >= 1400) && (!defined(B3_USE_DOUBLE_PRECISION)) -#if (defined(_M_IX86) || defined(_M_X64)) - - -#ifdef __clang__ -//#define B3_NO_SIMD_OPERATOR_OVERLOADS -#define B3_DISABLE_SSE -#endif //__clang__ - -#ifndef B3_DISABLE_SSE -#define B3_USE_SSE -#endif //B3_DISABLE_SSE - -#ifdef B3_USE_SSE -//B3_USE_SSE_IN_API is disabled under Windows by default, because -//it makes it harder to integrate Bullet into your application under Windows -//(structured embedding Bullet structs/classes need to be 16-byte aligned) -//with relatively little performance gain -//If you are not embedded Bullet data in your classes, or make sure that you align those classes on 16-byte boundaries -//you can manually enable this line or set it in the build system for a bit of performance gain (a few percent, dependent on usage) -//#define B3_USE_SSE_IN_API -#endif //B3_USE_SSE -#include <emmintrin.h> -#endif -#endif - -#endif //_XBOX - -#endif //__MINGW32__ - -#ifdef B3_DEBUG -#ifdef _MSC_VER -#include <stdio.h> -#define b3Assert(x) { if(!(x)){b3Error("Assert " __FILE__ ":%u (%s)\n", __LINE__, #x);__debugbreak(); }} -#else //_MSC_VER -#include <assert.h> -#define b3Assert assert -#endif //_MSC_VER -#else -#define b3Assert(x) -#endif -//b3FullAssert is optional, slows down a lot -#define b3FullAssert(x) - -#define b3Likely(_c) _c -#define b3Unlikely(_c) _c - -#else - -#if defined(__CELLOS_LV2__) -#define B3_FORCE_INLINE inline __attribute__((always_inline)) -#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__((aligned(16))) -#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__((aligned(64))) -#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__((aligned(128))) -#ifndef assert -#include <assert.h> -#endif -#ifdef B3_DEBUG -#ifdef __SPU__ -#include <spu_printf.h> -#define printf spu_printf -#define b3Assert(x) \ - { \ - if (!(x)) \ - { \ - b3Error( \ - "Assert "__FILE__ \ - ":%u (" #x ")\n", \ - __LINE__); \ - spu_hcmpeq(0, 0); \ - } \ - } -#else -#define b3Assert assert -#endif - -#else -#define b3Assert(x) -#endif -//b3FullAssert is optional, slows down a lot -#define b3FullAssert(x) - -#define b3Likely(_c) _c -#define b3Unlikely(_c) _c - -#else - -#ifdef USE_LIBSPE2 - -#define B3_FORCE_INLINE __inline -#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__((aligned(16))) -#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__((aligned(64))) -#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__((aligned(128))) -#ifndef assert -#include <assert.h> -#endif -#ifdef B3_DEBUG -#define b3Assert assert -#else -#define b3Assert(x) -#endif -//b3FullAssert is optional, slows down a lot -#define b3FullAssert(x) - -#define b3Likely(_c) __builtin_expect((_c), 1) -#define b3Unlikely(_c) __builtin_expect((_c), 0) - -#else -//non-windows systems - -#if (defined(__APPLE__) && (!defined(B3_USE_DOUBLE_PRECISION))) -#if defined(__i386__) || defined(__x86_64__) -#define B3_USE_SSE -//B3_USE_SSE_IN_API is enabled on Mac OSX by default, because memory is automatically aligned on 16-byte boundaries -//if apps run into issues, we will disable the next line -#define B3_USE_SSE_IN_API -#ifdef B3_USE_SSE -// include appropriate SSE level -#if defined(__SSE4_1__) -#include <smmintrin.h> -#elif defined(__SSSE3__) -#include <tmmintrin.h> -#elif defined(__SSE3__) -#include <pmmintrin.h> -#else -#include <emmintrin.h> -#endif -#endif //B3_USE_SSE -#elif defined(__armv7__) -#ifdef __clang__ -#define B3_USE_NEON 1 - -#if defined B3_USE_NEON && defined(__clang__) -#include <arm_neon.h> -#endif //B3_USE_NEON -#endif //__clang__ -#endif //__arm__ - -#define B3_FORCE_INLINE inline __attribute__((always_inline)) -///@todo: check out alignment methods for other platforms/compilers -#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__((aligned(16))) -#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__((aligned(64))) -#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__((aligned(128))) -#ifndef assert -#include <assert.h> -#endif - -#if defined(DEBUG) || defined(_DEBUG) -#if defined(__i386__) || defined(__x86_64__) -#include <stdio.h> -#define b3Assert(x) \ - { \ - if (!(x)) \ - { \ - b3Error("Assert %s in line %d, file %s\n", #x, __LINE__, __FILE__); \ - asm volatile("int3"); \ - } \ - } -#else //defined (__i386__) || defined (__x86_64__) -#define b3Assert assert -#endif //defined (__i386__) || defined (__x86_64__) -#else //defined(DEBUG) || defined (_DEBUG) -#define b3Assert(x) -#endif //defined(DEBUG) || defined (_DEBUG) - -//b3FullAssert is optional, slows down a lot -#define b3FullAssert(x) -#define b3Likely(_c) _c -#define b3Unlikely(_c) _c - -#else - -#define B3_FORCE_INLINE inline -///@todo: check out alignment methods for other platforms/compilers -#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__((aligned(16))) -#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__((aligned(64))) -#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__((aligned(128))) -///#define B3_ATTRIBUTE_ALIGNED16(a) a -///#define B3_ATTRIBUTE_ALIGNED64(a) a -///#define B3_ATTRIBUTE_ALIGNED128(a) a -#ifndef assert -#include <assert.h> -#endif - -#if defined(DEBUG) || defined(_DEBUG) -#define b3Assert assert -#else -#define b3Assert(x) -#endif - -//b3FullAssert is optional, slows down a lot -#define b3FullAssert(x) -#define b3Likely(_c) _c -#define b3Unlikely(_c) _c -#endif //__APPLE__ - -#endif // LIBSPE2 - -#endif //__CELLOS_LV2__ -#endif - -///The b3Scalar type abstracts floating point numbers, to easily switch between double and single floating point precision. -#if defined(B3_USE_DOUBLE_PRECISION) -typedef double b3Scalar; -//this number could be bigger in double precision -#define B3_LARGE_FLOAT 1e30 -#else -typedef float b3Scalar; -//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX -#define B3_LARGE_FLOAT 1e18f -#endif - -#ifdef B3_USE_SSE -typedef __m128 b3SimdFloat4; -#endif //B3_USE_SSE - -#if defined B3_USE_SSE_IN_API && defined(B3_USE_SSE) -#ifdef _WIN32 - -#ifndef B3_NAN -static int b3NanMask = 0x7F800001; -#define B3_NAN (*(float *)&b3NanMask) -#endif - -#ifndef B3_INFINITY_MASK -static int b3InfinityMask = 0x7F800000; -#define B3_INFINITY_MASK (*(float *)&b3InfinityMask) -#endif -#ifndef B3_NO_SIMD_OPERATOR_OVERLOADS -inline __m128 operator+(const __m128 A, const __m128 B) -{ - return _mm_add_ps(A, B); -} - -inline __m128 operator-(const __m128 A, const __m128 B) -{ - return _mm_sub_ps(A, B); -} - -inline __m128 operator*(const __m128 A, const __m128 B) -{ - return _mm_mul_ps(A, B); -} -#endif //B3_NO_SIMD_OPERATOR_OVERLOADS -#define b3CastfTo128i(a) (_mm_castps_si128(a)) -#define b3CastfTo128d(a) (_mm_castps_pd(a)) -#define b3CastiTo128f(a) (_mm_castsi128_ps(a)) -#define b3CastdTo128f(a) (_mm_castpd_ps(a)) -#define b3CastdTo128i(a) (_mm_castpd_si128(a)) -#define b3Assign128(r0, r1, r2, r3) _mm_setr_ps(r0, r1, r2, r3) - -#else //_WIN32 - -#define b3CastfTo128i(a) ((__m128i)(a)) -#define b3CastfTo128d(a) ((__m128d)(a)) -#define b3CastiTo128f(a) ((__m128)(a)) -#define b3CastdTo128f(a) ((__m128)(a)) -#define b3CastdTo128i(a) ((__m128i)(a)) -#define b3Assign128(r0, r1, r2, r3) \ - (__m128) { r0, r1, r2, r3 } -#endif //_WIN32 -#endif //B3_USE_SSE_IN_API - -#ifdef B3_USE_NEON -#include <arm_neon.h> - -typedef float32x4_t b3SimdFloat4; -#define B3_INFINITY INFINITY -#define B3_NAN NAN -#define b3Assign128(r0, r1, r2, r3) \ - (float32x4_t) { r0, r1, r2, r3 } -#endif - -#define B3_DECLARE_ALIGNED_ALLOCATOR() \ - B3_FORCE_INLINE void *operator new(size_t sizeInBytes) { return b3AlignedAlloc(sizeInBytes, 16); } \ - B3_FORCE_INLINE void operator delete(void *ptr) { b3AlignedFree(ptr); } \ - B3_FORCE_INLINE void *operator new(size_t, void *ptr) { return ptr; } \ - B3_FORCE_INLINE void operator delete(void *, void *) {} \ - B3_FORCE_INLINE void *operator new[](size_t sizeInBytes) { return b3AlignedAlloc(sizeInBytes, 16); } \ - B3_FORCE_INLINE void operator delete[](void *ptr) { b3AlignedFree(ptr); } \ - B3_FORCE_INLINE void *operator new[](size_t, void *ptr) { return ptr; } \ - B3_FORCE_INLINE void operator delete[](void *, void *) {} - -#if defined(B3_USE_DOUBLE_PRECISION) || defined(B3_FORCE_DOUBLE_FUNCTIONS) - -B3_FORCE_INLINE b3Scalar b3Sqrt(b3Scalar x) -{ - return sqrt(x); -} -B3_FORCE_INLINE b3Scalar b3Fabs(b3Scalar x) { return fabs(x); } -B3_FORCE_INLINE b3Scalar b3Cos(b3Scalar x) { return cos(x); } -B3_FORCE_INLINE b3Scalar b3Sin(b3Scalar x) { return sin(x); } -B3_FORCE_INLINE b3Scalar b3Tan(b3Scalar x) { return tan(x); } -B3_FORCE_INLINE b3Scalar b3Acos(b3Scalar x) -{ - if (x < b3Scalar(-1)) x = b3Scalar(-1); - if (x > b3Scalar(1)) x = b3Scalar(1); - return acos(x); -} -B3_FORCE_INLINE b3Scalar b3Asin(b3Scalar x) -{ - if (x < b3Scalar(-1)) x = b3Scalar(-1); - if (x > b3Scalar(1)) x = b3Scalar(1); - return asin(x); -} -B3_FORCE_INLINE b3Scalar b3Atan(b3Scalar x) { return atan(x); } -B3_FORCE_INLINE b3Scalar b3Atan2(b3Scalar x, b3Scalar y) { return atan2(x, y); } -B3_FORCE_INLINE b3Scalar b3Exp(b3Scalar x) { return exp(x); } -B3_FORCE_INLINE b3Scalar b3Log(b3Scalar x) { return log(x); } -B3_FORCE_INLINE b3Scalar b3Pow(b3Scalar x, b3Scalar y) { return pow(x, y); } -B3_FORCE_INLINE b3Scalar b3Fmod(b3Scalar x, b3Scalar y) { return fmod(x, y); } - -#else - -B3_FORCE_INLINE b3Scalar b3Sqrt(b3Scalar y) -{ -#ifdef USE_APPROXIMATION - double x, z, tempf; - unsigned long *tfptr = ((unsigned long *)&tempf) + 1; - - tempf = y; - *tfptr = (0xbfcdd90a - *tfptr) >> 1; /* estimate of 1/sqrt(y) */ - x = tempf; - z = y * b3Scalar(0.5); - x = (b3Scalar(1.5) * x) - (x * x) * (x * z); /* iteration formula */ - x = (b3Scalar(1.5) * x) - (x * x) * (x * z); - x = (b3Scalar(1.5) * x) - (x * x) * (x * z); - x = (b3Scalar(1.5) * x) - (x * x) * (x * z); - x = (b3Scalar(1.5) * x) - (x * x) * (x * z); - return x * y; -#else - return sqrtf(y); -#endif -} -B3_FORCE_INLINE b3Scalar b3Fabs(b3Scalar x) { return fabsf(x); } -B3_FORCE_INLINE b3Scalar b3Cos(b3Scalar x) { return cosf(x); } -B3_FORCE_INLINE b3Scalar b3Sin(b3Scalar x) { return sinf(x); } -B3_FORCE_INLINE b3Scalar b3Tan(b3Scalar x) { return tanf(x); } -B3_FORCE_INLINE b3Scalar b3Acos(b3Scalar x) -{ - if (x < b3Scalar(-1)) - x = b3Scalar(-1); - if (x > b3Scalar(1)) - x = b3Scalar(1); - return acosf(x); -} -B3_FORCE_INLINE b3Scalar b3Asin(b3Scalar x) -{ - if (x < b3Scalar(-1)) - x = b3Scalar(-1); - if (x > b3Scalar(1)) - x = b3Scalar(1); - return asinf(x); -} -B3_FORCE_INLINE b3Scalar b3Atan(b3Scalar x) { return atanf(x); } -B3_FORCE_INLINE b3Scalar b3Atan2(b3Scalar x, b3Scalar y) { return atan2f(x, y); } -B3_FORCE_INLINE b3Scalar b3Exp(b3Scalar x) { return expf(x); } -B3_FORCE_INLINE b3Scalar b3Log(b3Scalar x) { return logf(x); } -B3_FORCE_INLINE b3Scalar b3Pow(b3Scalar x, b3Scalar y) { return powf(x, y); } -B3_FORCE_INLINE b3Scalar b3Fmod(b3Scalar x, b3Scalar y) { return fmodf(x, y); } - -#endif - -#define B3_2_PI b3Scalar(6.283185307179586232) -#define B3_PI (B3_2_PI * b3Scalar(0.5)) -#define B3_HALF_PI (B3_2_PI * b3Scalar(0.25)) -#define B3_RADS_PER_DEG (B3_2_PI / b3Scalar(360.0)) -#define B3_DEGS_PER_RAD (b3Scalar(360.0) / B3_2_PI) -#define B3_SQRT12 b3Scalar(0.7071067811865475244008443621048490) - -#define b3RecipSqrt(x) ((b3Scalar)(b3Scalar(1.0) / b3Sqrt(b3Scalar(x)))) /* reciprocal square root */ - -#ifdef B3_USE_DOUBLE_PRECISION -#define B3_EPSILON DBL_EPSILON -#define B3_INFINITY DBL_MAX -#else -#define B3_EPSILON FLT_EPSILON -#define B3_INFINITY FLT_MAX -#endif - -B3_FORCE_INLINE b3Scalar b3Atan2Fast(b3Scalar y, b3Scalar x) -{ - b3Scalar coeff_1 = B3_PI / 4.0f; - b3Scalar coeff_2 = 3.0f * coeff_1; - b3Scalar abs_y = b3Fabs(y); - b3Scalar angle; - if (x >= 0.0f) - { - b3Scalar r = (x - abs_y) / (x + abs_y); - angle = coeff_1 - coeff_1 * r; - } - else - { - b3Scalar r = (x + abs_y) / (abs_y - x); - angle = coeff_2 - coeff_1 * r; - } - return (y < 0.0f) ? -angle : angle; -} - -B3_FORCE_INLINE bool b3FuzzyZero(b3Scalar x) { return b3Fabs(x) < B3_EPSILON; } - -B3_FORCE_INLINE bool b3Equal(b3Scalar a, b3Scalar eps) -{ - return (((a) <= eps) && !((a) < -eps)); -} -B3_FORCE_INLINE bool b3GreaterEqual(b3Scalar a, b3Scalar eps) -{ - return (!((a) <= eps)); -} - -B3_FORCE_INLINE int b3IsNegative(b3Scalar x) -{ - return x < b3Scalar(0.0) ? 1 : 0; -} - -B3_FORCE_INLINE b3Scalar b3Radians(b3Scalar x) { return x * B3_RADS_PER_DEG; } -B3_FORCE_INLINE b3Scalar b3Degrees(b3Scalar x) { return x * B3_DEGS_PER_RAD; } - -#define B3_DECLARE_HANDLE(name) \ - typedef struct name##__ \ - { \ - int unused; \ - } * name - -#ifndef b3Fsel -B3_FORCE_INLINE b3Scalar b3Fsel(b3Scalar a, b3Scalar b, b3Scalar c) -{ - return a >= 0 ? b : c; -} -#endif -#define b3Fsels(a, b, c) (b3Scalar) b3Fsel(a, b, c) - -B3_FORCE_INLINE bool b3MachineIsLittleEndian() -{ - long int i = 1; - const char *p = (const char *)&i; - if (p[0] == 1) // Lowest address contains the least significant byte - return true; - else - return false; -} - -///b3Select avoids branches, which makes performance much better for consoles like Playstation 3 and XBox 360 -///Thanks Phil Knight. See also http://www.cellperformance.com/articles/2006/04/more_techniques_for_eliminatin_1.html -B3_FORCE_INLINE unsigned b3Select(unsigned condition, unsigned valueIfConditionNonZero, unsigned valueIfConditionZero) -{ - // Set testNz to 0xFFFFFFFF if condition is nonzero, 0x00000000 if condition is zero - // Rely on positive value or'ed with its negative having sign bit on - // and zero value or'ed with its negative (which is still zero) having sign bit off - // Use arithmetic shift right, shifting the sign bit through all 32 bits - unsigned testNz = (unsigned)(((int)condition | -(int)condition) >> 31); - unsigned testEqz = ~testNz; - return ((valueIfConditionNonZero & testNz) | (valueIfConditionZero & testEqz)); -} -B3_FORCE_INLINE int b3Select(unsigned condition, int valueIfConditionNonZero, int valueIfConditionZero) -{ - unsigned testNz = (unsigned)(((int)condition | -(int)condition) >> 31); - unsigned testEqz = ~testNz; - return static_cast<int>((valueIfConditionNonZero & testNz) | (valueIfConditionZero & testEqz)); -} -B3_FORCE_INLINE float b3Select(unsigned condition, float valueIfConditionNonZero, float valueIfConditionZero) -{ -#ifdef B3_HAVE_NATIVE_FSEL - return (float)b3Fsel((b3Scalar)condition - b3Scalar(1.0f), valueIfConditionNonZero, valueIfConditionZero); -#else - return (condition != 0) ? valueIfConditionNonZero : valueIfConditionZero; -#endif -} - -template <typename T> -B3_FORCE_INLINE void b3Swap(T &a, T &b) -{ - T tmp = a; - a = b; - b = tmp; -} - -//PCK: endian swapping functions -B3_FORCE_INLINE unsigned b3SwapEndian(unsigned val) -{ - return (((val & 0xff000000) >> 24) | ((val & 0x00ff0000) >> 8) | ((val & 0x0000ff00) << 8) | ((val & 0x000000ff) << 24)); -} - -B3_FORCE_INLINE unsigned short b3SwapEndian(unsigned short val) -{ - return static_cast<unsigned short>(((val & 0xff00) >> 8) | ((val & 0x00ff) << 8)); -} - -B3_FORCE_INLINE unsigned b3SwapEndian(int val) -{ - return b3SwapEndian((unsigned)val); -} - -B3_FORCE_INLINE unsigned short b3SwapEndian(short val) -{ - return b3SwapEndian((unsigned short)val); -} - -///b3SwapFloat uses using char pointers to swap the endianness -////b3SwapFloat/b3SwapDouble will NOT return a float, because the machine might 'correct' invalid floating point values -///Not all values of sign/exponent/mantissa are valid floating point numbers according to IEEE 754. -///When a floating point unit is faced with an invalid value, it may actually change the value, or worse, throw an exception. -///In most systems, running user mode code, you wouldn't get an exception, but instead the hardware/os/runtime will 'fix' the number for you. -///so instead of returning a float/double, we return integer/long long integer -B3_FORCE_INLINE unsigned int b3SwapEndianFloat(float d) -{ - unsigned int a = 0; - unsigned char *dst = (unsigned char *)&a; - unsigned char *src = (unsigned char *)&d; - - dst[0] = src[3]; - dst[1] = src[2]; - dst[2] = src[1]; - dst[3] = src[0]; - return a; -} - -// unswap using char pointers -B3_FORCE_INLINE float b3UnswapEndianFloat(unsigned int a) -{ - float d = 0.0f; - unsigned char *src = (unsigned char *)&a; - unsigned char *dst = (unsigned char *)&d; - - dst[0] = src[3]; - dst[1] = src[2]; - dst[2] = src[1]; - dst[3] = src[0]; - - return d; -} - -// swap using char pointers -B3_FORCE_INLINE void b3SwapEndianDouble(double d, unsigned char *dst) -{ - unsigned char *src = (unsigned char *)&d; - - dst[0] = src[7]; - dst[1] = src[6]; - dst[2] = src[5]; - dst[3] = src[4]; - dst[4] = src[3]; - dst[5] = src[2]; - dst[6] = src[1]; - dst[7] = src[0]; -} - -// unswap using char pointers -B3_FORCE_INLINE double b3UnswapEndianDouble(const unsigned char *src) -{ - double d = 0.0; - unsigned char *dst = (unsigned char *)&d; - - dst[0] = src[7]; - dst[1] = src[6]; - dst[2] = src[5]; - dst[3] = src[4]; - dst[4] = src[3]; - dst[5] = src[2]; - dst[6] = src[1]; - dst[7] = src[0]; - - return d; -} - -// returns normalized value in range [-B3_PI, B3_PI] -B3_FORCE_INLINE b3Scalar b3NormalizeAngle(b3Scalar angleInRadians) -{ - angleInRadians = b3Fmod(angleInRadians, B3_2_PI); - if (angleInRadians < -B3_PI) - { - return angleInRadians + B3_2_PI; - } - else if (angleInRadians > B3_PI) - { - return angleInRadians - B3_2_PI; - } - else - { - return angleInRadians; - } -} - -///rudimentary class to provide type info -struct b3TypedObject -{ - b3TypedObject(int objectType) - : m_objectType(objectType) - { - } - int m_objectType; - inline int getObjectType() const - { - return m_objectType; - } -}; - -///align a pointer to the provided alignment, upwards -template <typename T> -T *b3AlignPointer(T *unalignedPtr, size_t alignment) -{ - struct b3ConvertPointerSizeT - { - union { - T *ptr; - size_t integer; - }; - }; - b3ConvertPointerSizeT converter; - - const size_t bit_mask = ~(alignment - 1); - converter.ptr = unalignedPtr; - converter.integer += alignment - 1; - converter.integer &= bit_mask; - return converter.ptr; -} - -#endif //B3_SCALAR_H diff --git a/thirdparty/bullet/Bullet3Common/b3StackAlloc.h b/thirdparty/bullet/Bullet3Common/b3StackAlloc.h deleted file mode 100644 index 4972236ac7..0000000000 --- a/thirdparty/bullet/Bullet3Common/b3StackAlloc.h +++ /dev/null @@ -1,118 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -/* -StackAlloc extracted from GJK-EPA collision solver by Nathanael Presson -Nov.2006 -*/ - -#ifndef B3_STACK_ALLOC -#define B3_STACK_ALLOC - -#include "b3Scalar.h" //for b3Assert -#include "b3AlignedAllocator.h" - -///The b3Block class is an internal structure for the b3StackAlloc memory allocator. -struct b3Block -{ - b3Block* previous; - unsigned char* address; -}; - -///The StackAlloc class provides some fast stack-based memory allocator (LIFO last-in first-out) -class b3StackAlloc -{ -public: - b3StackAlloc(unsigned int size) - { - ctor(); - create(size); - } - ~b3StackAlloc() { destroy(); } - - inline void create(unsigned int size) - { - destroy(); - data = (unsigned char*)b3AlignedAlloc(size, 16); - totalsize = size; - } - inline void destroy() - { - b3Assert(usedsize == 0); - //Raise(L"StackAlloc is still in use"); - - if (usedsize == 0) - { - if (!ischild && data) - b3AlignedFree(data); - - data = 0; - usedsize = 0; - } - } - - int getAvailableMemory() const - { - return static_cast<int>(totalsize - usedsize); - } - - unsigned char* allocate(unsigned int size) - { - const unsigned int nus(usedsize + size); - if (nus < totalsize) - { - usedsize = nus; - return (data + (usedsize - size)); - } - b3Assert(0); - //&& (L"Not enough memory")); - - return (0); - } - B3_FORCE_INLINE b3Block* beginBlock() - { - b3Block* pb = (b3Block*)allocate(sizeof(b3Block)); - pb->previous = current; - pb->address = data + usedsize; - current = pb; - return (pb); - } - B3_FORCE_INLINE void endBlock(b3Block* block) - { - b3Assert(block == current); - //Raise(L"Unmatched blocks"); - if (block == current) - { - current = block->previous; - usedsize = (unsigned int)((block->address - data) - sizeof(b3Block)); - } - } - -private: - void ctor() - { - data = 0; - totalsize = 0; - usedsize = 0; - current = 0; - ischild = false; - } - unsigned char* data; - unsigned int totalsize; - unsigned int usedsize; - b3Block* current; - bool ischild; -}; - -#endif //B3_STACK_ALLOC diff --git a/thirdparty/bullet/Bullet3Common/b3Transform.h b/thirdparty/bullet/Bullet3Common/b3Transform.h deleted file mode 100644 index 149da9d148..0000000000 --- a/thirdparty/bullet/Bullet3Common/b3Transform.h +++ /dev/null @@ -1,286 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_TRANSFORM_H -#define B3_TRANSFORM_H - -#include "b3Matrix3x3.h" - -#ifdef B3_USE_DOUBLE_PRECISION -#define b3TransformData b3TransformDoubleData -#else -#define b3TransformData b3TransformFloatData -#endif - -/**@brief The b3Transform class supports rigid transforms with only translation and rotation and no scaling/shear. - *It can be used in combination with b3Vector3, b3Quaternion and b3Matrix3x3 linear algebra classes. */ -B3_ATTRIBUTE_ALIGNED16(class) -b3Transform -{ - ///Storage for the rotation - b3Matrix3x3 m_basis; - ///Storage for the translation - b3Vector3 m_origin; - -public: - /**@brief No initialization constructor */ - b3Transform() {} - /**@brief Constructor from b3Quaternion (optional b3Vector3 ) - * @param q Rotation from quaternion - * @param c Translation from Vector (default 0,0,0) */ - explicit B3_FORCE_INLINE b3Transform(const b3Quaternion& q, - const b3Vector3& c = b3MakeVector3(b3Scalar(0), b3Scalar(0), b3Scalar(0))) - : m_basis(q), - m_origin(c) - { - } - - /**@brief Constructor from b3Matrix3x3 (optional b3Vector3) - * @param b Rotation from Matrix - * @param c Translation from Vector default (0,0,0)*/ - explicit B3_FORCE_INLINE b3Transform(const b3Matrix3x3& b, - const b3Vector3& c = b3MakeVector3(b3Scalar(0), b3Scalar(0), b3Scalar(0))) - : m_basis(b), - m_origin(c) - { - } - /**@brief Copy constructor */ - B3_FORCE_INLINE b3Transform(const b3Transform& other) - : m_basis(other.m_basis), - m_origin(other.m_origin) - { - } - /**@brief Assignment Operator */ - B3_FORCE_INLINE b3Transform& operator=(const b3Transform& other) - { - m_basis = other.m_basis; - m_origin = other.m_origin; - return *this; - } - - /**@brief Set the current transform as the value of the product of two transforms - * @param t1 Transform 1 - * @param t2 Transform 2 - * This = Transform1 * Transform2 */ - B3_FORCE_INLINE void mult(const b3Transform& t1, const b3Transform& t2) - { - m_basis = t1.m_basis * t2.m_basis; - m_origin = t1(t2.m_origin); - } - - /* void multInverseLeft(const b3Transform& t1, const b3Transform& t2) { - b3Vector3 v = t2.m_origin - t1.m_origin; - m_basis = b3MultTransposeLeft(t1.m_basis, t2.m_basis); - m_origin = v * t1.m_basis; - } - */ - - /**@brief Return the transform of the vector */ - B3_FORCE_INLINE b3Vector3 operator()(const b3Vector3& x) const - { - return x.dot3(m_basis[0], m_basis[1], m_basis[2]) + m_origin; - } - - /**@brief Return the transform of the vector */ - B3_FORCE_INLINE b3Vector3 operator*(const b3Vector3& x) const - { - return (*this)(x); - } - - /**@brief Return the transform of the b3Quaternion */ - B3_FORCE_INLINE b3Quaternion operator*(const b3Quaternion& q) const - { - return getRotation() * q; - } - - /**@brief Return the basis matrix for the rotation */ - B3_FORCE_INLINE b3Matrix3x3& getBasis() { return m_basis; } - /**@brief Return the basis matrix for the rotation */ - B3_FORCE_INLINE const b3Matrix3x3& getBasis() const { return m_basis; } - - /**@brief Return the origin vector translation */ - B3_FORCE_INLINE b3Vector3& getOrigin() { return m_origin; } - /**@brief Return the origin vector translation */ - B3_FORCE_INLINE const b3Vector3& getOrigin() const { return m_origin; } - - /**@brief Return a quaternion representing the rotation */ - b3Quaternion getRotation() const - { - b3Quaternion q; - m_basis.getRotation(q); - return q; - } - - /**@brief Set from an array - * @param m A pointer to a 15 element array (12 rotation(row major padded on the right by 1), and 3 translation */ - void setFromOpenGLMatrix(const b3Scalar* m) - { - m_basis.setFromOpenGLSubMatrix(m); - m_origin.setValue(m[12], m[13], m[14]); - } - - /**@brief Fill an array representation - * @param m A pointer to a 15 element array (12 rotation(row major padded on the right by 1), and 3 translation */ - void getOpenGLMatrix(b3Scalar * m) const - { - m_basis.getOpenGLSubMatrix(m); - m[12] = m_origin.getX(); - m[13] = m_origin.getY(); - m[14] = m_origin.getZ(); - m[15] = b3Scalar(1.0); - } - - /**@brief Set the translational element - * @param origin The vector to set the translation to */ - B3_FORCE_INLINE void setOrigin(const b3Vector3& origin) - { - m_origin = origin; - } - - B3_FORCE_INLINE b3Vector3 invXform(const b3Vector3& inVec) const; - - /**@brief Set the rotational element by b3Matrix3x3 */ - B3_FORCE_INLINE void setBasis(const b3Matrix3x3& basis) - { - m_basis = basis; - } - - /**@brief Set the rotational element by b3Quaternion */ - B3_FORCE_INLINE void setRotation(const b3Quaternion& q) - { - m_basis.setRotation(q); - } - - /**@brief Set this transformation to the identity */ - void setIdentity() - { - m_basis.setIdentity(); - m_origin.setValue(b3Scalar(0.0), b3Scalar(0.0), b3Scalar(0.0)); - } - - /**@brief Multiply this Transform by another(this = this * another) - * @param t The other transform */ - b3Transform& operator*=(const b3Transform& t) - { - m_origin += m_basis * t.m_origin; - m_basis *= t.m_basis; - return *this; - } - - /**@brief Return the inverse of this transform */ - b3Transform inverse() const - { - b3Matrix3x3 inv = m_basis.transpose(); - return b3Transform(inv, inv * -m_origin); - } - - /**@brief Return the inverse of this transform times the other transform - * @param t The other transform - * return this.inverse() * the other */ - b3Transform inverseTimes(const b3Transform& t) const; - - /**@brief Return the product of this transform and the other */ - b3Transform operator*(const b3Transform& t) const; - - /**@brief Return an identity transform */ - static const b3Transform& getIdentity() - { - static const b3Transform identityTransform(b3Matrix3x3::getIdentity()); - return identityTransform; - } - - void serialize(struct b3TransformData & dataOut) const; - - void serializeFloat(struct b3TransformFloatData & dataOut) const; - - void deSerialize(const struct b3TransformData& dataIn); - - void deSerializeDouble(const struct b3TransformDoubleData& dataIn); - - void deSerializeFloat(const struct b3TransformFloatData& dataIn); -}; - -B3_FORCE_INLINE b3Vector3 -b3Transform::invXform(const b3Vector3& inVec) const -{ - b3Vector3 v = inVec - m_origin; - return (m_basis.transpose() * v); -} - -B3_FORCE_INLINE b3Transform -b3Transform::inverseTimes(const b3Transform& t) const -{ - b3Vector3 v = t.getOrigin() - m_origin; - return b3Transform(m_basis.transposeTimes(t.m_basis), - v * m_basis); -} - -B3_FORCE_INLINE b3Transform - b3Transform::operator*(const b3Transform& t) const -{ - return b3Transform(m_basis * t.m_basis, - (*this)(t.m_origin)); -} - -/**@brief Test if two transforms have all elements equal */ -B3_FORCE_INLINE bool operator==(const b3Transform& t1, const b3Transform& t2) -{ - return (t1.getBasis() == t2.getBasis() && - t1.getOrigin() == t2.getOrigin()); -} - -///for serialization -struct b3TransformFloatData -{ - b3Matrix3x3FloatData m_basis; - b3Vector3FloatData m_origin; -}; - -struct b3TransformDoubleData -{ - b3Matrix3x3DoubleData m_basis; - b3Vector3DoubleData m_origin; -}; - -B3_FORCE_INLINE void b3Transform::serialize(b3TransformData& dataOut) const -{ - m_basis.serialize(dataOut.m_basis); - m_origin.serialize(dataOut.m_origin); -} - -B3_FORCE_INLINE void b3Transform::serializeFloat(b3TransformFloatData& dataOut) const -{ - m_basis.serializeFloat(dataOut.m_basis); - m_origin.serializeFloat(dataOut.m_origin); -} - -B3_FORCE_INLINE void b3Transform::deSerialize(const b3TransformData& dataIn) -{ - m_basis.deSerialize(dataIn.m_basis); - m_origin.deSerialize(dataIn.m_origin); -} - -B3_FORCE_INLINE void b3Transform::deSerializeFloat(const b3TransformFloatData& dataIn) -{ - m_basis.deSerializeFloat(dataIn.m_basis); - m_origin.deSerializeFloat(dataIn.m_origin); -} - -B3_FORCE_INLINE void b3Transform::deSerializeDouble(const b3TransformDoubleData& dataIn) -{ - m_basis.deSerializeDouble(dataIn.m_basis); - m_origin.deSerializeDouble(dataIn.m_origin); -} - -#endif //B3_TRANSFORM_H diff --git a/thirdparty/bullet/Bullet3Common/b3TransformUtil.h b/thirdparty/bullet/Bullet3Common/b3TransformUtil.h deleted file mode 100644 index 1850a9be5f..0000000000 --- a/thirdparty/bullet/Bullet3Common/b3TransformUtil.h +++ /dev/null @@ -1,210 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_TRANSFORM_UTIL_H -#define B3_TRANSFORM_UTIL_H - -#include "b3Transform.h" -#define B3_ANGULAR_MOTION_THRESHOLD b3Scalar(0.5) * B3_HALF_PI - -B3_FORCE_INLINE b3Vector3 b3AabbSupport(const b3Vector3& halfExtents, const b3Vector3& supportDir) -{ - return b3MakeVector3(supportDir.getX() < b3Scalar(0.0) ? -halfExtents.getX() : halfExtents.getX(), - supportDir.getY() < b3Scalar(0.0) ? -halfExtents.getY() : halfExtents.getY(), - supportDir.getZ() < b3Scalar(0.0) ? -halfExtents.getZ() : halfExtents.getZ()); -} - -/// Utils related to temporal transforms -class b3TransformUtil -{ -public: - static void integrateTransform(const b3Transform& curTrans, const b3Vector3& linvel, const b3Vector3& angvel, b3Scalar timeStep, b3Transform& predictedTransform) - { - predictedTransform.setOrigin(curTrans.getOrigin() + linvel * timeStep); - // #define QUATERNION_DERIVATIVE -#ifdef QUATERNION_DERIVATIVE - b3Quaternion predictedOrn = curTrans.getRotation(); - predictedOrn += (angvel * predictedOrn) * (timeStep * b3Scalar(0.5)); - predictedOrn.normalize(); -#else - //Exponential map - //google for "Practical Parameterization of Rotations Using the Exponential Map", F. Sebastian Grassia - - b3Vector3 axis; - b3Scalar fAngle = angvel.length(); - //limit the angular motion - if (fAngle * timeStep > B3_ANGULAR_MOTION_THRESHOLD) - { - fAngle = B3_ANGULAR_MOTION_THRESHOLD / timeStep; - } - - if (fAngle < b3Scalar(0.001)) - { - // use Taylor's expansions of sync function - axis = angvel * (b3Scalar(0.5) * timeStep - (timeStep * timeStep * timeStep) * (b3Scalar(0.020833333333)) * fAngle * fAngle); - } - else - { - // sync(fAngle) = sin(c*fAngle)/t - axis = angvel * (b3Sin(b3Scalar(0.5) * fAngle * timeStep) / fAngle); - } - b3Quaternion dorn(axis.getX(), axis.getY(), axis.getZ(), b3Cos(fAngle * timeStep * b3Scalar(0.5))); - b3Quaternion orn0 = curTrans.getRotation(); - - b3Quaternion predictedOrn = dorn * orn0; - predictedOrn.normalize(); -#endif - predictedTransform.setRotation(predictedOrn); - } - - static void calculateVelocityQuaternion(const b3Vector3& pos0, const b3Vector3& pos1, const b3Quaternion& orn0, const b3Quaternion& orn1, b3Scalar timeStep, b3Vector3& linVel, b3Vector3& angVel) - { - linVel = (pos1 - pos0) / timeStep; - b3Vector3 axis; - b3Scalar angle; - if (orn0 != orn1) - { - calculateDiffAxisAngleQuaternion(orn0, orn1, axis, angle); - angVel = axis * angle / timeStep; - } - else - { - angVel.setValue(0, 0, 0); - } - } - - static void calculateDiffAxisAngleQuaternion(const b3Quaternion& orn0, const b3Quaternion& orn1a, b3Vector3& axis, b3Scalar& angle) - { - b3Quaternion orn1 = orn0.nearest(orn1a); - b3Quaternion dorn = orn1 * orn0.inverse(); - angle = dorn.getAngle(); - axis = b3MakeVector3(dorn.getX(), dorn.getY(), dorn.getZ()); - axis[3] = b3Scalar(0.); - //check for axis length - b3Scalar len = axis.length2(); - if (len < B3_EPSILON * B3_EPSILON) - axis = b3MakeVector3(b3Scalar(1.), b3Scalar(0.), b3Scalar(0.)); - else - axis /= b3Sqrt(len); - } - - static void calculateVelocity(const b3Transform& transform0, const b3Transform& transform1, b3Scalar timeStep, b3Vector3& linVel, b3Vector3& angVel) - { - linVel = (transform1.getOrigin() - transform0.getOrigin()) / timeStep; - b3Vector3 axis; - b3Scalar angle; - calculateDiffAxisAngle(transform0, transform1, axis, angle); - angVel = axis * angle / timeStep; - } - - static void calculateDiffAxisAngle(const b3Transform& transform0, const b3Transform& transform1, b3Vector3& axis, b3Scalar& angle) - { - b3Matrix3x3 dmat = transform1.getBasis() * transform0.getBasis().inverse(); - b3Quaternion dorn; - dmat.getRotation(dorn); - - ///floating point inaccuracy can lead to w component > 1..., which breaks - dorn.normalize(); - - angle = dorn.getAngle(); - axis = b3MakeVector3(dorn.getX(), dorn.getY(), dorn.getZ()); - axis[3] = b3Scalar(0.); - //check for axis length - b3Scalar len = axis.length2(); - if (len < B3_EPSILON * B3_EPSILON) - axis = b3MakeVector3(b3Scalar(1.), b3Scalar(0.), b3Scalar(0.)); - else - axis /= b3Sqrt(len); - } -}; - -///The b3ConvexSeparatingDistanceUtil can help speed up convex collision detection -///by conservatively updating a cached separating distance/vector instead of re-calculating the closest distance -class b3ConvexSeparatingDistanceUtil -{ - b3Quaternion m_ornA; - b3Quaternion m_ornB; - b3Vector3 m_posA; - b3Vector3 m_posB; - - b3Vector3 m_separatingNormal; - - b3Scalar m_boundingRadiusA; - b3Scalar m_boundingRadiusB; - b3Scalar m_separatingDistance; - -public: - b3ConvexSeparatingDistanceUtil(b3Scalar boundingRadiusA, b3Scalar boundingRadiusB) - : m_boundingRadiusA(boundingRadiusA), - m_boundingRadiusB(boundingRadiusB), - m_separatingDistance(0.f) - { - } - - b3Scalar getConservativeSeparatingDistance() - { - return m_separatingDistance; - } - - void updateSeparatingDistance(const b3Transform& transA, const b3Transform& transB) - { - const b3Vector3& toPosA = transA.getOrigin(); - const b3Vector3& toPosB = transB.getOrigin(); - b3Quaternion toOrnA = transA.getRotation(); - b3Quaternion toOrnB = transB.getRotation(); - - if (m_separatingDistance > 0.f) - { - b3Vector3 linVelA, angVelA, linVelB, angVelB; - b3TransformUtil::calculateVelocityQuaternion(m_posA, toPosA, m_ornA, toOrnA, b3Scalar(1.), linVelA, angVelA); - b3TransformUtil::calculateVelocityQuaternion(m_posB, toPosB, m_ornB, toOrnB, b3Scalar(1.), linVelB, angVelB); - b3Scalar maxAngularProjectedVelocity = angVelA.length() * m_boundingRadiusA + angVelB.length() * m_boundingRadiusB; - b3Vector3 relLinVel = (linVelB - linVelA); - b3Scalar relLinVelocLength = relLinVel.dot(m_separatingNormal); - if (relLinVelocLength < 0.f) - { - relLinVelocLength = 0.f; - } - - b3Scalar projectedMotion = maxAngularProjectedVelocity + relLinVelocLength; - m_separatingDistance -= projectedMotion; - } - - m_posA = toPosA; - m_posB = toPosB; - m_ornA = toOrnA; - m_ornB = toOrnB; - } - - void initSeparatingDistance(const b3Vector3& separatingVector, b3Scalar separatingDistance, const b3Transform& transA, const b3Transform& transB) - { - m_separatingDistance = separatingDistance; - - if (m_separatingDistance > 0.f) - { - m_separatingNormal = separatingVector; - - const b3Vector3& toPosA = transA.getOrigin(); - const b3Vector3& toPosB = transB.getOrigin(); - b3Quaternion toOrnA = transA.getRotation(); - b3Quaternion toOrnB = transB.getRotation(); - m_posA = toPosA; - m_posB = toPosB; - m_ornA = toOrnA; - m_ornB = toOrnB; - } - } -}; - -#endif //B3_TRANSFORM_UTIL_H diff --git a/thirdparty/bullet/Bullet3Common/b3Vector3.cpp b/thirdparty/bullet/Bullet3Common/b3Vector3.cpp deleted file mode 100644 index 100fb774c1..0000000000 --- a/thirdparty/bullet/Bullet3Common/b3Vector3.cpp +++ /dev/null @@ -1,1637 +0,0 @@ -/* - Copyright (c) 2011-213 Apple Inc. http://bulletphysics.org - - This software is provided 'as-is', without any express or implied warranty. - In no event will the authors be held liable for any damages arising from the use of this software. - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it freely, - subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - - This source version has been altered. - */ - -#if defined(_WIN32) || defined(__i386__) -#define B3_USE_SSE_IN_API -#endif - -#include "b3Vector3.h" - -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) - -#ifdef __APPLE__ -#include <stdint.h> -typedef float float4 __attribute__((vector_size(16))); -#else -#define float4 __m128 -#endif -//typedef uint32_t uint4 __attribute__ ((vector_size(16))); - -#if defined B3_USE_SSE || defined _WIN32 - -#define LOG2_ARRAY_SIZE 6 -#define STACK_ARRAY_COUNT (1UL << LOG2_ARRAY_SIZE) - -#include <emmintrin.h> - -long b3_maxdot_large(const float *vv, const float *vec, unsigned long count, float *dotResult); -long b3_maxdot_large(const float *vv, const float *vec, unsigned long count, float *dotResult) -{ - const float4 *vertices = (const float4 *)vv; - static const unsigned char indexTable[16] = {(unsigned char)-1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0}; - float4 dotMax = b3Assign128(-B3_INFINITY, -B3_INFINITY, -B3_INFINITY, -B3_INFINITY); - float4 vvec = _mm_loadu_ps(vec); - float4 vHi = b3CastiTo128f(_mm_shuffle_epi32(b3CastfTo128i(vvec), 0xaa)); /// zzzz - float4 vLo = _mm_movelh_ps(vvec, vvec); /// xyxy - - long maxIndex = -1L; - - size_t segment = 0; - float4 stack_array[STACK_ARRAY_COUNT]; - -#if DEBUG - // memset( stack_array, -1, STACK_ARRAY_COUNT * sizeof(stack_array[0]) ); -#endif - - size_t index; - float4 max; - // Faster loop without cleanup code for full tiles - for (segment = 0; segment + STACK_ARRAY_COUNT * 4 <= count; segment += STACK_ARRAY_COUNT * 4) - { - max = dotMax; - - for (index = 0; index < STACK_ARRAY_COUNT; index += 4) - { // do four dot products at a time. Carefully avoid touching the w element. - float4 v0 = vertices[0]; - float4 v1 = vertices[1]; - float4 v2 = vertices[2]; - float4 v3 = vertices[3]; - vertices += 4; - - float4 lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - float4 hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - float4 lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - float4 hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - float4 z = _mm_shuffle_ps(hi0, hi1, 0x88); - float4 x = _mm_shuffle_ps(lo0, lo1, 0x88); - float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index] = x; - max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 1] = x; - max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 2] = x; - max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 3] = x; - max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan - - // It is too costly to keep the index of the max here. We will look for it again later. We save a lot of work this way. - } - - // If we found a new max - if (0xf != _mm_movemask_ps((float4)_mm_cmpeq_ps(max, dotMax))) - { - // copy the new max across all lanes of our max accumulator - max = _mm_max_ps(max, (float4)_mm_shuffle_ps(max, max, 0x4e)); - max = _mm_max_ps(max, (float4)_mm_shuffle_ps(max, max, 0xb1)); - - dotMax = max; - - // find first occurrence of that max - size_t test; - for (index = 0; 0 == (test = _mm_movemask_ps(_mm_cmpeq_ps(stack_array[index], max))); index++) // local_count must be a multiple of 4 - { - } - // record where it is. - maxIndex = 4 * index + segment + indexTable[test]; - } - } - - // account for work we've already done - count -= segment; - - // Deal with the last < STACK_ARRAY_COUNT vectors - max = dotMax; - index = 0; - - if (b3Unlikely(count > 16)) - { - for (; index + 4 <= count / 4; index += 4) - { // do four dot products at a time. Carefully avoid touching the w element. - float4 v0 = vertices[0]; - float4 v1 = vertices[1]; - float4 v2 = vertices[2]; - float4 v3 = vertices[3]; - vertices += 4; - - float4 lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - float4 hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - float4 lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - float4 hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - float4 z = _mm_shuffle_ps(hi0, hi1, 0x88); - float4 x = _mm_shuffle_ps(lo0, lo1, 0x88); - float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index] = x; - max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 1] = x; - max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 2] = x; - max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 3] = x; - max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan - - // It is too costly to keep the index of the max here. We will look for it again later. We save a lot of work this way. - } - } - - size_t localCount = (count & -4L) - 4 * index; - if (localCount) - { -#ifdef __APPLE__ - float4 t0, t1, t2, t3, t4; - float4 *sap = &stack_array[index + localCount / 4]; - vertices += localCount; // counter the offset - size_t byteIndex = -(localCount) * sizeof(float); - //AT&T Code style assembly - asm volatile( - ".align 4 \n\ - 0: movaps %[max], %[t2] // move max out of the way to avoid propagating NaNs in max \n\ - movaps (%[vertices], %[byteIndex], 4), %[t0] // vertices[0] \n\ - movaps 16(%[vertices], %[byteIndex], 4), %[t1] // vertices[1] \n\ - movaps %[t0], %[max] // vertices[0] \n\ - movlhps %[t1], %[max] // x0y0x1y1 \n\ - movaps 32(%[vertices], %[byteIndex], 4), %[t3] // vertices[2] \n\ - movaps 48(%[vertices], %[byteIndex], 4), %[t4] // vertices[3] \n\ - mulps %[vLo], %[max] // x0y0x1y1 * vLo \n\ - movhlps %[t0], %[t1] // z0w0z1w1 \n\ - movaps %[t3], %[t0] // vertices[2] \n\ - movlhps %[t4], %[t0] // x2y2x3y3 \n\ - mulps %[vLo], %[t0] // x2y2x3y3 * vLo \n\ - movhlps %[t3], %[t4] // z2w2z3w3 \n\ - shufps $0x88, %[t4], %[t1] // z0z1z2z3 \n\ - mulps %[vHi], %[t1] // z0z1z2z3 * vHi \n\ - movaps %[max], %[t3] // x0y0x1y1 * vLo \n\ - shufps $0x88, %[t0], %[max] // x0x1x2x3 * vLo.x \n\ - shufps $0xdd, %[t0], %[t3] // y0y1y2y3 * vLo.y \n\ - addps %[t3], %[max] // x + y \n\ - addps %[t1], %[max] // x + y + z \n\ - movaps %[max], (%[sap], %[byteIndex]) // record result for later scrutiny \n\ - maxps %[t2], %[max] // record max, restore max \n\ - add $16, %[byteIndex] // advance loop counter\n\ - jnz 0b \n\ - " - : [max] "+x"(max), [t0] "=&x"(t0), [t1] "=&x"(t1), [t2] "=&x"(t2), [t3] "=&x"(t3), [t4] "=&x"(t4), [byteIndex] "+r"(byteIndex) - : [vLo] "x"(vLo), [vHi] "x"(vHi), [vertices] "r"(vertices), [sap] "r"(sap) - : "memory", "cc"); - index += localCount / 4; -#else - { - for (unsigned int i = 0; i < localCount / 4; i++, index++) - { // do four dot products at a time. Carefully avoid touching the w element. - float4 v0 = vertices[0]; - float4 v1 = vertices[1]; - float4 v2 = vertices[2]; - float4 v3 = vertices[3]; - vertices += 4; - - float4 lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - float4 hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - float4 lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - float4 hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - float4 z = _mm_shuffle_ps(hi0, hi1, 0x88); - float4 x = _mm_shuffle_ps(lo0, lo1, 0x88); - float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index] = x; - max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan - } - } -#endif //__APPLE__ - } - - // process the last few points - if (count & 3) - { - float4 v0, v1, v2, x, y, z; - switch (count & 3) - { - case 3: - { - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - - // Calculate 3 dot products, transpose, duplicate v2 - float4 lo0 = _mm_movelh_ps(v0, v1); // xyxy.lo - float4 hi0 = _mm_movehl_ps(v1, v0); // z?z?.lo - lo0 = lo0 * vLo; - z = _mm_shuffle_ps(hi0, v2, 0xa8); // z0z1z2z2 - z = z * vHi; - float4 lo1 = _mm_movelh_ps(v2, v2); // xyxy - lo1 = lo1 * vLo; - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - } - break; - case 2: - { - v0 = vertices[0]; - v1 = vertices[1]; - float4 xy = _mm_movelh_ps(v0, v1); - z = _mm_movehl_ps(v1, v0); - xy = xy * vLo; - z = _mm_shuffle_ps(z, z, 0xa8); - x = _mm_shuffle_ps(xy, xy, 0xa8); - y = _mm_shuffle_ps(xy, xy, 0xfd); - z = z * vHi; - } - break; - case 1: - { - float4 xy = vertices[0]; - z = _mm_shuffle_ps(xy, xy, 0xaa); - xy = xy * vLo; - z = z * vHi; - x = _mm_shuffle_ps(xy, xy, 0); - y = _mm_shuffle_ps(xy, xy, 0x55); - } - break; - } - x = x + y; - x = x + z; - stack_array[index] = x; - max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan - index++; - } - - // if we found a new max. - if (0 == segment || 0xf != _mm_movemask_ps((float4)_mm_cmpeq_ps(max, dotMax))) - { // we found a new max. Search for it - // find max across the max vector, place in all elements of max -- big latency hit here - max = _mm_max_ps(max, (float4)_mm_shuffle_ps(max, max, 0x4e)); - max = _mm_max_ps(max, (float4)_mm_shuffle_ps(max, max, 0xb1)); - - // It is slightly faster to do this part in scalar code when count < 8. However, the common case for - // this where it actually makes a difference is handled in the early out at the top of the function, - // so it is less than a 1% difference here. I opted for improved code size, fewer branches and reduced - // complexity, and removed it. - - dotMax = max; - - // scan for the first occurence of max in the array - size_t test; - for (index = 0; 0 == (test = _mm_movemask_ps(_mm_cmpeq_ps(stack_array[index], max))); index++) // local_count must be a multiple of 4 - { - } - maxIndex = 4 * index + segment + indexTable[test]; - } - - _mm_store_ss(dotResult, dotMax); - return maxIndex; -} - -long b3_mindot_large(const float *vv, const float *vec, unsigned long count, float *dotResult); - -long b3_mindot_large(const float *vv, const float *vec, unsigned long count, float *dotResult) -{ - const float4 *vertices = (const float4 *)vv; - static const unsigned char indexTable[16] = {(unsigned char)-1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0}; - - float4 dotmin = b3Assign128(B3_INFINITY, B3_INFINITY, B3_INFINITY, B3_INFINITY); - float4 vvec = _mm_loadu_ps(vec); - float4 vHi = b3CastiTo128f(_mm_shuffle_epi32(b3CastfTo128i(vvec), 0xaa)); /// zzzz - float4 vLo = _mm_movelh_ps(vvec, vvec); /// xyxy - - long minIndex = -1L; - - size_t segment = 0; - float4 stack_array[STACK_ARRAY_COUNT]; - -#if DEBUG - // memset( stack_array, -1, STACK_ARRAY_COUNT * sizeof(stack_array[0]) ); -#endif - - size_t index; - float4 min; - // Faster loop without cleanup code for full tiles - for (segment = 0; segment + STACK_ARRAY_COUNT * 4 <= count; segment += STACK_ARRAY_COUNT * 4) - { - min = dotmin; - - for (index = 0; index < STACK_ARRAY_COUNT; index += 4) - { // do four dot products at a time. Carefully avoid touching the w element. - float4 v0 = vertices[0]; - float4 v1 = vertices[1]; - float4 v2 = vertices[2]; - float4 v3 = vertices[3]; - vertices += 4; - - float4 lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - float4 hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - float4 lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - float4 hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - float4 z = _mm_shuffle_ps(hi0, hi1, 0x88); - float4 x = _mm_shuffle_ps(lo0, lo1, 0x88); - float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index] = x; - min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 1] = x; - min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 2] = x; - min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 3] = x; - min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan - - // It is too costly to keep the index of the min here. We will look for it again later. We save a lot of work this way. - } - - // If we found a new min - if (0xf != _mm_movemask_ps((float4)_mm_cmpeq_ps(min, dotmin))) - { - // copy the new min across all lanes of our min accumulator - min = _mm_min_ps(min, (float4)_mm_shuffle_ps(min, min, 0x4e)); - min = _mm_min_ps(min, (float4)_mm_shuffle_ps(min, min, 0xb1)); - - dotmin = min; - - // find first occurrence of that min - size_t test; - for (index = 0; 0 == (test = _mm_movemask_ps(_mm_cmpeq_ps(stack_array[index], min))); index++) // local_count must be a multiple of 4 - { - } - // record where it is. - minIndex = 4 * index + segment + indexTable[test]; - } - } - - // account for work we've already done - count -= segment; - - // Deal with the last < STACK_ARRAY_COUNT vectors - min = dotmin; - index = 0; - - if (b3Unlikely(count > 16)) - { - for (; index + 4 <= count / 4; index += 4) - { // do four dot products at a time. Carefully avoid touching the w element. - float4 v0 = vertices[0]; - float4 v1 = vertices[1]; - float4 v2 = vertices[2]; - float4 v3 = vertices[3]; - vertices += 4; - - float4 lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - float4 hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - float4 lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - float4 hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - float4 z = _mm_shuffle_ps(hi0, hi1, 0x88); - float4 x = _mm_shuffle_ps(lo0, lo1, 0x88); - float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index] = x; - min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 1] = x; - min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 2] = x; - min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 3] = x; - min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan - - // It is too costly to keep the index of the min here. We will look for it again later. We save a lot of work this way. - } - } - - size_t localCount = (count & -4L) - 4 * index; - if (localCount) - { -#ifdef __APPLE__ - vertices += localCount; // counter the offset - float4 t0, t1, t2, t3, t4; - size_t byteIndex = -(localCount) * sizeof(float); - float4 *sap = &stack_array[index + localCount / 4]; - - asm volatile( - ".align 4 \n\ - 0: movaps %[min], %[t2] // move min out of the way to avoid propagating NaNs in min \n\ - movaps (%[vertices], %[byteIndex], 4), %[t0] // vertices[0] \n\ - movaps 16(%[vertices], %[byteIndex], 4), %[t1] // vertices[1] \n\ - movaps %[t0], %[min] // vertices[0] \n\ - movlhps %[t1], %[min] // x0y0x1y1 \n\ - movaps 32(%[vertices], %[byteIndex], 4), %[t3] // vertices[2] \n\ - movaps 48(%[vertices], %[byteIndex], 4), %[t4] // vertices[3] \n\ - mulps %[vLo], %[min] // x0y0x1y1 * vLo \n\ - movhlps %[t0], %[t1] // z0w0z1w1 \n\ - movaps %[t3], %[t0] // vertices[2] \n\ - movlhps %[t4], %[t0] // x2y2x3y3 \n\ - movhlps %[t3], %[t4] // z2w2z3w3 \n\ - mulps %[vLo], %[t0] // x2y2x3y3 * vLo \n\ - shufps $0x88, %[t4], %[t1] // z0z1z2z3 \n\ - mulps %[vHi], %[t1] // z0z1z2z3 * vHi \n\ - movaps %[min], %[t3] // x0y0x1y1 * vLo \n\ - shufps $0x88, %[t0], %[min] // x0x1x2x3 * vLo.x \n\ - shufps $0xdd, %[t0], %[t3] // y0y1y2y3 * vLo.y \n\ - addps %[t3], %[min] // x + y \n\ - addps %[t1], %[min] // x + y + z \n\ - movaps %[min], (%[sap], %[byteIndex]) // record result for later scrutiny \n\ - minps %[t2], %[min] // record min, restore min \n\ - add $16, %[byteIndex] // advance loop counter\n\ - jnz 0b \n\ - " - : [min] "+x"(min), [t0] "=&x"(t0), [t1] "=&x"(t1), [t2] "=&x"(t2), [t3] "=&x"(t3), [t4] "=&x"(t4), [byteIndex] "+r"(byteIndex) - : [vLo] "x"(vLo), [vHi] "x"(vHi), [vertices] "r"(vertices), [sap] "r"(sap) - : "memory", "cc"); - index += localCount / 4; -#else - { - for (unsigned int i = 0; i < localCount / 4; i++, index++) - { // do four dot products at a time. Carefully avoid touching the w element. - float4 v0 = vertices[0]; - float4 v1 = vertices[1]; - float4 v2 = vertices[2]; - float4 v3 = vertices[3]; - vertices += 4; - - float4 lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - float4 hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - float4 lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - float4 hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - float4 z = _mm_shuffle_ps(hi0, hi1, 0x88); - float4 x = _mm_shuffle_ps(lo0, lo1, 0x88); - float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index] = x; - min = _mm_min_ps(x, min); // control the order here so that max is never NaN even if x is nan - } - } - -#endif - } - - // process the last few points - if (count & 3) - { - float4 v0, v1, v2, x, y, z; - switch (count & 3) - { - case 3: - { - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - - // Calculate 3 dot products, transpose, duplicate v2 - float4 lo0 = _mm_movelh_ps(v0, v1); // xyxy.lo - float4 hi0 = _mm_movehl_ps(v1, v0); // z?z?.lo - lo0 = lo0 * vLo; - z = _mm_shuffle_ps(hi0, v2, 0xa8); // z0z1z2z2 - z = z * vHi; - float4 lo1 = _mm_movelh_ps(v2, v2); // xyxy - lo1 = lo1 * vLo; - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - } - break; - case 2: - { - v0 = vertices[0]; - v1 = vertices[1]; - float4 xy = _mm_movelh_ps(v0, v1); - z = _mm_movehl_ps(v1, v0); - xy = xy * vLo; - z = _mm_shuffle_ps(z, z, 0xa8); - x = _mm_shuffle_ps(xy, xy, 0xa8); - y = _mm_shuffle_ps(xy, xy, 0xfd); - z = z * vHi; - } - break; - case 1: - { - float4 xy = vertices[0]; - z = _mm_shuffle_ps(xy, xy, 0xaa); - xy = xy * vLo; - z = z * vHi; - x = _mm_shuffle_ps(xy, xy, 0); - y = _mm_shuffle_ps(xy, xy, 0x55); - } - break; - } - x = x + y; - x = x + z; - stack_array[index] = x; - min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan - index++; - } - - // if we found a new min. - if (0 == segment || 0xf != _mm_movemask_ps((float4)_mm_cmpeq_ps(min, dotmin))) - { // we found a new min. Search for it - // find min across the min vector, place in all elements of min -- big latency hit here - min = _mm_min_ps(min, (float4)_mm_shuffle_ps(min, min, 0x4e)); - min = _mm_min_ps(min, (float4)_mm_shuffle_ps(min, min, 0xb1)); - - // It is slightly faster to do this part in scalar code when count < 8. However, the common case for - // this where it actually makes a difference is handled in the early out at the top of the function, - // so it is less than a 1% difference here. I opted for improved code size, fewer branches and reduced - // complexity, and removed it. - - dotmin = min; - - // scan for the first occurence of min in the array - size_t test; - for (index = 0; 0 == (test = _mm_movemask_ps(_mm_cmpeq_ps(stack_array[index], min))); index++) // local_count must be a multiple of 4 - { - } - minIndex = 4 * index + segment + indexTable[test]; - } - - _mm_store_ss(dotResult, dotmin); - return minIndex; -} - -#elif defined B3_USE_NEON -#define ARM_NEON_GCC_COMPATIBILITY 1 -#include <arm_neon.h> - -static long b3_maxdot_large_v0(const float *vv, const float *vec, unsigned long count, float *dotResult); -static long b3_maxdot_large_v1(const float *vv, const float *vec, unsigned long count, float *dotResult); -static long b3_maxdot_large_sel(const float *vv, const float *vec, unsigned long count, float *dotResult); -static long b3_mindot_large_v0(const float *vv, const float *vec, unsigned long count, float *dotResult); -static long b3_mindot_large_v1(const float *vv, const float *vec, unsigned long count, float *dotResult); -static long b3_mindot_large_sel(const float *vv, const float *vec, unsigned long count, float *dotResult); - -long (*b3_maxdot_large)(const float *vv, const float *vec, unsigned long count, float *dotResult) = b3_maxdot_large_sel; -long (*b3_mindot_large)(const float *vv, const float *vec, unsigned long count, float *dotResult) = b3_mindot_large_sel; - -extern "C" -{ - int _get_cpu_capabilities(void); -} - -static long b3_maxdot_large_sel(const float *vv, const float *vec, unsigned long count, float *dotResult) -{ - if (_get_cpu_capabilities() & 0x2000) - b3_maxdot_large = _maxdot_large_v1; - else - b3_maxdot_large = _maxdot_large_v0; - - return b3_maxdot_large(vv, vec, count, dotResult); -} - -static long b3_mindot_large_sel(const float *vv, const float *vec, unsigned long count, float *dotResult) -{ - if (_get_cpu_capabilities() & 0x2000) - b3_mindot_large = _mindot_large_v1; - else - b3_mindot_large = _mindot_large_v0; - - return b3_mindot_large(vv, vec, count, dotResult); -} - -#define vld1q_f32_aligned_postincrement(_ptr) ({ float32x4_t _r; asm( "vld1.f32 {%0}, [%1, :128]!\n" : "=w" (_r), "+r" (_ptr) ); /*return*/ _r; }) - -long b3_maxdot_large_v0(const float *vv, const float *vec, unsigned long count, float *dotResult) -{ - unsigned long i = 0; - float32x4_t vvec = vld1q_f32_aligned_postincrement(vec); - float32x2_t vLo = vget_low_f32(vvec); - float32x2_t vHi = vdup_lane_f32(vget_high_f32(vvec), 0); - float32x2_t dotMaxLo = (float32x2_t){-B3_INFINITY, -B3_INFINITY}; - float32x2_t dotMaxHi = (float32x2_t){-B3_INFINITY, -B3_INFINITY}; - uint32x2_t indexLo = (uint32x2_t){0, 1}; - uint32x2_t indexHi = (uint32x2_t){2, 3}; - uint32x2_t iLo = (uint32x2_t){-1, -1}; - uint32x2_t iHi = (uint32x2_t){-1, -1}; - const uint32x2_t four = (uint32x2_t){4, 4}; - - for (; i + 8 <= count; i += 8) - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v3 = vld1q_f32_aligned_postincrement(vv); - - float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo); - float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo); - float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo); - float32x2_t xy3 = vmul_f32(vget_low_f32(v3), vLo); - - float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x2x2_t z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3)); - float32x2_t zLo = vmul_f32(z0.val[0], vHi); - float32x2_t zHi = vmul_f32(z1.val[0], vHi); - - float32x2_t rLo = vpadd_f32(xy0, xy1); - float32x2_t rHi = vpadd_f32(xy2, xy3); - rLo = vadd_f32(rLo, zLo); - rHi = vadd_f32(rHi, zHi); - - uint32x2_t maskLo = vcgt_f32(rLo, dotMaxLo); - uint32x2_t maskHi = vcgt_f32(rHi, dotMaxHi); - dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo); - dotMaxHi = vbsl_f32(maskHi, rHi, dotMaxHi); - iLo = vbsl_u32(maskLo, indexLo, iLo); - iHi = vbsl_u32(maskHi, indexHi, iHi); - indexLo = vadd_u32(indexLo, four); - indexHi = vadd_u32(indexHi, four); - - v0 = vld1q_f32_aligned_postincrement(vv); - v1 = vld1q_f32_aligned_postincrement(vv); - v2 = vld1q_f32_aligned_postincrement(vv); - v3 = vld1q_f32_aligned_postincrement(vv); - - xy0 = vmul_f32(vget_low_f32(v0), vLo); - xy1 = vmul_f32(vget_low_f32(v1), vLo); - xy2 = vmul_f32(vget_low_f32(v2), vLo); - xy3 = vmul_f32(vget_low_f32(v3), vLo); - - z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1)); - z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3)); - zLo = vmul_f32(z0.val[0], vHi); - zHi = vmul_f32(z1.val[0], vHi); - - rLo = vpadd_f32(xy0, xy1); - rHi = vpadd_f32(xy2, xy3); - rLo = vadd_f32(rLo, zLo); - rHi = vadd_f32(rHi, zHi); - - maskLo = vcgt_f32(rLo, dotMaxLo); - maskHi = vcgt_f32(rHi, dotMaxHi); - dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo); - dotMaxHi = vbsl_f32(maskHi, rHi, dotMaxHi); - iLo = vbsl_u32(maskLo, indexLo, iLo); - iHi = vbsl_u32(maskHi, indexHi, iHi); - indexLo = vadd_u32(indexLo, four); - indexHi = vadd_u32(indexHi, four); - } - - for (; i + 4 <= count; i += 4) - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v3 = vld1q_f32_aligned_postincrement(vv); - - float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo); - float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo); - float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo); - float32x2_t xy3 = vmul_f32(vget_low_f32(v3), vLo); - - float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x2x2_t z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3)); - float32x2_t zLo = vmul_f32(z0.val[0], vHi); - float32x2_t zHi = vmul_f32(z1.val[0], vHi); - - float32x2_t rLo = vpadd_f32(xy0, xy1); - float32x2_t rHi = vpadd_f32(xy2, xy3); - rLo = vadd_f32(rLo, zLo); - rHi = vadd_f32(rHi, zHi); - - uint32x2_t maskLo = vcgt_f32(rLo, dotMaxLo); - uint32x2_t maskHi = vcgt_f32(rHi, dotMaxHi); - dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo); - dotMaxHi = vbsl_f32(maskHi, rHi, dotMaxHi); - iLo = vbsl_u32(maskLo, indexLo, iLo); - iHi = vbsl_u32(maskHi, indexHi, iHi); - indexLo = vadd_u32(indexLo, four); - indexHi = vadd_u32(indexHi, four); - } - - switch (count & 3) - { - case 3: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - - float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo); - float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo); - float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo); - - float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x2_t zLo = vmul_f32(z0.val[0], vHi); - float32x2_t zHi = vmul_f32(vdup_lane_f32(vget_high_f32(v2), 0), vHi); - - float32x2_t rLo = vpadd_f32(xy0, xy1); - float32x2_t rHi = vpadd_f32(xy2, xy2); - rLo = vadd_f32(rLo, zLo); - rHi = vadd_f32(rHi, zHi); - - uint32x2_t maskLo = vcgt_f32(rLo, dotMaxLo); - uint32x2_t maskHi = vcgt_f32(rHi, dotMaxHi); - dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo); - dotMaxHi = vbsl_f32(maskHi, rHi, dotMaxHi); - iLo = vbsl_u32(maskLo, indexLo, iLo); - iHi = vbsl_u32(maskHi, indexHi, iHi); - } - break; - case 2: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - - float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo); - float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo); - - float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x2_t zLo = vmul_f32(z0.val[0], vHi); - - float32x2_t rLo = vpadd_f32(xy0, xy1); - rLo = vadd_f32(rLo, zLo); - - uint32x2_t maskLo = vcgt_f32(rLo, dotMaxLo); - dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo); - iLo = vbsl_u32(maskLo, indexLo, iLo); - } - break; - case 1: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo); - float32x2_t z0 = vdup_lane_f32(vget_high_f32(v0), 0); - float32x2_t zLo = vmul_f32(z0, vHi); - float32x2_t rLo = vpadd_f32(xy0, xy0); - rLo = vadd_f32(rLo, zLo); - uint32x2_t maskLo = vcgt_f32(rLo, dotMaxLo); - dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo); - iLo = vbsl_u32(maskLo, indexLo, iLo); - } - break; - - default: - break; - } - - // select best answer between hi and lo results - uint32x2_t mask = vcgt_f32(dotMaxHi, dotMaxLo); - dotMaxLo = vbsl_f32(mask, dotMaxHi, dotMaxLo); - iLo = vbsl_u32(mask, iHi, iLo); - - // select best answer between even and odd results - dotMaxHi = vdup_lane_f32(dotMaxLo, 1); - iHi = vdup_lane_u32(iLo, 1); - mask = vcgt_f32(dotMaxHi, dotMaxLo); - dotMaxLo = vbsl_f32(mask, dotMaxHi, dotMaxLo); - iLo = vbsl_u32(mask, iHi, iLo); - - *dotResult = vget_lane_f32(dotMaxLo, 0); - return vget_lane_u32(iLo, 0); -} - -long b3_maxdot_large_v1(const float *vv, const float *vec, unsigned long count, float *dotResult) -{ - float32x4_t vvec = vld1q_f32_aligned_postincrement(vec); - float32x4_t vLo = vcombine_f32(vget_low_f32(vvec), vget_low_f32(vvec)); - float32x4_t vHi = vdupq_lane_f32(vget_high_f32(vvec), 0); - const uint32x4_t four = (uint32x4_t){4, 4, 4, 4}; - uint32x4_t local_index = (uint32x4_t){0, 1, 2, 3}; - uint32x4_t index = (uint32x4_t){-1, -1, -1, -1}; - float32x4_t maxDot = (float32x4_t){-B3_INFINITY, -B3_INFINITY, -B3_INFINITY, -B3_INFINITY}; - - unsigned long i = 0; - for (; i + 8 <= count; i += 8) - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v3 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1)); - float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3)); - // the next two lines should resolve to a single vswp d, d - float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3)); - - xy0 = vmulq_f32(xy0, vLo); - xy1 = vmulq_f32(xy1, vLo); - - float32x4x2_t zb = vuzpq_f32(z0, z1); - float32x4_t z = vmulq_f32(zb.val[0], vHi); - float32x4x2_t xy = vuzpq_f32(xy0, xy1); - float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - uint32x4_t mask = vcgtq_f32(x, maxDot); - maxDot = vbslq_f32(mask, x, maxDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - - v0 = vld1q_f32_aligned_postincrement(vv); - v1 = vld1q_f32_aligned_postincrement(vv); - v2 = vld1q_f32_aligned_postincrement(vv); - v3 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1)); - xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3)); - // the next two lines should resolve to a single vswp d, d - z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1)); - z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3)); - - xy0 = vmulq_f32(xy0, vLo); - xy1 = vmulq_f32(xy1, vLo); - - zb = vuzpq_f32(z0, z1); - z = vmulq_f32(zb.val[0], vHi); - xy = vuzpq_f32(xy0, xy1); - x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - mask = vcgtq_f32(x, maxDot); - maxDot = vbslq_f32(mask, x, maxDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - } - - for (; i + 4 <= count; i += 4) - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v3 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1)); - float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3)); - // the next two lines should resolve to a single vswp d, d - float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3)); - - xy0 = vmulq_f32(xy0, vLo); - xy1 = vmulq_f32(xy1, vLo); - - float32x4x2_t zb = vuzpq_f32(z0, z1); - float32x4_t z = vmulq_f32(zb.val[0], vHi); - float32x4x2_t xy = vuzpq_f32(xy0, xy1); - float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - uint32x4_t mask = vcgtq_f32(x, maxDot); - maxDot = vbslq_f32(mask, x, maxDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - } - - switch (count & 3) - { - case 3: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1)); - float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v2)); - // the next two lines should resolve to a single vswp d, d - float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v2)); - - xy0 = vmulq_f32(xy0, vLo); - xy1 = vmulq_f32(xy1, vLo); - - float32x4x2_t zb = vuzpq_f32(z0, z1); - float32x4_t z = vmulq_f32(zb.val[0], vHi); - float32x4x2_t xy = vuzpq_f32(xy0, xy1); - float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - uint32x4_t mask = vcgtq_f32(x, maxDot); - maxDot = vbslq_f32(mask, x, maxDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - } - break; - - case 2: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1)); - // the next two lines should resolve to a single vswp d, d - float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1)); - - xy0 = vmulq_f32(xy0, vLo); - - float32x4x2_t zb = vuzpq_f32(z0, z0); - float32x4_t z = vmulq_f32(zb.val[0], vHi); - float32x4x2_t xy = vuzpq_f32(xy0, xy0); - float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - uint32x4_t mask = vcgtq_f32(x, maxDot); - maxDot = vbslq_f32(mask, x, maxDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - } - break; - - case 1: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v0)); - // the next two lines should resolve to a single vswp d, d - float32x4_t z = vdupq_lane_f32(vget_high_f32(v0), 0); - - xy0 = vmulq_f32(xy0, vLo); - - z = vmulq_f32(z, vHi); - float32x4x2_t xy = vuzpq_f32(xy0, xy0); - float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - uint32x4_t mask = vcgtq_f32(x, maxDot); - maxDot = vbslq_f32(mask, x, maxDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - } - break; - - default: - break; - } - - // select best answer between hi and lo results - uint32x2_t mask = vcgt_f32(vget_high_f32(maxDot), vget_low_f32(maxDot)); - float32x2_t maxDot2 = vbsl_f32(mask, vget_high_f32(maxDot), vget_low_f32(maxDot)); - uint32x2_t index2 = vbsl_u32(mask, vget_high_u32(index), vget_low_u32(index)); - - // select best answer between even and odd results - float32x2_t maxDotO = vdup_lane_f32(maxDot2, 1); - uint32x2_t indexHi = vdup_lane_u32(index2, 1); - mask = vcgt_f32(maxDotO, maxDot2); - maxDot2 = vbsl_f32(mask, maxDotO, maxDot2); - index2 = vbsl_u32(mask, indexHi, index2); - - *dotResult = vget_lane_f32(maxDot2, 0); - return vget_lane_u32(index2, 0); -} - -long b3_mindot_large_v0(const float *vv, const float *vec, unsigned long count, float *dotResult) -{ - unsigned long i = 0; - float32x4_t vvec = vld1q_f32_aligned_postincrement(vec); - float32x2_t vLo = vget_low_f32(vvec); - float32x2_t vHi = vdup_lane_f32(vget_high_f32(vvec), 0); - float32x2_t dotMinLo = (float32x2_t){B3_INFINITY, B3_INFINITY}; - float32x2_t dotMinHi = (float32x2_t){B3_INFINITY, B3_INFINITY}; - uint32x2_t indexLo = (uint32x2_t){0, 1}; - uint32x2_t indexHi = (uint32x2_t){2, 3}; - uint32x2_t iLo = (uint32x2_t){-1, -1}; - uint32x2_t iHi = (uint32x2_t){-1, -1}; - const uint32x2_t four = (uint32x2_t){4, 4}; - - for (; i + 8 <= count; i += 8) - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v3 = vld1q_f32_aligned_postincrement(vv); - - float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo); - float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo); - float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo); - float32x2_t xy3 = vmul_f32(vget_low_f32(v3), vLo); - - float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x2x2_t z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3)); - float32x2_t zLo = vmul_f32(z0.val[0], vHi); - float32x2_t zHi = vmul_f32(z1.val[0], vHi); - - float32x2_t rLo = vpadd_f32(xy0, xy1); - float32x2_t rHi = vpadd_f32(xy2, xy3); - rLo = vadd_f32(rLo, zLo); - rHi = vadd_f32(rHi, zHi); - - uint32x2_t maskLo = vclt_f32(rLo, dotMinLo); - uint32x2_t maskHi = vclt_f32(rHi, dotMinHi); - dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo); - dotMinHi = vbsl_f32(maskHi, rHi, dotMinHi); - iLo = vbsl_u32(maskLo, indexLo, iLo); - iHi = vbsl_u32(maskHi, indexHi, iHi); - indexLo = vadd_u32(indexLo, four); - indexHi = vadd_u32(indexHi, four); - - v0 = vld1q_f32_aligned_postincrement(vv); - v1 = vld1q_f32_aligned_postincrement(vv); - v2 = vld1q_f32_aligned_postincrement(vv); - v3 = vld1q_f32_aligned_postincrement(vv); - - xy0 = vmul_f32(vget_low_f32(v0), vLo); - xy1 = vmul_f32(vget_low_f32(v1), vLo); - xy2 = vmul_f32(vget_low_f32(v2), vLo); - xy3 = vmul_f32(vget_low_f32(v3), vLo); - - z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1)); - z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3)); - zLo = vmul_f32(z0.val[0], vHi); - zHi = vmul_f32(z1.val[0], vHi); - - rLo = vpadd_f32(xy0, xy1); - rHi = vpadd_f32(xy2, xy3); - rLo = vadd_f32(rLo, zLo); - rHi = vadd_f32(rHi, zHi); - - maskLo = vclt_f32(rLo, dotMinLo); - maskHi = vclt_f32(rHi, dotMinHi); - dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo); - dotMinHi = vbsl_f32(maskHi, rHi, dotMinHi); - iLo = vbsl_u32(maskLo, indexLo, iLo); - iHi = vbsl_u32(maskHi, indexHi, iHi); - indexLo = vadd_u32(indexLo, four); - indexHi = vadd_u32(indexHi, four); - } - - for (; i + 4 <= count; i += 4) - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v3 = vld1q_f32_aligned_postincrement(vv); - - float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo); - float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo); - float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo); - float32x2_t xy3 = vmul_f32(vget_low_f32(v3), vLo); - - float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x2x2_t z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3)); - float32x2_t zLo = vmul_f32(z0.val[0], vHi); - float32x2_t zHi = vmul_f32(z1.val[0], vHi); - - float32x2_t rLo = vpadd_f32(xy0, xy1); - float32x2_t rHi = vpadd_f32(xy2, xy3); - rLo = vadd_f32(rLo, zLo); - rHi = vadd_f32(rHi, zHi); - - uint32x2_t maskLo = vclt_f32(rLo, dotMinLo); - uint32x2_t maskHi = vclt_f32(rHi, dotMinHi); - dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo); - dotMinHi = vbsl_f32(maskHi, rHi, dotMinHi); - iLo = vbsl_u32(maskLo, indexLo, iLo); - iHi = vbsl_u32(maskHi, indexHi, iHi); - indexLo = vadd_u32(indexLo, four); - indexHi = vadd_u32(indexHi, four); - } - switch (count & 3) - { - case 3: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - - float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo); - float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo); - float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo); - - float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x2_t zLo = vmul_f32(z0.val[0], vHi); - float32x2_t zHi = vmul_f32(vdup_lane_f32(vget_high_f32(v2), 0), vHi); - - float32x2_t rLo = vpadd_f32(xy0, xy1); - float32x2_t rHi = vpadd_f32(xy2, xy2); - rLo = vadd_f32(rLo, zLo); - rHi = vadd_f32(rHi, zHi); - - uint32x2_t maskLo = vclt_f32(rLo, dotMinLo); - uint32x2_t maskHi = vclt_f32(rHi, dotMinHi); - dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo); - dotMinHi = vbsl_f32(maskHi, rHi, dotMinHi); - iLo = vbsl_u32(maskLo, indexLo, iLo); - iHi = vbsl_u32(maskHi, indexHi, iHi); - } - break; - case 2: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - - float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo); - float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo); - - float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x2_t zLo = vmul_f32(z0.val[0], vHi); - - float32x2_t rLo = vpadd_f32(xy0, xy1); - rLo = vadd_f32(rLo, zLo); - - uint32x2_t maskLo = vclt_f32(rLo, dotMinLo); - dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo); - iLo = vbsl_u32(maskLo, indexLo, iLo); - } - break; - case 1: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo); - float32x2_t z0 = vdup_lane_f32(vget_high_f32(v0), 0); - float32x2_t zLo = vmul_f32(z0, vHi); - float32x2_t rLo = vpadd_f32(xy0, xy0); - rLo = vadd_f32(rLo, zLo); - uint32x2_t maskLo = vclt_f32(rLo, dotMinLo); - dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo); - iLo = vbsl_u32(maskLo, indexLo, iLo); - } - break; - - default: - break; - } - - // select best answer between hi and lo results - uint32x2_t mask = vclt_f32(dotMinHi, dotMinLo); - dotMinLo = vbsl_f32(mask, dotMinHi, dotMinLo); - iLo = vbsl_u32(mask, iHi, iLo); - - // select best answer between even and odd results - dotMinHi = vdup_lane_f32(dotMinLo, 1); - iHi = vdup_lane_u32(iLo, 1); - mask = vclt_f32(dotMinHi, dotMinLo); - dotMinLo = vbsl_f32(mask, dotMinHi, dotMinLo); - iLo = vbsl_u32(mask, iHi, iLo); - - *dotResult = vget_lane_f32(dotMinLo, 0); - return vget_lane_u32(iLo, 0); -} - -long b3_mindot_large_v1(const float *vv, const float *vec, unsigned long count, float *dotResult) -{ - float32x4_t vvec = vld1q_f32_aligned_postincrement(vec); - float32x4_t vLo = vcombine_f32(vget_low_f32(vvec), vget_low_f32(vvec)); - float32x4_t vHi = vdupq_lane_f32(vget_high_f32(vvec), 0); - const uint32x4_t four = (uint32x4_t){4, 4, 4, 4}; - uint32x4_t local_index = (uint32x4_t){0, 1, 2, 3}; - uint32x4_t index = (uint32x4_t){-1, -1, -1, -1}; - float32x4_t minDot = (float32x4_t){B3_INFINITY, B3_INFINITY, B3_INFINITY, B3_INFINITY}; - - unsigned long i = 0; - for (; i + 8 <= count; i += 8) - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v3 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1)); - float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3)); - // the next two lines should resolve to a single vswp d, d - float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3)); - - xy0 = vmulq_f32(xy0, vLo); - xy1 = vmulq_f32(xy1, vLo); - - float32x4x2_t zb = vuzpq_f32(z0, z1); - float32x4_t z = vmulq_f32(zb.val[0], vHi); - float32x4x2_t xy = vuzpq_f32(xy0, xy1); - float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - uint32x4_t mask = vcltq_f32(x, minDot); - minDot = vbslq_f32(mask, x, minDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - - v0 = vld1q_f32_aligned_postincrement(vv); - v1 = vld1q_f32_aligned_postincrement(vv); - v2 = vld1q_f32_aligned_postincrement(vv); - v3 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1)); - xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3)); - // the next two lines should resolve to a single vswp d, d - z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1)); - z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3)); - - xy0 = vmulq_f32(xy0, vLo); - xy1 = vmulq_f32(xy1, vLo); - - zb = vuzpq_f32(z0, z1); - z = vmulq_f32(zb.val[0], vHi); - xy = vuzpq_f32(xy0, xy1); - x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - mask = vcltq_f32(x, minDot); - minDot = vbslq_f32(mask, x, minDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - } - - for (; i + 4 <= count; i += 4) - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v3 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1)); - float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3)); - // the next two lines should resolve to a single vswp d, d - float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3)); - - xy0 = vmulq_f32(xy0, vLo); - xy1 = vmulq_f32(xy1, vLo); - - float32x4x2_t zb = vuzpq_f32(z0, z1); - float32x4_t z = vmulq_f32(zb.val[0], vHi); - float32x4x2_t xy = vuzpq_f32(xy0, xy1); - float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - uint32x4_t mask = vcltq_f32(x, minDot); - minDot = vbslq_f32(mask, x, minDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - } - - switch (count & 3) - { - case 3: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1)); - float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v2)); - // the next two lines should resolve to a single vswp d, d - float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v2)); - - xy0 = vmulq_f32(xy0, vLo); - xy1 = vmulq_f32(xy1, vLo); - - float32x4x2_t zb = vuzpq_f32(z0, z1); - float32x4_t z = vmulq_f32(zb.val[0], vHi); - float32x4x2_t xy = vuzpq_f32(xy0, xy1); - float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - uint32x4_t mask = vcltq_f32(x, minDot); - minDot = vbslq_f32(mask, x, minDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - } - break; - - case 2: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1)); - // the next two lines should resolve to a single vswp d, d - float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1)); - - xy0 = vmulq_f32(xy0, vLo); - - float32x4x2_t zb = vuzpq_f32(z0, z0); - float32x4_t z = vmulq_f32(zb.val[0], vHi); - float32x4x2_t xy = vuzpq_f32(xy0, xy0); - float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - uint32x4_t mask = vcltq_f32(x, minDot); - minDot = vbslq_f32(mask, x, minDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - } - break; - - case 1: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v0)); - // the next two lines should resolve to a single vswp d, d - float32x4_t z = vdupq_lane_f32(vget_high_f32(v0), 0); - - xy0 = vmulq_f32(xy0, vLo); - - z = vmulq_f32(z, vHi); - float32x4x2_t xy = vuzpq_f32(xy0, xy0); - float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - uint32x4_t mask = vcltq_f32(x, minDot); - minDot = vbslq_f32(mask, x, minDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - } - break; - - default: - break; - } - - // select best answer between hi and lo results - uint32x2_t mask = vclt_f32(vget_high_f32(minDot), vget_low_f32(minDot)); - float32x2_t minDot2 = vbsl_f32(mask, vget_high_f32(minDot), vget_low_f32(minDot)); - uint32x2_t index2 = vbsl_u32(mask, vget_high_u32(index), vget_low_u32(index)); - - // select best answer between even and odd results - float32x2_t minDotO = vdup_lane_f32(minDot2, 1); - uint32x2_t indexHi = vdup_lane_u32(index2, 1); - mask = vclt_f32(minDotO, minDot2); - minDot2 = vbsl_f32(mask, minDotO, minDot2); - index2 = vbsl_u32(mask, indexHi, index2); - - *dotResult = vget_lane_f32(minDot2, 0); - return vget_lane_u32(index2, 0); -} - -#else -#error Unhandled __APPLE__ arch -#endif - -#endif /* __APPLE__ */ diff --git a/thirdparty/bullet/Bullet3Common/b3Vector3.h b/thirdparty/bullet/Bullet3Common/b3Vector3.h deleted file mode 100644 index a70d68d6e1..0000000000 --- a/thirdparty/bullet/Bullet3Common/b3Vector3.h +++ /dev/null @@ -1,1303 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_VECTOR3_H -#define B3_VECTOR3_H - -//#include <stdint.h> -#include "b3Scalar.h" -#include "b3MinMax.h" -#include "b3AlignedAllocator.h" - -#ifdef B3_USE_DOUBLE_PRECISION -#define b3Vector3Data b3Vector3DoubleData -#define b3Vector3DataName "b3Vector3DoubleData" -#else -#define b3Vector3Data b3Vector3FloatData -#define b3Vector3DataName "b3Vector3FloatData" -#endif //B3_USE_DOUBLE_PRECISION - -#if defined B3_USE_SSE - -//typedef uint32_t __m128i __attribute__ ((vector_size(16))); - -#ifdef _MSC_VER -#pragma warning(disable : 4556) // value of intrinsic immediate argument '4294967239' is out of range '0 - 255' -#endif - -#define B3_SHUFFLE(x, y, z, w) (((w) << 6 | (z) << 4 | (y) << 2 | (x)) & 0xff) -//#define b3_pshufd_ps( _a, _mask ) (__m128) _mm_shuffle_epi32((__m128i)(_a), (_mask) ) -#define b3_pshufd_ps(_a, _mask) _mm_shuffle_ps((_a), (_a), (_mask)) -#define b3_splat3_ps(_a, _i) b3_pshufd_ps((_a), B3_SHUFFLE(_i, _i, _i, 3)) -#define b3_splat_ps(_a, _i) b3_pshufd_ps((_a), B3_SHUFFLE(_i, _i, _i, _i)) - -#define b3v3AbsiMask (_mm_set_epi32(0x00000000, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF)) -#define b3vAbsMask (_mm_set_epi32(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF)) -#define b3vFFF0Mask (_mm_set_epi32(0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) -#define b3v3AbsfMask b3CastiTo128f(b3v3AbsiMask) -#define b3vFFF0fMask b3CastiTo128f(b3vFFF0Mask) -#define b3vxyzMaskf b3vFFF0fMask -#define b3vAbsfMask b3CastiTo128f(b3vAbsMask) - -const __m128 B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f}; -const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1110) = {1.0f, 1.0f, 1.0f, 0.0f}; -const __m128 B3_ATTRIBUTE_ALIGNED16(b3vHalf) = {0.5f, 0.5f, 0.5f, 0.5f}; -const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1_5) = {1.5f, 1.5f, 1.5f, 1.5f}; - -#endif - -#ifdef B3_USE_NEON - -const float32x4_t B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = (float32x4_t){-0.0f, -0.0f, -0.0f, -0.0f}; -const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3vFFF0Mask) = (int32x4_t){0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0}; -const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3vAbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF}; -const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3v3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0}; - -#endif - -class b3Vector3; -class b3Vector4; - -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) -//#if defined (B3_USE_SSE) || defined (B3_USE_NEON) -inline b3Vector3 b3MakeVector3(b3SimdFloat4 v); -inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec); -#endif - -inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z); -inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w); -inline b3Vector4 b3MakeVector4(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w); - -/**@brief b3Vector3 can be used to represent 3D points and vectors. - * It has an un-used w component to suit 16-byte alignment when b3Vector3 is stored in containers. This extra component can be used by derived classes (Quaternion?) or by user - * Ideally, this class should be replaced by a platform optimized SIMD version that keeps the data in registers - */ -B3_ATTRIBUTE_ALIGNED16(class) -b3Vector3 -{ -public: -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM - union { - b3SimdFloat4 mVec128; - float m_floats[4]; - struct - { - float x, y, z, w; - }; - }; -#else - union { - float m_floats[4]; - struct - { - float x, y, z, w; - }; - }; -#endif - -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM - - /*B3_FORCE_INLINE b3Vector3() - { - } - */ - - B3_FORCE_INLINE b3SimdFloat4 get128() const - { - return mVec128; - } - B3_FORCE_INLINE void set128(b3SimdFloat4 v128) - { - mVec128 = v128; - } -#endif - -public: - /**@brief Add a vector to this one - * @param The vector to add to this one */ - B3_FORCE_INLINE b3Vector3& operator+=(const b3Vector3& v) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - mVec128 = _mm_add_ps(mVec128, v.mVec128); -#elif defined(B3_USE_NEON) - mVec128 = vaddq_f32(mVec128, v.mVec128); -#else - m_floats[0] += v.m_floats[0]; - m_floats[1] += v.m_floats[1]; - m_floats[2] += v.m_floats[2]; -#endif - return *this; - } - - /**@brief Subtract a vector from this one - * @param The vector to subtract */ - B3_FORCE_INLINE b3Vector3& operator-=(const b3Vector3& v) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - mVec128 = _mm_sub_ps(mVec128, v.mVec128); -#elif defined(B3_USE_NEON) - mVec128 = vsubq_f32(mVec128, v.mVec128); -#else - m_floats[0] -= v.m_floats[0]; - m_floats[1] -= v.m_floats[1]; - m_floats[2] -= v.m_floats[2]; -#endif - return *this; - } - - /**@brief Scale the vector - * @param s Scale factor */ - B3_FORCE_INLINE b3Vector3& operator*=(const b3Scalar& s) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vs = _mm_load_ss(&s); // (S 0 0 0) - vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0) - mVec128 = _mm_mul_ps(mVec128, vs); -#elif defined(B3_USE_NEON) - mVec128 = vmulq_n_f32(mVec128, s); -#else - m_floats[0] *= s; - m_floats[1] *= s; - m_floats[2] *= s; -#endif - return *this; - } - - /**@brief Inversely scale the vector - * @param s Scale factor to divide by */ - B3_FORCE_INLINE b3Vector3& operator/=(const b3Scalar& s) - { - b3FullAssert(s != b3Scalar(0.0)); - -#if 0 //defined(B3_USE_SSE_IN_API) -// this code is not faster ! - __m128 vs = _mm_load_ss(&s); - vs = _mm_div_ss(b3v1110, vs); - vs = b3_pshufd_ps(vs, 0x00); // (S S S S) - - mVec128 = _mm_mul_ps(mVec128, vs); - - return *this; -#else - return *this *= b3Scalar(1.0) / s; -#endif - } - - /**@brief Return the dot product - * @param v The other vector in the dot product */ - B3_FORCE_INLINE b3Scalar dot(const b3Vector3& v) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vd = _mm_mul_ps(mVec128, v.mVec128); - __m128 z = _mm_movehl_ps(vd, vd); - __m128 y = _mm_shuffle_ps(vd, vd, 0x55); - vd = _mm_add_ss(vd, y); - vd = _mm_add_ss(vd, z); - return _mm_cvtss_f32(vd); -#elif defined(B3_USE_NEON) - float32x4_t vd = vmulq_f32(mVec128, v.mVec128); - float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_low_f32(vd)); - x = vadd_f32(x, vget_high_f32(vd)); - return vget_lane_f32(x, 0); -#else - return m_floats[0] * v.m_floats[0] + - m_floats[1] * v.m_floats[1] + - m_floats[2] * v.m_floats[2]; -#endif - } - - /**@brief Return the length of the vector squared */ - B3_FORCE_INLINE b3Scalar length2() const - { - return dot(*this); - } - - /**@brief Return the length of the vector */ - B3_FORCE_INLINE b3Scalar length() const - { - return b3Sqrt(length2()); - } - - /**@brief Return the distance squared between the ends of this and another vector - * This is symantically treating the vector like a point */ - B3_FORCE_INLINE b3Scalar distance2(const b3Vector3& v) const; - - /**@brief Return the distance between the ends of this and another vector - * This is symantically treating the vector like a point */ - B3_FORCE_INLINE b3Scalar distance(const b3Vector3& v) const; - - B3_FORCE_INLINE b3Vector3& safeNormalize() - { - b3Scalar l2 = length2(); - //triNormal.normalize(); - if (l2 >= B3_EPSILON * B3_EPSILON) - { - (*this) /= b3Sqrt(l2); - } - else - { - setValue(1, 0, 0); - } - return *this; - } - - /**@brief Normalize this vector - * x^2 + y^2 + z^2 = 1 */ - B3_FORCE_INLINE b3Vector3& normalize() - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - // dot product first - __m128 vd = _mm_mul_ps(mVec128, mVec128); - __m128 z = _mm_movehl_ps(vd, vd); - __m128 y = _mm_shuffle_ps(vd, vd, 0x55); - vd = _mm_add_ss(vd, y); - vd = _mm_add_ss(vd, z); - -#if 0 - vd = _mm_sqrt_ss(vd); - vd = _mm_div_ss(b3v1110, vd); - vd = b3_splat_ps(vd, 0x80); - mVec128 = _mm_mul_ps(mVec128, vd); -#else - - // NR step 1/sqrt(x) - vd is x, y is output - y = _mm_rsqrt_ss(vd); // estimate - - // one step NR - z = b3v1_5; - vd = _mm_mul_ss(vd, b3vHalf); // vd * 0.5 - //x2 = vd; - vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 - vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0 - z = _mm_sub_ss(z, vd); // 1.5 - vd * 0.5 * y0 * y0 - - y = _mm_mul_ss(y, z); // y0 * (1.5 - vd * 0.5 * y0 * y0) - - y = b3_splat_ps(y, 0x80); - mVec128 = _mm_mul_ps(mVec128, y); - -#endif - - return *this; -#else - return *this /= length(); -#endif - } - - /**@brief Return a normalized version of this vector */ - B3_FORCE_INLINE b3Vector3 normalized() const; - - /**@brief Return a rotated version of this vector - * @param wAxis The axis to rotate about - * @param angle The angle to rotate by */ - B3_FORCE_INLINE b3Vector3 rotate(const b3Vector3& wAxis, const b3Scalar angle) const; - - /**@brief Return the angle between this and another vector - * @param v The other vector */ - B3_FORCE_INLINE b3Scalar angle(const b3Vector3& v) const - { - b3Scalar s = b3Sqrt(length2() * v.length2()); - b3FullAssert(s != b3Scalar(0.0)); - return b3Acos(dot(v) / s); - } - - /**@brief Return a vector will the absolute values of each element */ - B3_FORCE_INLINE b3Vector3 absolute() const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - return b3MakeVector3(_mm_and_ps(mVec128, b3v3AbsfMask)); -#elif defined(B3_USE_NEON) - return b3Vector3(vabsq_f32(mVec128)); -#else - return b3MakeVector3( - b3Fabs(m_floats[0]), - b3Fabs(m_floats[1]), - b3Fabs(m_floats[2])); -#endif - } - - /**@brief Return the cross product between this and another vector - * @param v The other vector */ - B3_FORCE_INLINE b3Vector3 cross(const b3Vector3& v) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 T, V; - - T = b3_pshufd_ps(mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0) - V = b3_pshufd_ps(v.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0) - - V = _mm_mul_ps(V, mVec128); - T = _mm_mul_ps(T, v.mVec128); - V = _mm_sub_ps(V, T); - - V = b3_pshufd_ps(V, B3_SHUFFLE(1, 2, 0, 3)); - return b3MakeVector3(V); -#elif defined(B3_USE_NEON) - float32x4_t T, V; - // form (Y, Z, X, _) of mVec128 and v.mVec128 - float32x2_t Tlow = vget_low_f32(mVec128); - float32x2_t Vlow = vget_low_f32(v.mVec128); - T = vcombine_f32(vext_f32(Tlow, vget_high_f32(mVec128), 1), Tlow); - V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v.mVec128), 1), Vlow); - - V = vmulq_f32(V, mVec128); - T = vmulq_f32(T, v.mVec128); - V = vsubq_f32(V, T); - Vlow = vget_low_f32(V); - // form (Y, Z, X, _); - V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow); - V = (float32x4_t)vandq_s32((int32x4_t)V, b3vFFF0Mask); - - return b3Vector3(V); -#else - return b3MakeVector3( - m_floats[1] * v.m_floats[2] - m_floats[2] * v.m_floats[1], - m_floats[2] * v.m_floats[0] - m_floats[0] * v.m_floats[2], - m_floats[0] * v.m_floats[1] - m_floats[1] * v.m_floats[0]); -#endif - } - - B3_FORCE_INLINE b3Scalar triple(const b3Vector3& v1, const b3Vector3& v2) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - // cross: - __m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0) - __m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0) - - V = _mm_mul_ps(V, v1.mVec128); - T = _mm_mul_ps(T, v2.mVec128); - V = _mm_sub_ps(V, T); - - V = _mm_shuffle_ps(V, V, B3_SHUFFLE(1, 2, 0, 3)); - - // dot: - V = _mm_mul_ps(V, mVec128); - __m128 z = _mm_movehl_ps(V, V); - __m128 y = _mm_shuffle_ps(V, V, 0x55); - V = _mm_add_ss(V, y); - V = _mm_add_ss(V, z); - return _mm_cvtss_f32(V); - -#elif defined(B3_USE_NEON) - // cross: - float32x4_t T, V; - // form (Y, Z, X, _) of mVec128 and v.mVec128 - float32x2_t Tlow = vget_low_f32(v1.mVec128); - float32x2_t Vlow = vget_low_f32(v2.mVec128); - T = vcombine_f32(vext_f32(Tlow, vget_high_f32(v1.mVec128), 1), Tlow); - V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v2.mVec128), 1), Vlow); - - V = vmulq_f32(V, v1.mVec128); - T = vmulq_f32(T, v2.mVec128); - V = vsubq_f32(V, T); - Vlow = vget_low_f32(V); - // form (Y, Z, X, _); - V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow); - - // dot: - V = vmulq_f32(mVec128, V); - float32x2_t x = vpadd_f32(vget_low_f32(V), vget_low_f32(V)); - x = vadd_f32(x, vget_high_f32(V)); - return vget_lane_f32(x, 0); -#else - return m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) + - m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) + - m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]); -#endif - } - - /**@brief Return the axis with the smallest value - * Note return values are 0,1,2 for x, y, or z */ - B3_FORCE_INLINE int minAxis() const - { - return m_floats[0] < m_floats[1] ? (m_floats[0] < m_floats[2] ? 0 : 2) : (m_floats[1] < m_floats[2] ? 1 : 2); - } - - /**@brief Return the axis with the largest value - * Note return values are 0,1,2 for x, y, or z */ - B3_FORCE_INLINE int maxAxis() const - { - return m_floats[0] < m_floats[1] ? (m_floats[1] < m_floats[2] ? 2 : 1) : (m_floats[0] < m_floats[2] ? 2 : 0); - } - - B3_FORCE_INLINE int furthestAxis() const - { - return absolute().minAxis(); - } - - B3_FORCE_INLINE int closestAxis() const - { - return absolute().maxAxis(); - } - - B3_FORCE_INLINE void setInterpolate3(const b3Vector3& v0, const b3Vector3& v1, b3Scalar rt) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vrt = _mm_load_ss(&rt); // (rt 0 0 0) - b3Scalar s = b3Scalar(1.0) - rt; - __m128 vs = _mm_load_ss(&s); // (S 0 0 0) - vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0) - __m128 r0 = _mm_mul_ps(v0.mVec128, vs); - vrt = b3_pshufd_ps(vrt, 0x80); // (rt rt rt 0.0) - __m128 r1 = _mm_mul_ps(v1.mVec128, vrt); - __m128 tmp3 = _mm_add_ps(r0, r1); - mVec128 = tmp3; -#elif defined(B3_USE_NEON) - float32x4_t vl = vsubq_f32(v1.mVec128, v0.mVec128); - vl = vmulq_n_f32(vl, rt); - mVec128 = vaddq_f32(vl, v0.mVec128); -#else - b3Scalar s = b3Scalar(1.0) - rt; - m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0]; - m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1]; - m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2]; - //don't do the unused w component - // m_co[3] = s * v0[3] + rt * v1[3]; -#endif - } - - /**@brief Return the linear interpolation between this and another vector - * @param v The other vector - * @param t The ration of this to v (t = 0 => return this, t=1 => return other) */ - B3_FORCE_INLINE b3Vector3 lerp(const b3Vector3& v, const b3Scalar& t) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vt = _mm_load_ss(&t); // (t 0 0 0) - vt = b3_pshufd_ps(vt, 0x80); // (rt rt rt 0.0) - __m128 vl = _mm_sub_ps(v.mVec128, mVec128); - vl = _mm_mul_ps(vl, vt); - vl = _mm_add_ps(vl, mVec128); - - return b3MakeVector3(vl); -#elif defined(B3_USE_NEON) - float32x4_t vl = vsubq_f32(v.mVec128, mVec128); - vl = vmulq_n_f32(vl, t); - vl = vaddq_f32(vl, mVec128); - - return b3Vector3(vl); -#else - return b3MakeVector3(m_floats[0] + (v.m_floats[0] - m_floats[0]) * t, - m_floats[1] + (v.m_floats[1] - m_floats[1]) * t, - m_floats[2] + (v.m_floats[2] - m_floats[2]) * t); -#endif - } - - /**@brief Elementwise multiply this vector by the other - * @param v The other vector */ - B3_FORCE_INLINE b3Vector3& operator*=(const b3Vector3& v) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - mVec128 = _mm_mul_ps(mVec128, v.mVec128); -#elif defined(B3_USE_NEON) - mVec128 = vmulq_f32(mVec128, v.mVec128); -#else - m_floats[0] *= v.m_floats[0]; - m_floats[1] *= v.m_floats[1]; - m_floats[2] *= v.m_floats[2]; -#endif - return *this; - } - - /**@brief Return the x value */ - B3_FORCE_INLINE const b3Scalar& getX() const { return m_floats[0]; } - /**@brief Return the y value */ - B3_FORCE_INLINE const b3Scalar& getY() const { return m_floats[1]; } - /**@brief Return the z value */ - B3_FORCE_INLINE const b3Scalar& getZ() const { return m_floats[2]; } - /**@brief Return the w value */ - B3_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; } - - /**@brief Set the x value */ - B3_FORCE_INLINE void setX(b3Scalar _x) { m_floats[0] = _x; }; - /**@brief Set the y value */ - B3_FORCE_INLINE void setY(b3Scalar _y) { m_floats[1] = _y; }; - /**@brief Set the z value */ - B3_FORCE_INLINE void setZ(b3Scalar _z) { m_floats[2] = _z; }; - /**@brief Set the w value */ - B3_FORCE_INLINE void setW(b3Scalar _w) { m_floats[3] = _w; }; - - //B3_FORCE_INLINE b3Scalar& operator[](int i) { return (&m_floats[0])[i]; } - //B3_FORCE_INLINE const b3Scalar& operator[](int i) const { return (&m_floats[0])[i]; } - ///operator b3Scalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons. - B3_FORCE_INLINE operator b3Scalar*() { return &m_floats[0]; } - B3_FORCE_INLINE operator const b3Scalar*() const { return &m_floats[0]; } - - B3_FORCE_INLINE bool operator==(const b3Vector3& other) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128))); -#else - return ((m_floats[3] == other.m_floats[3]) && - (m_floats[2] == other.m_floats[2]) && - (m_floats[1] == other.m_floats[1]) && - (m_floats[0] == other.m_floats[0])); -#endif - } - - B3_FORCE_INLINE bool operator!=(const b3Vector3& other) const - { - return !(*this == other); - } - - /**@brief Set each element to the max of the current values and the values of another b3Vector3 - * @param other The other b3Vector3 to compare with - */ - B3_FORCE_INLINE void setMax(const b3Vector3& other) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - mVec128 = _mm_max_ps(mVec128, other.mVec128); -#elif defined(B3_USE_NEON) - mVec128 = vmaxq_f32(mVec128, other.mVec128); -#else - b3SetMax(m_floats[0], other.m_floats[0]); - b3SetMax(m_floats[1], other.m_floats[1]); - b3SetMax(m_floats[2], other.m_floats[2]); - b3SetMax(m_floats[3], other.m_floats[3]); -#endif - } - - /**@brief Set each element to the min of the current values and the values of another b3Vector3 - * @param other The other b3Vector3 to compare with - */ - B3_FORCE_INLINE void setMin(const b3Vector3& other) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - mVec128 = _mm_min_ps(mVec128, other.mVec128); -#elif defined(B3_USE_NEON) - mVec128 = vminq_f32(mVec128, other.mVec128); -#else - b3SetMin(m_floats[0], other.m_floats[0]); - b3SetMin(m_floats[1], other.m_floats[1]); - b3SetMin(m_floats[2], other.m_floats[2]); - b3SetMin(m_floats[3], other.m_floats[3]); -#endif - } - - B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z) - { - m_floats[0] = _x; - m_floats[1] = _y; - m_floats[2] = _z; - m_floats[3] = b3Scalar(0.f); - } - - void getSkewSymmetricMatrix(b3Vector3 * v0, b3Vector3 * v1, b3Vector3 * v2) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - - __m128 V = _mm_and_ps(mVec128, b3vFFF0fMask); - __m128 V0 = _mm_xor_ps(b3vMzeroMask, V); - __m128 V2 = _mm_movelh_ps(V0, V); - - __m128 V1 = _mm_shuffle_ps(V, V0, 0xCE); - - V0 = _mm_shuffle_ps(V0, V, 0xDB); - V2 = _mm_shuffle_ps(V2, V, 0xF9); - - v0->mVec128 = V0; - v1->mVec128 = V1; - v2->mVec128 = V2; -#else - v0->setValue(0., -getZ(), getY()); - v1->setValue(getZ(), 0., -getX()); - v2->setValue(-getY(), getX(), 0.); -#endif - } - - void setZero() - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - mVec128 = (__m128)_mm_xor_ps(mVec128, mVec128); -#elif defined(B3_USE_NEON) - int32x4_t vi = vdupq_n_s32(0); - mVec128 = vreinterpretq_f32_s32(vi); -#else - setValue(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); -#endif - } - - B3_FORCE_INLINE bool isZero() const - { - return m_floats[0] == b3Scalar(0) && m_floats[1] == b3Scalar(0) && m_floats[2] == b3Scalar(0); - } - - B3_FORCE_INLINE bool fuzzyZero() const - { - return length2() < B3_EPSILON; - } - - B3_FORCE_INLINE void serialize(struct b3Vector3Data & dataOut) const; - - B3_FORCE_INLINE void deSerialize(const struct b3Vector3Data& dataIn); - - B3_FORCE_INLINE void serializeFloat(struct b3Vector3FloatData & dataOut) const; - - B3_FORCE_INLINE void deSerializeFloat(const struct b3Vector3FloatData& dataIn); - - B3_FORCE_INLINE void serializeDouble(struct b3Vector3DoubleData & dataOut) const; - - B3_FORCE_INLINE void deSerializeDouble(const struct b3Vector3DoubleData& dataIn); - - /**@brief returns index of maximum dot product between this and vectors in array[] - * @param array The other vectors - * @param array_count The number of other vectors - * @param dotOut The maximum dot product */ - B3_FORCE_INLINE long maxDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const; - - /**@brief returns index of minimum dot product between this and vectors in array[] - * @param array The other vectors - * @param array_count The number of other vectors - * @param dotOut The minimum dot product */ - B3_FORCE_INLINE long minDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const; - - /* create a vector as b3Vector3( this->dot( b3Vector3 v0 ), this->dot( b3Vector3 v1), this->dot( b3Vector3 v2 )) */ - B3_FORCE_INLINE b3Vector3 dot3(const b3Vector3& v0, const b3Vector3& v1, const b3Vector3& v2) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - - __m128 a0 = _mm_mul_ps(v0.mVec128, this->mVec128); - __m128 a1 = _mm_mul_ps(v1.mVec128, this->mVec128); - __m128 a2 = _mm_mul_ps(v2.mVec128, this->mVec128); - __m128 b0 = _mm_unpacklo_ps(a0, a1); - __m128 b1 = _mm_unpackhi_ps(a0, a1); - __m128 b2 = _mm_unpacklo_ps(a2, _mm_setzero_ps()); - __m128 r = _mm_movelh_ps(b0, b2); - r = _mm_add_ps(r, _mm_movehl_ps(b2, b0)); - a2 = _mm_and_ps(a2, b3vxyzMaskf); - r = _mm_add_ps(r, b3CastdTo128f(_mm_move_sd(b3CastfTo128d(a2), b3CastfTo128d(b1)))); - return b3MakeVector3(r); - -#elif defined(B3_USE_NEON) - static const uint32x4_t xyzMask = (const uint32x4_t){-1, -1, -1, 0}; - float32x4_t a0 = vmulq_f32(v0.mVec128, this->mVec128); - float32x4_t a1 = vmulq_f32(v1.mVec128, this->mVec128); - float32x4_t a2 = vmulq_f32(v2.mVec128, this->mVec128); - float32x2x2_t zLo = vtrn_f32(vget_high_f32(a0), vget_high_f32(a1)); - a2 = (float32x4_t)vandq_u32((uint32x4_t)a2, xyzMask); - float32x2_t b0 = vadd_f32(vpadd_f32(vget_low_f32(a0), vget_low_f32(a1)), zLo.val[0]); - float32x2_t b1 = vpadd_f32(vpadd_f32(vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f)); - return b3Vector3(vcombine_f32(b0, b1)); -#else - return b3MakeVector3(dot(v0), dot(v1), dot(v2)); -#endif - } -}; - -/**@brief Return the sum of two vectors (Point symantics)*/ -B3_FORCE_INLINE b3Vector3 -operator+(const b3Vector3& v1, const b3Vector3& v2) -{ -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - return b3MakeVector3(_mm_add_ps(v1.mVec128, v2.mVec128)); -#elif defined(B3_USE_NEON) - return b3MakeVector3(vaddq_f32(v1.mVec128, v2.mVec128)); -#else - return b3MakeVector3( - v1.m_floats[0] + v2.m_floats[0], - v1.m_floats[1] + v2.m_floats[1], - v1.m_floats[2] + v2.m_floats[2]); -#endif -} - -/**@brief Return the elementwise product of two vectors */ -B3_FORCE_INLINE b3Vector3 -operator*(const b3Vector3& v1, const b3Vector3& v2) -{ -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - return b3MakeVector3(_mm_mul_ps(v1.mVec128, v2.mVec128)); -#elif defined(B3_USE_NEON) - return b3MakeVector3(vmulq_f32(v1.mVec128, v2.mVec128)); -#else - return b3MakeVector3( - v1.m_floats[0] * v2.m_floats[0], - v1.m_floats[1] * v2.m_floats[1], - v1.m_floats[2] * v2.m_floats[2]); -#endif -} - -/**@brief Return the difference between two vectors */ -B3_FORCE_INLINE b3Vector3 -operator-(const b3Vector3& v1, const b3Vector3& v2) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - - // without _mm_and_ps this code causes slowdown in Concave moving - __m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128); - return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask)); -#elif defined(B3_USE_NEON) - float32x4_t r = vsubq_f32(v1.mVec128, v2.mVec128); - return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask)); -#else - return b3MakeVector3( - v1.m_floats[0] - v2.m_floats[0], - v1.m_floats[1] - v2.m_floats[1], - v1.m_floats[2] - v2.m_floats[2]); -#endif -} - -/**@brief Return the negative of the vector */ -B3_FORCE_INLINE b3Vector3 -operator-(const b3Vector3& v) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - __m128 r = _mm_xor_ps(v.mVec128, b3vMzeroMask); - return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask)); -#elif defined(B3_USE_NEON) - return b3MakeVector3((b3SimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)b3vMzeroMask)); -#else - return b3MakeVector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]); -#endif -} - -/**@brief Return the vector scaled by s */ -B3_FORCE_INLINE b3Vector3 -operator*(const b3Vector3& v, const b3Scalar& s) -{ -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vs = _mm_load_ss(&s); // (S 0 0 0) - vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0) - return b3MakeVector3(_mm_mul_ps(v.mVec128, vs)); -#elif defined(B3_USE_NEON) - float32x4_t r = vmulq_n_f32(v.mVec128, s); - return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask)); -#else - return b3MakeVector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s); -#endif -} - -/**@brief Return the vector scaled by s */ -B3_FORCE_INLINE b3Vector3 -operator*(const b3Scalar& s, const b3Vector3& v) -{ - return v * s; -} - -/**@brief Return the vector inversely scaled by s */ -B3_FORCE_INLINE b3Vector3 -operator/(const b3Vector3& v, const b3Scalar& s) -{ - b3FullAssert(s != b3Scalar(0.0)); -#if 0 //defined(B3_USE_SSE_IN_API) -// this code is not faster ! - __m128 vs = _mm_load_ss(&s); - vs = _mm_div_ss(b3v1110, vs); - vs = b3_pshufd_ps(vs, 0x00); // (S S S S) - - return b3Vector3(_mm_mul_ps(v.mVec128, vs)); -#else - return v * (b3Scalar(1.0) / s); -#endif -} - -/**@brief Return the vector inversely scaled by s */ -B3_FORCE_INLINE b3Vector3 -operator/(const b3Vector3& v1, const b3Vector3& v2) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - __m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128); - vec = _mm_and_ps(vec, b3vFFF0fMask); - return b3MakeVector3(vec); -#elif defined(B3_USE_NEON) - float32x4_t x, y, v, m; - - x = v1.mVec128; - y = v2.mVec128; - - v = vrecpeq_f32(y); // v ~ 1/y - m = vrecpsq_f32(y, v); // m = (2-v*y) - v = vmulq_f32(v, m); // vv = v*m ~~ 1/y - m = vrecpsq_f32(y, v); // mm = (2-vv*y) - v = vmulq_f32(v, x); // x*vv - v = vmulq_f32(v, m); // (x*vv)*(2-vv*y) = x*(vv(2-vv*y)) ~~~ x/y - - return b3Vector3(v); -#else - return b3MakeVector3( - v1.m_floats[0] / v2.m_floats[0], - v1.m_floats[1] / v2.m_floats[1], - v1.m_floats[2] / v2.m_floats[2]); -#endif -} - -/**@brief Return the dot product between two vectors */ -B3_FORCE_INLINE b3Scalar -b3Dot(const b3Vector3& v1, const b3Vector3& v2) -{ - return v1.dot(v2); -} - -/**@brief Return the distance squared between two vectors */ -B3_FORCE_INLINE b3Scalar -b3Distance2(const b3Vector3& v1, const b3Vector3& v2) -{ - return v1.distance2(v2); -} - -/**@brief Return the distance between two vectors */ -B3_FORCE_INLINE b3Scalar -b3Distance(const b3Vector3& v1, const b3Vector3& v2) -{ - return v1.distance(v2); -} - -/**@brief Return the angle between two vectors */ -B3_FORCE_INLINE b3Scalar -b3Angle(const b3Vector3& v1, const b3Vector3& v2) -{ - return v1.angle(v2); -} - -/**@brief Return the cross product of two vectors */ -B3_FORCE_INLINE b3Vector3 -b3Cross(const b3Vector3& v1, const b3Vector3& v2) -{ - return v1.cross(v2); -} - -B3_FORCE_INLINE b3Scalar -b3Triple(const b3Vector3& v1, const b3Vector3& v2, const b3Vector3& v3) -{ - return v1.triple(v2, v3); -} - -/**@brief Return the linear interpolation between two vectors - * @param v1 One vector - * @param v2 The other vector - * @param t The ration of this to v (t = 0 => return v1, t=1 => return v2) */ -B3_FORCE_INLINE b3Vector3 -b3Lerp(const b3Vector3& v1, const b3Vector3& v2, const b3Scalar& t) -{ - return v1.lerp(v2, t); -} - -B3_FORCE_INLINE b3Scalar b3Vector3::distance2(const b3Vector3& v) const -{ - return (v - *this).length2(); -} - -B3_FORCE_INLINE b3Scalar b3Vector3::distance(const b3Vector3& v) const -{ - return (v - *this).length(); -} - -B3_FORCE_INLINE b3Vector3 b3Vector3::normalized() const -{ -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - b3Vector3 norm = *this; - - return norm.normalize(); -#else - return *this / length(); -#endif -} - -B3_FORCE_INLINE b3Vector3 b3Vector3::rotate(const b3Vector3& wAxis, const b3Scalar _angle) const -{ - // wAxis must be a unit lenght vector - -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - - __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128); - b3Scalar ssin = b3Sin(_angle); - __m128 C = wAxis.cross(b3MakeVector3(mVec128)).mVec128; - O = _mm_and_ps(O, b3vFFF0fMask); - b3Scalar scos = b3Cos(_angle); - - __m128 vsin = _mm_load_ss(&ssin); // (S 0 0 0) - __m128 vcos = _mm_load_ss(&scos); // (S 0 0 0) - - __m128 Y = b3_pshufd_ps(O, 0xC9); // (Y Z X 0) - __m128 Z = b3_pshufd_ps(O, 0xD2); // (Z X Y 0) - O = _mm_add_ps(O, Y); - vsin = b3_pshufd_ps(vsin, 0x80); // (S S S 0) - O = _mm_add_ps(O, Z); - vcos = b3_pshufd_ps(vcos, 0x80); // (S S S 0) - - vsin = vsin * C; - O = O * wAxis.mVec128; - __m128 X = mVec128 - O; - - O = O + vsin; - vcos = vcos * X; - O = O + vcos; - - return b3MakeVector3(O); -#else - b3Vector3 o = wAxis * wAxis.dot(*this); - b3Vector3 _x = *this - o; - b3Vector3 _y; - - _y = wAxis.cross(*this); - - return (o + _x * b3Cos(_angle) + _y * b3Sin(_angle)); -#endif -} - -B3_FORCE_INLINE long b3Vector3::maxDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const -{ -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) -#if defined _WIN32 || defined(B3_USE_SSE) - const long scalar_cutoff = 10; - long b3_maxdot_large(const float* array, const float* vec, unsigned long array_count, float* dotOut); -#elif defined B3_USE_NEON - const long scalar_cutoff = 4; - extern long (*_maxdot_large)(const float* array, const float* vec, unsigned long array_count, float* dotOut); -#endif - if (array_count < scalar_cutoff) -#else - -#endif //B3_USE_SSE || B3_USE_NEON - { - b3Scalar maxDot = -B3_INFINITY; - int i = 0; - int ptIndex = -1; - for (i = 0; i < array_count; i++) - { - b3Scalar dot = array[i].dot(*this); - - if (dot > maxDot) - { - maxDot = dot; - ptIndex = i; - } - } - - b3Assert(ptIndex >= 0); - if (ptIndex < 0) - { - ptIndex = 0; - } - dotOut = maxDot; - return ptIndex; - } -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) - return b3_maxdot_large((float*)array, (float*)&m_floats[0], array_count, &dotOut); -#endif -} - -B3_FORCE_INLINE long b3Vector3::minDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const -{ -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) -#if defined B3_USE_SSE - const long scalar_cutoff = 10; - long b3_mindot_large(const float* array, const float* vec, unsigned long array_count, float* dotOut); -#elif defined B3_USE_NEON - const long scalar_cutoff = 4; - extern long (*b3_mindot_large)(const float* array, const float* vec, unsigned long array_count, float* dotOut); -#else -#error unhandled arch! -#endif - - if (array_count < scalar_cutoff) -#endif //B3_USE_SSE || B3_USE_NEON - { - b3Scalar minDot = B3_INFINITY; - int i = 0; - int ptIndex = -1; - - for (i = 0; i < array_count; i++) - { - b3Scalar dot = array[i].dot(*this); - - if (dot < minDot) - { - minDot = dot; - ptIndex = i; - } - } - - dotOut = minDot; - - return ptIndex; - } -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) - return b3_mindot_large((float*)array, (float*)&m_floats[0], array_count, &dotOut); -#endif -} - -class b3Vector4 : public b3Vector3 -{ -public: - B3_FORCE_INLINE b3Vector4 absolute4() const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - return b3MakeVector4(_mm_and_ps(mVec128, b3vAbsfMask)); -#elif defined(B3_USE_NEON) - return b3Vector4(vabsq_f32(mVec128)); -#else - return b3MakeVector4( - b3Fabs(m_floats[0]), - b3Fabs(m_floats[1]), - b3Fabs(m_floats[2]), - b3Fabs(m_floats[3])); -#endif - } - - b3Scalar getW() const { return m_floats[3]; } - - B3_FORCE_INLINE int maxAxis4() const - { - int maxIndex = -1; - b3Scalar maxVal = b3Scalar(-B3_LARGE_FLOAT); - if (m_floats[0] > maxVal) - { - maxIndex = 0; - maxVal = m_floats[0]; - } - if (m_floats[1] > maxVal) - { - maxIndex = 1; - maxVal = m_floats[1]; - } - if (m_floats[2] > maxVal) - { - maxIndex = 2; - maxVal = m_floats[2]; - } - if (m_floats[3] > maxVal) - { - maxIndex = 3; - } - - return maxIndex; - } - - B3_FORCE_INLINE int minAxis4() const - { - int minIndex = -1; - b3Scalar minVal = b3Scalar(B3_LARGE_FLOAT); - if (m_floats[0] < minVal) - { - minIndex = 0; - minVal = m_floats[0]; - } - if (m_floats[1] < minVal) - { - minIndex = 1; - minVal = m_floats[1]; - } - if (m_floats[2] < minVal) - { - minIndex = 2; - minVal = m_floats[2]; - } - if (m_floats[3] < minVal) - { - minIndex = 3; - minVal = m_floats[3]; - } - - return minIndex; - } - - B3_FORCE_INLINE int closestAxis4() const - { - return absolute4().maxAxis4(); - } - - /**@brief Set x,y,z and zero w - * @param x Value of x - * @param y Value of y - * @param z Value of z - */ - - /* void getValue(b3Scalar *m) const - { - m[0] = m_floats[0]; - m[1] = m_floats[1]; - m[2] =m_floats[2]; - } -*/ - /**@brief Set the values - * @param x Value of x - * @param y Value of y - * @param z Value of z - * @param w Value of w - */ - B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w) - { - m_floats[0] = _x; - m_floats[1] = _y; - m_floats[2] = _z; - m_floats[3] = _w; - } -}; - -///b3SwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization -B3_FORCE_INLINE void b3SwapScalarEndian(const b3Scalar& sourceVal, b3Scalar& destVal) -{ -#ifdef B3_USE_DOUBLE_PRECISION - unsigned char* dest = (unsigned char*)&destVal; - unsigned char* src = (unsigned char*)&sourceVal; - dest[0] = src[7]; - dest[1] = src[6]; - dest[2] = src[5]; - dest[3] = src[4]; - dest[4] = src[3]; - dest[5] = src[2]; - dest[6] = src[1]; - dest[7] = src[0]; -#else - unsigned char* dest = (unsigned char*)&destVal; - unsigned char* src = (unsigned char*)&sourceVal; - dest[0] = src[3]; - dest[1] = src[2]; - dest[2] = src[1]; - dest[3] = src[0]; -#endif //B3_USE_DOUBLE_PRECISION -} -///b3SwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization -B3_FORCE_INLINE void b3SwapVector3Endian(const b3Vector3& sourceVec, b3Vector3& destVec) -{ - for (int i = 0; i < 4; i++) - { - b3SwapScalarEndian(sourceVec[i], destVec[i]); - } -} - -///b3UnSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization -B3_FORCE_INLINE void b3UnSwapVector3Endian(b3Vector3& vector) -{ - b3Vector3 swappedVec; - for (int i = 0; i < 4; i++) - { - b3SwapScalarEndian(vector[i], swappedVec[i]); - } - vector = swappedVec; -} - -template <class T> -B3_FORCE_INLINE void b3PlaneSpace1(const T& n, T& p, T& q) -{ - if (b3Fabs(n[2]) > B3_SQRT12) - { - // choose p in y-z plane - b3Scalar a = n[1] * n[1] + n[2] * n[2]; - b3Scalar k = b3RecipSqrt(a); - p[0] = 0; - p[1] = -n[2] * k; - p[2] = n[1] * k; - // set q = n x p - q[0] = a * k; - q[1] = -n[0] * p[2]; - q[2] = n[0] * p[1]; - } - else - { - // choose p in x-y plane - b3Scalar a = n[0] * n[0] + n[1] * n[1]; - b3Scalar k = b3RecipSqrt(a); - p[0] = -n[1] * k; - p[1] = n[0] * k; - p[2] = 0; - // set q = n x p - q[0] = -n[2] * p[1]; - q[1] = n[2] * p[0]; - q[2] = a * k; - } -} - -struct b3Vector3FloatData -{ - float m_floats[4]; -}; - -struct b3Vector3DoubleData -{ - double m_floats[4]; -}; - -B3_FORCE_INLINE void b3Vector3::serializeFloat(struct b3Vector3FloatData& dataOut) const -{ - ///could also do a memcpy, check if it is worth it - for (int i = 0; i < 4; i++) - dataOut.m_floats[i] = float(m_floats[i]); -} - -B3_FORCE_INLINE void b3Vector3::deSerializeFloat(const struct b3Vector3FloatData& dataIn) -{ - for (int i = 0; i < 4; i++) - m_floats[i] = b3Scalar(dataIn.m_floats[i]); -} - -B3_FORCE_INLINE void b3Vector3::serializeDouble(struct b3Vector3DoubleData& dataOut) const -{ - ///could also do a memcpy, check if it is worth it - for (int i = 0; i < 4; i++) - dataOut.m_floats[i] = double(m_floats[i]); -} - -B3_FORCE_INLINE void b3Vector3::deSerializeDouble(const struct b3Vector3DoubleData& dataIn) -{ - for (int i = 0; i < 4; i++) - m_floats[i] = b3Scalar(dataIn.m_floats[i]); -} - -B3_FORCE_INLINE void b3Vector3::serialize(struct b3Vector3Data& dataOut) const -{ - ///could also do a memcpy, check if it is worth it - for (int i = 0; i < 4; i++) - dataOut.m_floats[i] = m_floats[i]; -} - -B3_FORCE_INLINE void b3Vector3::deSerialize(const struct b3Vector3Data& dataIn) -{ - for (int i = 0; i < 4; i++) - m_floats[i] = dataIn.m_floats[i]; -} - -inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z) -{ - b3Vector3 tmp; - tmp.setValue(x, y, z); - return tmp; -} - -inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w) -{ - b3Vector3 tmp; - tmp.setValue(x, y, z); - tmp.w = w; - return tmp; -} - -inline b3Vector4 b3MakeVector4(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w) -{ - b3Vector4 tmp; - tmp.setValue(x, y, z, w); - return tmp; -} - -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - -inline b3Vector3 b3MakeVector3(b3SimdFloat4 v) -{ - b3Vector3 tmp; - tmp.set128(v); - return tmp; -} - -inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec) -{ - b3Vector4 tmp; - tmp.set128(vec); - return tmp; -} - -#endif - -#endif //B3_VECTOR3_H diff --git a/thirdparty/bullet/Bullet3Common/shared/b3Float4.h b/thirdparty/bullet/Bullet3Common/shared/b3Float4.h deleted file mode 100644 index d8a9f47411..0000000000 --- a/thirdparty/bullet/Bullet3Common/shared/b3Float4.h +++ /dev/null @@ -1,90 +0,0 @@ -#ifndef B3_FLOAT4_H -#define B3_FLOAT4_H - -#include "Bullet3Common/shared/b3PlatformDefinitions.h" - -#ifdef __cplusplus -#include "Bullet3Common/b3Vector3.h" -#define b3Float4 b3Vector3 -#define b3Float4ConstArg const b3Vector3& -#define b3Dot3F4 b3Dot -#define b3Cross3 b3Cross -#define b3MakeFloat4 b3MakeVector3 -inline b3Vector3 b3Normalized(const b3Vector3& vec) -{ - return vec.normalized(); -} - -inline b3Float4 b3FastNormalized3(b3Float4ConstArg v) -{ - return v.normalized(); -} - -inline b3Float4 b3MaxFloat4(const b3Float4& a, const b3Float4& b) -{ - b3Float4 tmp = a; - tmp.setMax(b); - return tmp; -} -inline b3Float4 b3MinFloat4(const b3Float4& a, const b3Float4& b) -{ - b3Float4 tmp = a; - tmp.setMin(b); - return tmp; -} - -#else -typedef float4 b3Float4; -#define b3Float4ConstArg const b3Float4 -#define b3MakeFloat4 (float4) -float b3Dot3F4(b3Float4ConstArg v0, b3Float4ConstArg v1) -{ - float4 a1 = b3MakeFloat4(v0.xyz, 0.f); - float4 b1 = b3MakeFloat4(v1.xyz, 0.f); - return dot(a1, b1); -} -b3Float4 b3Cross3(b3Float4ConstArg v0, b3Float4ConstArg v1) -{ - float4 a1 = b3MakeFloat4(v0.xyz, 0.f); - float4 b1 = b3MakeFloat4(v1.xyz, 0.f); - return cross(a1, b1); -} -#define b3MinFloat4 min -#define b3MaxFloat4 max - -#define b3Normalized(a) normalize(a) - -#endif - -inline bool b3IsAlmostZero(b3Float4ConstArg v) -{ - if (b3Fabs(v.x) > 1e-6 || b3Fabs(v.y) > 1e-6 || b3Fabs(v.z) > 1e-6) - return false; - return true; -} - -inline int b3MaxDot(b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut) -{ - float maxDot = -B3_INFINITY; - int i = 0; - int ptIndex = -1; - for (i = 0; i < vecLen; i++) - { - float dot = b3Dot3F4(vecArray[i], vec); - - if (dot > maxDot) - { - maxDot = dot; - ptIndex = i; - } - } - b3Assert(ptIndex >= 0); - if (ptIndex < 0) - { - ptIndex = 0; - } - *dotOut = maxDot; - return ptIndex; -} - -#endif //B3_FLOAT4_H diff --git a/thirdparty/bullet/Bullet3Common/shared/b3Int2.h b/thirdparty/bullet/Bullet3Common/shared/b3Int2.h deleted file mode 100644 index 7b84de4436..0000000000 --- a/thirdparty/bullet/Bullet3Common/shared/b3Int2.h +++ /dev/null @@ -1,63 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_INT2_H -#define B3_INT2_H - -#ifdef __cplusplus - -struct b3UnsignedInt2 -{ - union { - struct - { - unsigned int x, y; - }; - struct - { - unsigned int s[2]; - }; - }; -}; - -struct b3Int2 -{ - union { - struct - { - int x, y; - }; - struct - { - int s[2]; - }; - }; -}; - -inline b3Int2 b3MakeInt2(int x, int y) -{ - b3Int2 v; - v.s[0] = x; - v.s[1] = y; - return v; -} -#else - -#define b3UnsignedInt2 uint2 -#define b3Int2 int2 -#define b3MakeInt2 (int2) - -#endif //__cplusplus -#endif
\ No newline at end of file diff --git a/thirdparty/bullet/Bullet3Common/shared/b3Int4.h b/thirdparty/bullet/Bullet3Common/shared/b3Int4.h deleted file mode 100644 index f6a1754245..0000000000 --- a/thirdparty/bullet/Bullet3Common/shared/b3Int4.h +++ /dev/null @@ -1,71 +0,0 @@ -#ifndef B3_INT4_H -#define B3_INT4_H - -#ifdef __cplusplus - -#include "Bullet3Common/b3Scalar.h" - -B3_ATTRIBUTE_ALIGNED16(struct) -b3UnsignedInt4 -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - union { - struct - { - unsigned int x, y, z, w; - }; - struct - { - unsigned int s[4]; - }; - }; -}; - -B3_ATTRIBUTE_ALIGNED16(struct) -b3Int4 -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - union { - struct - { - int x, y, z, w; - }; - struct - { - int s[4]; - }; - }; -}; - -B3_FORCE_INLINE b3Int4 b3MakeInt4(int x, int y, int z, int w = 0) -{ - b3Int4 v; - v.s[0] = x; - v.s[1] = y; - v.s[2] = z; - v.s[3] = w; - return v; -} - -B3_FORCE_INLINE b3UnsignedInt4 b3MakeUnsignedInt4(unsigned int x, unsigned int y, unsigned int z, unsigned int w = 0) -{ - b3UnsignedInt4 v; - v.s[0] = x; - v.s[1] = y; - v.s[2] = z; - v.s[3] = w; - return v; -} - -#else - -#define b3UnsignedInt4 uint4 -#define b3Int4 int4 -#define b3MakeInt4 (int4) -#define b3MakeUnsignedInt4 (uint4) - -#endif //__cplusplus - -#endif //B3_INT4_H diff --git a/thirdparty/bullet/Bullet3Common/shared/b3Mat3x3.h b/thirdparty/bullet/Bullet3Common/shared/b3Mat3x3.h deleted file mode 100644 index ce6482b5a6..0000000000 --- a/thirdparty/bullet/Bullet3Common/shared/b3Mat3x3.h +++ /dev/null @@ -1,157 +0,0 @@ - -#ifndef B3_MAT3x3_H -#define B3_MAT3x3_H - -#include "Bullet3Common/shared/b3Quat.h" - -#ifdef __cplusplus - -#include "Bullet3Common/b3Matrix3x3.h" - -#define b3Mat3x3 b3Matrix3x3 -#define b3Mat3x3ConstArg const b3Matrix3x3& - -inline b3Mat3x3 b3QuatGetRotationMatrix(b3QuatConstArg quat) -{ - return b3Mat3x3(quat); -} - -inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg mat) -{ - return mat.absolute(); -} - -#define b3GetRow(m, row) m.getRow(row) - -__inline b3Float4 mtMul3(b3Float4ConstArg a, b3Mat3x3ConstArg b) -{ - return b * a; -} - -#else - -typedef struct -{ - b3Float4 m_row[3]; -} b3Mat3x3; - -#define b3Mat3x3ConstArg const b3Mat3x3 -#define b3GetRow(m, row) (m.m_row[row]) - -inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat) -{ - b3Float4 quat2 = (b3Float4)(quat.x * quat.x, quat.y * quat.y, quat.z * quat.z, 0.f); - b3Mat3x3 out; - - out.m_row[0].x = 1 - 2 * quat2.y - 2 * quat2.z; - out.m_row[0].y = 2 * quat.x * quat.y - 2 * quat.w * quat.z; - out.m_row[0].z = 2 * quat.x * quat.z + 2 * quat.w * quat.y; - out.m_row[0].w = 0.f; - - out.m_row[1].x = 2 * quat.x * quat.y + 2 * quat.w * quat.z; - out.m_row[1].y = 1 - 2 * quat2.x - 2 * quat2.z; - out.m_row[1].z = 2 * quat.y * quat.z - 2 * quat.w * quat.x; - out.m_row[1].w = 0.f; - - out.m_row[2].x = 2 * quat.x * quat.z - 2 * quat.w * quat.y; - out.m_row[2].y = 2 * quat.y * quat.z + 2 * quat.w * quat.x; - out.m_row[2].z = 1 - 2 * quat2.x - 2 * quat2.y; - out.m_row[2].w = 0.f; - - return out; -} - -inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn) -{ - b3Mat3x3 out; - out.m_row[0] = fabs(matIn.m_row[0]); - out.m_row[1] = fabs(matIn.m_row[1]); - out.m_row[2] = fabs(matIn.m_row[2]); - return out; -} - -__inline b3Mat3x3 mtZero(); - -__inline b3Mat3x3 mtIdentity(); - -__inline b3Mat3x3 mtTranspose(b3Mat3x3 m); - -__inline b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b); - -__inline b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b); - -__inline b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b); - -__inline b3Mat3x3 mtZero() -{ - b3Mat3x3 m; - m.m_row[0] = (b3Float4)(0.f); - m.m_row[1] = (b3Float4)(0.f); - m.m_row[2] = (b3Float4)(0.f); - return m; -} - -__inline b3Mat3x3 mtIdentity() -{ - b3Mat3x3 m; - m.m_row[0] = (b3Float4)(1, 0, 0, 0); - m.m_row[1] = (b3Float4)(0, 1, 0, 0); - m.m_row[2] = (b3Float4)(0, 0, 1, 0); - return m; -} - -__inline b3Mat3x3 mtTranspose(b3Mat3x3 m) -{ - b3Mat3x3 out; - out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f); - out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f); - out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f); - return out; -} - -__inline b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b) -{ - b3Mat3x3 transB; - transB = mtTranspose(b); - b3Mat3x3 ans; - // why this doesn't run when 0ing in the for{} - a.m_row[0].w = 0.f; - a.m_row[1].w = 0.f; - a.m_row[2].w = 0.f; - for (int i = 0; i < 3; i++) - { - // a.m_row[i].w = 0.f; - ans.m_row[i].x = b3Dot3F4(a.m_row[i], transB.m_row[0]); - ans.m_row[i].y = b3Dot3F4(a.m_row[i], transB.m_row[1]); - ans.m_row[i].z = b3Dot3F4(a.m_row[i], transB.m_row[2]); - ans.m_row[i].w = 0.f; - } - return ans; -} - -__inline b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b) -{ - b3Float4 ans; - ans.x = b3Dot3F4(a.m_row[0], b); - ans.y = b3Dot3F4(a.m_row[1], b); - ans.z = b3Dot3F4(a.m_row[2], b); - ans.w = 0.f; - return ans; -} - -__inline b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b) -{ - b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); - b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); - b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); - - b3Float4 ans; - ans.x = b3Dot3F4(a, colx); - ans.y = b3Dot3F4(a, coly); - ans.z = b3Dot3F4(a, colz); - return ans; -} - -#endif - -#endif //B3_MAT3x3_H diff --git a/thirdparty/bullet/Bullet3Common/shared/b3PlatformDefinitions.h b/thirdparty/bullet/Bullet3Common/shared/b3PlatformDefinitions.h deleted file mode 100644 index b72bee9310..0000000000 --- a/thirdparty/bullet/Bullet3Common/shared/b3PlatformDefinitions.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef B3_PLATFORM_DEFINITIONS_H -#define B3_PLATFORM_DEFINITIONS_H - -struct MyTest -{ - int bla; -}; - -#ifdef __cplusplus -//#define b3ConstArray(a) const b3AlignedObjectArray<a>& -#define b3ConstArray(a) const a * -#define b3AtomicInc(a) ((*a)++) - -inline int b3AtomicAdd(volatile int *p, int val) -{ - int oldValue = *p; - int newValue = oldValue + val; - *p = newValue; - return oldValue; -} - -#define __global - -#define B3_STATIC static -#else -//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX -#define B3_LARGE_FLOAT 1e18f -#define B3_INFINITY 1e18f -#define b3Assert(a) -#define b3ConstArray(a) __global const a * -#define b3AtomicInc atomic_inc -#define b3AtomicAdd atomic_add -#define b3Fabs fabs -#define b3Sqrt native_sqrt -#define b3Sin native_sin -#define b3Cos native_cos - -#define B3_STATIC -#endif - -#endif diff --git a/thirdparty/bullet/Bullet3Common/shared/b3Quat.h b/thirdparty/bullet/Bullet3Common/shared/b3Quat.h deleted file mode 100644 index 940610c77b..0000000000 --- a/thirdparty/bullet/Bullet3Common/shared/b3Quat.h +++ /dev/null @@ -1,100 +0,0 @@ -#ifndef B3_QUAT_H -#define B3_QUAT_H - -#include "Bullet3Common/shared/b3PlatformDefinitions.h" -#include "Bullet3Common/shared/b3Float4.h" - -#ifdef __cplusplus -#include "Bullet3Common/b3Quaternion.h" -#include "Bullet3Common/b3Transform.h" - -#define b3Quat b3Quaternion -#define b3QuatConstArg const b3Quaternion& -inline b3Quat b3QuatInverse(b3QuatConstArg orn) -{ - return orn.inverse(); -} - -inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation) -{ - b3Transform tr; - tr.setOrigin(translation); - tr.setRotation(orientation); - return tr(point); -} - -#else -typedef float4 b3Quat; -#define b3QuatConstArg const b3Quat - -inline float4 b3FastNormalize4(float4 v) -{ - v = (float4)(v.xyz, 0.f); - return fast_normalize(v); -} - -inline b3Quat b3QuatMul(b3Quat a, b3Quat b); -inline b3Quat b3QuatNormalized(b3QuatConstArg in); -inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec); -inline b3Quat b3QuatInvert(b3QuatConstArg q); -inline b3Quat b3QuatInverse(b3QuatConstArg q); - -inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b) -{ - b3Quat ans; - ans = b3Cross3(a, b); - ans += a.w * b + b.w * a; - // ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w * b.w - b3Dot3F4(a, b); - return ans; -} - -inline b3Quat b3QuatNormalized(b3QuatConstArg in) -{ - b3Quat q; - q = in; - //return b3FastNormalize4(in); - float len = native_sqrt(dot(q, q)); - if (len > 0.f) - { - q *= 1.f / len; - } - else - { - q.x = q.y = q.z = 0.f; - q.w = 1.f; - } - return q; -} -inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec) -{ - b3Quat qInv = b3QuatInvert(q); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = b3QuatMul(b3QuatMul(q, vcpy), qInv); - return out; -} - -inline b3Quat b3QuatInverse(b3QuatConstArg q) -{ - return (b3Quat)(-q.xyz, q.w); -} - -inline b3Quat b3QuatInvert(b3QuatConstArg q) -{ - return (b3Quat)(-q.xyz, q.w); -} - -inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec) -{ - return b3QuatRotate(b3QuatInvert(q), vec); -} - -inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation) -{ - return b3QuatRotate(orientation, point) + (translation); -} - -#endif - -#endif //B3_QUAT_H |