diff options
Diffstat (limited to 'thirdparty/bullet/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h')
-rw-r--r-- | thirdparty/bullet/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h | 1269 |
1 files changed, 1269 insertions, 0 deletions
diff --git a/thirdparty/bullet/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h b/thirdparty/bullet/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h new file mode 100644 index 0000000000..c004b9130f --- /dev/null +++ b/thirdparty/bullet/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h @@ -0,0 +1,1269 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +///b3DynamicBvh implementation by Nathanael Presson + +#ifndef B3_DYNAMIC_BOUNDING_VOLUME_TREE_H +#define B3_DYNAMIC_BOUNDING_VOLUME_TREE_H + +#include "Bullet3Common/b3AlignedObjectArray.h" +#include "Bullet3Common/b3Vector3.h" +#include "Bullet3Common/b3Transform.h" +#include "Bullet3Geometry/b3AabbUtil.h" + +// +// Compile time configuration +// + + +// Implementation profiles +#define B3_DBVT_IMPL_GENERIC 0 // Generic implementation +#define B3_DBVT_IMPL_SSE 1 // SSE + +// Template implementation of ICollide +#ifdef _WIN32 +#if (defined (_MSC_VER) && _MSC_VER >= 1400) +#define B3_DBVT_USE_TEMPLATE 1 +#else +#define B3_DBVT_USE_TEMPLATE 0 +#endif +#else +#define B3_DBVT_USE_TEMPLATE 0 +#endif + +// Use only intrinsics instead of inline asm +#define B3_DBVT_USE_INTRINSIC_SSE 1 + +// Using memmov for collideOCL +#define B3_DBVT_USE_MEMMOVE 1 + +// Enable benchmarking code +#define B3_DBVT_ENABLE_BENCHMARK 0 + +// Inlining +#define B3_DBVT_INLINE B3_FORCE_INLINE + +// Specific methods implementation + +//SSE gives errors on a MSVC 7.1 +#if defined (B3_USE_SSE) //&& defined (_WIN32) +#define B3_DBVT_SELECT_IMPL B3_DBVT_IMPL_SSE +#define B3_DBVT_MERGE_IMPL B3_DBVT_IMPL_SSE +#define B3_DBVT_INT0_IMPL B3_DBVT_IMPL_SSE +#else +#define B3_DBVT_SELECT_IMPL B3_DBVT_IMPL_GENERIC +#define B3_DBVT_MERGE_IMPL B3_DBVT_IMPL_GENERIC +#define B3_DBVT_INT0_IMPL B3_DBVT_IMPL_GENERIC +#endif + +#if (B3_DBVT_SELECT_IMPL==B3_DBVT_IMPL_SSE)|| \ + (B3_DBVT_MERGE_IMPL==B3_DBVT_IMPL_SSE)|| \ + (B3_DBVT_INT0_IMPL==B3_DBVT_IMPL_SSE) +#include <emmintrin.h> +#endif + +// +// Auto config and checks +// + +#if B3_DBVT_USE_TEMPLATE +#define B3_DBVT_VIRTUAL +#define B3_DBVT_VIRTUAL_DTOR(a) +#define B3_DBVT_PREFIX template <typename T> +#define B3_DBVT_IPOLICY T& policy +#define B3_DBVT_CHECKTYPE static const ICollide& typechecker=*(T*)1;(void)typechecker; +#else +#define B3_DBVT_VIRTUAL_DTOR(a) virtual ~a() {} +#define B3_DBVT_VIRTUAL virtual +#define B3_DBVT_PREFIX +#define B3_DBVT_IPOLICY ICollide& policy +#define B3_DBVT_CHECKTYPE +#endif + +#if B3_DBVT_USE_MEMMOVE +#if !defined( __CELLOS_LV2__) && !defined(__MWERKS__) +#include <memory.h> +#endif +#include <string.h> +#endif + +#ifndef B3_DBVT_USE_TEMPLATE +#error "B3_DBVT_USE_TEMPLATE undefined" +#endif + +#ifndef B3_DBVT_USE_MEMMOVE +#error "B3_DBVT_USE_MEMMOVE undefined" +#endif + +#ifndef B3_DBVT_ENABLE_BENCHMARK +#error "B3_DBVT_ENABLE_BENCHMARK undefined" +#endif + +#ifndef B3_DBVT_SELECT_IMPL +#error "B3_DBVT_SELECT_IMPL undefined" +#endif + +#ifndef B3_DBVT_MERGE_IMPL +#error "B3_DBVT_MERGE_IMPL undefined" +#endif + +#ifndef B3_DBVT_INT0_IMPL +#error "B3_DBVT_INT0_IMPL undefined" +#endif + +// +// Defaults volumes +// + +/* b3DbvtAabbMm */ +struct b3DbvtAabbMm +{ + B3_DBVT_INLINE b3Vector3 Center() const { return((mi+mx)/2); } + B3_DBVT_INLINE b3Vector3 Lengths() const { return(mx-mi); } + B3_DBVT_INLINE b3Vector3 Extents() const { return((mx-mi)/2); } + B3_DBVT_INLINE const b3Vector3& Mins() const { return(mi); } + B3_DBVT_INLINE const b3Vector3& Maxs() const { return(mx); } + static inline b3DbvtAabbMm FromCE(const b3Vector3& c,const b3Vector3& e); + static inline b3DbvtAabbMm FromCR(const b3Vector3& c,b3Scalar r); + static inline b3DbvtAabbMm FromMM(const b3Vector3& mi,const b3Vector3& mx); + static inline b3DbvtAabbMm FromPoints(const b3Vector3* pts,int n); + static inline b3DbvtAabbMm FromPoints(const b3Vector3** ppts,int n); + B3_DBVT_INLINE void Expand(const b3Vector3& e); + B3_DBVT_INLINE void SignedExpand(const b3Vector3& e); + B3_DBVT_INLINE bool Contain(const b3DbvtAabbMm& a) const; + B3_DBVT_INLINE int Classify(const b3Vector3& n,b3Scalar o,int s) const; + B3_DBVT_INLINE b3Scalar ProjectMinimum(const b3Vector3& v,unsigned signs) const; + B3_DBVT_INLINE friend bool b3Intersect( const b3DbvtAabbMm& a, + const b3DbvtAabbMm& b); + + B3_DBVT_INLINE friend bool b3Intersect( const b3DbvtAabbMm& a, + const b3Vector3& b); + + B3_DBVT_INLINE friend b3Scalar b3Proximity( const b3DbvtAabbMm& a, + const b3DbvtAabbMm& b); + B3_DBVT_INLINE friend int b3Select( const b3DbvtAabbMm& o, + const b3DbvtAabbMm& a, + const b3DbvtAabbMm& b); + B3_DBVT_INLINE friend void b3Merge( const b3DbvtAabbMm& a, + const b3DbvtAabbMm& b, + b3DbvtAabbMm& r); + B3_DBVT_INLINE friend bool b3NotEqual( const b3DbvtAabbMm& a, + const b3DbvtAabbMm& b); + + B3_DBVT_INLINE b3Vector3& tMins() { return(mi); } + B3_DBVT_INLINE b3Vector3& tMaxs() { return(mx); } + +private: + B3_DBVT_INLINE void AddSpan(const b3Vector3& d,b3Scalar& smi,b3Scalar& smx) const; +private: + b3Vector3 mi,mx; +}; + +// Types +typedef b3DbvtAabbMm b3DbvtVolume; + +/* b3DbvtNode */ +struct b3DbvtNode +{ + b3DbvtVolume volume; + b3DbvtNode* parent; + B3_DBVT_INLINE bool isleaf() const { return(childs[1]==0); } + B3_DBVT_INLINE bool isinternal() const { return(!isleaf()); } + union + { + b3DbvtNode* childs[2]; + void* data; + int dataAsInt; + }; +}; + +///The b3DynamicBvh class implements a fast dynamic bounding volume tree based on axis aligned bounding boxes (aabb tree). +///This b3DynamicBvh is used for soft body collision detection and for the b3DynamicBvhBroadphase. It has a fast insert, remove and update of nodes. +///Unlike the b3QuantizedBvh, nodes can be dynamically moved around, which allows for change in topology of the underlying data structure. +struct b3DynamicBvh +{ + /* Stack element */ + struct sStkNN + { + const b3DbvtNode* a; + const b3DbvtNode* b; + sStkNN() {} + sStkNN(const b3DbvtNode* na,const b3DbvtNode* nb) : a(na),b(nb) {} + }; + struct sStkNP + { + const b3DbvtNode* node; + int mask; + sStkNP(const b3DbvtNode* n,unsigned m) : node(n),mask(m) {} + }; + struct sStkNPS + { + const b3DbvtNode* node; + int mask; + b3Scalar value; + sStkNPS() {} + sStkNPS(const b3DbvtNode* n,unsigned m,b3Scalar v) : node(n),mask(m),value(v) {} + }; + struct sStkCLN + { + const b3DbvtNode* node; + b3DbvtNode* parent; + sStkCLN(const b3DbvtNode* n,b3DbvtNode* p) : node(n),parent(p) {} + }; + // Policies/Interfaces + + /* ICollide */ + struct ICollide + { + B3_DBVT_VIRTUAL_DTOR(ICollide) + B3_DBVT_VIRTUAL void Process(const b3DbvtNode*,const b3DbvtNode*) {} + B3_DBVT_VIRTUAL void Process(const b3DbvtNode*) {} + B3_DBVT_VIRTUAL void Process(const b3DbvtNode* n,b3Scalar) { Process(n); } + B3_DBVT_VIRTUAL bool Descent(const b3DbvtNode*) { return(true); } + B3_DBVT_VIRTUAL bool AllLeaves(const b3DbvtNode*) { return(true); } + }; + /* IWriter */ + struct IWriter + { + virtual ~IWriter() {} + virtual void Prepare(const b3DbvtNode* root,int numnodes)=0; + virtual void WriteNode(const b3DbvtNode*,int index,int parent,int child0,int child1)=0; + virtual void WriteLeaf(const b3DbvtNode*,int index,int parent)=0; + }; + /* IClone */ + struct IClone + { + virtual ~IClone() {} + virtual void CloneLeaf(b3DbvtNode*) {} + }; + + // Constants + enum { + B3_SIMPLE_STACKSIZE = 64, + B3_DOUBLE_STACKSIZE = B3_SIMPLE_STACKSIZE*2 + }; + + // Fields + b3DbvtNode* m_root; + b3DbvtNode* m_free; + int m_lkhd; + int m_leaves; + unsigned m_opath; + + + b3AlignedObjectArray<sStkNN> m_stkStack; + mutable b3AlignedObjectArray<const b3DbvtNode*> m_rayTestStack; + + + // Methods + b3DynamicBvh(); + ~b3DynamicBvh(); + void clear(); + bool empty() const { return(0==m_root); } + void optimizeBottomUp(); + void optimizeTopDown(int bu_treshold=128); + void optimizeIncremental(int passes); + b3DbvtNode* insert(const b3DbvtVolume& box,void* data); + void update(b3DbvtNode* leaf,int lookahead=-1); + void update(b3DbvtNode* leaf,b3DbvtVolume& volume); + bool update(b3DbvtNode* leaf,b3DbvtVolume& volume,const b3Vector3& velocity,b3Scalar margin); + bool update(b3DbvtNode* leaf,b3DbvtVolume& volume,const b3Vector3& velocity); + bool update(b3DbvtNode* leaf,b3DbvtVolume& volume,b3Scalar margin); + void remove(b3DbvtNode* leaf); + void write(IWriter* iwriter) const; + void clone(b3DynamicBvh& dest,IClone* iclone=0) const; + static int maxdepth(const b3DbvtNode* node); + static int countLeaves(const b3DbvtNode* node); + static void extractLeaves(const b3DbvtNode* node,b3AlignedObjectArray<const b3DbvtNode*>& leaves); +#if B3_DBVT_ENABLE_BENCHMARK + static void benchmark(); +#else + static void benchmark(){} +#endif + // B3_DBVT_IPOLICY must support ICollide policy/interface + B3_DBVT_PREFIX + static void enumNodes( const b3DbvtNode* root, + B3_DBVT_IPOLICY); + B3_DBVT_PREFIX + static void enumLeaves( const b3DbvtNode* root, + B3_DBVT_IPOLICY); + B3_DBVT_PREFIX + void collideTT( const b3DbvtNode* root0, + const b3DbvtNode* root1, + B3_DBVT_IPOLICY); + + B3_DBVT_PREFIX + void collideTTpersistentStack( const b3DbvtNode* root0, + const b3DbvtNode* root1, + B3_DBVT_IPOLICY); +#if 0 + B3_DBVT_PREFIX + void collideTT( const b3DbvtNode* root0, + const b3DbvtNode* root1, + const b3Transform& xform, + B3_DBVT_IPOLICY); + B3_DBVT_PREFIX + void collideTT( const b3DbvtNode* root0, + const b3Transform& xform0, + const b3DbvtNode* root1, + const b3Transform& xform1, + B3_DBVT_IPOLICY); +#endif + + B3_DBVT_PREFIX + void collideTV( const b3DbvtNode* root, + const b3DbvtVolume& volume, + B3_DBVT_IPOLICY) const; + ///rayTest is a re-entrant ray test, and can be called in parallel as long as the b3AlignedAlloc is thread-safe (uses locking etc) + ///rayTest is slower than rayTestInternal, because it builds a local stack, using memory allocations, and it recomputes signs/rayDirectionInverses each time + B3_DBVT_PREFIX + static void rayTest( const b3DbvtNode* root, + const b3Vector3& rayFrom, + const b3Vector3& rayTo, + B3_DBVT_IPOLICY); + ///rayTestInternal is faster than rayTest, because it uses a persistent stack (to reduce dynamic memory allocations to a minimum) and it uses precomputed signs/rayInverseDirections + ///rayTestInternal is used by b3DynamicBvhBroadphase to accelerate world ray casts + B3_DBVT_PREFIX + void rayTestInternal( const b3DbvtNode* root, + const b3Vector3& rayFrom, + const b3Vector3& rayTo, + const b3Vector3& rayDirectionInverse, + unsigned int signs[3], + b3Scalar lambda_max, + const b3Vector3& aabbMin, + const b3Vector3& aabbMax, + B3_DBVT_IPOLICY) const; + + B3_DBVT_PREFIX + static void collideKDOP(const b3DbvtNode* root, + const b3Vector3* normals, + const b3Scalar* offsets, + int count, + B3_DBVT_IPOLICY); + B3_DBVT_PREFIX + static void collideOCL( const b3DbvtNode* root, + const b3Vector3* normals, + const b3Scalar* offsets, + const b3Vector3& sortaxis, + int count, + B3_DBVT_IPOLICY, + bool fullsort=true); + B3_DBVT_PREFIX + static void collideTU( const b3DbvtNode* root, + B3_DBVT_IPOLICY); + // Helpers + static B3_DBVT_INLINE int nearest(const int* i,const b3DynamicBvh::sStkNPS* a,b3Scalar v,int l,int h) + { + int m=0; + while(l<h) + { + m=(l+h)>>1; + if(a[i[m]].value>=v) l=m+1; else h=m; + } + return(h); + } + static B3_DBVT_INLINE int allocate( b3AlignedObjectArray<int>& ifree, + b3AlignedObjectArray<sStkNPS>& stock, + const sStkNPS& value) + { + int i; + if(ifree.size()>0) + { i=ifree[ifree.size()-1];ifree.pop_back();stock[i]=value; } + else + { i=stock.size();stock.push_back(value); } + return(i); + } + // +private: + b3DynamicBvh(const b3DynamicBvh&) {} +}; + +// +// Inline's +// + +// +inline b3DbvtAabbMm b3DbvtAabbMm::FromCE(const b3Vector3& c,const b3Vector3& e) +{ + b3DbvtAabbMm box; + box.mi=c-e;box.mx=c+e; + return(box); +} + +// +inline b3DbvtAabbMm b3DbvtAabbMm::FromCR(const b3Vector3& c,b3Scalar r) +{ + return(FromCE(c,b3MakeVector3(r,r,r))); +} + +// +inline b3DbvtAabbMm b3DbvtAabbMm::FromMM(const b3Vector3& mi,const b3Vector3& mx) +{ + b3DbvtAabbMm box; + box.mi=mi;box.mx=mx; + return(box); +} + +// +inline b3DbvtAabbMm b3DbvtAabbMm::FromPoints(const b3Vector3* pts,int n) +{ + b3DbvtAabbMm box; + box.mi=box.mx=pts[0]; + for(int i=1;i<n;++i) + { + box.mi.setMin(pts[i]); + box.mx.setMax(pts[i]); + } + return(box); +} + +// +inline b3DbvtAabbMm b3DbvtAabbMm::FromPoints(const b3Vector3** ppts,int n) +{ + b3DbvtAabbMm box; + box.mi=box.mx=*ppts[0]; + for(int i=1;i<n;++i) + { + box.mi.setMin(*ppts[i]); + box.mx.setMax(*ppts[i]); + } + return(box); +} + +// +B3_DBVT_INLINE void b3DbvtAabbMm::Expand(const b3Vector3& e) +{ + mi-=e;mx+=e; +} + +// +B3_DBVT_INLINE void b3DbvtAabbMm::SignedExpand(const b3Vector3& e) +{ + if(e.x>0) mx.setX(mx.x+e[0]); else mi.setX(mi.x+e[0]); + if(e.y>0) mx.setY(mx.y+e[1]); else mi.setY(mi.y+e[1]); + if(e.z>0) mx.setZ(mx.z+e[2]); else mi.setZ(mi.z+e[2]); +} + +// +B3_DBVT_INLINE bool b3DbvtAabbMm::Contain(const b3DbvtAabbMm& a) const +{ + return( (mi.x<=a.mi.x)&& + (mi.y<=a.mi.y)&& + (mi.z<=a.mi.z)&& + (mx.x>=a.mx.x)&& + (mx.y>=a.mx.y)&& + (mx.z>=a.mx.z)); +} + +// +B3_DBVT_INLINE int b3DbvtAabbMm::Classify(const b3Vector3& n,b3Scalar o,int s) const +{ + b3Vector3 pi,px; + switch(s) + { + case (0+0+0): px=b3MakeVector3(mi.x,mi.y,mi.z); + pi=b3MakeVector3(mx.x,mx.y,mx.z);break; + case (1+0+0): px=b3MakeVector3(mx.x,mi.y,mi.z); + pi=b3MakeVector3(mi.x,mx.y,mx.z);break; + case (0+2+0): px=b3MakeVector3(mi.x,mx.y,mi.z); + pi=b3MakeVector3(mx.x,mi.y,mx.z);break; + case (1+2+0): px=b3MakeVector3(mx.x,mx.y,mi.z); + pi=b3MakeVector3(mi.x,mi.y,mx.z);break; + case (0+0+4): px=b3MakeVector3(mi.x,mi.y,mx.z); + pi=b3MakeVector3(mx.x,mx.y,mi.z);break; + case (1+0+4): px=b3MakeVector3(mx.x,mi.y,mx.z); + pi=b3MakeVector3(mi.x,mx.y,mi.z);break; + case (0+2+4): px=b3MakeVector3(mi.x,mx.y,mx.z); + pi=b3MakeVector3(mx.x,mi.y,mi.z);break; + case (1+2+4): px=b3MakeVector3(mx.x,mx.y,mx.z); + pi=b3MakeVector3(mi.x,mi.y,mi.z);break; + } + if((b3Dot(n,px)+o)<0) return(-1); + if((b3Dot(n,pi)+o)>=0) return(+1); + return(0); +} + +// +B3_DBVT_INLINE b3Scalar b3DbvtAabbMm::ProjectMinimum(const b3Vector3& v,unsigned signs) const +{ + const b3Vector3* b[]={&mx,&mi}; + const b3Vector3 p = b3MakeVector3( b[(signs>>0)&1]->x, + b[(signs>>1)&1]->y, + b[(signs>>2)&1]->z); + return(b3Dot(p,v)); +} + +// +B3_DBVT_INLINE void b3DbvtAabbMm::AddSpan(const b3Vector3& d,b3Scalar& smi,b3Scalar& smx) const +{ + for(int i=0;i<3;++i) + { + if(d[i]<0) + { smi+=mx[i]*d[i];smx+=mi[i]*d[i]; } + else + { smi+=mi[i]*d[i];smx+=mx[i]*d[i]; } + } +} + +// +B3_DBVT_INLINE bool b3Intersect( const b3DbvtAabbMm& a, + const b3DbvtAabbMm& b) +{ +#if B3_DBVT_INT0_IMPL == B3_DBVT_IMPL_SSE + const __m128 rt(_mm_or_ps( _mm_cmplt_ps(_mm_load_ps(b.mx),_mm_load_ps(a.mi)), + _mm_cmplt_ps(_mm_load_ps(a.mx),_mm_load_ps(b.mi)))); +#if defined (_WIN32) + const __int32* pu((const __int32*)&rt); +#else + const int* pu((const int*)&rt); +#endif + return((pu[0]|pu[1]|pu[2])==0); +#else + return( (a.mi.x<=b.mx.x)&& + (a.mx.x>=b.mi.x)&& + (a.mi.y<=b.mx.y)&& + (a.mx.y>=b.mi.y)&& + (a.mi.z<=b.mx.z)&& + (a.mx.z>=b.mi.z)); +#endif +} + + + +// +B3_DBVT_INLINE bool b3Intersect( const b3DbvtAabbMm& a, + const b3Vector3& b) +{ + return( (b.x>=a.mi.x)&& + (b.y>=a.mi.y)&& + (b.z>=a.mi.z)&& + (b.x<=a.mx.x)&& + (b.y<=a.mx.y)&& + (b.z<=a.mx.z)); +} + + + + + +////////////////////////////////////// + + +// +B3_DBVT_INLINE b3Scalar b3Proximity( const b3DbvtAabbMm& a, + const b3DbvtAabbMm& b) +{ + const b3Vector3 d=(a.mi+a.mx)-(b.mi+b.mx); + return(b3Fabs(d.x)+b3Fabs(d.y)+b3Fabs(d.z)); +} + + + +// +B3_DBVT_INLINE int b3Select( const b3DbvtAabbMm& o, + const b3DbvtAabbMm& a, + const b3DbvtAabbMm& b) +{ +#if B3_DBVT_SELECT_IMPL == B3_DBVT_IMPL_SSE + +#if defined (_WIN32) + static B3_ATTRIBUTE_ALIGNED16(const unsigned __int32) mask[]={0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff}; +#else + static B3_ATTRIBUTE_ALIGNED16(const unsigned int) mask[]={0x7fffffff,0x7fffffff,0x7fffffff,0x00000000 /*0x7fffffff*/}; +#endif + ///@todo: the intrinsic version is 11% slower +#if B3_DBVT_USE_INTRINSIC_SSE + + union b3SSEUnion ///NOTE: if we use more intrinsics, move b3SSEUnion into the LinearMath directory + { + __m128 ssereg; + float floats[4]; + int ints[4]; + }; + + __m128 omi(_mm_load_ps(o.mi)); + omi=_mm_add_ps(omi,_mm_load_ps(o.mx)); + __m128 ami(_mm_load_ps(a.mi)); + ami=_mm_add_ps(ami,_mm_load_ps(a.mx)); + ami=_mm_sub_ps(ami,omi); + ami=_mm_and_ps(ami,_mm_load_ps((const float*)mask)); + __m128 bmi(_mm_load_ps(b.mi)); + bmi=_mm_add_ps(bmi,_mm_load_ps(b.mx)); + bmi=_mm_sub_ps(bmi,omi); + bmi=_mm_and_ps(bmi,_mm_load_ps((const float*)mask)); + __m128 t0(_mm_movehl_ps(ami,ami)); + ami=_mm_add_ps(ami,t0); + ami=_mm_add_ss(ami,_mm_shuffle_ps(ami,ami,1)); + __m128 t1(_mm_movehl_ps(bmi,bmi)); + bmi=_mm_add_ps(bmi,t1); + bmi=_mm_add_ss(bmi,_mm_shuffle_ps(bmi,bmi,1)); + + b3SSEUnion tmp; + tmp.ssereg = _mm_cmple_ss(bmi,ami); + return tmp.ints[0]&1; + +#else + B3_ATTRIBUTE_ALIGNED16(__int32 r[1]); + __asm + { + mov eax,o + mov ecx,a + mov edx,b + movaps xmm0,[eax] + movaps xmm5,mask + addps xmm0,[eax+16] + movaps xmm1,[ecx] + movaps xmm2,[edx] + addps xmm1,[ecx+16] + addps xmm2,[edx+16] + subps xmm1,xmm0 + subps xmm2,xmm0 + andps xmm1,xmm5 + andps xmm2,xmm5 + movhlps xmm3,xmm1 + movhlps xmm4,xmm2 + addps xmm1,xmm3 + addps xmm2,xmm4 + pshufd xmm3,xmm1,1 + pshufd xmm4,xmm2,1 + addss xmm1,xmm3 + addss xmm2,xmm4 + cmpless xmm2,xmm1 + movss r,xmm2 + } + return(r[0]&1); +#endif +#else + return(b3Proximity(o,a)<b3Proximity(o,b)?0:1); +#endif +} + +// +B3_DBVT_INLINE void b3Merge( const b3DbvtAabbMm& a, + const b3DbvtAabbMm& b, + b3DbvtAabbMm& r) +{ +#if B3_DBVT_MERGE_IMPL==B3_DBVT_IMPL_SSE + __m128 ami(_mm_load_ps(a.mi)); + __m128 amx(_mm_load_ps(a.mx)); + __m128 bmi(_mm_load_ps(b.mi)); + __m128 bmx(_mm_load_ps(b.mx)); + ami=_mm_min_ps(ami,bmi); + amx=_mm_max_ps(amx,bmx); + _mm_store_ps(r.mi,ami); + _mm_store_ps(r.mx,amx); +#else + for(int i=0;i<3;++i) + { + if(a.mi[i]<b.mi[i]) r.mi[i]=a.mi[i]; else r.mi[i]=b.mi[i]; + if(a.mx[i]>b.mx[i]) r.mx[i]=a.mx[i]; else r.mx[i]=b.mx[i]; + } +#endif +} + +// +B3_DBVT_INLINE bool b3NotEqual( const b3DbvtAabbMm& a, + const b3DbvtAabbMm& b) +{ + return( (a.mi.x!=b.mi.x)|| + (a.mi.y!=b.mi.y)|| + (a.mi.z!=b.mi.z)|| + (a.mx.x!=b.mx.x)|| + (a.mx.y!=b.mx.y)|| + (a.mx.z!=b.mx.z)); +} + +// +// Inline's +// + +// +B3_DBVT_PREFIX +inline void b3DynamicBvh::enumNodes( const b3DbvtNode* root, + B3_DBVT_IPOLICY) +{ + B3_DBVT_CHECKTYPE + policy.Process(root); + if(root->isinternal()) + { + enumNodes(root->childs[0],policy); + enumNodes(root->childs[1],policy); + } +} + +// +B3_DBVT_PREFIX +inline void b3DynamicBvh::enumLeaves( const b3DbvtNode* root, + B3_DBVT_IPOLICY) +{ + B3_DBVT_CHECKTYPE + if(root->isinternal()) + { + enumLeaves(root->childs[0],policy); + enumLeaves(root->childs[1],policy); + } + else + { + policy.Process(root); + } +} + +// +B3_DBVT_PREFIX +inline void b3DynamicBvh::collideTT( const b3DbvtNode* root0, + const b3DbvtNode* root1, + B3_DBVT_IPOLICY) +{ + B3_DBVT_CHECKTYPE + if(root0&&root1) + { + int depth=1; + int treshold=B3_DOUBLE_STACKSIZE-4; + b3AlignedObjectArray<sStkNN> stkStack; + stkStack.resize(B3_DOUBLE_STACKSIZE); + stkStack[0]=sStkNN(root0,root1); + do { + sStkNN p=stkStack[--depth]; + if(depth>treshold) + { + stkStack.resize(stkStack.size()*2); + treshold=stkStack.size()-4; + } + if(p.a==p.b) + { + if(p.a->isinternal()) + { + stkStack[depth++]=sStkNN(p.a->childs[0],p.a->childs[0]); + stkStack[depth++]=sStkNN(p.a->childs[1],p.a->childs[1]); + stkStack[depth++]=sStkNN(p.a->childs[0],p.a->childs[1]); + } + } + else if(b3Intersect(p.a->volume,p.b->volume)) + { + if(p.a->isinternal()) + { + if(p.b->isinternal()) + { + stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[0]); + stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[0]); + stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[1]); + stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[1]); + } + else + { + stkStack[depth++]=sStkNN(p.a->childs[0],p.b); + stkStack[depth++]=sStkNN(p.a->childs[1],p.b); + } + } + else + { + if(p.b->isinternal()) + { + stkStack[depth++]=sStkNN(p.a,p.b->childs[0]); + stkStack[depth++]=sStkNN(p.a,p.b->childs[1]); + } + else + { + policy.Process(p.a,p.b); + } + } + } + } while(depth); + } +} + + + +B3_DBVT_PREFIX +inline void b3DynamicBvh::collideTTpersistentStack( const b3DbvtNode* root0, + const b3DbvtNode* root1, + B3_DBVT_IPOLICY) +{ + B3_DBVT_CHECKTYPE + if(root0&&root1) + { + int depth=1; + int treshold=B3_DOUBLE_STACKSIZE-4; + + m_stkStack.resize(B3_DOUBLE_STACKSIZE); + m_stkStack[0]=sStkNN(root0,root1); + do { + sStkNN p=m_stkStack[--depth]; + if(depth>treshold) + { + m_stkStack.resize(m_stkStack.size()*2); + treshold=m_stkStack.size()-4; + } + if(p.a==p.b) + { + if(p.a->isinternal()) + { + m_stkStack[depth++]=sStkNN(p.a->childs[0],p.a->childs[0]); + m_stkStack[depth++]=sStkNN(p.a->childs[1],p.a->childs[1]); + m_stkStack[depth++]=sStkNN(p.a->childs[0],p.a->childs[1]); + } + } + else if(b3Intersect(p.a->volume,p.b->volume)) + { + if(p.a->isinternal()) + { + if(p.b->isinternal()) + { + m_stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[0]); + m_stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[0]); + m_stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[1]); + m_stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[1]); + } + else + { + m_stkStack[depth++]=sStkNN(p.a->childs[0],p.b); + m_stkStack[depth++]=sStkNN(p.a->childs[1],p.b); + } + } + else + { + if(p.b->isinternal()) + { + m_stkStack[depth++]=sStkNN(p.a,p.b->childs[0]); + m_stkStack[depth++]=sStkNN(p.a,p.b->childs[1]); + } + else + { + policy.Process(p.a,p.b); + } + } + } + } while(depth); + } +} + +#if 0 +// +B3_DBVT_PREFIX +inline void b3DynamicBvh::collideTT( const b3DbvtNode* root0, + const b3DbvtNode* root1, + const b3Transform& xform, + B3_DBVT_IPOLICY) +{ + B3_DBVT_CHECKTYPE + if(root0&&root1) + { + int depth=1; + int treshold=B3_DOUBLE_STACKSIZE-4; + b3AlignedObjectArray<sStkNN> stkStack; + stkStack.resize(B3_DOUBLE_STACKSIZE); + stkStack[0]=sStkNN(root0,root1); + do { + sStkNN p=stkStack[--depth]; + if(b3Intersect(p.a->volume,p.b->volume,xform)) + { + if(depth>treshold) + { + stkStack.resize(stkStack.size()*2); + treshold=stkStack.size()-4; + } + if(p.a->isinternal()) + { + if(p.b->isinternal()) + { + stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[0]); + stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[0]); + stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[1]); + stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[1]); + } + else + { + stkStack[depth++]=sStkNN(p.a->childs[0],p.b); + stkStack[depth++]=sStkNN(p.a->childs[1],p.b); + } + } + else + { + if(p.b->isinternal()) + { + stkStack[depth++]=sStkNN(p.a,p.b->childs[0]); + stkStack[depth++]=sStkNN(p.a,p.b->childs[1]); + } + else + { + policy.Process(p.a,p.b); + } + } + } + } while(depth); + } +} +// +B3_DBVT_PREFIX +inline void b3DynamicBvh::collideTT( const b3DbvtNode* root0, + const b3Transform& xform0, + const b3DbvtNode* root1, + const b3Transform& xform1, + B3_DBVT_IPOLICY) +{ + const b3Transform xform=xform0.inverse()*xform1; + collideTT(root0,root1,xform,policy); +} +#endif + +// +B3_DBVT_PREFIX +inline void b3DynamicBvh::collideTV( const b3DbvtNode* root, + const b3DbvtVolume& vol, + B3_DBVT_IPOLICY) const +{ + B3_DBVT_CHECKTYPE + if(root) + { + B3_ATTRIBUTE_ALIGNED16(b3DbvtVolume) volume(vol); + b3AlignedObjectArray<const b3DbvtNode*> stack; + stack.resize(0); + stack.reserve(B3_SIMPLE_STACKSIZE); + stack.push_back(root); + do { + const b3DbvtNode* n=stack[stack.size()-1]; + stack.pop_back(); + if(b3Intersect(n->volume,volume)) + { + if(n->isinternal()) + { + stack.push_back(n->childs[0]); + stack.push_back(n->childs[1]); + } + else + { + policy.Process(n); + } + } + } while(stack.size()>0); + } +} + +B3_DBVT_PREFIX +inline void b3DynamicBvh::rayTestInternal( const b3DbvtNode* root, + const b3Vector3& rayFrom, + const b3Vector3& rayTo, + const b3Vector3& rayDirectionInverse, + unsigned int signs[3], + b3Scalar lambda_max, + const b3Vector3& aabbMin, + const b3Vector3& aabbMax, + B3_DBVT_IPOLICY) const +{ + (void) rayTo; + B3_DBVT_CHECKTYPE + if(root) + { + int depth=1; + int treshold=B3_DOUBLE_STACKSIZE-2; + b3AlignedObjectArray<const b3DbvtNode*>& stack = m_rayTestStack; + stack.resize(B3_DOUBLE_STACKSIZE); + stack[0]=root; + b3Vector3 bounds[2]; + do + { + const b3DbvtNode* node=stack[--depth]; + bounds[0] = node->volume.Mins()-aabbMax; + bounds[1] = node->volume.Maxs()-aabbMin; + b3Scalar tmin=1.f,lambda_min=0.f; + unsigned int result1=false; + result1 = b3RayAabb2(rayFrom,rayDirectionInverse,signs,bounds,tmin,lambda_min,lambda_max); + if(result1) + { + if(node->isinternal()) + { + if(depth>treshold) + { + stack.resize(stack.size()*2); + treshold=stack.size()-2; + } + stack[depth++]=node->childs[0]; + stack[depth++]=node->childs[1]; + } + else + { + policy.Process(node); + } + } + } while(depth); + } +} + +// +B3_DBVT_PREFIX +inline void b3DynamicBvh::rayTest( const b3DbvtNode* root, + const b3Vector3& rayFrom, + const b3Vector3& rayTo, + B3_DBVT_IPOLICY) +{ + B3_DBVT_CHECKTYPE + if(root) + { + b3Vector3 rayDir = (rayTo-rayFrom); + rayDir.normalize (); + + ///what about division by zero? --> just set rayDirection[i] to INF/B3_LARGE_FLOAT + b3Vector3 rayDirectionInverse; + rayDirectionInverse[0] = rayDir[0] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[0]; + rayDirectionInverse[1] = rayDir[1] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[1]; + rayDirectionInverse[2] = rayDir[2] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[2]; + unsigned int signs[3] = { rayDirectionInverse[0] < 0.0, rayDirectionInverse[1] < 0.0, rayDirectionInverse[2] < 0.0}; + + b3Scalar lambda_max = rayDir.dot(rayTo-rayFrom); +#ifdef COMPARE_BTRAY_AABB2 + b3Vector3 resultNormal; +#endif//COMPARE_BTRAY_AABB2 + + b3AlignedObjectArray<const b3DbvtNode*> stack; + + int depth=1; + int treshold=B3_DOUBLE_STACKSIZE-2; + + stack.resize(B3_DOUBLE_STACKSIZE); + stack[0]=root; + b3Vector3 bounds[2]; + do { + const b3DbvtNode* node=stack[--depth]; + + bounds[0] = node->volume.Mins(); + bounds[1] = node->volume.Maxs(); + + b3Scalar tmin=1.f,lambda_min=0.f; + unsigned int result1 = b3RayAabb2(rayFrom,rayDirectionInverse,signs,bounds,tmin,lambda_min,lambda_max); + +#ifdef COMPARE_BTRAY_AABB2 + b3Scalar param=1.f; + bool result2 = b3RayAabb(rayFrom,rayTo,node->volume.Mins(),node->volume.Maxs(),param,resultNormal); + b3Assert(result1 == result2); +#endif //TEST_BTRAY_AABB2 + + if(result1) + { + if(node->isinternal()) + { + if(depth>treshold) + { + stack.resize(stack.size()*2); + treshold=stack.size()-2; + } + stack[depth++]=node->childs[0]; + stack[depth++]=node->childs[1]; + } + else + { + policy.Process(node); + } + } + } while(depth); + + } +} + +// +B3_DBVT_PREFIX +inline void b3DynamicBvh::collideKDOP(const b3DbvtNode* root, + const b3Vector3* normals, + const b3Scalar* offsets, + int count, + B3_DBVT_IPOLICY) +{ + B3_DBVT_CHECKTYPE + if(root) + { + const int inside=(1<<count)-1; + b3AlignedObjectArray<sStkNP> stack; + int signs[sizeof(unsigned)*8]; + b3Assert(count<int (sizeof(signs)/sizeof(signs[0]))); + for(int i=0;i<count;++i) + { + signs[i]= ((normals[i].x>=0)?1:0)+ + ((normals[i].y>=0)?2:0)+ + ((normals[i].z>=0)?4:0); + } + stack.reserve(B3_SIMPLE_STACKSIZE); + stack.push_back(sStkNP(root,0)); + do { + sStkNP se=stack[stack.size()-1]; + bool out=false; + stack.pop_back(); + for(int i=0,j=1;(!out)&&(i<count);++i,j<<=1) + { + if(0==(se.mask&j)) + { + const int side=se.node->volume.Classify(normals[i],offsets[i],signs[i]); + switch(side) + { + case -1: out=true;break; + case +1: se.mask|=j;break; + } + } + } + if(!out) + { + if((se.mask!=inside)&&(se.node->isinternal())) + { + stack.push_back(sStkNP(se.node->childs[0],se.mask)); + stack.push_back(sStkNP(se.node->childs[1],se.mask)); + } + else + { + if(policy.AllLeaves(se.node)) enumLeaves(se.node,policy); + } + } + } while(stack.size()); + } +} + +// +B3_DBVT_PREFIX +inline void b3DynamicBvh::collideOCL( const b3DbvtNode* root, + const b3Vector3* normals, + const b3Scalar* offsets, + const b3Vector3& sortaxis, + int count, + B3_DBVT_IPOLICY, + bool fsort) +{ + B3_DBVT_CHECKTYPE + if(root) + { + const unsigned srtsgns=(sortaxis[0]>=0?1:0)+ + (sortaxis[1]>=0?2:0)+ + (sortaxis[2]>=0?4:0); + const int inside=(1<<count)-1; + b3AlignedObjectArray<sStkNPS> stock; + b3AlignedObjectArray<int> ifree; + b3AlignedObjectArray<int> stack; + int signs[sizeof(unsigned)*8]; + b3Assert(count<int (sizeof(signs)/sizeof(signs[0]))); + for(int i=0;i<count;++i) + { + signs[i]= ((normals[i].x>=0)?1:0)+ + ((normals[i].y>=0)?2:0)+ + ((normals[i].z>=0)?4:0); + } + stock.reserve(B3_SIMPLE_STACKSIZE); + stack.reserve(B3_SIMPLE_STACKSIZE); + ifree.reserve(B3_SIMPLE_STACKSIZE); + stack.push_back(allocate(ifree,stock,sStkNPS(root,0,root->volume.ProjectMinimum(sortaxis,srtsgns)))); + do { + const int id=stack[stack.size()-1]; + sStkNPS se=stock[id]; + stack.pop_back();ifree.push_back(id); + if(se.mask!=inside) + { + bool out=false; + for(int i=0,j=1;(!out)&&(i<count);++i,j<<=1) + { + if(0==(se.mask&j)) + { + const int side=se.node->volume.Classify(normals[i],offsets[i],signs[i]); + switch(side) + { + case -1: out=true;break; + case +1: se.mask|=j;break; + } + } + } + if(out) continue; + } + if(policy.Descent(se.node)) + { + if(se.node->isinternal()) + { + const b3DbvtNode* pns[]={ se.node->childs[0],se.node->childs[1]}; + sStkNPS nes[]={ sStkNPS(pns[0],se.mask,pns[0]->volume.ProjectMinimum(sortaxis,srtsgns)), + sStkNPS(pns[1],se.mask,pns[1]->volume.ProjectMinimum(sortaxis,srtsgns))}; + const int q=nes[0].value<nes[1].value?1:0; + int j=stack.size(); + if(fsort&&(j>0)) + { + /* Insert 0 */ + j=nearest(&stack[0],&stock[0],nes[q].value,0,stack.size()); + stack.push_back(0); +#if B3_DBVT_USE_MEMMOVE + memmove(&stack[j+1],&stack[j],sizeof(int)*(stack.size()-j-1)); +#else + for(int k=stack.size()-1;k>j;--k) stack[k]=stack[k-1]; +#endif + stack[j]=allocate(ifree,stock,nes[q]); + /* Insert 1 */ + j=nearest(&stack[0],&stock[0],nes[1-q].value,j,stack.size()); + stack.push_back(0); +#if B3_DBVT_USE_MEMMOVE + memmove(&stack[j+1],&stack[j],sizeof(int)*(stack.size()-j-1)); +#else + for(int k=stack.size()-1;k>j;--k) stack[k]=stack[k-1]; +#endif + stack[j]=allocate(ifree,stock,nes[1-q]); + } + else + { + stack.push_back(allocate(ifree,stock,nes[q])); + stack.push_back(allocate(ifree,stock,nes[1-q])); + } + } + else + { + policy.Process(se.node,se.value); + } + } + } while(stack.size()); + } +} + +// +B3_DBVT_PREFIX +inline void b3DynamicBvh::collideTU( const b3DbvtNode* root, + B3_DBVT_IPOLICY) +{ + B3_DBVT_CHECKTYPE + if(root) + { + b3AlignedObjectArray<const b3DbvtNode*> stack; + stack.reserve(B3_SIMPLE_STACKSIZE); + stack.push_back(root); + do { + const b3DbvtNode* n=stack[stack.size()-1]; + stack.pop_back(); + if(policy.Descent(n)) + { + if(n->isinternal()) + { stack.push_back(n->childs[0]);stack.push_back(n->childs[1]); } + else + { policy.Process(n); } + } + } while(stack.size()>0); + } +} + +// +// PP Cleanup +// + +#undef B3_DBVT_USE_MEMMOVE +#undef B3_DBVT_USE_TEMPLATE +#undef B3_DBVT_VIRTUAL_DTOR +#undef B3_DBVT_VIRTUAL +#undef B3_DBVT_PREFIX +#undef B3_DBVT_IPOLICY +#undef B3_DBVT_CHECKTYPE +#undef B3_DBVT_IMPL_GENERIC +#undef B3_DBVT_IMPL_SSE +#undef B3_DBVT_USE_INTRINSIC_SSE +#undef B3_DBVT_SELECT_IMPL +#undef B3_DBVT_MERGE_IMPL +#undef B3_DBVT_INT0_IMPL + +#endif |