diff options
author | Rémi Verschelde <rverschelde@gmail.com> | 2016-10-13 21:48:06 +0200 |
---|---|---|
committer | Rémi Verschelde <rverschelde@gmail.com> | 2016-10-15 12:01:28 +0200 |
commit | 8311a78df5cdf257297c1ec7493cb098dc76f010 (patch) | |
tree | b8d84615a8b5429516f18253e0b3e201869bf53a /thirdparty/squish/simd_sse.h | |
parent | 8981ff8a84e8cc6967afe3a11613e7d72d8ac599 (diff) |
squish: Move to a module and split thirdparty lib
Diffstat (limited to 'thirdparty/squish/simd_sse.h')
-rw-r--r-- | thirdparty/squish/simd_sse.h | 180 |
1 files changed, 180 insertions, 0 deletions
diff --git a/thirdparty/squish/simd_sse.h b/thirdparty/squish/simd_sse.h new file mode 100644 index 0000000000..e584f2a0e5 --- /dev/null +++ b/thirdparty/squish/simd_sse.h @@ -0,0 +1,180 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#ifndef SQUISH_SIMD_SSE_H +#define SQUISH_SIMD_SSE_H + +#include <xmmintrin.h> +#if ( SQUISH_USE_SSE > 1 ) +#include <emmintrin.h> +#endif + +#define SQUISH_SSE_SPLAT( a ) \ + ( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) ) + +#define SQUISH_SSE_SHUF( x, y, z, w ) \ + ( ( x ) | ( ( y ) << 2 ) | ( ( z ) << 4 ) | ( ( w ) << 6 ) ) + +namespace squish { + +#define VEC4_CONST( X ) Vec4( X ) + +class Vec4 +{ +public: + typedef Vec4 const& Arg; + + Vec4() {} + + explicit Vec4( __m128 v ) : m_v( v ) {} + + Vec4( Vec4 const& arg ) : m_v( arg.m_v ) {} + + Vec4& operator=( Vec4 const& arg ) + { + m_v = arg.m_v; + return *this; + } + + explicit Vec4( float s ) : m_v( _mm_set1_ps( s ) ) {} + + Vec4( float x, float y, float z, float w ) : m_v( _mm_setr_ps( x, y, z, w ) ) {} + + Vec3 GetVec3() const + { +#ifdef __GNUC__ + __attribute__ ((__aligned__ (16))) float c[4]; +#else + __declspec(align(16)) float c[4]; +#endif + _mm_store_ps( c, m_v ); + return Vec3( c[0], c[1], c[2] ); + } + + Vec4 SplatX() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 0 ) ) ); } + Vec4 SplatY() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 1 ) ) ); } + Vec4 SplatZ() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 2 ) ) ); } + Vec4 SplatW() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 3 ) ) ); } + + Vec4& operator+=( Arg v ) + { + m_v = _mm_add_ps( m_v, v.m_v ); + return *this; + } + + Vec4& operator-=( Arg v ) + { + m_v = _mm_sub_ps( m_v, v.m_v ); + return *this; + } + + Vec4& operator*=( Arg v ) + { + m_v = _mm_mul_ps( m_v, v.m_v ); + return *this; + } + + friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right ) + { + return Vec4( _mm_add_ps( left.m_v, right.m_v ) ); + } + + friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right ) + { + return Vec4( _mm_sub_ps( left.m_v, right.m_v ) ); + } + + friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right ) + { + return Vec4( _mm_mul_ps( left.m_v, right.m_v ) ); + } + + //! Returns a*b + c + friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c ) + { + return Vec4( _mm_add_ps( _mm_mul_ps( a.m_v, b.m_v ), c.m_v ) ); + } + + //! Returns -( a*b - c ) + friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c ) + { + return Vec4( _mm_sub_ps( c.m_v, _mm_mul_ps( a.m_v, b.m_v ) ) ); + } + + friend Vec4 Reciprocal( Vec4::Arg v ) + { + // get the reciprocal estimate + __m128 estimate = _mm_rcp_ps( v.m_v ); + + // one round of Newton-Rhaphson refinement + __m128 diff = _mm_sub_ps( _mm_set1_ps( 1.0f ), _mm_mul_ps( estimate, v.m_v ) ); + return Vec4( _mm_add_ps( _mm_mul_ps( diff, estimate ), estimate ) ); + } + + friend Vec4 Min( Vec4::Arg left, Vec4::Arg right ) + { + return Vec4( _mm_min_ps( left.m_v, right.m_v ) ); + } + + friend Vec4 Max( Vec4::Arg left, Vec4::Arg right ) + { + return Vec4( _mm_max_ps( left.m_v, right.m_v ) ); + } + + friend Vec4 Truncate( Vec4::Arg v ) + { +#if ( SQUISH_USE_SSE == 1 ) + // convert to ints + __m128 input = v.m_v; + __m64 lo = _mm_cvttps_pi32( input ); + __m64 hi = _mm_cvttps_pi32( _mm_movehl_ps( input, input ) ); + + // convert to floats + __m128 part = _mm_movelh_ps( input, _mm_cvtpi32_ps( input, hi ) ); + __m128 truncated = _mm_cvtpi32_ps( part, lo ); + + // clear out the MMX multimedia state to allow FP calls later + _mm_empty(); + return Vec4( truncated ); +#else + // use SSE2 instructions + return Vec4( _mm_cvtepi32_ps( _mm_cvttps_epi32( v.m_v ) ) ); +#endif + } + + friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right ) + { + __m128 bits = _mm_cmplt_ps( left.m_v, right.m_v ); + int value = _mm_movemask_ps( bits ); + return value != 0; + } + +private: + __m128 m_v; +}; + +} // namespace squish + +#endif // ndef SQUISH_SIMD_SSE_H |