diff options
author | George Marques <george@gmarqu.es> | 2016-05-01 12:48:46 -0300 |
---|---|---|
committer | George Marques <george@gmarqu.es> | 2016-05-02 13:18:34 -0300 |
commit | 7c59d819a7ebb936d51ca032e66a2489e4080d08 (patch) | |
tree | df6747620113a36812194db3456729aac2fa1b08 /drivers/opus/celt/x86/x86cpu.h | |
parent | a3d81cab8a97eeece54ebadb82c40532188b4d57 (diff) |
Update Opus driver to 1.1.2
And opusfile to 0.7.
Diffstat (limited to 'drivers/opus/celt/x86/x86cpu.h')
-rw-r--r-- | drivers/opus/celt/x86/x86cpu.h | 93 |
1 files changed, 93 insertions, 0 deletions
diff --git a/drivers/opus/celt/x86/x86cpu.h b/drivers/opus/celt/x86/x86cpu.h new file mode 100644 index 0000000000..04fd48aac4 --- /dev/null +++ b/drivers/opus/celt/x86/x86cpu.h @@ -0,0 +1,93 @@ +/* Copyright (c) 2014, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#if !defined(X86CPU_H) +# define X86CPU_H + +# if defined(OPUS_X86_MAY_HAVE_SSE) +# define MAY_HAVE_SSE(name) name ## _sse +# else +# define MAY_HAVE_SSE(name) name ## _c +# endif + +# if defined(OPUS_X86_MAY_HAVE_SSE2) +# define MAY_HAVE_SSE2(name) name ## _sse2 +# else +# define MAY_HAVE_SSE2(name) name ## _c +# endif + +# if defined(OPUS_X86_MAY_HAVE_SSE4_1) +# define MAY_HAVE_SSE4_1(name) name ## _sse4_1 +# else +# define MAY_HAVE_SSE4_1(name) name ## _c +# endif + +# if defined(OPUS_X86_MAY_HAVE_AVX) +# define MAY_HAVE_AVX(name) name ## _avx +# else +# define MAY_HAVE_AVX(name) name ## _c +# endif + +# if defined(OPUS_HAVE_RTCD) +int opus_select_arch(void); +# endif + +/*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32() + or _mm_cvtepi16_epi32() when optimizations are disabled, even though the + actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory + reference, these require 16-byte alignment and load a full 16 bytes (instead + of 4 or 8), possibly reading out of bounds. + + We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or + _mm_loadl_epi64(), which should have the same semantics as an m32 or m64 + reference in the PMOVSXWD instruction itself, but gcc is not smart enough to + optimize this out when optimizations ARE enabled. + + Clang, in contrast, requires us to do this always for _mm_cvtepi8_epi32 + (which is fair, since technically the compiler is always allowed to do the + dereference before invoking the function implementing the intrinsic). + However, it is smart enough to eliminate the extra MOVD instruction. + For _mm_cvtepi16_epi32, it does the right thing, though does *not* optimize out + the extra MOVQ if it's specified explicitly */ + +# if defined(__clang__) || !defined(__OPTIMIZE__) +# define OP_CVTEPI8_EPI32_M32(x) \ + (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x)))) +# else +# define OP_CVTEPI8_EPI32_M32(x) \ + (_mm_cvtepi8_epi32(*(__m128i *)(x))) +#endif + +# if !defined(__OPTIMIZE__) +# define OP_CVTEPI16_EPI32_M64(x) \ + (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x)))) +# else +# define OP_CVTEPI16_EPI32_M64(x) \ + (_mm_cvtepi16_epi32(*(__m128i *)(x))) +# endif + +#endif |