diff options
Diffstat (limited to 'thirdparty/opus/celt/x86')
-rw-r--r-- | thirdparty/opus/celt/x86/celt_lpc_sse.c | 132 | ||||
-rw-r--r-- | thirdparty/opus/celt/x86/celt_lpc_sse.h | 68 | ||||
-rw-r--r-- | thirdparty/opus/celt/x86/pitch_sse.c | 185 | ||||
-rw-r--r-- | thirdparty/opus/celt/x86/pitch_sse.h | 192 | ||||
-rw-r--r-- | thirdparty/opus/celt/x86/pitch_sse2.c | 95 | ||||
-rw-r--r-- | thirdparty/opus/celt/x86/pitch_sse4_1.c | 195 | ||||
-rw-r--r-- | thirdparty/opus/celt/x86/x86_celt_map.c | 155 | ||||
-rw-r--r-- | thirdparty/opus/celt/x86/x86cpu.c | 157 | ||||
-rw-r--r-- | thirdparty/opus/celt/x86/x86cpu.h | 93 |
9 files changed, 0 insertions, 1272 deletions
diff --git a/thirdparty/opus/celt/x86/celt_lpc_sse.c b/thirdparty/opus/celt/x86/celt_lpc_sse.c deleted file mode 100644 index 67e5592acf..0000000000 --- a/thirdparty/opus/celt/x86/celt_lpc_sse.c +++ /dev/null @@ -1,132 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <xmmintrin.h> -#include <emmintrin.h> -#include <smmintrin.h> -#include "celt_lpc.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "pitch.h" -#include "x86cpu.h" - -#if defined(FIXED_POINT) - -void celt_fir_sse4_1(const opus_val16 *_x, - const opus_val16 *num, - opus_val16 *_y, - int N, - int ord, - opus_val16 *mem, - int arch) -{ - int i,j; - VARDECL(opus_val16, rnum); - VARDECL(opus_val16, x); - - __m128i vecNoA; - opus_int32 noA ; - SAVE_STACK; - - ALLOC(rnum, ord, opus_val16); - ALLOC(x, N+ord, opus_val16); - for(i=0;i<ord;i++) - rnum[i] = num[ord-i-1]; - for(i=0;i<ord;i++) - x[i] = mem[ord-i-1]; - - for (i=0;i<N-7;i+=8) - { - x[i+ord ]=_x[i ]; - x[i+ord+1]=_x[i+1]; - x[i+ord+2]=_x[i+2]; - x[i+ord+3]=_x[i+3]; - x[i+ord+4]=_x[i+4]; - x[i+ord+5]=_x[i+5]; - x[i+ord+6]=_x[i+6]; - x[i+ord+7]=_x[i+7]; - } - - for (;i<N-3;i+=4) - { - x[i+ord ]=_x[i ]; - x[i+ord+1]=_x[i+1]; - x[i+ord+2]=_x[i+2]; - x[i+ord+3]=_x[i+3]; - } - - for (;i<N;i++) - x[i+ord]=_x[i]; - - for(i=0;i<ord;i++) - mem[i] = _x[N-i-1]; -#ifdef SMALL_FOOTPRINT - for (i=0;i<N;i++) - { - opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT); - for (j=0;j<ord;j++) - { - sum = MAC16_16(sum,rnum[j],x[i+j]); - } - _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT)); - } -#else - noA = EXTEND32(1) << SIG_SHIFT >> 1; - vecNoA = _mm_set_epi32(noA, noA, noA, noA); - - for (i=0;i<N-3;i+=4) - { - opus_val32 sums[4] = {0}; - __m128i vecSum, vecX; - - xcorr_kernel(rnum, x+i, sums, ord, arch); - - vecSum = _mm_loadu_si128((__m128i *)sums); - vecSum = _mm_add_epi32(vecSum, vecNoA); - vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT); - vecX = OP_CVTEPI16_EPI32_M64(_x + i); - vecSum = _mm_add_epi32(vecSum, vecX); - vecSum = _mm_packs_epi32(vecSum, vecSum); - _mm_storel_epi64((__m128i *)(_y + i), vecSum); - } - for (;i<N;i++) - { - opus_val32 sum = 0; - for (j=0;j<ord;j++) - sum = MAC16_16(sum, rnum[j], x[i + j]); - _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT))); - } - -#endif - RESTORE_STACK; -} - -#endif diff --git a/thirdparty/opus/celt/x86/celt_lpc_sse.h b/thirdparty/opus/celt/x86/celt_lpc_sse.h deleted file mode 100644 index c5ec796ed5..0000000000 --- a/thirdparty/opus/celt/x86/celt_lpc_sse.h +++ /dev/null @@ -1,68 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef CELT_LPC_SSE_H -#define CELT_LPC_SSE_H - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT) -#define OVERRIDE_CELT_FIR - -void celt_fir_sse4_1( - const opus_val16 *x, - const opus_val16 *num, - opus_val16 *y, - int N, - int ord, - opus_val16 *mem, - int arch); - -#if defined(OPUS_X86_PRESUME_SSE4_1) -#define celt_fir(x, num, y, N, ord, mem, arch) \ - ((void)arch, celt_fir_sse4_1(x, num, y, N, ord, mem, arch)) - -#else - -extern void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *num, - opus_val16 *y, - int N, - int ord, - opus_val16 *mem, - int arch); - -# define celt_fir(x, num, y, N, ord, mem, arch) \ - ((*CELT_FIR_IMPL[(arch) & OPUS_ARCHMASK])(x, num, y, N, ord, mem, arch)) - -#endif -#endif - -#endif diff --git a/thirdparty/opus/celt/x86/pitch_sse.c b/thirdparty/opus/celt/x86/pitch_sse.c deleted file mode 100644 index 20e73126b6..0000000000 --- a/thirdparty/opus/celt/x86/pitch_sse.c +++ /dev/null @@ -1,185 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "macros.h" -#include "celt_lpc.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "pitch.h" - -#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT) - -#include <xmmintrin.h> -#include "arch.h" - -void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len) -{ - int j; - __m128 xsum1, xsum2; - xsum1 = _mm_loadu_ps(sum); - xsum2 = _mm_setzero_ps(); - - for (j = 0; j < len-3; j += 4) - { - __m128 x0 = _mm_loadu_ps(x+j); - __m128 yj = _mm_loadu_ps(y+j); - __m128 y3 = _mm_loadu_ps(y+j+3); - - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x00),yj)); - xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x55), - _mm_shuffle_ps(yj,y3,0x49))); - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xaa), - _mm_shuffle_ps(yj,y3,0x9e))); - xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xff),y3)); - } - if (j < len) - { - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); - if (++j < len) - { - xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); - if (++j < len) - { - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); - } - } - } - _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2)); -} - - -void dual_inner_prod_sse(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, - int N, opus_val32 *xy1, opus_val32 *xy2) -{ - int i; - __m128 xsum1, xsum2; - xsum1 = _mm_setzero_ps(); - xsum2 = _mm_setzero_ps(); - for (i=0;i<N-3;i+=4) - { - __m128 xi = _mm_loadu_ps(x+i); - __m128 y1i = _mm_loadu_ps(y01+i); - __m128 y2i = _mm_loadu_ps(y02+i); - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(xi, y1i)); - xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(xi, y2i)); - } - /* Horizontal sum */ - xsum1 = _mm_add_ps(xsum1, _mm_movehl_ps(xsum1, xsum1)); - xsum1 = _mm_add_ss(xsum1, _mm_shuffle_ps(xsum1, xsum1, 0x55)); - _mm_store_ss(xy1, xsum1); - xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2)); - xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55)); - _mm_store_ss(xy2, xsum2); - for (;i<N;i++) - { - *xy1 = MAC16_16(*xy1, x[i], y01[i]); - *xy2 = MAC16_16(*xy2, x[i], y02[i]); - } -} - -opus_val32 celt_inner_prod_sse(const opus_val16 *x, const opus_val16 *y, - int N) -{ - int i; - float xy; - __m128 sum; - sum = _mm_setzero_ps(); - /* FIXME: We should probably go 8-way and use 2 sums. */ - for (i=0;i<N-3;i+=4) - { - __m128 xi = _mm_loadu_ps(x+i); - __m128 yi = _mm_loadu_ps(y+i); - sum = _mm_add_ps(sum,_mm_mul_ps(xi, yi)); - } - /* Horizontal sum */ - sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum)); - sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55)); - _mm_store_ss(&xy, sum); - for (;i<N;i++) - { - xy = MAC16_16(xy, x[i], y[i]); - } - return xy; -} - -void comb_filter_const_sse(opus_val32 *y, opus_val32 *x, int T, int N, - opus_val16 g10, opus_val16 g11, opus_val16 g12) -{ - int i; - __m128 x0v; - __m128 g10v, g11v, g12v; - g10v = _mm_load1_ps(&g10); - g11v = _mm_load1_ps(&g11); - g12v = _mm_load1_ps(&g12); - x0v = _mm_loadu_ps(&x[-T-2]); - for (i=0;i<N-3;i+=4) - { - __m128 yi, yi2, x1v, x2v, x3v, x4v; - const opus_val32 *xp = &x[i-T-2]; - yi = _mm_loadu_ps(x+i); - x4v = _mm_loadu_ps(xp+4); -#if 0 - /* Slower version with all loads */ - x1v = _mm_loadu_ps(xp+1); - x2v = _mm_loadu_ps(xp+2); - x3v = _mm_loadu_ps(xp+3); -#else - x2v = _mm_shuffle_ps(x0v, x4v, 0x4e); - x1v = _mm_shuffle_ps(x0v, x2v, 0x99); - x3v = _mm_shuffle_ps(x2v, x4v, 0x99); -#endif - - yi = _mm_add_ps(yi, _mm_mul_ps(g10v,x2v)); -#if 0 /* Set to 1 to make it bit-exact with the non-SSE version */ - yi = _mm_add_ps(yi, _mm_mul_ps(g11v,_mm_add_ps(x3v,x1v))); - yi = _mm_add_ps(yi, _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v))); -#else - /* Use partial sums */ - yi2 = _mm_add_ps(_mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)), - _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v))); - yi = _mm_add_ps(yi, yi2); -#endif - x0v=x4v; - _mm_storeu_ps(y+i, yi); - } -#ifdef CUSTOM_MODES - for (;i<N;i++) - { - y[i] = x[i] - + MULT16_32_Q15(g10,x[i-T]) - + MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1])) - + MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2])); - } -#endif -} - - -#endif diff --git a/thirdparty/opus/celt/x86/pitch_sse.h b/thirdparty/opus/celt/x86/pitch_sse.h deleted file mode 100644 index e5f87ab51a..0000000000 --- a/thirdparty/opus/celt/x86/pitch_sse.h +++ /dev/null @@ -1,192 +0,0 @@ -/* Copyright (c) 2013 Jean-Marc Valin and John Ridges - Copyright (c) 2014, Cisco Systems, INC MingXiang WeiZhou MinPeng YanWang*/ -/** - @file pitch_sse.h - @brief Pitch analysis - */ - -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef PITCH_SSE_H -#define PITCH_SSE_H - -#if defined(HAVE_CONFIG_H) -#include "config.h" -#endif - -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT) -void xcorr_kernel_sse4_1( - const opus_int16 *x, - const opus_int16 *y, - opus_val32 sum[4], - int len); -#endif - -#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT) -void xcorr_kernel_sse( - const opus_val16 *x, - const opus_val16 *y, - opus_val32 sum[4], - int len); -#endif - -#if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT) -#define OVERRIDE_XCORR_KERNEL -#define xcorr_kernel(x, y, sum, len, arch) \ - ((void)arch, xcorr_kernel_sse4_1(x, y, sum, len)) - -#elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT) -#define OVERRIDE_XCORR_KERNEL -#define xcorr_kernel(x, y, sum, len, arch) \ - ((void)arch, xcorr_kernel_sse(x, y, sum, len)) - -#elif (defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)) || (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) - -extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *y, - opus_val32 sum[4], - int len); - -#define OVERRIDE_XCORR_KERNEL -#define xcorr_kernel(x, y, sum, len, arch) \ - ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len)) - -#endif - -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT) -opus_val32 celt_inner_prod_sse4_1( - const opus_int16 *x, - const opus_int16 *y, - int N); -#endif - -#if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT) -opus_val32 celt_inner_prod_sse2( - const opus_int16 *x, - const opus_int16 *y, - int N); -#endif - -#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT) -opus_val32 celt_inner_prod_sse( - const opus_val16 *x, - const opus_val16 *y, - int N); -#endif - - -#if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT) -#define OVERRIDE_CELT_INNER_PROD -#define celt_inner_prod(x, y, N, arch) \ - ((void)arch, celt_inner_prod_sse4_1(x, y, N)) - -#elif defined(OPUS_X86_PRESUME_SSE2) && defined(FIXED_POINT) && !defined(OPUS_X86_MAY_HAVE_SSE4_1) -#define OVERRIDE_CELT_INNER_PROD -#define celt_inner_prod(x, y, N, arch) \ - ((void)arch, celt_inner_prod_sse2(x, y, N)) - -#elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT) -#define OVERRIDE_CELT_INNER_PROD -#define celt_inner_prod(x, y, N, arch) \ - ((void)arch, celt_inner_prod_sse(x, y, N)) - - -#elif ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) || \ - (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) - -extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *y, - int N); - -#define OVERRIDE_CELT_INNER_PROD -#define celt_inner_prod(x, y, N, arch) \ - ((*CELT_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y, N)) - -#endif - -#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT) - -#define OVERRIDE_DUAL_INNER_PROD -#define OVERRIDE_COMB_FILTER_CONST - -#undef dual_inner_prod -#undef comb_filter_const - -void dual_inner_prod_sse(const opus_val16 *x, - const opus_val16 *y01, - const opus_val16 *y02, - int N, - opus_val32 *xy1, - opus_val32 *xy2); - -void comb_filter_const_sse(opus_val32 *y, - opus_val32 *x, - int T, - int N, - opus_val16 g10, - opus_val16 g11, - opus_val16 g12); - - -#if defined(OPUS_X86_PRESUME_SSE) -# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \ - ((void)(arch),dual_inner_prod_sse(x, y01, y02, N, xy1, xy2)) - -# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \ - ((void)(arch),comb_filter_const_sse(y, x, T, N, g10, g11, g12)) -#else - -extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *y01, - const opus_val16 *y02, - int N, - opus_val32 *xy1, - opus_val32 *xy2); - -#define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \ - ((*DUAL_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2)) - -extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])( - opus_val32 *y, - opus_val32 *x, - int T, - int N, - opus_val16 g10, - opus_val16 g11, - opus_val16 g12); - -#define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \ - ((*COMB_FILTER_CONST_IMPL[(arch) & OPUS_ARCHMASK])(y, x, T, N, g10, g11, g12)) - -#define NON_STATIC_COMB_FILTER_CONST_C - -#endif -#endif - -#endif diff --git a/thirdparty/opus/celt/x86/pitch_sse2.c b/thirdparty/opus/celt/x86/pitch_sse2.c deleted file mode 100644 index a0e7d1beaf..0000000000 --- a/thirdparty/opus/celt/x86/pitch_sse2.c +++ /dev/null @@ -1,95 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <xmmintrin.h> -#include <emmintrin.h> - -#include "macros.h" -#include "celt_lpc.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "pitch.h" - -#if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT) -opus_val32 celt_inner_prod_sse2(const opus_val16 *x, const opus_val16 *y, - int N) -{ - opus_int i, dataSize16; - opus_int32 sum; - - __m128i inVec1_76543210, inVec1_FEDCBA98, acc1; - __m128i inVec2_76543210, inVec2_FEDCBA98, acc2; - - sum = 0; - dataSize16 = N & ~15; - - acc1 = _mm_setzero_si128(); - acc2 = _mm_setzero_si128(); - - for (i=0;i<dataSize16;i+=16) - { - inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0])); - inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0])); - - inVec1_FEDCBA98 = _mm_loadu_si128((__m128i *)(&x[i + 8])); - inVec2_FEDCBA98 = _mm_loadu_si128((__m128i *)(&y[i + 8])); - - inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210); - inVec1_FEDCBA98 = _mm_madd_epi16(inVec1_FEDCBA98, inVec2_FEDCBA98); - - acc1 = _mm_add_epi32(acc1, inVec1_76543210); - acc2 = _mm_add_epi32(acc2, inVec1_FEDCBA98); - } - - acc1 = _mm_add_epi32( acc1, acc2 ); - - if (N - i >= 8) - { - inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0])); - inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0])); - - inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210); - - acc1 = _mm_add_epi32(acc1, inVec1_76543210); - i += 8; - } - - acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64( acc1, acc1)); - acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16( acc1, 0x0E)); - sum += _mm_cvtsi128_si32(acc1); - - for (;i<N;i++) { - sum = silk_SMLABB(sum, x[i], y[i]); - } - - return sum; -} -#endif diff --git a/thirdparty/opus/celt/x86/pitch_sse4_1.c b/thirdparty/opus/celt/x86/pitch_sse4_1.c deleted file mode 100644 index a092c68b24..0000000000 --- a/thirdparty/opus/celt/x86/pitch_sse4_1.c +++ /dev/null @@ -1,195 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <xmmintrin.h> -#include <emmintrin.h> - -#include "macros.h" -#include "celt_lpc.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "pitch.h" - -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT) -#include <smmintrin.h> -#include "x86cpu.h" - -opus_val32 celt_inner_prod_sse4_1(const opus_val16 *x, const opus_val16 *y, - int N) -{ - opus_int i, dataSize16; - opus_int32 sum; - __m128i inVec1_76543210, inVec1_FEDCBA98, acc1; - __m128i inVec2_76543210, inVec2_FEDCBA98, acc2; - __m128i inVec1_3210, inVec2_3210; - - sum = 0; - dataSize16 = N & ~15; - - acc1 = _mm_setzero_si128(); - acc2 = _mm_setzero_si128(); - - for (i=0;i<dataSize16;i+=16) { - inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0])); - inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0])); - - inVec1_FEDCBA98 = _mm_loadu_si128((__m128i *)(&x[i + 8])); - inVec2_FEDCBA98 = _mm_loadu_si128((__m128i *)(&y[i + 8])); - - inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210); - inVec1_FEDCBA98 = _mm_madd_epi16(inVec1_FEDCBA98, inVec2_FEDCBA98); - - acc1 = _mm_add_epi32(acc1, inVec1_76543210); - acc2 = _mm_add_epi32(acc2, inVec1_FEDCBA98); - } - - acc1 = _mm_add_epi32(acc1, acc2); - - if (N - i >= 8) - { - inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0])); - inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0])); - - inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210); - - acc1 = _mm_add_epi32(acc1, inVec1_76543210); - i += 8; - } - - if (N - i >= 4) - { - inVec1_3210 = OP_CVTEPI16_EPI32_M64(&x[i + 0]); - inVec2_3210 = OP_CVTEPI16_EPI32_M64(&y[i + 0]); - - inVec1_3210 = _mm_mullo_epi32(inVec1_3210, inVec2_3210); - - acc1 = _mm_add_epi32(acc1, inVec1_3210); - i += 4; - } - - acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64(acc1, acc1)); - acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16(acc1, 0x0E)); - - sum += _mm_cvtsi128_si32(acc1); - - for (;i<N;i++) - { - sum = silk_SMLABB(sum, x[i], y[i]); - } - - return sum; -} - -void xcorr_kernel_sse4_1(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[ 4 ], int len) -{ - int j; - - __m128i vecX, vecX0, vecX1, vecX2, vecX3; - __m128i vecY0, vecY1, vecY2, vecY3; - __m128i sum0, sum1, sum2, sum3, vecSum; - __m128i initSum; - - celt_assert(len >= 3); - - sum0 = _mm_setzero_si128(); - sum1 = _mm_setzero_si128(); - sum2 = _mm_setzero_si128(); - sum3 = _mm_setzero_si128(); - - for (j=0;j<(len-7);j+=8) - { - vecX = _mm_loadu_si128((__m128i *)(&x[j + 0])); - vecY0 = _mm_loadu_si128((__m128i *)(&y[j + 0])); - vecY1 = _mm_loadu_si128((__m128i *)(&y[j + 1])); - vecY2 = _mm_loadu_si128((__m128i *)(&y[j + 2])); - vecY3 = _mm_loadu_si128((__m128i *)(&y[j + 3])); - - sum0 = _mm_add_epi32(sum0, _mm_madd_epi16(vecX, vecY0)); - sum1 = _mm_add_epi32(sum1, _mm_madd_epi16(vecX, vecY1)); - sum2 = _mm_add_epi32(sum2, _mm_madd_epi16(vecX, vecY2)); - sum3 = _mm_add_epi32(sum3, _mm_madd_epi16(vecX, vecY3)); - } - - sum0 = _mm_add_epi32(sum0, _mm_unpackhi_epi64( sum0, sum0)); - sum0 = _mm_add_epi32(sum0, _mm_shufflelo_epi16( sum0, 0x0E)); - - sum1 = _mm_add_epi32(sum1, _mm_unpackhi_epi64( sum1, sum1)); - sum1 = _mm_add_epi32(sum1, _mm_shufflelo_epi16( sum1, 0x0E)); - - sum2 = _mm_add_epi32(sum2, _mm_unpackhi_epi64( sum2, sum2)); - sum2 = _mm_add_epi32(sum2, _mm_shufflelo_epi16( sum2, 0x0E)); - - sum3 = _mm_add_epi32(sum3, _mm_unpackhi_epi64( sum3, sum3)); - sum3 = _mm_add_epi32(sum3, _mm_shufflelo_epi16( sum3, 0x0E)); - - vecSum = _mm_unpacklo_epi64(_mm_unpacklo_epi32(sum0, sum1), - _mm_unpacklo_epi32(sum2, sum3)); - - for (;j<(len-3);j+=4) - { - vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]); - vecX0 = _mm_shuffle_epi32(vecX, 0x00); - vecX1 = _mm_shuffle_epi32(vecX, 0x55); - vecX2 = _mm_shuffle_epi32(vecX, 0xaa); - vecX3 = _mm_shuffle_epi32(vecX, 0xff); - - vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]); - vecY1 = OP_CVTEPI16_EPI32_M64(&y[j + 1]); - vecY2 = OP_CVTEPI16_EPI32_M64(&y[j + 2]); - vecY3 = OP_CVTEPI16_EPI32_M64(&y[j + 3]); - - sum0 = _mm_mullo_epi32(vecX0, vecY0); - sum1 = _mm_mullo_epi32(vecX1, vecY1); - sum2 = _mm_mullo_epi32(vecX2, vecY2); - sum3 = _mm_mullo_epi32(vecX3, vecY3); - - sum0 = _mm_add_epi32(sum0, sum1); - sum2 = _mm_add_epi32(sum2, sum3); - vecSum = _mm_add_epi32(vecSum, sum0); - vecSum = _mm_add_epi32(vecSum, sum2); - } - - for (;j<len;j++) - { - vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]); - vecX0 = _mm_shuffle_epi32(vecX, 0x00); - - vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]); - - sum0 = _mm_mullo_epi32(vecX0, vecY0); - vecSum = _mm_add_epi32(vecSum, sum0); - } - - initSum = _mm_loadu_si128((__m128i *)(&sum[0])); - initSum = _mm_add_epi32(initSum, vecSum); - _mm_storeu_si128((__m128i *)sum, initSum); -} -#endif diff --git a/thirdparty/opus/celt/x86/x86_celt_map.c b/thirdparty/opus/celt/x86/x86_celt_map.c deleted file mode 100644 index 47ba41b9ee..0000000000 --- a/thirdparty/opus/celt/x86/x86_celt_map.c +++ /dev/null @@ -1,155 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if defined(HAVE_CONFIG_H) -#include "config.h" -#endif - -#include "x86/x86cpu.h" -#include "celt_lpc.h" -#include "pitch.h" -#include "pitch_sse.h" - -#if defined(OPUS_HAVE_RTCD) - -# if defined(FIXED_POINT) - -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1) - -void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *num, - opus_val16 *y, - int N, - int ord, - opus_val16 *mem, - int arch -) = { - celt_fir_c, /* non-sse */ - celt_fir_c, - celt_fir_c, - MAY_HAVE_SSE4_1(celt_fir), /* sse4.1 */ - MAY_HAVE_SSE4_1(celt_fir) /* avx */ -}; - -void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *y, - opus_val32 sum[4], - int len -) = { - xcorr_kernel_c, /* non-sse */ - xcorr_kernel_c, - xcorr_kernel_c, - MAY_HAVE_SSE4_1(xcorr_kernel), /* sse4.1 */ - MAY_HAVE_SSE4_1(xcorr_kernel) /* avx */ -}; - -#endif - -#if (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \ - (!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) - -opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *y, - int N -) = { - celt_inner_prod_c, /* non-sse */ - celt_inner_prod_c, - MAY_HAVE_SSE2(celt_inner_prod), - MAY_HAVE_SSE4_1(celt_inner_prod), /* sse4.1 */ - MAY_HAVE_SSE4_1(celt_inner_prod) /* avx */ -}; - -#endif - -# else - -#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE) - -void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *y, - opus_val32 sum[4], - int len -) = { - xcorr_kernel_c, /* non-sse */ - MAY_HAVE_SSE(xcorr_kernel), - MAY_HAVE_SSE(xcorr_kernel), - MAY_HAVE_SSE(xcorr_kernel), - MAY_HAVE_SSE(xcorr_kernel) -}; - -opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *y, - int N -) = { - celt_inner_prod_c, /* non-sse */ - MAY_HAVE_SSE(celt_inner_prod), - MAY_HAVE_SSE(celt_inner_prod), - MAY_HAVE_SSE(celt_inner_prod), - MAY_HAVE_SSE(celt_inner_prod) -}; - -void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *y01, - const opus_val16 *y02, - int N, - opus_val32 *xy1, - opus_val32 *xy2 -) = { - dual_inner_prod_c, /* non-sse */ - MAY_HAVE_SSE(dual_inner_prod), - MAY_HAVE_SSE(dual_inner_prod), - MAY_HAVE_SSE(dual_inner_prod), - MAY_HAVE_SSE(dual_inner_prod) -}; - -void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])( - opus_val32 *y, - opus_val32 *x, - int T, - int N, - opus_val16 g10, - opus_val16 g11, - opus_val16 g12 -) = { - comb_filter_const_c, /* non-sse */ - MAY_HAVE_SSE(comb_filter_const), - MAY_HAVE_SSE(comb_filter_const), - MAY_HAVE_SSE(comb_filter_const), - MAY_HAVE_SSE(comb_filter_const) -}; - - -#endif - -#endif -#endif diff --git a/thirdparty/opus/celt/x86/x86cpu.c b/thirdparty/opus/celt/x86/x86cpu.c deleted file mode 100644 index 080eb25e41..0000000000 --- a/thirdparty/opus/celt/x86/x86cpu.c +++ /dev/null @@ -1,157 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "cpu_support.h" -#include "macros.h" -#include "main.h" -#include "pitch.h" -#include "x86cpu.h" - -#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \ - (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \ - (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \ - (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)) - - -#if defined(_MSC_VER) - -#include <intrin.h> -static _inline void cpuid(unsigned int CPUInfo[4], unsigned int InfoType) -{ - __cpuid((int*)CPUInfo, InfoType); -} - -#else - -#if defined(CPU_INFO_BY_C) -#include <cpuid.h> -#endif - -static void cpuid(unsigned int CPUInfo[4], unsigned int InfoType) -{ -#if defined(CPU_INFO_BY_ASM) -#if defined(__i386__) && defined(__PIC__) -/* %ebx is PIC register in 32-bit, so mustn't clobber it. */ - __asm__ __volatile__ ( - "xchg %%ebx, %1\n" - "cpuid\n" - "xchg %%ebx, %1\n": - "=a" (CPUInfo[0]), - "=r" (CPUInfo[1]), - "=c" (CPUInfo[2]), - "=d" (CPUInfo[3]) : - "0" (InfoType) - ); -#else - __asm__ __volatile__ ( - "cpuid": - "=a" (CPUInfo[0]), - "=b" (CPUInfo[1]), - "=c" (CPUInfo[2]), - "=d" (CPUInfo[3]) : - "0" (InfoType) - ); -#endif -#elif defined(CPU_INFO_BY_C) - __get_cpuid(InfoType, &(CPUInfo[0]), &(CPUInfo[1]), &(CPUInfo[2]), &(CPUInfo[3])); -#endif -} - -#endif - -typedef struct CPU_Feature{ - /* SIMD: 128-bit */ - int HW_SSE; - int HW_SSE2; - int HW_SSE41; - /* SIMD: 256-bit */ - int HW_AVX; -} CPU_Feature; - -static void opus_cpu_feature_check(CPU_Feature *cpu_feature) -{ - unsigned int info[4] = {0}; - unsigned int nIds = 0; - - cpuid(info, 0); - nIds = info[0]; - - if (nIds >= 1){ - cpuid(info, 1); - cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0; - cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0; - cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0; - cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0; - } - else { - cpu_feature->HW_SSE = 0; - cpu_feature->HW_SSE2 = 0; - cpu_feature->HW_SSE41 = 0; - cpu_feature->HW_AVX = 0; - } -} - -int opus_select_arch(void) -{ - CPU_Feature cpu_feature; - int arch; - - opus_cpu_feature_check(&cpu_feature); - - arch = 0; - if (!cpu_feature.HW_SSE) - { - return arch; - } - arch++; - - if (!cpu_feature.HW_SSE2) - { - return arch; - } - arch++; - - if (!cpu_feature.HW_SSE41) - { - return arch; - } - arch++; - - if (!cpu_feature.HW_AVX) - { - return arch; - } - arch++; - - return arch; -} - -#endif diff --git a/thirdparty/opus/celt/x86/x86cpu.h b/thirdparty/opus/celt/x86/x86cpu.h deleted file mode 100644 index 04fd48aac4..0000000000 --- a/thirdparty/opus/celt/x86/x86cpu.h +++ /dev/null @@ -1,93 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if !defined(X86CPU_H) -# define X86CPU_H - -# if defined(OPUS_X86_MAY_HAVE_SSE) -# define MAY_HAVE_SSE(name) name ## _sse -# else -# define MAY_HAVE_SSE(name) name ## _c -# endif - -# if defined(OPUS_X86_MAY_HAVE_SSE2) -# define MAY_HAVE_SSE2(name) name ## _sse2 -# else -# define MAY_HAVE_SSE2(name) name ## _c -# endif - -# if defined(OPUS_X86_MAY_HAVE_SSE4_1) -# define MAY_HAVE_SSE4_1(name) name ## _sse4_1 -# else -# define MAY_HAVE_SSE4_1(name) name ## _c -# endif - -# if defined(OPUS_X86_MAY_HAVE_AVX) -# define MAY_HAVE_AVX(name) name ## _avx -# else -# define MAY_HAVE_AVX(name) name ## _c -# endif - -# if defined(OPUS_HAVE_RTCD) -int opus_select_arch(void); -# endif - -/*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32() - or _mm_cvtepi16_epi32() when optimizations are disabled, even though the - actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory - reference, these require 16-byte alignment and load a full 16 bytes (instead - of 4 or 8), possibly reading out of bounds. - - We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or - _mm_loadl_epi64(), which should have the same semantics as an m32 or m64 - reference in the PMOVSXWD instruction itself, but gcc is not smart enough to - optimize this out when optimizations ARE enabled. - - Clang, in contrast, requires us to do this always for _mm_cvtepi8_epi32 - (which is fair, since technically the compiler is always allowed to do the - dereference before invoking the function implementing the intrinsic). - However, it is smart enough to eliminate the extra MOVD instruction. - For _mm_cvtepi16_epi32, it does the right thing, though does *not* optimize out - the extra MOVQ if it's specified explicitly */ - -# if defined(__clang__) || !defined(__OPTIMIZE__) -# define OP_CVTEPI8_EPI32_M32(x) \ - (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x)))) -# else -# define OP_CVTEPI8_EPI32_M32(x) \ - (_mm_cvtepi8_epi32(*(__m128i *)(x))) -#endif - -# if !defined(__OPTIMIZE__) -# define OP_CVTEPI16_EPI32_M64(x) \ - (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x)))) -# else -# define OP_CVTEPI16_EPI32_M64(x) \ - (_mm_cvtepi16_epi32(*(__m128i *)(x))) -# endif - -#endif |