From 7c59d819a7ebb936d51ca032e66a2489e4080d08 Mon Sep 17 00:00:00 2001 From: George Marques Date: Sun, 1 May 2016 12:48:46 -0300 Subject: Update Opus driver to 1.1.2 And opusfile to 0.7. --- drivers/opus/celt/_kiss_fft_guts.h | 17 +- drivers/opus/celt/arch.h | 41 +- drivers/opus/celt/arm/arm2gnu.pl | 53 +- drivers/opus/celt/arm/arm_celt_map.c | 87 +++- drivers/opus/celt/arm/armcpu.c | 9 +- drivers/opus/celt/arm/armopts.s | 37 -- drivers/opus/celt/arm/celt_ne10_fft.c | 172 +++++++ drivers/opus/celt/arm/celt_ne10_mdct.c | 256 ++++++++++ drivers/opus/celt/arm/celt_neon_intr.c | 249 +++++++++ drivers/opus/celt/arm/celt_pitch_xcorr_arm-gnu.S | 551 ++++++++++++++++++++ drivers/opus/celt/arm/celt_pitch_xcorr_arm.s | 24 +- drivers/opus/celt/arm/fft_arm.h | 72 +++ drivers/opus/celt/arm/fixed_armv4.h | 4 + drivers/opus/celt/arm/fixed_armv5e.h | 35 ++ drivers/opus/celt/arm/kiss_fft_armv4.h | 2 +- drivers/opus/celt/arm/kiss_fft_armv5e.h | 2 +- drivers/opus/celt/arm/mdct_arm.h | 60 +++ drivers/opus/celt/arm/pitch_arm.h | 15 +- drivers/opus/celt/bands.c | 244 ++++----- drivers/opus/celt/bands.h | 28 +- drivers/opus/celt/celt.c | 101 +++- drivers/opus/celt/celt.h | 23 +- drivers/opus/celt/celt_decoder.c | 366 +++++++------ drivers/opus/celt/celt_encoder.c | 391 +++++++------- drivers/opus/celt/celt_lpc.c | 19 +- drivers/opus/celt/celt_lpc.h | 19 +- drivers/opus/celt/cpu_support.h | 22 +- drivers/opus/celt/cwrs.c | 43 +- drivers/opus/celt/cwrs.h | 2 +- drivers/opus/celt/entcode.c | 63 ++- drivers/opus/celt/entcode.h | 35 ++ drivers/opus/celt/entdec.c | 5 +- drivers/opus/celt/entenc.c | 9 +- drivers/opus/celt/fixed_debug.h | 11 + drivers/opus/celt/fixed_generic.h | 19 +- drivers/opus/celt/float_cast.h | 4 +- drivers/opus/celt/kiss_fft.c | 497 +++++++----------- drivers/opus/celt/kiss_fft.h | 75 ++- drivers/opus/celt/laplace.c | 3 - drivers/opus/celt/mathops.c | 3 - drivers/opus/celt/mathops.h | 2 +- drivers/opus/celt/mdct.c | 216 ++++---- drivers/opus/celt/mdct.h | 58 ++- drivers/opus/celt/mips/celt_mipsr1.h | 148 ++++++ drivers/opus/celt/mips/fixed_generic_mipsr1.h | 126 +++++ drivers/opus/celt/mips/kiss_fft_mipsr1.h | 167 ++++++ drivers/opus/celt/mips/mdct_mipsr1.h | 286 +++++++++++ drivers/opus/celt/mips/pitch_mipsr1.h | 161 ++++++ drivers/opus/celt/mips/vq_mipsr1.h | 122 +++++ drivers/opus/celt/modes.c | 13 +- drivers/opus/celt/modes.h | 75 +++ drivers/opus/celt/opus_custom_demo.c | 9 +- drivers/opus/celt/opus_modes.h | 83 --- drivers/opus/celt/os_support.h | 6 +- drivers/opus/celt/pitch.c | 82 +-- drivers/opus/celt/pitch.h | 75 ++- drivers/opus/celt/quant_bands.c | 5 +- drivers/opus/celt/quant_bands.h | 2 +- drivers/opus/celt/rate.c | 14 +- drivers/opus/celt/rate.h | 4 +- drivers/opus/celt/stack_alloc.h | 18 +- drivers/opus/celt/static_modes_fixed.h | 623 +++++++++++++++++------ drivers/opus/celt/static_modes_fixed_arm_ne10.h | 388 ++++++++++++++ drivers/opus/celt/static_modes_float.h | 615 ++++++++++++++++------ drivers/opus/celt/static_modes_float_arm_ne10.h | 404 +++++++++++++++ drivers/opus/celt/tests/test_unit_cwrs32.c | 161 ------ drivers/opus/celt/tests/test_unit_dft.c | 164 ------ drivers/opus/celt/tests/test_unit_entropy.c | 382 -------------- drivers/opus/celt/tests/test_unit_laplace.c | 92 ---- drivers/opus/celt/tests/test_unit_mathops.c | 275 ---------- drivers/opus/celt/tests/test_unit_mdct.c | 210 -------- drivers/opus/celt/tests/test_unit_rotation.c | 90 ---- drivers/opus/celt/tests/test_unit_types.c | 50 -- drivers/opus/celt/vq.c | 58 +-- drivers/opus/celt/vq.h | 11 +- drivers/opus/celt/x86/celt_lpc_sse.c | 129 +++++ drivers/opus/celt/x86/celt_lpc_sse.h | 65 +++ drivers/opus/celt/x86/pitch_sse.c | 182 +++++++ drivers/opus/celt/x86/pitch_sse.h | 257 ++++++---- drivers/opus/celt/x86/pitch_sse2.c | 92 ++++ drivers/opus/celt/x86/pitch_sse4_1.c | 192 +++++++ drivers/opus/celt/x86/x86_celt_map.c | 152 ++++++ drivers/opus/celt/x86/x86cpu.c | 154 ++++++ drivers/opus/celt/x86/x86cpu.h | 93 ++++ 84 files changed, 7069 insertions(+), 3110 deletions(-) delete mode 100644 drivers/opus/celt/arm/armopts.s create mode 100644 drivers/opus/celt/arm/celt_ne10_fft.c create mode 100644 drivers/opus/celt/arm/celt_ne10_mdct.c create mode 100644 drivers/opus/celt/arm/celt_neon_intr.c create mode 100644 drivers/opus/celt/arm/celt_pitch_xcorr_arm-gnu.S create mode 100644 drivers/opus/celt/arm/fft_arm.h create mode 100644 drivers/opus/celt/arm/mdct_arm.h create mode 100644 drivers/opus/celt/mips/celt_mipsr1.h create mode 100644 drivers/opus/celt/mips/fixed_generic_mipsr1.h create mode 100644 drivers/opus/celt/mips/kiss_fft_mipsr1.h create mode 100644 drivers/opus/celt/mips/mdct_mipsr1.h create mode 100644 drivers/opus/celt/mips/pitch_mipsr1.h create mode 100644 drivers/opus/celt/mips/vq_mipsr1.h create mode 100644 drivers/opus/celt/modes.h delete mode 100644 drivers/opus/celt/opus_modes.h create mode 100644 drivers/opus/celt/static_modes_fixed_arm_ne10.h create mode 100644 drivers/opus/celt/static_modes_float_arm_ne10.h delete mode 100644 drivers/opus/celt/tests/test_unit_cwrs32.c delete mode 100644 drivers/opus/celt/tests/test_unit_dft.c delete mode 100644 drivers/opus/celt/tests/test_unit_entropy.c delete mode 100644 drivers/opus/celt/tests/test_unit_laplace.c delete mode 100644 drivers/opus/celt/tests/test_unit_mathops.c delete mode 100644 drivers/opus/celt/tests/test_unit_mdct.c delete mode 100644 drivers/opus/celt/tests/test_unit_rotation.c delete mode 100644 drivers/opus/celt/tests/test_unit_types.c create mode 100644 drivers/opus/celt/x86/celt_lpc_sse.c create mode 100644 drivers/opus/celt/x86/celt_lpc_sse.h create mode 100644 drivers/opus/celt/x86/pitch_sse.c create mode 100644 drivers/opus/celt/x86/pitch_sse2.c create mode 100644 drivers/opus/celt/x86/pitch_sse4_1.c create mode 100644 drivers/opus/celt/x86/x86_celt_map.c create mode 100644 drivers/opus/celt/x86/x86cpu.c create mode 100644 drivers/opus/celt/x86/x86cpu.h (limited to 'drivers/opus/celt') diff --git a/drivers/opus/celt/_kiss_fft_guts.h b/drivers/opus/celt/_kiss_fft_guts.h index 2a4ee744ef..a41f9a1a05 100644 --- a/drivers/opus/celt/_kiss_fft_guts.h +++ b/drivers/opus/celt/_kiss_fft_guts.h @@ -65,10 +65,6 @@ do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0) -# define C_MUL4(m,a,b) \ - do{ (m).r = SHR32(SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)),2); \ - (m).i = SHR32(ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)),2); }while(0) - # define C_MULBYSCALAR( c, s ) \ do{ (c).r = S_MUL( (c).r , s ) ;\ (c).i = S_MUL( (c).i , s ) ; }while(0) @@ -95,14 +91,17 @@ }while(0) #if defined(OPUS_ARM_INLINE_ASM) -#include "arm/kiss_fft_armv4.h" +#include "opus/celt/arm/kiss_fft_armv4.h" #endif #if defined(OPUS_ARM_INLINE_EDSP) -#include "arm/kiss_fft_armv5e.h" +#include "opus/celt/arm/kiss_fft_armv5e.h" +#endif +#if defined(MIPSr1_ASM) +#include "opus/celt/mips/kiss_fft_mipsr1.h" #endif -#else /* not OPUS_FIXED_POINT*/ +#else /* not FIXED_POINT*/ # define S_MUL(a,b) ( (a)*(b) ) #define C_MUL(m,a,b) \ @@ -153,8 +152,8 @@ #endif /* C_ADD defined */ #ifdef OPUS_FIXED_POINT -# define KISS_FFT_COS(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase)))) -# define KISS_FFT_SIN(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase)))) +/*# define KISS_FFT_COS(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase)))) +# define KISS_FFT_SIN(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))*/ # define KISS_FFT_COS(phase) floor(.5+TWID_MAX*cos (phase)) # define KISS_FFT_SIN(phase) floor(.5+TWID_MAX*sin (phase)) # define HALF_OF(x) ((x)>>1) diff --git a/drivers/opus/celt/arch.h b/drivers/opus/celt/arch.h index d964f8d90c..7fb036e9fe 100644 --- a/drivers/opus/celt/arch.h +++ b/drivers/opus/celt/arch.h @@ -69,11 +69,8 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line) #define IMUL32(a,b) ((a)*(b)) -#define ABS(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute integer value. */ -#define ABS16(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 16-bit value. */ #define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */ #define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */ -#define ABS32(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 32-bit value. */ #define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */ #define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */ #define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */ @@ -108,6 +105,13 @@ typedef opus_val32 celt_ener; #define SCALEIN(a) (a) #define SCALEOUT(a) (a) +#define ABS16(x) ((x) < 0 ? (-(x)) : (x)) +#define ABS32(x) ((x) < 0 ? (-(x)) : (x)) + +static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { + return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x; +} + #ifdef FIXED_DEBUG #include "opus/celt/fixed_debug.h" #else @@ -115,9 +119,9 @@ typedef opus_val32 celt_ener; #include "opus/celt/fixed_generic.h" #ifdef OPUS_ARM_INLINE_EDSP -#include "arm/fixed_armv5e.h" +#include "opus/celt/arm/fixed_armv5e.h" #elif defined (OPUS_ARM_INLINE_ASM) -#include "arm/fixed_armv4.h" +#include "opus/celt/arm/fixed_armv4.h" #elif defined (BFIN_ASM) #include "fixed_bfin.h" #elif defined (TI_C5X_ASM) @@ -128,7 +132,7 @@ typedef opus_val32 celt_ener; #endif -#else /* OPUS_FIXED_POINT */ +#else /* FIXED_POINT */ typedef float opus_val16; typedef float opus_val32; @@ -137,6 +141,22 @@ typedef float celt_sig; typedef float celt_norm; typedef float celt_ener; +#ifdef FLOAT_APPROX +/* This code should reliably detect NaN/inf even when -ffast-math is used. + Assumes IEEE 754 format. */ +static OPUS_INLINE int celt_isnan(float x) +{ + union {float f; opus_uint32 i;} in; + in.f = x; + return ((in.i>>23)&0xFF)==0xFF && (in.i&0x007FFFFF)!=0; +} +#else +#ifdef __FAST_MATH__ +#error Cannot build libopus with -ffast-math unless FLOAT_APPROX is defined. This could result in crashes on extreme (e.g. NaN) input +#endif +#define celt_isnan(x) ((x)!=(x)) +#endif + #define Q15ONE 1.0f #define NORM_SCALING 1.f @@ -146,6 +166,10 @@ typedef float celt_ener; #define VERY_LARGE16 1e15f #define Q15_ONE ((opus_val16)1.f) +/* This appears to be the same speed as C99's fabsf() but it's more portable. */ +#define ABS16(x) ((float)fabs(x)) +#define ABS32(x) ((float)fabs(x)) + #define QCONST16(x,bits) (x) #define QCONST32(x,bits) (x) @@ -184,6 +208,7 @@ typedef float celt_ener; #define MULT32_32_Q31(a,b) ((a)*(b)) #define MAC16_32_Q15(c,a,b) ((c)+(a)*(b)) +#define MAC16_32_Q16(c,a,b) ((c)+(a)*(b)) #define MULT16_16_Q11_32(a,b) ((a)*(b)) #define MULT16_16_Q11(a,b) ((a)*(b)) @@ -201,7 +226,9 @@ typedef float celt_ener; #define SCALEIN(a) ((a)*CELT_SIG_SCALE) #define SCALEOUT(a) ((a)*(1/CELT_SIG_SCALE)) -#endif /* !OPUS_FIXED_POINT */ +#define SIG2WORD16(x) (x) + +#endif /* !FIXED_POINT */ #ifndef GLOBAL_STACK_SIZE #ifdef OPUS_FIXED_POINT diff --git a/drivers/opus/celt/arm/arm2gnu.pl b/drivers/opus/celt/arm/arm2gnu.pl index eab42efa2b..6c922ac819 100755 --- a/drivers/opus/celt/arm/arm2gnu.pl +++ b/drivers/opus/celt/arm/arm2gnu.pl @@ -1,7 +1,33 @@ #!/usr/bin/perl +# Copyright (C) 2002-2013 Xiph.org Foundation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. my $bigend; # little/big endian my $nxstack; +my $apple = 0; +my $symprefix = ""; $nxstack = 0; @@ -10,11 +36,16 @@ eval 'exec /usr/local/bin/perl -S $0 ${1+"$@"}' while ($ARGV[0] =~ /^-/) { $_ = shift; - last if /^--/; - if (/^-n/) { + last if /^--$/; + if (/^-n$/) { $nflag++; next; } + if (/^--apple$/) { + $apple = 1; + $symprefix = "_"; + next; + } die "I don't recognize this switch: $_\\n"; } $printit++ unless $nflag; @@ -25,6 +56,8 @@ $n=0; $thumb = 0; # ARM mode by default, not Thumb. @proc_stack = (); +printf (" .syntax unified\n"); + LINE: while (<>) { @@ -53,7 +86,7 @@ while (<>) { s/\bINCLUDE[ \t]*([^ \t\n]+)/.include \"$1\"/; s/\bGET[ \t]*([^ \t\n]+)/.include \"${ my $x=$1; $x =~ s|\.s|-gnu.S|; \$x }\"/; s/\bIMPORT\b/.extern/; - s/\bEXPORT\b/.global/; + s/\bEXPORT\b\s*/.global $symprefix/; s/^(\s+)\[/$1IF/; s/^(\s+)\|/$1ELSE/; s/^(\s+)\]/$1ENDIF/; @@ -109,7 +142,7 @@ while (<>) { # won't match the original source file (we could use the .line # directive, which is documented to be obsolete, but then gdb will # show the wrong line in the translated source file). - s/$/; .arch armv7-a\n .fpu neon\n .object_arch armv4t/; + s/$/; .arch armv7-a\n .fpu neon\n .object_arch armv4t/ unless ($apple); } } @@ -131,9 +164,13 @@ while (<>) { $prefix = ""; if ($proc) { - $prefix = $prefix.sprintf("\t.type\t%s, %%function; ",$proc); + $prefix = $prefix.sprintf("\t.type\t%s, %%function; ",$proc) unless ($apple); + # Make sure we $prefix isn't empty here (for the $apple case). + # We handle mangling the label here, make sure it doesn't match + # the label handling below (if $prefix would be empty). + $prefix = "; "; push(@proc_stack, $proc); - s/^[A-Za-z_\.]\w+/$&:/; + s/^[A-Za-z_\.]\w+/$symprefix$&:/; } $prefix = $prefix."\t.thumb_func; " if ($thumb); s/\bPROC\b/@ $&/; @@ -146,7 +183,7 @@ while (<>) { my $proc; s/\bENDP\b/@ $&/; $proc = pop(@proc_stack); - $_ = "\t.size $proc, .-$proc".$_ if ($proc); + $_ = "\t.size $proc, .-$proc".$_ if ($proc && !$apple); } s/\bSUBT\b/@ $&/; s/\bDATA\b/@ $&/; # DATA directive is deprecated -- Asm guide, p.7-25 @@ -311,6 +348,6 @@ while (<>) { } #If we had a code section, mark that this object doesn't need an executable # stack. -if ($nxstack) { +if ($nxstack && !$apple) { printf (" .section\t.note.GNU-stack,\"\",\%\%progbits\n"); } diff --git a/drivers/opus/celt/arm/arm_celt_map.c b/drivers/opus/celt/arm/arm_celt_map.c index 31e7d5b319..92c83c1763 100644 --- a/drivers/opus/celt/arm/arm_celt_map.c +++ b/drivers/opus/celt/arm/arm_celt_map.c @@ -24,16 +24,15 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -#ifdef OPUS_ENABLED #include "opus/opus_config.h" -#endif #include "opus/celt/pitch.h" +#include "opus/celt/kiss_fft.h" +#include "opus/celt/mdct.h" #if defined(OPUS_HAVE_RTCD) -# if defined(OPUS_FIXED_POINT) +# if defined(FIXED_POINT) opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, const opus_val16 *, opus_val32 *, int , int) = { celt_pitch_xcorr_c, /* ARMv4 */ @@ -41,9 +40,79 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */ MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */ }; -# else -# error "Floating-point implementation is not supported by ARM asm yet." \ - "Reconfigure with --disable-rtcd or send patches." -# endif +# else /* !FIXED_POINT */ +# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, + const opus_val16 *, opus_val32 *, int, int) = { + celt_pitch_xcorr_c, /* ARMv4 */ + celt_pitch_xcorr_c, /* EDSP */ + celt_pitch_xcorr_c, /* Media */ + celt_pitch_xcorr_float_neon /* Neon */ +}; +# endif +# endif /* FIXED_POINT */ + +# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +# if defined(HAVE_ARM_NE10) +# if defined(CUSTOM_MODES) +int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = { + opus_fft_alloc_arch_c, /* ARMv4 */ + opus_fft_alloc_arch_c, /* EDSP */ + opus_fft_alloc_arch_c, /* Media */ + opus_fft_alloc_arm_neon /* Neon with NE10 library support */ +}; + +void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = { + opus_fft_free_arch_c, /* ARMv4 */ + opus_fft_free_arch_c, /* EDSP */ + opus_fft_free_arch_c, /* Media */ + opus_fft_free_arm_neon /* Neon with NE10 */ +}; +# endif /* CUSTOM_MODES */ + +void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, + const kiss_fft_cpx *fin, + kiss_fft_cpx *fout) = { + opus_fft_c, /* ARMv4 */ + opus_fft_c, /* EDSP */ + opus_fft_c, /* Media */ + opus_fft_neon /* Neon with NE10 */ +}; + +void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, + const kiss_fft_cpx *fin, + kiss_fft_cpx *fout) = { + opus_ifft_c, /* ARMv4 */ + opus_ifft_c, /* EDSP */ + opus_ifft_c, /* Media */ + opus_ifft_neon /* Neon with NE10 */ +}; + +void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l, + kiss_fft_scalar *in, + kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, + int overlap, int shift, + int stride, int arch) = { + clt_mdct_forward_c, /* ARMv4 */ + clt_mdct_forward_c, /* EDSP */ + clt_mdct_forward_c, /* Media */ + clt_mdct_forward_neon /* Neon with NE10 */ +}; + +void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l, + kiss_fft_scalar *in, + kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, + int overlap, int shift, + int stride, int arch) = { + clt_mdct_backward_c, /* ARMv4 */ + clt_mdct_backward_c, /* EDSP */ + clt_mdct_backward_c, /* Media */ + clt_mdct_backward_neon /* Neon with NE10 */ +}; + +# endif /* HAVE_ARM_NE10 */ +# endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */ -#endif +#endif /* OPUS_HAVE_RTCD */ diff --git a/drivers/opus/celt/arm/armcpu.c b/drivers/opus/celt/arm/armcpu.c index fb7f2421fe..ac8ad2ec14 100644 --- a/drivers/opus/celt/arm/armcpu.c +++ b/drivers/opus/celt/arm/armcpu.c @@ -26,10 +26,7 @@ */ /* Original code from libtheora modified to suit to Opus */ - -#ifdef OPUS_ENABLED #include "opus/opus_config.h" -#endif #ifdef OPUS_HAVE_RTCD @@ -73,7 +70,7 @@ static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){ __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ /*Ignore exception.*/ } -# if defined(OPUS_ARM_MAY_HAVE_NEON) +# if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) __try{ /*VORR q0,q0,q0*/ __emit(0xF2200150); @@ -107,7 +104,7 @@ opus_uint32 opus_cpu_capabilities(void) while(fgets(buf, 512, cpuinfo) != NULL) { -# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON) +# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) /* Search for edsp and neon flag */ if(memcmp(buf, "Features", 8) == 0) { @@ -118,7 +115,7 @@ opus_uint32 opus_cpu_capabilities(void) flags |= OPUS_CPU_ARM_EDSP; # endif -# if defined(OPUS_ARM_MAY_HAVE_NEON) +# if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) p = strstr(buf, " neon"); if(p != NULL && (p[5] == ' ' || p[5] == '\n')) flags |= OPUS_CPU_ARM_NEON; diff --git a/drivers/opus/celt/arm/armopts.s b/drivers/opus/celt/arm/armopts.s deleted file mode 100644 index fb9196072a..0000000000 --- a/drivers/opus/celt/arm/armopts.s +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright (C) 2013 Mozilla Corporation */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -; Set the following to 1 if we have EDSP instructions -; (LDRD/STRD, etc., ARMv5E and later). -OPUS_ARM_MAY_HAVE_EDSP * - -; Set the following to 1 if we have ARMv6 media instructions. -OPUS_ARM_MAY_HAVE_MEDIA * - -; Set the following to 1 if we have NEON (some ARMv7) -OPUS_ARM_MAY_HAVE_NEON * - -END diff --git a/drivers/opus/celt/arm/celt_ne10_fft.c b/drivers/opus/celt/arm/celt_ne10_fft.c new file mode 100644 index 0000000000..e57d23ee1d --- /dev/null +++ b/drivers/opus/celt/arm/celt_ne10_fft.c @@ -0,0 +1,172 @@ +/* Copyright (c) 2015 Xiph.Org Foundation + Written by Viswanath Puttagunta */ +/** + @file celt_ne10_fft.c + @brief ARM Neon optimizations for fft using NE10 library + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SKIP_CONFIG_H +#include "opus/opus_config.h" +#endif + +#include +#include +#include "opus/celt/os_support.h" +#include "opus/celt/kiss_fft.h" +#include "opus/celt/stack_alloc.h" + +#if !defined(FIXED_POINT) +# define NE10_FFT_ALLOC_C2C_TYPE_NEON ne10_fft_alloc_c2c_float32_neon +# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t +# define NE10_FFT_STATE_TYPE_T ne10_fft_state_float32_t +# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_float32 +# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_float32_t +# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_float32_neon +#else +# define NE10_FFT_ALLOC_C2C_TYPE_NEON(nfft) ne10_fft_alloc_c2c_int32_neon(nfft) +# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t +# define NE10_FFT_STATE_TYPE_T ne10_fft_state_int32_t +# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32 +# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32 +# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_int32_t +# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_int32_neon +#endif + +#if defined(CUSTOM_MODES) + +/* nfft lengths in NE10 that support scaled fft */ +# define NE10_FFTSCALED_SUPPORT_MAX 4 +static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = { + 480, 240, 120, 60 +}; + +int opus_fft_alloc_arm_neon(kiss_fft_state *st) +{ + int i; + size_t memneeded = sizeof(struct arch_fft_state); + + st->arch_fft = (arch_fft_state *)opus_alloc(memneeded); + if (!st->arch_fft) + return -1; + + for (i = 0; i < NE10_FFTSCALED_SUPPORT_MAX; i++) { + if(st->nfft == ne10_fft_scaled_support[i]) + break; + } + if (i == NE10_FFTSCALED_SUPPORT_MAX) { + /* This nfft length (scaled fft) is not supported in NE10 */ + st->arch_fft->is_supported = 0; + st->arch_fft->priv = NULL; + } + else { + st->arch_fft->is_supported = 1; + st->arch_fft->priv = (void *)NE10_FFT_ALLOC_C2C_TYPE_NEON(st->nfft); + if (st->arch_fft->priv == NULL) { + return -1; + } + } + return 0; +} + +void opus_fft_free_arm_neon(kiss_fft_state *st) +{ + NE10_FFT_CFG_TYPE_T cfg; + + if (!st->arch_fft) + return; + + cfg = (NE10_FFT_CFG_TYPE_T)st->arch_fft->priv; + if (cfg) + NE10_FFT_DESTROY_C2C_TYPE(cfg); + opus_free(st->arch_fft); +} +#endif + +void opus_fft_neon(const kiss_fft_state *st, + const kiss_fft_cpx *fin, + kiss_fft_cpx *fout) +{ + NE10_FFT_STATE_TYPE_T state; + NE10_FFT_CFG_TYPE_T cfg = &state; + VARDECL(NE10_FFT_CPX_TYPE_T, buffer); + SAVE_STACK; + ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T); + + if (!st->arch_fft->is_supported) { + /* This nfft length (scaled fft) not supported in NE10 */ + opus_fft_c(st, fin, fout); + } + else { + memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T)); + state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0]; +#if !defined(FIXED_POINT) + state.is_forward_scaled = 1; + + NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout, + (NE10_FFT_CPX_TYPE_T *)fin, + cfg, 0); +#else + NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout, + (NE10_FFT_CPX_TYPE_T *)fin, + cfg, 0, 1); +#endif + } + RESTORE_STACK; +} + +void opus_ifft_neon(const kiss_fft_state *st, + const kiss_fft_cpx *fin, + kiss_fft_cpx *fout) +{ + NE10_FFT_STATE_TYPE_T state; + NE10_FFT_CFG_TYPE_T cfg = &state; + VARDECL(NE10_FFT_CPX_TYPE_T, buffer); + SAVE_STACK; + ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T); + + if (!st->arch_fft->is_supported) { + /* This nfft length (scaled fft) not supported in NE10 */ + opus_ifft_c(st, fin, fout); + } + else { + memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T)); + state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0]; +#if !defined(FIXED_POINT) + state.is_backward_scaled = 0; + + NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout, + (NE10_FFT_CPX_TYPE_T *)fin, + cfg, 1); +#else + NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout, + (NE10_FFT_CPX_TYPE_T *)fin, + cfg, 1, 0); +#endif + } + RESTORE_STACK; +} diff --git a/drivers/opus/celt/arm/celt_ne10_mdct.c b/drivers/opus/celt/arm/celt_ne10_mdct.c new file mode 100644 index 0000000000..eb407b674f --- /dev/null +++ b/drivers/opus/celt/arm/celt_ne10_mdct.c @@ -0,0 +1,256 @@ +/* Copyright (c) 2015 Xiph.Org Foundation + Written by Viswanath Puttagunta */ +/** + @file celt_ne10_mdct.c + @brief ARM Neon optimizations for mdct using NE10 library + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SKIP_CONFIG_H +#include "opus/opus_config.h" +#endif + +#include "opus/celt/kiss_fft.h" +#include "opus/celt/_kiss_fft_guts.h" +#include "opus/celt/mdct.h" +#include "opus/celt/stack_alloc.h" + +void clt_mdct_forward_neon(const mdct_lookup *l, + kiss_fft_scalar *in, + kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, + int overlap, int shift, int stride, int arch) +{ + int i; + int N, N2, N4; + VARDECL(kiss_fft_scalar, f); + VARDECL(kiss_fft_cpx, f2); + const kiss_fft_state *st = l->kfft[shift]; + const kiss_twiddle_scalar *trig; + + SAVE_STACK; + + N = l->n; + trig = l->trig; + for (i=0;i>= 1; + trig += N; + } + N2 = N>>1; + N4 = N>>2; + + ALLOC(f, N2, kiss_fft_scalar); + ALLOC(f2, N4, kiss_fft_cpx); + + /* Consider the input to be composed of four blocks: [a, b, c, d] */ + /* Window, shuffle, fold */ + { + /* Temp pointers to make it really clear to the compiler what we're doing */ + const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); + const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); + kiss_fft_scalar * OPUS_RESTRICT yp = f; + const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); + const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; + for(i=0;i<((overlap+3)>>2);i++) + { + /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ + *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); + *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]); + xp1+=2; + xp2-=2; + wp1+=2; + wp2-=2; + } + wp1 = window; + wp2 = window+overlap-1; + for(;i>2);i++) + { + /* Real part arranged as a-bR, Imag part arranged as -c-dR */ + *yp++ = *xp2; + *yp++ = *xp1; + xp1+=2; + xp2-=2; + } + for(;ii,t[N4+i]) - S_MUL(fp->r,t[i]); + yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]); + *yp1 = yr; + *yp2 = yi; + fp++; + yp1 += 2*stride; + yp2 -= 2*stride; + } + } + RESTORE_STACK; +} + +void clt_mdct_backward_neon(const mdct_lookup *l, + kiss_fft_scalar *in, + kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 * OPUS_RESTRICT window, + int overlap, int shift, int stride, int arch) +{ + int i; + int N, N2, N4; + VARDECL(kiss_fft_scalar, f); + const kiss_twiddle_scalar *trig; + const kiss_fft_state *st = l->kfft[shift]; + + N = l->n; + trig = l->trig; + for (i=0;i>= 1; + trig += N; + } + N2 = N>>1; + N4 = N>>2; + + ALLOC(f, N2, kiss_fft_scalar); + + /* Pre-rotate */ + { + /* Temp pointers to make it really clear to the compiler what we're doing */ + const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; + const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); + kiss_fft_scalar * OPUS_RESTRICT yp = f; + const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; + for(i=0;i>1)), arch); + + /* Post-rotate and de-shuffle from both ends of the buffer at once to make + it in-place. */ + { + kiss_fft_scalar * yp0 = out+(overlap>>1); + kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2; + const kiss_twiddle_scalar *t = &trig[0]; + /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the + middle pair will be computed twice. */ + for(i=0;i<(N4+1)>>1;i++) + { + kiss_fft_scalar re, im, yr, yi; + kiss_twiddle_scalar t0, t1; + re = yp0[0]; + im = yp0[1]; + t0 = t[i]; + t1 = t[N4+i]; + /* We'd scale up by 2 here, but instead it's done when mixing the windows */ + yr = S_MUL(re,t0) + S_MUL(im,t1); + yi = S_MUL(re,t1) - S_MUL(im,t0); + re = yp1[0]; + im = yp1[1]; + yp0[0] = yr; + yp1[1] = yi; + + t0 = t[(N4-i-1)]; + t1 = t[(N2-i-1)]; + /* We'd scale up by 2 here, but instead it's done when mixing the windows */ + yr = S_MUL(re,t0) + S_MUL(im,t1); + yi = S_MUL(re,t1) - S_MUL(im,t0); + yp1[0] = yr; + yp0[1] = yi; + yp0 += 2; + yp1 -= 2; + } + } + + /* Mirror on both sides for TDAC */ + { + kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; + kiss_fft_scalar * OPUS_RESTRICT yp1 = out; + const opus_val16 * OPUS_RESTRICT wp1 = window; + const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; + + for(i = 0; i < overlap/2; i++) + { + kiss_fft_scalar x1, x2; + x1 = *xp1; + x2 = *yp1; + *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); + *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); + wp1++; + wp2--; + } + } + RESTORE_STACK; +} diff --git a/drivers/opus/celt/arm/celt_neon_intr.c b/drivers/opus/celt/arm/celt_neon_intr.c new file mode 100644 index 0000000000..82b6958644 --- /dev/null +++ b/drivers/opus/celt/arm/celt_neon_intr.c @@ -0,0 +1,249 @@ +/* Copyright (c) 2014-2015 Xiph.Org Foundation + Written by Viswanath Puttagunta */ +/** + @file celt_neon_intr.c + @brief ARM Neon Intrinsic optimizations for celt + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +#include "opus/opus_config.h" + +#include +#include "opus/celt/pitch.h" + +#if !defined(FIXED_POINT) +/* + * Function: xcorr_kernel_neon_float + * --------------------------------- + * Computes 4 correlation values and stores them in sum[4] + */ +static void xcorr_kernel_neon_float(const float32_t *x, const float32_t *y, + float32_t sum[4], int len) { + float32x4_t YY[3]; + float32x4_t YEXT[3]; + float32x4_t XX[2]; + float32x2_t XX_2; + float32x4_t SUMM; + const float32_t *xi = x; + const float32_t *yi = y; + + celt_assert(len>0); + + YY[0] = vld1q_f32(yi); + SUMM = vdupq_n_f32(0); + + /* Consume 8 elements in x vector and 12 elements in y + * vector. However, the 12'th element never really gets + * touched in this loop. So, if len == 8, then we only + * must access y[0] to y[10]. y[11] must not be accessed + * hence make sure len > 8 and not len >= 8 + */ + while (len > 8) { + yi += 4; + YY[1] = vld1q_f32(yi); + yi += 4; + YY[2] = vld1q_f32(yi); + + XX[0] = vld1q_f32(xi); + xi += 4; + XX[1] = vld1q_f32(xi); + xi += 4; + + SUMM = vmlaq_lane_f32(SUMM, YY[0], vget_low_f32(XX[0]), 0); + YEXT[0] = vextq_f32(YY[0], YY[1], 1); + SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[0]), 1); + YEXT[1] = vextq_f32(YY[0], YY[1], 2); + SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[0]), 0); + YEXT[2] = vextq_f32(YY[0], YY[1], 3); + SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[0]), 1); + + SUMM = vmlaq_lane_f32(SUMM, YY[1], vget_low_f32(XX[1]), 0); + YEXT[0] = vextq_f32(YY[1], YY[2], 1); + SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[1]), 1); + YEXT[1] = vextq_f32(YY[1], YY[2], 2); + SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[1]), 0); + YEXT[2] = vextq_f32(YY[1], YY[2], 3); + SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[1]), 1); + + YY[0] = YY[2]; + len -= 8; + } + + /* Consume 4 elements in x vector and 8 elements in y + * vector. However, the 8'th element in y never really gets + * touched in this loop. So, if len == 4, then we only + * must access y[0] to y[6]. y[7] must not be accessed + * hence make sure len>4 and not len>=4 + */ + if (len > 4) { + yi += 4; + YY[1] = vld1q_f32(yi); + + XX[0] = vld1q_f32(xi); + xi += 4; + + SUMM = vmlaq_lane_f32(SUMM, YY[0], vget_low_f32(XX[0]), 0); + YEXT[0] = vextq_f32(YY[0], YY[1], 1); + SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[0]), 1); + YEXT[1] = vextq_f32(YY[0], YY[1], 2); + SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[0]), 0); + YEXT[2] = vextq_f32(YY[0], YY[1], 3); + SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[0]), 1); + + YY[0] = YY[1]; + len -= 4; + } + + while (--len > 0) { + XX_2 = vld1_dup_f32(xi++); + SUMM = vmlaq_lane_f32(SUMM, YY[0], XX_2, 0); + YY[0]= vld1q_f32(++yi); + } + + XX_2 = vld1_dup_f32(xi); + SUMM = vmlaq_lane_f32(SUMM, YY[0], XX_2, 0); + + vst1q_f32(sum, SUMM); +} + +/* + * Function: xcorr_kernel_neon_float_process1 + * --------------------------------- + * Computes single correlation values and stores in *sum + */ +static void xcorr_kernel_neon_float_process1(const float32_t *x, + const float32_t *y, float32_t *sum, int len) { + float32x4_t XX[4]; + float32x4_t YY[4]; + float32x2_t XX_2; + float32x2_t YY_2; + float32x4_t SUMM; + float32x2_t SUMM_2[2]; + const float32_t *xi = x; + const float32_t *yi = y; + + SUMM = vdupq_n_f32(0); + + /* Work on 16 values per iteration */ + while (len >= 16) { + XX[0] = vld1q_f32(xi); + xi += 4; + XX[1] = vld1q_f32(xi); + xi += 4; + XX[2] = vld1q_f32(xi); + xi += 4; + XX[3] = vld1q_f32(xi); + xi += 4; + + YY[0] = vld1q_f32(yi); + yi += 4; + YY[1] = vld1q_f32(yi); + yi += 4; + YY[2] = vld1q_f32(yi); + yi += 4; + YY[3] = vld1q_f32(yi); + yi += 4; + + SUMM = vmlaq_f32(SUMM, YY[0], XX[0]); + SUMM = vmlaq_f32(SUMM, YY[1], XX[1]); + SUMM = vmlaq_f32(SUMM, YY[2], XX[2]); + SUMM = vmlaq_f32(SUMM, YY[3], XX[3]); + len -= 16; + } + + /* Work on 8 values */ + if (len >= 8) { + XX[0] = vld1q_f32(xi); + xi += 4; + XX[1] = vld1q_f32(xi); + xi += 4; + + YY[0] = vld1q_f32(yi); + yi += 4; + YY[1] = vld1q_f32(yi); + yi += 4; + + SUMM = vmlaq_f32(SUMM, YY[0], XX[0]); + SUMM = vmlaq_f32(SUMM, YY[1], XX[1]); + len -= 8; + } + + /* Work on 4 values */ + if (len >= 4) { + XX[0] = vld1q_f32(xi); + xi += 4; + YY[0] = vld1q_f32(yi); + yi += 4; + SUMM = vmlaq_f32(SUMM, YY[0], XX[0]); + len -= 4; + } + + /* Start accumulating results */ + SUMM_2[0] = vget_low_f32(SUMM); + if (len >= 2) { + /* While at it, consume 2 more values if available */ + XX_2 = vld1_f32(xi); + xi += 2; + YY_2 = vld1_f32(yi); + yi += 2; + SUMM_2[0] = vmla_f32(SUMM_2[0], YY_2, XX_2); + len -= 2; + } + SUMM_2[1] = vget_high_f32(SUMM); + SUMM_2[0] = vadd_f32(SUMM_2[0], SUMM_2[1]); + SUMM_2[0] = vpadd_f32(SUMM_2[0], SUMM_2[0]); + /* Ok, now we have result accumulated in SUMM_2[0].0 */ + + if (len > 0) { + /* Case when you have one value left */ + XX_2 = vld1_dup_f32(xi); + YY_2 = vld1_dup_f32(yi); + SUMM_2[0] = vmla_f32(SUMM_2[0], XX_2, YY_2); + } + + vst1_lane_f32(sum, SUMM_2[0], 0); +} + +void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y, + opus_val32 *xcorr, int len, int max_pitch) { + int i; + celt_assert(max_pitch > 0); + celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); + + for (i = 0; i < (max_pitch-3); i += 4) { + xcorr_kernel_neon_float((const float32_t *)_x, (const float32_t *)_y+i, + (float32_t *)xcorr+i, len); + } + + /* In case max_pitch isn't multiple of 4 + * compute single correlation value per iteration + */ + for (; i < max_pitch; i++) { + xcorr_kernel_neon_float_process1((const float32_t *)_x, + (const float32_t *)_y+i, (float32_t *)xcorr+i, len); + } +} +#endif diff --git a/drivers/opus/celt/arm/celt_pitch_xcorr_arm-gnu.S b/drivers/opus/celt/arm/celt_pitch_xcorr_arm-gnu.S new file mode 100644 index 0000000000..5b2ee55a10 --- /dev/null +++ b/drivers/opus/celt/arm/celt_pitch_xcorr_arm-gnu.S @@ -0,0 +1,551 @@ + .syntax unified +@ Copyright (c) 2007-2008 CSIRO +@ Copyright (c) 2007-2009 Xiph.Org Foundation +@ Copyright (c) 2013 Parrot +@ Written by Aurélien Zanelli +@ +@ Redistribution and use in source and binary forms, with or without +@ modification, are permitted provided that the following conditions +@ are met: +@ +@ - Redistributions of source code must retain the above copyright +@ notice, this list of conditions and the following disclaimer. +@ +@ - Redistributions in binary form must reproduce the above copyright +@ notice, this list of conditions and the following disclaimer in the +@ documentation and/or other materials provided with the distribution. +@ +@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +@ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +@ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + .text; .p2align 2; .arch armv7-a + .fpu neon + .object_arch armv4t + + .include "celt/arm/armopts-gnu.S" + + .if OPUS_ARM_MAY_HAVE_EDSP + .global celt_pitch_xcorr_edsp + .endif + + .if OPUS_ARM_MAY_HAVE_NEON + .global celt_pitch_xcorr_neon + .endif + + .if OPUS_ARM_MAY_HAVE_NEON + +@ Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3 +; xcorr_kernel_neon: @ PROC +xcorr_kernel_neon_start: + @ input: + @ r3 = int len + @ r4 = opus_val16 *x + @ r5 = opus_val16 *y + @ q0 = opus_val32 sum[4] + @ output: + @ q0 = opus_val32 sum[4] + @ preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15 + @ internal usage: + @ r12 = int j + @ d3 = y_3|y_2|y_1|y_0 + @ q2 = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4 + @ q3 = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0 + @ q8 = scratch + @ + @ Load y[0...3] + @ This requires len>0 to always be valid (which we assert in the C code). + VLD1.16 {d5}, [r5]! + SUBS r12, r3, #8 + BLE xcorr_kernel_neon_process4 +@ Process 8 samples at a time. +@ This loop loads one y value more than we actually need. Therefore we have to +@ stop as soon as there are 8 or fewer samples left (instead of 7), to avoid +@ reading past the end of the array. +xcorr_kernel_neon_process8: + @ This loop has 19 total instructions (10 cycles to issue, minimum), with + @ - 2 cycles of ARM insrtuctions, + @ - 10 cycles of load/store/byte permute instructions, and + @ - 9 cycles of data processing instructions. + @ On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the + @ latter two categories, meaning the whole loop should run in 10 cycles per + @ iteration, barring cache misses. + @ + @ Load x[0...7] + VLD1.16 {d6, d7}, [r4]! + @ Unlike VMOV, VAND is a data processsing instruction (and doesn't get + @ assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1. + VAND d3, d5, d5 + SUBS r12, r12, #8 + @ Load y[4...11] + VLD1.16 {d4, d5}, [r5]! + VMLAL.S16 q0, d3, d6[0] + VEXT.16 d16, d3, d4, #1 + VMLAL.S16 q0, d4, d7[0] + VEXT.16 d17, d4, d5, #1 + VMLAL.S16 q0, d16, d6[1] + VEXT.16 d16, d3, d4, #2 + VMLAL.S16 q0, d17, d7[1] + VEXT.16 d17, d4, d5, #2 + VMLAL.S16 q0, d16, d6[2] + VEXT.16 d16, d3, d4, #3 + VMLAL.S16 q0, d17, d7[2] + VEXT.16 d17, d4, d5, #3 + VMLAL.S16 q0, d16, d6[3] + VMLAL.S16 q0, d17, d7[3] + BGT xcorr_kernel_neon_process8 +@ Process 4 samples here if we have > 4 left (still reading one extra y value). +xcorr_kernel_neon_process4: + ADDS r12, r12, #4 + BLE xcorr_kernel_neon_process2 + @ Load x[0...3] + VLD1.16 d6, [r4]! + @ Use VAND since it's a data processing instruction again. + VAND d4, d5, d5 + SUB r12, r12, #4 + @ Load y[4...7] + VLD1.16 d5, [r5]! + VMLAL.S16 q0, d4, d6[0] + VEXT.16 d16, d4, d5, #1 + VMLAL.S16 q0, d16, d6[1] + VEXT.16 d16, d4, d5, #2 + VMLAL.S16 q0, d16, d6[2] + VEXT.16 d16, d4, d5, #3 + VMLAL.S16 q0, d16, d6[3] +@ Process 2 samples here if we have > 2 left (still reading one extra y value). +xcorr_kernel_neon_process2: + ADDS r12, r12, #2 + BLE xcorr_kernel_neon_process1 + @ Load x[0...1] + VLD2.16 {d6[],d7[]}, [r4]! + @ Use VAND since it's a data processing instruction again. + VAND d4, d5, d5 + SUB r12, r12, #2 + @ Load y[4...5] + VLD1.32 {d5[]}, [r5]! + VMLAL.S16 q0, d4, d6 + VEXT.16 d16, d4, d5, #1 + @ Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI + @ instead of VEXT, since it's a data-processing instruction. + VSRI.64 d5, d4, #32 + VMLAL.S16 q0, d16, d7 +@ Process 1 sample using the extra y value we loaded above. +xcorr_kernel_neon_process1: + @ Load next *x + VLD1.16 {d6[]}, [r4]! + ADDS r12, r12, #1 + @ y[0...3] are left in d5 from prior iteration(s) (if any) + VMLAL.S16 q0, d5, d6 + MOVLE pc, lr +@ Now process 1 last sample, not reading ahead. + @ Load last *y + VLD1.16 {d4[]}, [r5]! + VSRI.64 d4, d5, #16 + @ Load last *x + VLD1.16 {d6[]}, [r4]! + VMLAL.S16 q0, d4, d6 + MOV pc, lr + .size xcorr_kernel_neon, .-xcorr_kernel_neon @ ENDP + +@ opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y, +@ opus_val32 *xcorr, int len, int max_pitch) +; celt_pitch_xcorr_neon: @ PROC + @ input: + @ r0 = opus_val16 *_x + @ r1 = opus_val16 *_y + @ r2 = opus_val32 *xcorr + @ r3 = int len + @ output: + @ r0 = int maxcorr + @ internal usage: + @ r4 = opus_val16 *x (for xcorr_kernel_neon()) + @ r5 = opus_val16 *y (for xcorr_kernel_neon()) + @ r6 = int max_pitch + @ r12 = int j + @ q15 = int maxcorr[4] (q15 is not used by xcorr_kernel_neon()) + STMFD sp!, {r4-r6, lr} + LDR r6, [sp, #16] + VMOV.S32 q15, #1 + @ if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done + SUBS r6, r6, #4 + BLT celt_pitch_xcorr_neon_process4_done +celt_pitch_xcorr_neon_process4: + @ xcorr_kernel_neon parameters: + @ r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0} + MOV r4, r0 + MOV r5, r1 + VEOR q0, q0, q0 + @ xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3. + @ So we don't save/restore any other registers. + BL xcorr_kernel_neon_start + SUBS r6, r6, #4 + VST1.32 {q0}, [r2]! + @ _y += 4 + ADD r1, r1, #8 + VMAX.S32 q15, q15, q0 + @ if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done + BGE celt_pitch_xcorr_neon_process4 +@ We have less than 4 sums left to compute. +celt_pitch_xcorr_neon_process4_done: + ADDS r6, r6, #4 + @ Reduce maxcorr to a single value + VMAX.S32 d30, d30, d31 + VPMAX.S32 d30, d30, d30 + @ if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done + BLE celt_pitch_xcorr_neon_done +@ Now compute each remaining sum one at a time. +celt_pitch_xcorr_neon_process_remaining: + MOV r4, r0 + MOV r5, r1 + VMOV.I32 q0, #0 + SUBS r12, r3, #8 + BLT celt_pitch_xcorr_neon_process_remaining4 +@ Sum terms 8 at a time. +celt_pitch_xcorr_neon_process_remaining_loop8: + @ Load x[0...7] + VLD1.16 {q1}, [r4]! + @ Load y[0...7] + VLD1.16 {q2}, [r5]! + SUBS r12, r12, #8 + VMLAL.S16 q0, d4, d2 + VMLAL.S16 q0, d5, d3 + BGE celt_pitch_xcorr_neon_process_remaining_loop8 +@ Sum terms 4 at a time. +celt_pitch_xcorr_neon_process_remaining4: + ADDS r12, r12, #4 + BLT celt_pitch_xcorr_neon_process_remaining4_done + @ Load x[0...3] + VLD1.16 {d2}, [r4]! + @ Load y[0...3] + VLD1.16 {d3}, [r5]! + SUB r12, r12, #4 + VMLAL.S16 q0, d3, d2 +celt_pitch_xcorr_neon_process_remaining4_done: + @ Reduce the sum to a single value. + VADD.S32 d0, d0, d1 + VPADDL.S32 d0, d0 + ADDS r12, r12, #4 + BLE celt_pitch_xcorr_neon_process_remaining_loop_done +@ Sum terms 1 at a time. +celt_pitch_xcorr_neon_process_remaining_loop1: + VLD1.16 {d2[]}, [r4]! + VLD1.16 {d3[]}, [r5]! + SUBS r12, r12, #1 + VMLAL.S16 q0, d2, d3 + BGT celt_pitch_xcorr_neon_process_remaining_loop1 +celt_pitch_xcorr_neon_process_remaining_loop_done: + VST1.32 {d0[0]}, [r2]! + VMAX.S32 d30, d30, d0 + SUBS r6, r6, #1 + @ _y++ + ADD r1, r1, #2 + @ if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining + BGT celt_pitch_xcorr_neon_process_remaining +celt_pitch_xcorr_neon_done: + VMOV.32 r0, d30[0] + LDMFD sp!, {r4-r6, pc} + .size celt_pitch_xcorr_neon, .-celt_pitch_xcorr_neon @ ENDP + + .endif + + .if OPUS_ARM_MAY_HAVE_EDSP + +@ This will get used on ARMv7 devices without NEON, so it has been optimized +@ to take advantage of dual-issuing where possible. +; xcorr_kernel_edsp: @ PROC +xcorr_kernel_edsp_start: + @ input: + @ r3 = int len + @ r4 = opus_val16 *_x (must be 32-bit aligned) + @ r5 = opus_val16 *_y (must be 32-bit aligned) + @ r6...r9 = opus_val32 sum[4] + @ output: + @ r6...r9 = opus_val32 sum[4] + @ preserved: r0-r5 + @ internal usage + @ r2 = int j + @ r12,r14 = opus_val16 x[4] + @ r10,r11 = opus_val16 y[4] + STMFD sp!, {r2,r4,r5,lr} + LDR r10, [r5], #4 @ Load y[0...1] + SUBS r2, r3, #4 @ j = len-4 + LDR r11, [r5], #4 @ Load y[2...3] + BLE xcorr_kernel_edsp_process4_done + LDR r12, [r4], #4 @ Load x[0...1] + @ Stall +xcorr_kernel_edsp_process4: + @ The multiplies must issue from pipeline 0, and can't dual-issue with each + @ other. Every other instruction here dual-issues with a multiply, and is + @ thus "free". There should be no stalls in the body of the loop. + SMLABB r6, r12, r10, r6 @ sum[0] = MAC16_16(sum[0],x_0,y_0) + LDR r14, [r4], #4 @ Load x[2...3] + SMLABT r7, r12, r10, r7 @ sum[1] = MAC16_16(sum[1],x_0,y_1) + SUBS r2, r2, #4 @ j-=4 + SMLABB r8, r12, r11, r8 @ sum[2] = MAC16_16(sum[2],x_0,y_2) + SMLABT r9, r12, r11, r9 @ sum[3] = MAC16_16(sum[3],x_0,y_3) + SMLATT r6, r12, r10, r6 @ sum[0] = MAC16_16(sum[0],x_1,y_1) + LDR r10, [r5], #4 @ Load y[4...5] + SMLATB r7, r12, r11, r7 @ sum[1] = MAC16_16(sum[1],x_1,y_2) + SMLATT r8, r12, r11, r8 @ sum[2] = MAC16_16(sum[2],x_1,y_3) + SMLATB r9, r12, r10, r9 @ sum[3] = MAC16_16(sum[3],x_1,y_4) + LDRGT r12, [r4], #4 @ Load x[0...1] + SMLABB r6, r14, r11, r6 @ sum[0] = MAC16_16(sum[0],x_2,y_2) + SMLABT r7, r14, r11, r7 @ sum[1] = MAC16_16(sum[1],x_2,y_3) + SMLABB r8, r14, r10, r8 @ sum[2] = MAC16_16(sum[2],x_2,y_4) + SMLABT r9, r14, r10, r9 @ sum[3] = MAC16_16(sum[3],x_2,y_5) + SMLATT r6, r14, r11, r6 @ sum[0] = MAC16_16(sum[0],x_3,y_3) + LDR r11, [r5], #4 @ Load y[6...7] + SMLATB r7, r14, r10, r7 @ sum[1] = MAC16_16(sum[1],x_3,y_4) + SMLATT r8, r14, r10, r8 @ sum[2] = MAC16_16(sum[2],x_3,y_5) + SMLATB r9, r14, r11, r9 @ sum[3] = MAC16_16(sum[3],x_3,y_6) + BGT xcorr_kernel_edsp_process4 +xcorr_kernel_edsp_process4_done: + ADDS r2, r2, #4 + BLE xcorr_kernel_edsp_done + LDRH r12, [r4], #2 @ r12 = *x++ + SUBS r2, r2, #1 @ j-- + @ Stall + SMLABB r6, r12, r10, r6 @ sum[0] = MAC16_16(sum[0],x,y_0) + LDRHGT r14, [r4], #2 @ r14 = *x++ + SMLABT r7, r12, r10, r7 @ sum[1] = MAC16_16(sum[1],x,y_1) + SMLABB r8, r12, r11, r8 @ sum[2] = MAC16_16(sum[2],x,y_2) + SMLABT r9, r12, r11, r9 @ sum[3] = MAC16_16(sum[3],x,y_3) + BLE xcorr_kernel_edsp_done + SMLABT r6, r14, r10, r6 @ sum[0] = MAC16_16(sum[0],x,y_1) + SUBS r2, r2, #1 @ j-- + SMLABB r7, r14, r11, r7 @ sum[1] = MAC16_16(sum[1],x,y_2) + LDRH r10, [r5], #2 @ r10 = y_4 = *y++ + SMLABT r8, r14, r11, r8 @ sum[2] = MAC16_16(sum[2],x,y_3) + LDRHGT r12, [r4], #2 @ r12 = *x++ + SMLABB r9, r14, r10, r9 @ sum[3] = MAC16_16(sum[3],x,y_4) + BLE xcorr_kernel_edsp_done + SMLABB r6, r12, r11, r6 @ sum[0] = MAC16_16(sum[0],tmp,y_2) + CMP r2, #1 @ j-- + SMLABT r7, r12, r11, r7 @ sum[1] = MAC16_16(sum[1],tmp,y_3) + LDRH r2, [r5], #2 @ r2 = y_5 = *y++ + SMLABB r8, r12, r10, r8 @ sum[2] = MAC16_16(sum[2],tmp,y_4) + LDRHGT r14, [r4] @ r14 = *x + SMLABB r9, r12, r2, r9 @ sum[3] = MAC16_16(sum[3],tmp,y_5) + BLE xcorr_kernel_edsp_done + SMLABT r6, r14, r11, r6 @ sum[0] = MAC16_16(sum[0],tmp,y_3) + LDRH r11, [r5] @ r11 = y_6 = *y + SMLABB r7, r14, r10, r7 @ sum[1] = MAC16_16(sum[1],tmp,y_4) + SMLABB r8, r14, r2, r8 @ sum[2] = MAC16_16(sum[2],tmp,y_5) + SMLABB r9, r14, r11, r9 @ sum[3] = MAC16_16(sum[3],tmp,y_6) +xcorr_kernel_edsp_done: + LDMFD sp!, {r2,r4,r5,pc} + .size xcorr_kernel_edsp, .-xcorr_kernel_edsp @ ENDP + +; celt_pitch_xcorr_edsp: @ PROC + @ input: + @ r0 = opus_val16 *_x (must be 32-bit aligned) + @ r1 = opus_val16 *_y (only needs to be 16-bit aligned) + @ r2 = opus_val32 *xcorr + @ r3 = int len + @ output: + @ r0 = maxcorr + @ internal usage + @ r4 = opus_val16 *x + @ r5 = opus_val16 *y + @ r6 = opus_val32 sum0 + @ r7 = opus_val32 sum1 + @ r8 = opus_val32 sum2 + @ r9 = opus_val32 sum3 + @ r1 = int max_pitch + @ r12 = int j + STMFD sp!, {r4-r11, lr} + MOV r5, r1 + LDR r1, [sp, #36] + MOV r4, r0 + TST r5, #3 + @ maxcorr = 1 + MOV r0, #1 + BEQ celt_pitch_xcorr_edsp_process1u_done +@ Compute one sum at the start to make y 32-bit aligned. + SUBS r12, r3, #4 + @ r14 = sum = 0 + MOV r14, #0 + LDRH r8, [r5], #2 + BLE celt_pitch_xcorr_edsp_process1u_loop4_done + LDR r6, [r4], #4 + MOV r8, r8, LSL #16 +celt_pitch_xcorr_edsp_process1u_loop4: + LDR r9, [r5], #4 + SMLABT r14, r6, r8, r14 @ sum = MAC16_16(sum, x_0, y_0) + LDR r7, [r4], #4 + SMLATB r14, r6, r9, r14 @ sum = MAC16_16(sum, x_1, y_1) + LDR r8, [r5], #4 + SMLABT r14, r7, r9, r14 @ sum = MAC16_16(sum, x_2, y_2) + SUBS r12, r12, #4 @ j-=4 + SMLATB r14, r7, r8, r14 @ sum = MAC16_16(sum, x_3, y_3) + LDRGT r6, [r4], #4 + BGT celt_pitch_xcorr_edsp_process1u_loop4 + MOV r8, r8, LSR #16 +celt_pitch_xcorr_edsp_process1u_loop4_done: + ADDS r12, r12, #4 +celt_pitch_xcorr_edsp_process1u_loop1: + LDRHGE r6, [r4], #2 + @ Stall + SMLABBGE r14, r6, r8, r14 @ sum = MAC16_16(sum, *x, *y) + SUBSGE r12, r12, #1 + LDRHGT r8, [r5], #2 + BGT celt_pitch_xcorr_edsp_process1u_loop1 + @ Restore _x + SUB r4, r4, r3, LSL #1 + @ Restore and advance _y + SUB r5, r5, r3, LSL #1 + @ maxcorr = max(maxcorr, sum) + CMP r0, r14 + ADD r5, r5, #2 + MOVLT r0, r14 + SUBS r1, r1, #1 + @ xcorr[i] = sum + STR r14, [r2], #4 + BLE celt_pitch_xcorr_edsp_done +celt_pitch_xcorr_edsp_process1u_done: + @ if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2 + SUBS r1, r1, #4 + BLT celt_pitch_xcorr_edsp_process2 +celt_pitch_xcorr_edsp_process4: + @ xcorr_kernel_edsp parameters: + @ r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0} + MOV r6, #0 + MOV r7, #0 + MOV r8, #0 + MOV r9, #0 + BL xcorr_kernel_edsp_start @ xcorr_kernel_edsp(_x, _y+i, xcorr+i, len) + @ maxcorr = max(maxcorr, sum0, sum1, sum2, sum3) + CMP r0, r6 + @ _y+=4 + ADD r5, r5, #8 + MOVLT r0, r6 + CMP r0, r7 + MOVLT r0, r7 + CMP r0, r8 + MOVLT r0, r8 + CMP r0, r9 + MOVLT r0, r9 + STMIA r2!, {r6-r9} + SUBS r1, r1, #4 + BGE celt_pitch_xcorr_edsp_process4 +celt_pitch_xcorr_edsp_process2: + ADDS r1, r1, #2 + BLT celt_pitch_xcorr_edsp_process1a + SUBS r12, r3, #4 + @ {r10, r11} = {sum0, sum1} = {0, 0} + MOV r10, #0 + MOV r11, #0 + LDR r8, [r5], #4 + BLE celt_pitch_xcorr_edsp_process2_loop_done + LDR r6, [r4], #4 + LDR r9, [r5], #4 +celt_pitch_xcorr_edsp_process2_loop4: + SMLABB r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_0) + LDR r7, [r4], #4 + SMLABT r11, r6, r8, r11 @ sum1 = MAC16_16(sum1, x_0, y_1) + SUBS r12, r12, #4 @ j-=4 + SMLATT r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_1, y_1) + LDR r8, [r5], #4 + SMLATB r11, r6, r9, r11 @ sum1 = MAC16_16(sum1, x_1, y_2) + LDRGT r6, [r4], #4 + SMLABB r10, r7, r9, r10 @ sum0 = MAC16_16(sum0, x_2, y_2) + SMLABT r11, r7, r9, r11 @ sum1 = MAC16_16(sum1, x_2, y_3) + SMLATT r10, r7, r9, r10 @ sum0 = MAC16_16(sum0, x_3, y_3) + LDRGT r9, [r5], #4 + SMLATB r11, r7, r8, r11 @ sum1 = MAC16_16(sum1, x_3, y_4) + BGT celt_pitch_xcorr_edsp_process2_loop4 +celt_pitch_xcorr_edsp_process2_loop_done: + ADDS r12, r12, #2 + BLE celt_pitch_xcorr_edsp_process2_1 + LDR r6, [r4], #4 + @ Stall + SMLABB r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_0) + LDR r9, [r5], #4 + SMLABT r11, r6, r8, r11 @ sum1 = MAC16_16(sum1, x_0, y_1) + SUB r12, r12, #2 + SMLATT r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_1, y_1) + MOV r8, r9 + SMLATB r11, r6, r9, r11 @ sum1 = MAC16_16(sum1, x_1, y_2) +celt_pitch_xcorr_edsp_process2_1: + LDRH r6, [r4], #2 + ADDS r12, r12, #1 + @ Stall + SMLABB r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_0) + LDRHGT r7, [r4], #2 + SMLABT r11, r6, r8, r11 @ sum1 = MAC16_16(sum1, x_0, y_1) + BLE celt_pitch_xcorr_edsp_process2_done + LDRH r9, [r5], #2 + SMLABT r10, r7, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_1) + SMLABB r11, r7, r9, r11 @ sum1 = MAC16_16(sum1, x_0, y_2) +celt_pitch_xcorr_edsp_process2_done: + @ Restore _x + SUB r4, r4, r3, LSL #1 + @ Restore and advance _y + SUB r5, r5, r3, LSL #1 + @ maxcorr = max(maxcorr, sum0) + CMP r0, r10 + ADD r5, r5, #2 + MOVLT r0, r10 + SUB r1, r1, #2 + @ maxcorr = max(maxcorr, sum1) + CMP r0, r11 + @ xcorr[i] = sum + STR r10, [r2], #4 + MOVLT r0, r11 + STR r11, [r2], #4 +celt_pitch_xcorr_edsp_process1a: + ADDS r1, r1, #1 + BLT celt_pitch_xcorr_edsp_done + SUBS r12, r3, #4 + @ r14 = sum = 0 + MOV r14, #0 + BLT celt_pitch_xcorr_edsp_process1a_loop_done + LDR r6, [r4], #4 + LDR r8, [r5], #4 + LDR r7, [r4], #4 + LDR r9, [r5], #4 +celt_pitch_xcorr_edsp_process1a_loop4: + SMLABB r14, r6, r8, r14 @ sum = MAC16_16(sum, x_0, y_0) + SUBS r12, r12, #4 @ j-=4 + SMLATT r14, r6, r8, r14 @ sum = MAC16_16(sum, x_1, y_1) + LDRGE r6, [r4], #4 + SMLABB r14, r7, r9, r14 @ sum = MAC16_16(sum, x_2, y_2) + LDRGE r8, [r5], #4 + SMLATT r14, r7, r9, r14 @ sum = MAC16_16(sum, x_3, y_3) + LDRGE r7, [r4], #4 + LDRGE r9, [r5], #4 + BGE celt_pitch_xcorr_edsp_process1a_loop4 +celt_pitch_xcorr_edsp_process1a_loop_done: + ADDS r12, r12, #2 + LDRGE r6, [r4], #4 + LDRGE r8, [r5], #4 + @ Stall + SMLABBGE r14, r6, r8, r14 @ sum = MAC16_16(sum, x_0, y_0) + SUBGE r12, r12, #2 + SMLATTGE r14, r6, r8, r14 @ sum = MAC16_16(sum, x_1, y_1) + ADDS r12, r12, #1 + LDRHGE r6, [r4], #2 + LDRHGE r8, [r5], #2 + @ Stall + SMLABBGE r14, r6, r8, r14 @ sum = MAC16_16(sum, *x, *y) + @ maxcorr = max(maxcorr, sum) + CMP r0, r14 + @ xcorr[i] = sum + STR r14, [r2], #4 + MOVLT r0, r14 +celt_pitch_xcorr_edsp_done: + LDMFD sp!, {r4-r11, pc} + .size celt_pitch_xcorr_edsp, .-celt_pitch_xcorr_edsp @ ENDP + + .endif + +@ END: + .section .note.GNU-stack,"",%progbits diff --git a/drivers/opus/celt/arm/celt_pitch_xcorr_arm.s b/drivers/opus/celt/arm/celt_pitch_xcorr_arm.s index 09917b16bf..f96e0a88bb 100644 --- a/drivers/opus/celt/arm/celt_pitch_xcorr_arm.s +++ b/drivers/opus/celt/arm/celt_pitch_xcorr_arm.s @@ -42,6 +42,7 @@ IF OPUS_ARM_MAY_HAVE_NEON ; Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3 xcorr_kernel_neon PROC +xcorr_kernel_neon_start ; input: ; r3 = int len ; r4 = opus_val16 *x @@ -181,7 +182,7 @@ celt_pitch_xcorr_neon_process4 VEOR q0, q0, q0 ; xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3. ; So we don't save/restore any other registers. - BL xcorr_kernel_neon + BL xcorr_kernel_neon_start SUBS r6, r6, #4 VST1.32 {q0}, [r2]! ; _y += 4 @@ -257,6 +258,7 @@ IF OPUS_ARM_MAY_HAVE_EDSP ; This will get used on ARMv7 devices without NEON, so it has been optimized ; to take advantage of dual-issuing where possible. xcorr_kernel_edsp PROC +xcorr_kernel_edsp_start ; input: ; r3 = int len ; r4 = opus_val16 *_x (must be 32-bit aligned) @@ -309,7 +311,7 @@ xcorr_kernel_edsp_process4_done SUBS r2, r2, #1 ; j-- ; Stall SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_0) - LDRGTH r14, [r4], #2 ; r14 = *x++ + LDRHGT r14, [r4], #2 ; r14 = *x++ SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x,y_1) SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_2) SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x,y_3) @@ -319,7 +321,7 @@ xcorr_kernel_edsp_process4_done SMLABB r7, r14, r11, r7 ; sum[1] = MAC16_16(sum[1],x,y_2) LDRH r10, [r5], #2 ; r10 = y_4 = *y++ SMLABT r8, r14, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_3) - LDRGTH r12, [r4], #2 ; r12 = *x++ + LDRHGT r12, [r4], #2 ; r12 = *x++ SMLABB r9, r14, r10, r9 ; sum[3] = MAC16_16(sum[3],x,y_4) BLE xcorr_kernel_edsp_done SMLABB r6, r12, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_2) @@ -327,7 +329,7 @@ xcorr_kernel_edsp_process4_done SMLABT r7, r12, r11, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_3) LDRH r2, [r5], #2 ; r2 = y_5 = *y++ SMLABB r8, r12, r10, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_4) - LDRGTH r14, [r4] ; r14 = *x + LDRHGT r14, [r4] ; r14 = *x SMLABB r9, r12, r2, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_5) BLE xcorr_kernel_edsp_done SMLABT r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_3) @@ -387,11 +389,11 @@ celt_pitch_xcorr_edsp_process1u_loop4 celt_pitch_xcorr_edsp_process1u_loop4_done ADDS r12, r12, #4 celt_pitch_xcorr_edsp_process1u_loop1 - LDRGEH r6, [r4], #2 + LDRHGE r6, [r4], #2 ; Stall SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y) - SUBGES r12, r12, #1 - LDRGTH r8, [r5], #2 + SUBSGE r12, r12, #1 + LDRHGT r8, [r5], #2 BGT celt_pitch_xcorr_edsp_process1u_loop1 ; Restore _x SUB r4, r4, r3, LSL #1 @@ -416,7 +418,7 @@ celt_pitch_xcorr_edsp_process4 MOV r7, #0 MOV r8, #0 MOV r9, #0 - BL xcorr_kernel_edsp ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len) + BL xcorr_kernel_edsp_start ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len) ; maxcorr = max(maxcorr, sum0, sum1, sum2, sum3) CMP r0, r6 ; _y+=4 @@ -474,7 +476,7 @@ celt_pitch_xcorr_edsp_process2_1 ADDS r12, r12, #1 ; Stall SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0) - LDRGTH r7, [r4], #2 + LDRHGT r7, [r4], #2 SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1) BLE celt_pitch_xcorr_edsp_process2_done LDRH r9, [r5], #2 @@ -527,8 +529,8 @@ celt_pitch_xcorr_edsp_process1a_loop_done SUBGE r12, r12, #2 SMLATTGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_1, y_1) ADDS r12, r12, #1 - LDRGEH r6, [r4], #2 - LDRGEH r8, [r5], #2 + LDRHGE r6, [r4], #2 + LDRHGE r8, [r5], #2 ; Stall SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y) ; maxcorr = max(maxcorr, sum) diff --git a/drivers/opus/celt/arm/fft_arm.h b/drivers/opus/celt/arm/fft_arm.h new file mode 100644 index 0000000000..c99458b4af --- /dev/null +++ b/drivers/opus/celt/arm/fft_arm.h @@ -0,0 +1,72 @@ +/* Copyright (c) 2015 Xiph.Org Foundation + Written by Viswanath Puttagunta */ +/** + @file fft_arm.h + @brief ARM Neon Intrinsic optimizations for fft using NE10 library + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#if !defined(FFT_ARM_H) +#define FFT_ARM_H + +#include "opus/opus_config.h" +#include "opus/celt/kiss_fft.h" + +#if defined(HAVE_ARM_NE10) + +int opus_fft_alloc_arm_neon(kiss_fft_state *st); +void opus_fft_free_arm_neon(kiss_fft_state *st); + +void opus_fft_neon(const kiss_fft_state *st, + const kiss_fft_cpx *fin, + kiss_fft_cpx *fout); + +void opus_ifft_neon(const kiss_fft_state *st, + const kiss_fft_cpx *fin, + kiss_fft_cpx *fout); + +#if !defined(OPUS_HAVE_RTCD) +#define OVERRIDE_OPUS_FFT (1) + +#define opus_fft_alloc_arch(_st, arch) \ + ((void)(arch), opus_fft_alloc_arm_neon(_st)) + +#define opus_fft_free_arch(_st, arch) \ + ((void)(arch), opus_fft_free_arm_neon(_st)) + +#define opus_fft(_st, _fin, _fout, arch) \ + ((void)(arch), opus_fft_neon(_st, _fin, _fout)) + +#define opus_ifft(_st, _fin, _fout, arch) \ + ((void)(arch), opus_ifft_neon(_st, _fin, _fout)) + +#endif /* OPUS_HAVE_RTCD */ + +#endif /* HAVE_ARM_NE10 */ + +#endif diff --git a/drivers/opus/celt/arm/fixed_armv4.h b/drivers/opus/celt/arm/fixed_armv4.h index b690bc8cea..efb3b1896a 100644 --- a/drivers/opus/celt/arm/fixed_armv4.h +++ b/drivers/opus/celt/arm/fixed_armv4.h @@ -68,6 +68,10 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b) #undef MAC16_32_Q15 #define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b)) +/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add. + Result fits in 32 bits. */ +#undef MAC16_32_Q16 +#define MAC16_32_Q16(c, a, b) ADD32(c, MULT16_32_Q16(a, b)) /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ #undef MULT32_32_Q31 diff --git a/drivers/opus/celt/arm/fixed_armv5e.h b/drivers/opus/celt/arm/fixed_armv5e.h index cb6e4c1da9..2db23262e8 100644 --- a/drivers/opus/celt/arm/fixed_armv5e.h +++ b/drivers/opus/celt/arm/fixed_armv5e.h @@ -82,6 +82,23 @@ static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a, } #define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b)) +/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add. + Result fits in 32 bits. */ +#undef MAC16_32_Q16 +static OPUS_INLINE opus_val32 MAC16_32_Q16_armv5e(opus_val32 c, opus_val16 a, + opus_val32 b) +{ + int res; + __asm__( + "#MAC16_32_Q16\n\t" + "smlawb %0, %1, %2, %3;\n" + : "=r"(res) + : "r"(b), "r"(a), "r"(c) + ); + return res; +} +#define MAC16_32_Q16(c, a, b) (MAC16_32_Q16_armv5e(c, a, b)) + /** 16x16 multiply-add where the result fits in 32 bits */ #undef MAC16_16 static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a, @@ -113,4 +130,22 @@ static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b) } #define MULT16_16(a, b) (MULT16_16_armv5e(a, b)) +#ifdef OPUS_ARM_INLINE_MEDIA + +#undef SIG2WORD16 +static OPUS_INLINE opus_val16 SIG2WORD16_armv6(opus_val32 x) +{ + celt_sig res; + __asm__( + "#SIG2WORD16\n\t" + "ssat %0, #16, %1, ASR #12\n\t" + : "=r"(res) + : "r"(x+2048) + ); + return EXTRACT16(res); +} +#define SIG2WORD16(x) (SIG2WORD16_armv6(x)) + +#endif /* OPUS_ARM_INLINE_MEDIA */ + #endif diff --git a/drivers/opus/celt/arm/kiss_fft_armv4.h b/drivers/opus/celt/arm/kiss_fft_armv4.h index 773464628b..0f5617324d 100644 --- a/drivers/opus/celt/arm/kiss_fft_armv4.h +++ b/drivers/opus/celt/arm/kiss_fft_armv4.h @@ -116,6 +116,6 @@ } \ while(0) -#endif /* OPUS_FIXED_POINT */ +#endif /* FIXED_POINT */ #endif /* KISS_FFT_ARMv4_H */ diff --git a/drivers/opus/celt/arm/kiss_fft_armv5e.h b/drivers/opus/celt/arm/kiss_fft_armv5e.h index 1eff56a66a..0ab75fbc58 100644 --- a/drivers/opus/celt/arm/kiss_fft_armv5e.h +++ b/drivers/opus/celt/arm/kiss_fft_armv5e.h @@ -113,6 +113,6 @@ } \ while(0) -#endif /* OPUS_FIXED_POINT */ +#endif /* FIXED_POINT */ #endif /* KISS_FFT_GUTS_H */ diff --git a/drivers/opus/celt/arm/mdct_arm.h b/drivers/opus/celt/arm/mdct_arm.h new file mode 100644 index 0000000000..faf3c8acab --- /dev/null +++ b/drivers/opus/celt/arm/mdct_arm.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2015 Xiph.Org Foundation + Written by Viswanath Puttagunta */ +/** + @file arm_mdct.h + @brief ARM Neon Intrinsic optimizations for mdct using NE10 library + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#if !defined(MDCT_ARM_H) +#define MDCT_ARM_H + +#include "opus/opus_config.h" +#include "opus/celt/mdct.h" + +#if defined(HAVE_ARM_NE10) +/** Compute a forward MDCT and scale by 4/N, trashes the input array */ +void clt_mdct_forward_neon(const mdct_lookup *l, kiss_fft_scalar *in, + kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, int overlap, + int shift, int stride, int arch); + +void clt_mdct_backward_neon(const mdct_lookup *l, kiss_fft_scalar *in, + kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, int overlap, + int shift, int stride, int arch); + +#if !defined(OPUS_HAVE_RTCD) +#define OVERRIDE_OPUS_MDCT (1) +#define clt_mdct_forward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \ + clt_mdct_forward_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch) +#define clt_mdct_backward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \ + clt_mdct_backward_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch) +#endif /* OPUS_HAVE_RTCD */ +#endif /* HAVE_ARM_NE10 */ + +#endif diff --git a/drivers/opus/celt/arm/pitch_arm.h b/drivers/opus/celt/arm/pitch_arm.h index 18d1f2e75e..996fbc7fd9 100644 --- a/drivers/opus/celt/arm/pitch_arm.h +++ b/drivers/opus/celt/arm/pitch_arm.h @@ -30,7 +30,7 @@ # include "opus/celt/arm/armcpu.h" -# if defined(OPUS_FIXED_POINT) +# if defined(FIXED_POINT) # if defined(OPUS_ARM_MAY_HAVE_NEON) opus_val32 celt_pitch_xcorr_neon(const opus_val16 *_x, const opus_val16 *_y, @@ -52,6 +52,17 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y, ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch)) # endif -# endif +#else /* Start !FIXED_POINT */ +/* Float case */ +#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y, + opus_val32 *xcorr, int len, int max_pitch); +#if !defined(OPUS_HAVE_RTCD) || defined(OPUS_ARM_PRESUME_NEON_INTR) +#define OVERRIDE_PITCH_XCORR (1) +# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ + ((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch)) +#endif +#endif +#endif /* end !FIXED_POINT */ #endif diff --git a/drivers/opus/celt/bands.c b/drivers/opus/celt/bands.c index 5a6b23d87f..bdd87dd327 100644 --- a/drivers/opus/celt/bands.c +++ b/drivers/opus/celt/bands.c @@ -26,14 +26,11 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -#ifdef OPUS_ENABLED #include "opus/opus_config.h" -#endif #include #include "opus/celt/bands.h" -#include "opus/celt/opus_modes.h" +#include "opus/celt/modes.h" #include "opus/celt/vq.h" #include "opus/celt/cwrs.h" #include "opus/celt/stack_alloc.h" @@ -92,11 +89,11 @@ static int bitexact_log2tan(int isin,int icos) #ifdef OPUS_FIXED_POINT /* Compute the amplitude (sqrt energy) in each of the bands */ -void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) +void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) { int i, c, N; const opus_int16 *eBands = m->eBands; - N = M*m->shortMdctSize; + N = m->shortMdctSize< 0) { - int shift = celt_ilog2(maxval)-10; - j=M*eBands[i]; do { - sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)), - EXTRACT16(VSHR32(X[j+c*N],shift))); - } while (++jlogN[i]>>BITRES)+LM+1)>>1); + j=eBands[i]<0) + { + do { + sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)), + EXTRACT16(SHR32(X[j+c*N],shift))); + } while (++jnbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); } else { @@ -148,20 +150,18 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel } while (++ceBands; - N = M*m->shortMdctSize; + N = m->shortMdctSize<nbEBands] = celt_sqrt(sum); /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ } @@ -186,78 +186,84 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel } while (++ceBands; N = M*m->shortMdctSize; - celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels"); - c=0; do { - celt_sig * OPUS_RESTRICT f; - const celt_norm * OPUS_RESTRICT x; - f = freq+c*N; - x = X+c*N+M*eBands[start]; - for (i=0;inbEBands], SHL16((opus_val16)eMeans[i],6)); + j=M*eBands[i]; + band_end = M*eBands[i+1]; + lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6)); #ifndef OPUS_FIXED_POINT - g = celt_exp2(lg); + g = celt_exp2(lg); #else - /* Handle the integer part of the log energy */ - shift = 16-(lg>>DB_SHIFT); - if (shift>31) - { - shift=0; - g=0; - } else { - /* Handle the fractional part. */ - g = celt_exp2_frac(lg&((1<>DB_SHIFT); + if (shift>31) + { + shift=0; + g=0; + } else { + /* Handle the fractional part. */ + g = celt_exp2_frac(lg&((1<eBands[i+1]-m->eBands[i]; /* depth in 1/8 bits */ - depth = (1+pulses[i])/((m->eBands[i+1]-m->eBands[i])<=0); + depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM; #ifdef OPUS_FIXED_POINT thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1); @@ -345,12 +352,12 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas } /* We just added some energy, so we need to renormalise */ if (renormalize) - renormalise_vector(X, N0<m->nbEBands-4) - hf_sum += 32*(tcount[1]+tcount[0])/N; + hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N); tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N); sum += tmp*256; nbBands++; @@ -493,7 +499,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average, if (update_hf) { if (hf_sum) - hf_sum /= C*(4-m->nbEBands+end); + hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end)); *hf_average = (*hf_average+hf_sum)>>1; hf_sum = *hf_average; if (*tapset_decision==2) @@ -509,7 +515,8 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average, } /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/ celt_assert(nbBands>0); /* end has to be non-zero */ - sum /= nbBands; + celt_assert(sum>=0); + sum = celt_udiv(sum, nbBands); /* Recursive averaging */ sum = (sum+*average)>>1; *average = sum; @@ -567,8 +574,7 @@ static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard for (j=0;jarch); } tell = ec_tell_frac(ec); if (qn!=1) @@ -769,7 +776,8 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx, ec_dec_update(ec, fl, fl+fs, ft); } } - itheta = (opus_int32)itheta*16384/qn; + celt_assert(itheta>=0); + itheta = celt_udiv((opus_int32)itheta*16384, qn); if (encode && stereo) { if (itheta==0) @@ -1021,8 +1029,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X, fill &= cm_mask; if (!fill) { - for (j=0;jarch); } } } @@ -1084,7 +1091,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X, longBlocks = B0==1; - N_B /= B; + N_B = celt_udiv(N_B, B); /* Special case for one sample */ if (N==1) @@ -1098,9 +1105,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X, if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1)) { - int j; - for (j=0;jarch); if (inv) { int j; @@ -1353,9 +1358,11 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm void quant_all_bands(int encode, const CELTMode *m, int start, int end, - celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses, - int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res, - opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int LM, int codedBands, opus_uint32 *seed) + celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks, + const celt_ener *bandE, int *pulses, int shortBlocks, int spread, + int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits, + opus_int32 balance, ec_ctx *ec, int LM, int codedBands, + opus_uint32 *seed, int arch) { int i; opus_int32 remaining_bits; @@ -1397,6 +1404,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end, ctx.m = m; ctx.seed = *seed; ctx.spread = spread; + ctx.arch = arch; for (i=start;ioverlap; + nbEBands = mode->nbEBands; + N = mode->shortMdctSize<shortMdctSize; + B = M; + NB = mode->shortMdctSize; shift = mode->maxLM; } else { B = 1; - N = mode->shortMdctSize<shortMdctSize<maxLM-LM; } - c=0; do { - /* IMDCT on the interleaved the sub-frames, overlap-add is performed by the IMDCT */ + + if (CC==2&&C==1) + { + /* Copying a mono streams to two channels */ + celt_sig *freq2; + denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M, + downsample, silence); + /* Store a temporary copy in the output buffer because the IMDCT destroys its input. */ + freq2 = out_syn[1]+overlap/2; + OPUS_COPY(freq2, freq, N); for (b=0;bmdct, &X[b+c*N*B], out_mem[c]+N*b, mode->window, overlap, shift, B); - } while (++cmdct, &freq2[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch); + for (b=0;bmdct, &freq[b], out_syn[1]+NB*b, mode->window, overlap, shift, B, arch); + } else if (CC==1&&C==2) + { + /* Downmixing a stereo stream to mono */ + celt_sig *freq2; + freq2 = out_syn[0]+overlap/2; + denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M, + downsample, silence); + /* Use the output buffer as temp array before downmixing. */ + denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M, + downsample, silence); + for (i=0;imdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch); + } else { + /* Normal case (mono or stereo) */ + c=0; do { + denormalise_bands(mode, X+c*N, freq, oldBandE+c*nbEBands, start, effEnd, M, + downsample, silence); + for (b=0;bmdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B, arch); + } while (++c>1, opus_val16 ); + pitch_downsample(decode_mem, lp_pitch_buf, + DECODE_BUFFER_SIZE, C, arch); + pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf, + DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX, + PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, arch); + pitch_index = PLC_PITCH_LAG_MAX-pitch_index; + RESTORE_STACK; + return pitch_index; +} + +static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) { int c; int i; @@ -343,11 +422,9 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R int nbEBands; int overlap; int start; - int downsample; int loss_count; int noise_based; const opus_int16 *eBands; - VARDECL(celt_sig, scratch); SAVE_STACK; mode = st->mode; @@ -367,40 +444,37 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R loss_count = st->loss_count; start = st->start; - downsample = st->downsample; noise_based = loss_count >= 5 || start != 0; - ALLOC(scratch, noise_based?N*C:N, celt_sig); if (noise_based) { /* Noise-based PLC/CNG */ - celt_sig *freq; +#ifdef NORM_ALIASING_HACK + celt_norm *X; +#else VARDECL(celt_norm, X); +#endif opus_uint32 seed; - opus_val16 *plcLogE; int end; int effEnd; - + opus_val16 decay; end = st->end; effEnd = IMAX(start, IMIN(end, mode->effEBands)); - /* Share the interleaved signal MDCT coefficient buffer with the - deemphasis scratch buffer. */ - freq = scratch; +#ifdef NORM_ALIASING_HACK + /* This is an ugly hack that breaks aliasing rules and would be easily broken, + but it saves almost 4kB of stack. */ + X = (celt_norm*)(out_syn[C-1]+overlap/2); +#else ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ +#endif - if (loss_count >= 5) - plcLogE = backgroundLogE; - else { - /* Energy decay */ - opus_val16 decay = loss_count==0 ? - QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT); - c=0; do - { - for (i=start;irng; for (c=0;c>20); } - renormalise_vector(X+boffs, blen, Q15ONE); + renormalise_vector(X+boffs, blen, Q15ONE, st->arch); } } st->rng = seed; - denormalise_bands(mode, X, freq, plcLogE, start, effEnd, C, 1<>1)); } while (++cdownsample, 0, st->arch); } else { /* Pitch-based PLC */ const opus_val16 *window; @@ -445,15 +511,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R if (loss_count == 0) { - VARDECL( opus_val16, lp_pitch_buf ); - ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 ); - pitch_downsample(decode_mem, lp_pitch_buf, - DECODE_BUFFER_SIZE, C, st->arch); - pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf, - DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX, - PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, st->arch); - pitch_index = PLC_PITCH_LAG_MAX-pitch_index; - st->last_pitch_index = pitch_index; + st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch); } else { pitch_index = st->last_pitch_index; fade = QCONST16(.8f,15); @@ -516,7 +574,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R } /* Compute the excitation for exc_length samples before the loss. */ celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER, - exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem); + exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem, st->arch); } /* Check if the waveform is decaying, and if so how fast. @@ -583,7 +641,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R the signal domain. */ celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER, buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER, - lpc_mem); + lpc_mem, st->arch); } /* Check if the synthesis energy is higher than expected, which can @@ -631,7 +689,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R comb_filter(etmp, buf+DECODE_BUFFER_SIZE, st->postfilter_period, st->postfilter_period, overlap, -st->postfilter_gain, -st->postfilter_gain, - st->postfilter_tapset, st->postfilter_tapset, NULL, 0); + st->postfilter_tapset, st->postfilter_tapset, NULL, 0, st->arch); /* Simulate TDAC on the concealed audio so that it blends with the MDCT of the next frame. */ @@ -644,22 +702,23 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R } while (++cpreemph, st->preemph_memD, scratch); - st->loss_count = loss_count+1; RESTORE_STACK; } -int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec) +int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, + int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum) { int c, i, N; int spread_decision; opus_int32 bits; ec_dec _dec; - VARDECL(celt_sig, freq); +#ifdef NORM_ALIASING_HACK + celt_norm *X; +#else VARDECL(celt_norm, X); +#endif VARDECL(int, fine_quant); VARDECL(int, pulses); VARDECL(int, cap); @@ -677,6 +736,8 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat int intra_ener; const int CC = st->channels; int LM, M; + int start; + int end; int effEnd; int codedBands; int alloc_trim; @@ -703,11 +764,10 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat nbEBands = mode->nbEBands; overlap = mode->overlap; eBands = mode->eBands; + start = st->start; + end = st->end; frame_size *= st->downsample; - c=0; do { - decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); - } while (++c_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC); oldBandE = lpc+CC*LPC_ORDER; oldLogE = oldBandE + 2*nbEBands; @@ -725,7 +785,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat if (data0<0) return OPUS_INVALID_PACKET; } - st->end = IMAX(1, mode->effEBands-2*(data0>>5)); + st->end = end = IMAX(1, mode->effEBands-2*(data0>>5)); LM = (data0>>3)&0x3; C = 1 + ((data0>>2)&0x1); data++; @@ -752,14 +812,19 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat return OPUS_BAD_ARG; N = M*mode->shortMdctSize; + c=0; do { + decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); + out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N; + } while (++cend; + effEnd = end; if (effEnd > mode->effEBands) effEnd = mode->effEBands; if (data == NULL || len<=1) { - celt_decode_lost(st, pcm, N, LM); + celt_decode_lost(st, N, LM); + deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); RESTORE_STACK; return frame_size/st->downsample; } @@ -795,7 +860,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat postfilter_gain = 0; postfilter_pitch = 0; postfilter_tapset = 0; - if (st->start==0 && tell+16 <= total_bits) + if (start==0 && tell+16 <= total_bits) { if(ec_dec_bit_logp(dec, 1)) { @@ -826,11 +891,11 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat /* Decode the global flags (first symbols in the stream) */ intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0; /* Get band energies */ - unquant_coarse_energy(mode, st->start, st->end, oldBandE, + unquant_coarse_energy(mode, start, end, oldBandE, intra_ener, dec, C, LM); ALLOC(tf_res, nbEBands, int); - tf_decode(st->start, st->end, isTransient, tf_res, LM, dec); + tf_decode(start, end, isTransient, tf_res, LM, dec); tell = ec_tell(dec); spread_decision = SPREAD_NORMAL; @@ -846,7 +911,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat dynalloc_logp = 6; total_bits<<=BITRES; tell = ec_tell_frac(dec); - for (i=st->start;iend;i++) + for (i=start;istart, st->end, offsets, cap, + codedBands = compute_allocation(mode, start, end, offsets, cap, alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses, fine_quant, fine_priority, C, LM, dec, 0, 0, 0); - unquant_fine_energy(mode, st->start, st->end, oldBandE, fine_quant, dec, C); + unquant_fine_energy(mode, start, end, oldBandE, fine_quant, dec, C); + + c=0; do { + OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2); + } while (++cstart, st->end, X, C==2 ? X+N : NULL, collapse_masks, + quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks, NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res, - len*(8<rng); + len*(8<rng, st->arch); if (anti_collapse_rsv > 0) { anti_collapse_on = ec_dec_bits(dec, 1); } - unquant_energy_finalise(mode, st->start, st->end, oldBandE, + unquant_energy_finalise(mode, start, end, oldBandE, fine_quant, fine_priority, len*8-ec_tell(dec), dec, C); if (anti_collapse_on) anti_collapse(mode, X, collapse_masks, LM, C, N, - st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); - - ALLOC(freq, IMAX(CC,C)*N, celt_sig); /**< Interleaved signal MDCTs */ + start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng, st->arch); if (silence) { for (i=0;istart, effEnd, C, M); } - c=0; do { - OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2); - } while (++cdownsample!=1) - bound = IMIN(bound, N/st->downsample); - for (i=bound;idownsample, silence, st->arch); c=0; do { st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD); st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD); comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, mode->shortMdctSize, st->postfilter_gain_old, st->postfilter_gain, st->postfilter_tapset_old, st->postfilter_tapset, - mode->window, overlap); + mode->window, overlap, st->arch); if (LM!=0) comb_filter(out_syn[c]+mode->shortMdctSize, out_syn[c]+mode->shortMdctSize, st->postfilter_period, postfilter_pitch, N-mode->shortMdctSize, st->postfilter_gain, postfilter_gain, st->postfilter_tapset, postfilter_tapset, - mode->window, overlap); + mode->window, overlap, st->arch); } while (++cpostfilter_period_old = st->postfilter_period; @@ -978,32 +1021,36 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat st->postfilter_tapset_old = st->postfilter_tapset; } - if (C==1) { - for (i=0;iloss_count < 10) + max_background_increase = M*QCONST16(0.001f,DB_SHIFT); + else + max_background_increase = QCONST16(1.f,DB_SHIFT); for (i=0;i<2*nbEBands;i++) - oldLogE2[i] = oldLogE[i]; - for (i=0;i<2*nbEBands;i++) - oldLogE[i] = oldBandE[i]; - for (i=0;i<2*nbEBands;i++) - backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]); + backgroundLogE[i] = MIN16(backgroundLogE[i] + max_background_increase, oldBandE[i]); } else { for (i=0;i<2*nbEBands;i++) oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]); } c=0; do { - for (i=0;istart;i++) + for (i=0;iend;irng = dec->rng; - /* We reuse freq[] as scratch space for the de-emphasis */ - deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, freq); + deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); st->loss_count = 0; RESTORE_STACK; if (ec_tell(dec) > 8*len) @@ -1028,7 +1074,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat #ifdef OPUS_FIXED_POINT int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) { - return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL); + return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0); } #ifndef DISABLE_FLOAT_API @@ -1045,7 +1091,7 @@ int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char N = frame_size; ALLOC(out, C*N, opus_int16); - ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL); + ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0); if (ret>0) for (j=0;j0) for (j=0;jmode = mode; - st->overlap = mode->overlap; st->stream_channels = st->channels = channels; st->upsample = 1; @@ -276,8 +271,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int } /*printf("\n");*/ /* First few samples are bad because we don't propagate the memory */ - for (i=0;i<12;i++) - tmp[i] = 0; + OPUS_CLEAR(tmp, 12); #ifdef OPUS_FIXED_POINT /* Normalize tmp to max range */ @@ -346,9 +340,9 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int { int id; #ifdef OPUS_FIXED_POINT - id = IMAX(0,IMIN(127,MULT16_32_Q15(tmp[i],norm))); /* Do not round to nearest */ + id = MAX32(0,MIN32(127,MULT16_32_Q15(tmp[i]+EPSILON,norm))); /* Do not round to nearest */ #else - id = IMAX(0,IMIN(127,(int)floor(64*norm*tmp[i]))); /* Do not round to nearest */ + id = (int)MAX32(0,MIN32(127,floor(64*norm*(tmp[i]+EPSILON)))); /* Do not round to nearest */ #endif unmask += inv_table[id]; } @@ -366,7 +360,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int /* Arbitrary metric for VBR boost */ tf_max = MAX16(0,celt_sqrt(27*mask_metric)-42); /* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */ - *tf_estimate = celt_sqrt(MAX16(0, SHL32(MULT16_16(QCONST16(0.0069,14),MIN16(163,tf_max)),14)-QCONST32(0.139,28))); + *tf_estimate = celt_sqrt(MAX32(0, SHL32(MULT16_16(QCONST16(0.0069,14),MIN16(163,tf_max)),14)-QCONST32(0.139,28))); /*printf("%d %f\n", tf_max, mask_metric);*/ RESTORE_STACK; #ifdef FUZZING @@ -378,8 +372,8 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int /* Looks for sudden increases of energy to decide whether we need to patch the transient decision */ -int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands, - int end, int C) +static int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands, + int start, int end, int C) { int i, c; opus_val32 mean_diff=0; @@ -388,28 +382,28 @@ int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands, avoid false detection caused by irrelevant bands */ if (C==1) { - spread_old[0] = oldE[0]; - for (i=1;i=0;i--) + for (i=end-2;i>=start;i--) spread_old[i] = MAX16(spread_old[i], spread_old[i+1]-QCONST16(1.0f, DB_SHIFT)); /* Compute mean increase */ c=0; do { - for (i=2;i QCONST16(1.f, DB_SHIFT); } @@ -417,9 +411,10 @@ int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands, /** Apply window and compute the MDCT for all sub-frames and all channels in a frame */ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in, - celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample) + celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample, + int arch) { - const int overlap = OVERLAP(mode); + const int overlap = mode->overlap; int N; int B; int shift; @@ -438,7 +433,9 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS for (b=0;bmdct, in+c*(B*N+overlap)+b*N, &out[b+c*N*B], mode->window, overlap, shift, B); + clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, + &out[b+c*N*B], mode->window, overlap, shift, B, + arch); } } while (++ceBands[i+1]-m->eBands[i])<eBands[i+1]-m->eBands[i])==1; - for (j=0;jeBands[i]<eBands[i]<>LM, 1<eBands[i]<eBands[i+1]<eBands[i]<eBands[i]<eBands[i+1]-m->eBands[i])<eBands[i]<eBands[i+1]<eBands[i]<eBands[i]<eBands[i+1]-m->eBands[i])< QCONST16(.995f,10)) - trim_index-=4; - else if (sum > QCONST16(.92f,10)) - trim_index-=3; - else if (sum > QCONST16(.85f,10)) - trim_index-=2; - else if (sum > QCONST16(.8f,10)) - trim_index-=1; /* mid-side savings estimations based on the LF average*/ logXC = celt_log2(QCONST32(1.001f, 20)-MULT16_16(sum, sum)); /* mid-side savings estimations based on min correlation */ @@ -819,14 +806,6 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, } while (++c QCONST16(2.f, DB_SHIFT)) - trim_index--; - if (diff > QCONST16(8.f, DB_SHIFT)) - trim_index--; - if (diff < -QCONST16(4.f, DB_SHIFT)) - trim_index++; - if (diff < -QCONST16(10.f, DB_SHIFT)) - trim_index++; trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 )); trim -= SHR16(surround_trim, DB_SHIFT-8); trim -= 2*SHR16(tf_estimate, 14-8); @@ -836,6 +815,8 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), (opus_val16)(QCONST16(2.f, 8)*(analysis->tonality_slope+.05f)))); } +#else + (void)analysis; #endif #ifdef OPUS_FIXED_POINT @@ -843,10 +824,7 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, #else trim_index = (int)floor(.5f+trim); #endif - if (trim_index<0) - trim_index = 0; - if (trim_index>10) - trim_index = 10; + trim_index = IMAX(0, IMIN(10, trim_index)); /*printf("%d\n", trim_index);*/ #ifdef FUZZING trim_index = rand()%11; @@ -886,6 +864,66 @@ static int stereo_analysis(const CELTMode *m, const celt_norm *X, > MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR); } +#define MSWAP(a,b) do {opus_val16 tmp = a;a=b;b=tmp;} while(0) +static opus_val16 median_of_5(const opus_val16 *x) +{ + opus_val16 t0, t1, t2, t3, t4; + t2 = x[2]; + if (x[0] > x[1]) + { + t0 = x[1]; + t1 = x[0]; + } else { + t0 = x[0]; + t1 = x[1]; + } + if (x[3] > x[4]) + { + t3 = x[4]; + t4 = x[3]; + } else { + t3 = x[3]; + t4 = x[4]; + } + if (t0 > t3) + { + MSWAP(t0, t3); + MSWAP(t1, t4); + } + if (t2 > t1) + { + if (t1 < t3) + return MIN16(t2, t3); + else + return MIN16(t4, t1); + } else { + if (t2 < t3) + return MIN16(t1, t3); + else + return MIN16(t2, t4); + } +} + +static opus_val16 median_of_3(const opus_val16 *x) +{ + opus_val16 t0, t1, t2; + if (x[0] > x[1]) + { + t0 = x[1]; + t1 = x[0]; + } else { + t0 = x[0]; + t1 = x[1]; + } + t2 = x[2]; + if (t1 < t2) + return t1; + else if (t0 < t2) + return t2; + else + return t0; +} + static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2, int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN, int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM, @@ -899,8 +937,7 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 SAVE_STACK; ALLOC(follower, C*nbEBands, opus_val16); ALLOC(noise_floor, C*nbEBands, opus_val16); - for (i=0;i bandLogE2[c*nbEBands+i-1]+QCONST16(.5f,DB_SHIFT)) last=i; - follower[c*nbEBands+i] = MIN16(follower[c*nbEBands+i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*nbEBands+i]); + f[i] = MIN16(f[i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*nbEBands+i]); } for (i=last-1;i>=0;i--) - follower[c*nbEBands+i] = MIN16(follower[c*nbEBands+i], MIN16(follower[c*nbEBands+i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*nbEBands+i])); + f[i] = MIN16(f[i], MIN16(f[i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*nbEBands+i])); + + /* Combine with a median filter to avoid dynalloc triggering unnecessarily. + The "offset" value controls how conservative we are -- a higher offset + reduces the impact of the median filter and makes dynalloc use more bits. */ + offset = QCONST16(1.f, DB_SHIFT); + for (i=2;imode; + overlap = mode->overlap; ALLOC(_pre, CC*(N+COMBFILTER_MAXPERIOD), celt_sig); pre[0] = _pre; @@ -1027,7 +1084,7 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, c=0; do { OPUS_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD); - OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+st->overlap)+st->overlap, N); + OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+overlap)+overlap, N); } while (++cprefilter_period, st->prefilter_gain); + N, &pitch_index, st->prefilter_period, st->prefilter_gain, st->arch); if (pitch_index > COMBFILTER_MAXPERIOD-2) pitch_index = COMBFILTER_MAXPERIOD-2; gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1); @@ -1100,18 +1157,18 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, /*printf("%d %f\n", pitch_index, gain1);*/ c=0; do { - int offset = mode->shortMdctSize-st->overlap; + int offset = mode->shortMdctSize-overlap; st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD); - OPUS_COPY(in+c*(N+st->overlap), st->in_mem+c*(st->overlap), st->overlap); + OPUS_COPY(in+c*(N+overlap), st->in_mem+c*(overlap), overlap); if (offset) - comb_filter(in+c*(N+st->overlap)+st->overlap, pre[c]+COMBFILTER_MAXPERIOD, + comb_filter(in+c*(N+overlap)+overlap, pre[c]+COMBFILTER_MAXPERIOD, st->prefilter_period, st->prefilter_period, offset, -st->prefilter_gain, -st->prefilter_gain, - st->prefilter_tapset, st->prefilter_tapset, NULL, 0); + st->prefilter_tapset, st->prefilter_tapset, NULL, 0, st->arch); - comb_filter(in+c*(N+st->overlap)+st->overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset, + comb_filter(in+c*(N+overlap)+overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset, st->prefilter_period, pitch_index, N-offset, -st->prefilter_gain, -gain1, - st->prefilter_tapset, prefilter_tapset, mode->window, st->overlap); - OPUS_COPY(st->in_mem+c*(st->overlap), in+c*(N+st->overlap)+N, st->overlap); + st->prefilter_tapset, prefilter_tapset, mode->window, overlap, st->arch); + OPUS_COPY(st->in_mem+c*(overlap), in+c*(N+overlap)+N, overlap); if (N>COMBFILTER_MAXPERIOD) { @@ -1196,6 +1253,9 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 /*printf("%f %f ", analysis->tonality, tonal);*/ target = tonal_target; } +#else + (void)analysis; + (void)pitch_change; #endif if (has_surround_mask&&!lfe) @@ -1273,6 +1333,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int LM, M; int tf_select; int nbFilledBytes, nbAvailableBytes; + int start; + int end; int effEnd; int codedBands; int tf_sum; @@ -1316,6 +1378,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, nbEBands = mode->nbEBands; overlap = mode->overlap; eBands = mode->eBands; + start = st->start; + end = st->end; tf_estimate = 0; if (nbCompressedBytes<2 || pcm==NULL) { @@ -1335,8 +1399,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, M=1<shortMdctSize; - prefilter_mem = st->in_mem+CC*(st->overlap); - oldBandE = (opus_val16*)(st->in_mem+CC*(st->overlap+COMBFILTER_MAXPERIOD)); + prefilter_mem = st->in_mem+CC*(overlap); + oldBandE = (opus_val16*)(st->in_mem+CC*(overlap+COMBFILTER_MAXPERIOD)); oldLogE = oldBandE + CC*nbEBands; oldLogE2 = oldLogE + CC*nbEBands; @@ -1352,8 +1416,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, #ifdef CUSTOM_MODES if (st->signalling && enc==NULL) { - int tmp = (mode->effEBands-st->end)>>1; - st->end = IMAX(1, mode->effEBands-tmp); + int tmp = (mode->effEBands-end)>>1; + end = st->end = IMAX(1, mode->effEBands-tmp); compressed[0] = tmp<<5; compressed[0] |= LM<<3; compressed[0] |= (C==2)<<2; @@ -1436,11 +1500,11 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, } total_bits = nbCompressedBytes*8; - effEnd = st->end; + effEnd = end; if (effEnd > mode->effEBands) effEnd = mode->effEBands; - ALLOC(in, CC*(N+st->overlap), celt_sig); + ALLOC(in, CC*(N+overlap), celt_sig); sample_max=MAX32(st->overlap_max, celt_maxabs16(pcm, C*(N-overlap)/st->upsample)); st->overlap_max=celt_maxabs16(pcm+C*(N-overlap)/st->upsample, C*overlap/st->upsample); @@ -1474,8 +1538,12 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, enc->nbits_total+=tell-ec_tell(enc); } c=0; do { - celt_preemphasis(pcm+c, in+c*(N+st->overlap)+st->overlap, N, CC, st->upsample, - mode->preemph, st->preemph_memE+c, st->clip); + int need_clip=0; +#ifndef OPUS_FIXED_POINT + need_clip = st->clip && sample_max>65536.f; +#endif + celt_preemphasis(pcm+c, in+c*(N+overlap)+overlap, N, CC, st->upsample, + mode->preemph, st->preemph_memE+c, need_clip); } while (++clfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && st->start==0 && !silence && !st->disable_pf + enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && start==0 && !silence && !st->disable_pf && st->complexity >= 5 && !(st->consec_transient && LM!=3 && st->variable_duration==OPUS_FRAMESIZE_VARIABLE); prefilter_tapset = st->tapset_decision; @@ -1494,7 +1562,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, pitch_change = 1; if (pf_on==0) { - if(st->start==0 && tell+16<=total_bits) + if(start==0 && tell+16<=total_bits) ec_enc_bit_logp(enc, 0, 1); } else { /*This block is not gated by a total bits check only because @@ -1515,7 +1583,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, shortBlocks = 0; if (st->complexity >= 1 && !st->lfe) { - isTransient = transient_analysis(in, N+st->overlap, CC, + isTransient = transient_analysis(in, N+overlap, CC, &tf_estimate, &tf_chan); } if (LM>0 && ec_tell(enc)+3<=total_bits) @@ -1535,33 +1603,32 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, ALLOC(bandLogE2, C*nbEBands, opus_val16); if (secondMdct) { - compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample); - compute_band_energies(mode, freq, bandE, effEnd, C, M); - amp2Log2(mode, effEnd, st->end, bandE, bandLogE2, C); + compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch); + compute_band_energies(mode, freq, bandE, effEnd, C, LM); + amp2Log2(mode, effEnd, end, bandE, bandLogE2, C); for (i=0;iupsample); + compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch); if (CC==2&&C==1) tf_chan = 0; - compute_band_energies(mode, freq, bandE, effEnd, C, M); + compute_band_energies(mode, freq, bandE, effEnd, C, LM); if (st->lfe) { - for (i=2;iend;i++) + for (i=2;iend, bandE, bandLogE, C); + amp2Log2(mode, effEnd, end, bandE, bandLogE, C); ALLOC(surround_dynalloc, C*nbEBands, opus_val16); - for(i=0;iend;i++) - surround_dynalloc[i] = 0; + OPUS_CLEAR(surround_dynalloc, end); /* This computes how much masking takes place between surround channels */ - if (st->start==0&&st->energy_mask&&!st->lfe) + if (start==0&&st->energy_mask&&!st->lfe) { int mask_end; int midband; @@ -1584,6 +1651,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, diff += MULT16_16(mask, 1+2*i-mask_end); } } + celt_assert(count>0); mask_avg = DIV32_16(mask_avg,count); mask_avg += QCONST16(.2f, DB_SHIFT); diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end); @@ -1621,8 +1689,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, disabling masking. */ mask_avg = 0; diff = 0; - for(i=0;istart;iend;i++) + for(i=start;iend-st->start); + frame_avg /= (end-start); temporal_vbr = SUB16(frame_avg,st->spec_avg); temporal_vbr = MIN16(QCONST16(3.f, DB_SHIFT), MAX16(-QCONST16(1.5f, DB_SHIFT), temporal_vbr)); st->spec_avg += MULT16_16_Q15(QCONST16(.02f, 15), temporal_vbr); @@ -1658,21 +1725,20 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, if (!secondMdct) { - for (i=0;i0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe) { - if (patch_transient_decision(bandLogE, oldBandE, nbEBands, st->end, C)) + if (patch_transient_decision(bandLogE, oldBandE, nbEBands, start, end, C)) { isTransient = 1; shortBlocks = M; - compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample); - compute_band_energies(mode, freq, bandE, effEnd, C, M); - amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C); + compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch); + compute_band_energies(mode, freq, bandE, effEnd, C, LM); + amp2Log2(mode, effEnd, end, bandE, bandLogE, C); /* Compensate for the scaling of short vs long mdcts */ for (i=0;i=15*C && st->start==0 && st->complexity>=2 && !st->lfe) + if (effectiveBytes>=15*C && start==0 && st->complexity>=2 && !st->lfe) { int lambda; if (effectiveBytes<40) @@ -1703,22 +1769,22 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, lambda = 3; lambda*=2; tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, &tf_sum, tf_estimate, tf_chan); - for (i=effEnd;iend;i++) + for (i=effEnd;iend;i++) + for (i=0;istart, st->end, effEnd, bandLogE, + quant_coarse_energy(mode, start, end, effEnd, bandLogE, oldBandE, total_bits, error, enc, C, LM, nbAvailableBytes, st->force_intra, &st->delayedIntra, st->complexity >= 4, st->loss_rate, st->lfe); - tf_encode(st->start, st->end, isTransient, tf_res, LM, tf_select, enc); + tf_encode(start, end, isTransient, tf_res, LM, tf_select, enc); if (ec_tell(enc)+4<=total_bits) { @@ -1726,7 +1792,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, { st->tapset_decision = 0; st->spread_decision = SPREAD_NORMAL; - } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || st->start != 0) + } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || start != 0) { if (st->complexity == 0) st->spread_decision = SPREAD_NONE; @@ -1760,7 +1826,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, ALLOC(offsets, nbEBands, int); - maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, st->start, st->end, C, offsets, + maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, start, end, C, offsets, st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr, eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc); /* For LFE, everything interesting is in the first band */ @@ -1773,7 +1839,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, total_bits<<=BITRES; total_boost = 0; tell = ec_tell_frac(enc); - for (i=st->start;iend;i++) + for (i=start;iintensity = hysteresis_decision((opus_val16)(equiv_rate/1000), intensity_thresholds, intensity_histeresis, 21, st->intensity); - st->intensity = IMIN(st->end,IMAX(st->start, st->intensity)); + st->intensity = IMIN(end,IMAX(start, st->intensity)); } alloc_trim = 5; @@ -1828,7 +1894,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, alloc_trim = 5; else alloc_trim = alloc_trim_analysis(mode, X, bandLogE, - st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity, surround_trim); + end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, + st->intensity, surround_trim, st->arch); ec_enc_icdf(enc, alloc_trim, trim_icdf, 7); tell = ec_tell_frac(enc); } @@ -1930,7 +1997,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, bits = (((opus_int32)nbCompressedBytes*8)<=2&&bits>=((LM+2)<end-1; + signalBandwidth = end-1; #ifndef DISABLE_FLOAT_API if (st->analysis.valid) { @@ -1950,7 +2017,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, #endif if (st->lfe) signalBandwidth = 1; - codedBands = compute_allocation(mode, st->start, st->end, offsets, cap, + codedBands = compute_allocation(mode, start, end, offsets, cap, alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses, fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth); if (st->lastCodedBands) @@ -1958,13 +2025,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, else st->lastCodedBands = codedBands; - quant_fine_energy(mode, st->start, st->end, oldBandE, error, fine_quant, enc, C); + quant_fine_energy(mode, start, end, oldBandE, error, fine_quant, enc, C); /* Residual quantisation */ ALLOC(collapse_masks, C*nbEBands, unsigned char); - quant_all_bands(1, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks, - bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, st->intensity, tf_res, - nbCompressedBytes*(8<rng); + quant_all_bands(1, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks, + bandE, pulses, shortBlocks, st->spread_decision, + dual_stereo, st->intensity, tf_res, nbCompressedBytes*(8<rng, st->arch); if (anti_collapse_rsv > 0) { @@ -1974,7 +2042,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, #endif ec_enc_bits(enc, anti_collapse_on, 1); } - quant_energy_finalise(mode, st->start, st->end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C); + quant_energy_finalise(mode, start, end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C); if (silence) { @@ -1990,40 +2058,26 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, if (anti_collapse_on) { anti_collapse(mode, X, collapse_masks, LM, C, N, - st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); - } - - if (silence) - { - for (i=0;istart, effEnd, C, M); + start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); } c=0; do { OPUS_MOVE(st->syn_mem[c], st->syn_mem[c]+N, 2*MAX_PERIOD-N+overlap/2); } while (++csyn_mem[c]+2*MAX_PERIOD-N; } while (++cupsample, silence, st->arch); c=0; do { st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD); st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD); comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, mode->shortMdctSize, st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset, - mode->window, st->overlap); + mode->window, overlap); if (LM!=0) comb_filter(out_mem[c]+mode->shortMdctSize, out_mem[c]+mode->shortMdctSize, st->prefilter_period, pitch_index, N-mode->shortMdctSize, st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset, @@ -2031,7 +2085,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, } while (++cupsample, mode->preemph, st->preemph_memD, freq); + deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD); st->prefilter_period_old = st->prefilter_period; st->prefilter_gain_old = st->prefilter_gain; st->prefilter_tapset_old = st->prefilter_tapset; @@ -2051,16 +2105,13 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, #endif if (CC==2&&C==1) { - for (i=0;istart;i++) + for (i=0;iend;iin_mem+st->channels*(st->overlap+COMBFILTER_MAXPERIOD)); + oldBandE = (opus_val16*)(st->in_mem+st->channels*(st->mode->overlap+COMBFILTER_MAXPERIOD)); oldLogE = oldBandE + st->channels*st->mode->nbEBands; oldLogE2 = oldLogE + st->channels*st->mode->nbEBands; OPUS_CLEAR((char*)&st->ENCODER_RESET_START, diff --git a/drivers/opus/celt/celt_lpc.c b/drivers/opus/celt/celt_lpc.c index ad0a6dfd43..5b8c01021f 100644 --- a/drivers/opus/celt/celt_lpc.c +++ b/drivers/opus/celt/celt_lpc.c @@ -24,10 +24,7 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -#ifdef OPUS_ENABLED #include "opus/opus_config.h" -#endif #include "opus/celt/celt_lpc.h" #include "opus/celt/stack_alloc.h" @@ -88,12 +85,15 @@ int p #endif } -void celt_fir(const opus_val16 *_x, + +void celt_fir_c( + const opus_val16 *_x, const opus_val16 *num, opus_val16 *_y, int N, int ord, - opus_val16 *mem) + opus_val16 *mem, + int arch) { int i,j; VARDECL(opus_val16, rnum); @@ -111,6 +111,7 @@ void celt_fir(const opus_val16 *_x, for(i=0;i ARMv4 @@ -42,6 +43,22 @@ */ #define OPUS_ARCHMASK 3 +#elif (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \ + (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \ + (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \ + (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)) + +#include "opus/celt/x86/x86cpu.h" +/* We currently support 5 x86 variants: + * arch[0] -> non-sse + * arch[1] -> sse + * arch[2] -> sse2 + * arch[3] -> sse4.1 + * arch[4] -> avx + */ +#define OPUS_ARCHMASK 7 +int opus_select_arch(void); + #else #define OPUS_ARCHMASK 0 @@ -50,5 +67,4 @@ static OPUS_INLINE int opus_select_arch(void) return 0; } #endif - #endif diff --git a/drivers/opus/celt/cwrs.c b/drivers/opus/celt/cwrs.c index bae9d21b97..983d4580a9 100644 --- a/drivers/opus/celt/cwrs.c +++ b/drivers/opus/celt/cwrs.c @@ -26,10 +26,7 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -#ifdef OPUS_ENABLED #include "opus/opus_config.h" -#endif #include "opus/celt/os_support.h" #include "opus/celt/cwrs.h" @@ -460,10 +457,12 @@ void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){ ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k)); } -static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ +static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ opus_uint32 p; int s; int k0; + opus_int16 val; + opus_val32 yy=0; celt_assert(_k>0); celt_assert(_n>1); while(_n>2){ @@ -487,7 +486,9 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ } else for(p=row[_k];p>_i;p=row[_k])_k--; _i-=p; - *_y++=(k0-_k+s)^s; + val=(k0-_k+s)^s; + *_y++=val; + yy=MAC16_16(yy,val,val); } /*Lots of dimensions case:*/ else{ @@ -507,7 +508,9 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ do p=CELT_PVQ_U_ROW[--_k][_n]; while(p>_i); _i-=p; - *_y++=(k0-_k+s)^s; + val=(k0-_k+s)^s; + *_y++=val; + yy=MAC16_16(yy,val,val); } } _n--; @@ -519,14 +522,19 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ k0=_k; _k=(_i+1)>>1; if(_k)_i-=2*_k-1; - *_y++=(k0-_k+s)^s; + val=(k0-_k+s)^s; + *_y++=val; + yy=MAC16_16(yy,val,val); /*_n==1*/ s=-(int)_i; - *_y=(_k+s)^s; + val=(_k+s)^s; + *_y=val; + yy=MAC16_16(yy,val,val); + return yy; } -void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ - cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y); +opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ + return cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y); } #else /* SMALL_FOOTPRINT */ @@ -591,8 +599,10 @@ static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){ _y: Returns the vector of pulses. _u: Must contain entries [0..._k+1] of row _n of U() on input. Its contents will be destructively modified.*/ -static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ +static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ int j; + opus_int16 val; + opus_val32 yy=0; celt_assert(_n>0); j=0; do{ @@ -607,10 +617,13 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ while(p>_i)p=_u[--_k]; _i-=p; yj-=_k; - _y[j]=(yj+s)^s; + val=(yj+s)^s; + _y[j]=val; + yy=MAC16_16(yy,val,val); uprev(_u,_k+2,0); } while(++j<_n); + return yy; } /*Returns the index of the given combination of K elements chosen from a set @@ -685,13 +698,15 @@ void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){ RESTORE_STACK; } -void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ +opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ VARDECL(opus_uint32,u); + int ret; SAVE_STACK; celt_assert(_k>0); ALLOC(u,_k+2U,opus_uint32); - cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); + ret = cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); RESTORE_STACK; + return ret; } #endif /* SMALL_FOOTPRINT */ diff --git a/drivers/opus/celt/cwrs.h b/drivers/opus/celt/cwrs.h index 5400afa6a4..f0ea844e5e 100644 --- a/drivers/opus/celt/cwrs.h +++ b/drivers/opus/celt/cwrs.h @@ -43,6 +43,6 @@ void get_required_bits(opus_int16 *bits, int N, int K, int frac); void encode_pulses(const int *_y, int N, int K, ec_enc *enc); -void decode_pulses(int *_y, int N, int K, ec_dec *dec); +opus_val32 decode_pulses(int *_y, int N, int K, ec_dec *dec); #endif /* CWRS_H */ diff --git a/drivers/opus/celt/entcode.c b/drivers/opus/celt/entcode.c index 5c9874908d..d1c6b25c7f 100644 --- a/drivers/opus/celt/entcode.c +++ b/drivers/opus/celt/entcode.c @@ -24,10 +24,7 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -#ifdef OPUS_ENABLED #include "opus/opus_config.h" -#endif #include "opus/celt/entcode.h" #include "opus/celt/arch.h" @@ -62,6 +59,27 @@ int ec_ilog(opus_uint32 _v){ } #endif +#if 1 +/* This is a faster version of ec_tell_frac() that takes advantage + of the low (1/8 bit) resolution to use just a linear function + followed by a lookup to determine the exact transition thresholds. */ +opus_uint32 ec_tell_frac(ec_ctx *_this){ + static const unsigned correction[8] = + {35733, 38967, 42495, 46340, + 50535, 55109, 60097, 65535}; + opus_uint32 nbits; + opus_uint32 r; + int l; + unsigned b; + nbits=_this->nbits_total<rng); + r=_this->rng>>(l-16); + b = (r>>12)-8; + b += r>correction[b]; + l = (l<<3)+b; + return nbits-l; +} +#else opus_uint32 ec_tell_frac(ec_ctx *_this){ opus_uint32 nbits; opus_uint32 r; @@ -91,3 +109,42 @@ opus_uint32 ec_tell_frac(ec_ctx *_this){ } return nbits-l; } +#endif + +#ifdef USE_SMALL_DIV_TABLE +/* Result of 2^32/(2*i+1), except for i=0. */ +const opus_uint32 SMALL_DIV_TABLE[129] = { + 0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924, + 0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111, + 0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C, + 0x0A3D70A3, 0x097B425E, 0x08D3DCB0, 0x08421084, + 0x07C1F07C, 0x07507507, 0x06EB3E45, 0x06906906, + 0x063E7063, 0x05F417D0, 0x05B05B05, 0x0572620A, + 0x05397829, 0x05050505, 0x04D4873E, 0x04A7904A, + 0x047DC11F, 0x0456C797, 0x04325C53, 0x04104104, + 0x03F03F03, 0x03D22635, 0x03B5CC0E, 0x039B0AD1, + 0x0381C0E0, 0x0369D036, 0x03531DEC, 0x033D91D2, + 0x0329161F, 0x03159721, 0x03030303, 0x02F14990, + 0x02E05C0B, 0x02D02D02, 0x02C0B02C, 0x02B1DA46, + 0x02A3A0FD, 0x0295FAD4, 0x0288DF0C, 0x027C4597, + 0x02702702, 0x02647C69, 0x02593F69, 0x024E6A17, + 0x0243F6F0, 0x0239E0D5, 0x02302302, 0x0226B902, + 0x021D9EAD, 0x0214D021, 0x020C49BA, 0x02040810, + 0x01FC07F0, 0x01F44659, 0x01ECC07B, 0x01E573AC, + 0x01DE5D6E, 0x01D77B65, 0x01D0CB58, 0x01CA4B30, + 0x01C3F8F0, 0x01BDD2B8, 0x01B7D6C3, 0x01B20364, + 0x01AC5701, 0x01A6D01A, 0x01A16D3F, 0x019C2D14, + 0x01970E4F, 0x01920FB4, 0x018D3018, 0x01886E5F, + 0x0183C977, 0x017F405F, 0x017AD220, 0x01767DCE, + 0x01724287, 0x016E1F76, 0x016A13CD, 0x01661EC6, + 0x01623FA7, 0x015E75BB, 0x015AC056, 0x01571ED3, + 0x01539094, 0x01501501, 0x014CAB88, 0x0149539E, + 0x01460CBC, 0x0142D662, 0x013FB013, 0x013C995A, + 0x013991C2, 0x013698DF, 0x0133AE45, 0x0130D190, + 0x012E025C, 0x012B404A, 0x01288B01, 0x0125E227, + 0x01234567, 0x0120B470, 0x011E2EF3, 0x011BB4A4, + 0x01194538, 0x0116E068, 0x011485F0, 0x0112358E, + 0x010FEF01, 0x010DB20A, 0x010B7E6E, 0x010953F3, + 0x01073260, 0x0105197F, 0x0103091B, 0x01010101 +}; +#endif diff --git a/drivers/opus/celt/entcode.h b/drivers/opus/celt/entcode.h index c129f9b7d9..4d736d4dca 100644 --- a/drivers/opus/celt/entcode.h +++ b/drivers/opus/celt/entcode.h @@ -34,6 +34,12 @@ # include # include "opus/celt/ecintrin.h" +extern const opus_uint32 SMALL_DIV_TABLE[129]; + +#ifdef OPUS_ARM_ASM +#define USE_SMALL_DIV_TABLE +#endif + /*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a larger type, you can speed up the decoder by using it here.*/ typedef opus_uint32 ec_window; @@ -114,4 +120,33 @@ static OPUS_INLINE int ec_tell(ec_ctx *_this){ rounding error is in the positive direction).*/ opus_uint32 ec_tell_frac(ec_ctx *_this); +/* Tested exhaustively for all n and for 1<=d<=256 */ +static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) { + celt_assert(d>0); +#ifdef USE_SMALL_DIV_TABLE + if (d>256) + return n/d; + else { + opus_uint32 t, q; + t = EC_ILOG(d&-d); + q = (opus_uint64)SMALL_DIV_TABLE[d>>t]*(n>>(t-1))>>32; + return q+(n-q*d >= d); + } +#else + return n/d; +#endif +} + +static OPUS_INLINE opus_int32 celt_sudiv(opus_int32 n, opus_int32 d) { + celt_assert(d>0); +#ifdef USE_SMALL_DIV_TABLE + if (n<0) + return -(opus_int32)celt_udiv(-n, d); + else + return celt_udiv(n, d); +#else + return n/d; +#endif +} + #endif diff --git a/drivers/opus/celt/entdec.c b/drivers/opus/celt/entdec.c index 0ec4d460d2..fdc49c92c1 100644 --- a/drivers/opus/celt/entdec.c +++ b/drivers/opus/celt/entdec.c @@ -24,10 +24,7 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -#ifdef OPUS_ENABLED #include "opus/opus_config.h" -#endif #include #include "opus/celt/os_support.h" @@ -138,7 +135,7 @@ void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage){ unsigned ec_decode(ec_dec *_this,unsigned _ft){ unsigned s; - _this->ext=_this->rng/_ft; + _this->ext=celt_udiv(_this->rng,_ft); s=(unsigned)(_this->val/_this->ext); return _ft-EC_MINI(s+1,_ft); } diff --git a/drivers/opus/celt/entenc.c b/drivers/opus/celt/entenc.c index 085b2b2816..da7f2c8e62 100644 --- a/drivers/opus/celt/entenc.c +++ b/drivers/opus/celt/entenc.c @@ -24,10 +24,7 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -#if defined(OPUS_ENABLED) -# include "opus/opus_config.h" -#endif +#include "opus/opus_config.h" #include "opus/celt/os_support.h" #include "opus/celt/arch.h" #include "opus/celt/entenc.h" @@ -98,7 +95,7 @@ static void ec_enc_carry_out(ec_enc *_this,int _c){ else _this->ext++; } -static void ec_enc_normalize(ec_enc *_this){ +static OPUS_INLINE void ec_enc_normalize(ec_enc *_this){ /*If the range is too small, output some bits and rescale it.*/ while(_this->rng<=EC_CODE_BOT){ ec_enc_carry_out(_this,(int)(_this->val>>EC_CODE_SHIFT)); @@ -127,7 +124,7 @@ void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size){ void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){ opus_uint32 r; - r=_this->rng/_ft; + r=celt_udiv(_this->rng,_ft); if(_fl>0){ _this->val+=_this->rng-IMUL32(r,(_ft-_fl)); _this->rng=IMUL32(r,(_fh-_fl)); diff --git a/drivers/opus/celt/fixed_debug.h b/drivers/opus/celt/fixed_debug.h index 0ed16baa17..3b8d5ef7a9 100644 --- a/drivers/opus/celt/fixed_debug.h +++ b/drivers/opus/celt/fixed_debug.h @@ -496,6 +496,7 @@ static OPUS_INLINE int MULT16_32_PX_(int a, opus_int64 b, int Q, char *file, int #define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15) #define MAC16_32_Q15(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q15((a),(b)))) +#define MAC16_32_Q16(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q16((a),(b)))) static OPUS_INLINE int SATURATE(int a, int b) { @@ -767,6 +768,16 @@ static OPUS_INLINE int DIV32_(opus_int64 a, opus_int64 b, char *file, int line) return res; } +static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x) +{ + x = PSHR32(x, SIG_SHIFT); + x = MAX32(x, -32768); + x = MIN32(x, 32767); + return EXTRACT16(x); +} +#define SIG2WORD16(x) (SIG2WORD16_generic(x)) + + #undef PRINT_MIPS #define PRINT_MIPS(file) do {fprintf (file, "total complexity = %llu MIPS\n", celt_mips);} while (0); diff --git a/drivers/opus/celt/fixed_generic.h b/drivers/opus/celt/fixed_generic.h index ecf018a244..71ab048e4a 100644 --- a/drivers/opus/celt/fixed_generic.h +++ b/drivers/opus/celt/fixed_generic.h @@ -113,7 +113,11 @@ /** 16x32 multiply, followed by a 15-bit shift right and 32-bit add. b must fit in 31 bits. Result fits in 32 bits. */ -#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) +#define MAC16_32_Q15(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) + +/** 16x32 multiplication, followed by a 16-bit shift right and 32-bit add. + Results fits in 32 bits */ +#define MAC16_32_Q16(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))) #define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11)) #define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11)) @@ -131,4 +135,17 @@ /** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */ #define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b))) +#if defined(MIPSr1_ASM) +#include "opus/celt/mips/fixed_generic_mipsr1.h" +#endif + +static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x) +{ + x = PSHR32(x, SIG_SHIFT); + x = MAX32(x, -32768); + x = MIN32(x, 32767); + return EXTRACT16(x); +} +#define SIG2WORD16(x) (SIG2WORD16_generic(x)) + #endif diff --git a/drivers/opus/celt/float_cast.h b/drivers/opus/celt/float_cast.h index 86e80a93ff..36a1bfe671 100644 --- a/drivers/opus/celt/float_cast.h +++ b/drivers/opus/celt/float_cast.h @@ -90,14 +90,14 @@ #include #define float2int(x) lrint(x) -#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN64) || defined (_WIN64)) +#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_X64) #include __inline long int float2int(float value) { return _mm_cvtss_si32(_mm_load_ss(&value)); } -#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN32) || defined (_WIN32)) +#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_IX86) #include /* Win32 doesn't seem to have these functions. diff --git a/drivers/opus/celt/kiss_fft.c b/drivers/opus/celt/kiss_fft.c index 89a1790b24..cf8d049fa1 100644 --- a/drivers/opus/celt/kiss_fft.c +++ b/drivers/opus/celt/kiss_fft.c @@ -30,9 +30,7 @@ heavily modified to better suit Opus */ #ifndef SKIP_CONFIG_H -# ifdef OPUS_ENABLED #include "opus/opus_config.h" -# endif #endif #include "opus/celt/_kiss_fft_guts.h" @@ -47,64 +45,56 @@ static void kf_bfly2( kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, int m, - int N, - int mm + int N ) { kiss_fft_cpx * Fout2; - const kiss_twiddle_cpx * tw1; - int i,j; - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;itwiddles; - for(j=0;jr = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1); - Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1); - C_MUL (t, *Fout2 , *tw1); - tw1 += fstride; + Fout2 = Fout + 1; + t = *Fout2; C_SUB( *Fout2 , *Fout , t ); C_ADDTO( *Fout , t ); - ++Fout2; - ++Fout; + Fout += 2; } - } -} - -static void ki_bfly2( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - kiss_fft_cpx * Fout2; - const kiss_twiddle_cpx * tw1; - kiss_fft_cpx t; - int i,j; - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;itwiddles; - for(j=0;jtwiddles; - for (j=0;jr = PSHR32(Fout->r, 2); - Fout->i = PSHR32(Fout->i, 2); - C_SUB( scratch[5] , *Fout, scratch[1] ); - C_ADDTO(*Fout, scratch[1]); - C_ADD( scratch[3] , scratch[0] , scratch[2] ); - C_SUB( scratch[4] , scratch[0] , scratch[2] ); - C_SUB( Fout[m2], *Fout, scratch[3] ); - tw1 += fstride; - tw2 += fstride*2; - tw3 += fstride*3; - C_ADDTO( *Fout , scratch[3] ); - - Fout[m].r = scratch[5].r + scratch[4].i; - Fout[m].i = scratch[5].i - scratch[4].r; - Fout[m3].r = scratch[5].r - scratch[4].i; - Fout[m3].i = scratch[5].i + scratch[4].r; - ++Fout; + kiss_fft_cpx scratch0, scratch1; + + C_SUB( scratch0 , *Fout, Fout[2] ); + C_ADDTO(*Fout, Fout[2]); + C_ADD( scratch1 , Fout[1] , Fout[3] ); + C_SUB( Fout[2], *Fout, scratch1 ); + C_ADDTO( *Fout , scratch1 ); + C_SUB( scratch1 , Fout[1] , Fout[3] ); + + Fout[1].r = scratch0.r + scratch1.i; + Fout[1].i = scratch0.i - scratch1.r; + Fout[3].r = scratch0.r - scratch1.i; + Fout[3].i = scratch0.i + scratch1.r; + Fout+=4; } - } -} - -static void ki_bfly4( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - const kiss_twiddle_cpx *tw1,*tw2,*tw3; - kiss_fft_cpx scratch[6]; - const size_t m2=2*m; - const size_t m3=3*m; - int i, j; - - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;itwiddles; - for (j=0;jtwiddles; + /* m is guaranteed to be a multiple of 4. */ + for (j=0;jtwiddles[fstride*m]; +#endif for (i=0;itwiddles; + /* For non-custom modes, m is guaranteed to be a multiple of 4. */ k=m; do { - C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); C_MUL(scratch[1],Fout[m] , *tw1); C_MUL(scratch[2],Fout[m2] , *tw2); @@ -255,56 +228,8 @@ static void kf_bfly3( } } -static void ki_bfly3( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - int i, k; - const size_t m2 = 2*m; - const kiss_twiddle_cpx *tw1,*tw2; - kiss_fft_cpx scratch[5]; - kiss_twiddle_cpx epi3; - - kiss_fft_cpx * Fout_beg = Fout; - epi3 = st->twiddles[fstride*m]; - for (i=0;itwiddles; - k=m; - do{ - - C_MULC(scratch[1],Fout[m] , *tw1); - C_MULC(scratch[2],Fout[m2] , *tw2); - - C_ADD(scratch[3],scratch[1],scratch[2]); - C_SUB(scratch[0],scratch[1],scratch[2]); - tw1 += fstride; - tw2 += fstride*2; - - Fout[m].r = Fout->r - HALF_OF(scratch[3].r); - Fout[m].i = Fout->i - HALF_OF(scratch[3].i); - - C_MULBYSCALAR( scratch[0] , -epi3.i ); - - C_ADDTO(*Fout,scratch[3]); - - Fout[m2].r = Fout[m].r + scratch[0].i; - Fout[m2].i = Fout[m].i - scratch[0].r; - - Fout[m].r -= scratch[0].i; - Fout[m].i += scratch[0].r; - - ++Fout; - }while(--k); - } -} +#ifndef OVERRIDE_kf_bfly5 static void kf_bfly5( kiss_fft_cpx * Fout, const size_t fstride, @@ -317,13 +242,19 @@ static void kf_bfly5( kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; int i, u; kiss_fft_cpx scratch[13]; - const kiss_twiddle_cpx * twiddles = st->twiddles; const kiss_twiddle_cpx *tw; kiss_twiddle_cpx ya,yb; kiss_fft_cpx * Fout_beg = Fout; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; +#ifdef OPUS_FIXED_POINT + ya.r = 10126; + ya.i = -31164; + yb.r = -26510; + yb.i = -19261; +#else + ya = st->twiddles[fstride*m]; + yb = st->twiddles[fstride*2*m]; +#endif tw=st->twiddles; for (i=0;itwiddles; - const kiss_twiddle_cpx *tw; - kiss_twiddle_cpx ya,yb; - kiss_fft_cpx * Fout_beg = Fout; - - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - tw=st->twiddles; - - for (i=0;ir += scratch[7].r + scratch[8].r; - Fout0->i += scratch[7].i + scratch[8].i; - - scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); - scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); - - scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i); - scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i); - - C_SUB(*Fout1,scratch[5],scratch[6]); - C_ADD(*Fout4,scratch[5],scratch[6]); - - scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); - scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); - scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i); - scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i); - - C_ADD(*Fout2,scratch[11],scratch[12]); - C_SUB(*Fout3,scratch[11],scratch[12]); - - ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; - } - } -} #endif @@ -488,6 +353,9 @@ static int kf_factor(int n,opus_int16 * facbuf) { int p=4; + int i; + int stages=0; + int nbak = n; /*factor out powers of 4, powers of 2, then any remaining primes */ do { @@ -509,9 +377,30 @@ int kf_factor(int n,opus_int16 * facbuf) { return 0; } - *facbuf++ = p; - *facbuf++ = n; + facbuf[2*stages] = p; + if (p==2 && stages > 1) + { + facbuf[2*stages] = 4; + facbuf[2] = 2; + } + stages++; } while (n > 1); + n = nbak; + /* Reverse the order to get the radix 4 at the end, so we can use the + fast degenerate case. It turns out that reversing the order also + improves the noise behaviour. */ + for (i=0;infft=nfft; -#ifndef OPUS_FIXED_POINT +#ifdef OPUS_FIXED_POINT + st->scale_shift = celt_ilog2(st->nfft); + if (st->nfft == 1<scale_shift) + st->scale = Q15ONE; + else + st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift); +#else st->scale = 1.f/nfft; #endif if (base != NULL) { st->twiddles = base->twiddles; st->shift = 0; - while (nfft<shift != base->nfft && st->shift < 32) + while (st->shift < 32 && nfft<shift != base->nfft) st->shift++; if (st->shift>=32) goto fail; @@ -581,22 +482,31 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, co if (st->bitrev==NULL) goto fail; compute_bitrev_table(0, bitrev, 1,1, st->factors,st); + + /* Initialize architecture specific fft parameters */ + if (opus_fft_alloc_arch(st, arch)) + goto fail; } return st; fail: - opus_fft_free(st); + opus_fft_free(st, arch); return NULL; } -kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem ) +kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch) { - return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL); + return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch); +} + +void opus_fft_free_arch_c(kiss_fft_state *st) { + (void)st; } -void opus_fft_free(const kiss_fft_state *cfg) +void opus_fft_free(const kiss_fft_state *cfg, int arch) { if (cfg) { + opus_fft_free_arch((kiss_fft_state *)cfg, arch); opus_free((opus_int16*)cfg->bitrev); if (cfg->shift < 0) opus_free((kiss_twiddle_cpx*)cfg->twiddles); @@ -606,7 +516,7 @@ void opus_fft_free(const kiss_fft_state *cfg) #endif /* CUSTOM_MODES */ -void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) { int m2, m; int p; @@ -618,17 +528,6 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou /* st->shift can be -1 */ shift = st->shift>0 ? st->shift : 0; - celt_assert2 (fin != fout, "In-place FFT not supported"); - /* Bit-reverse the input */ - for (i=0;infft;i++) - { - fout[st->bitrev[i]] = fin[i]; -#ifndef OPUS_FIXED_POINT - fout[st->bitrev[i]].r *= st->scale; - fout[st->bitrev[i]].i *= st->scale; -#endif - } - fstride[0] = 1; L=0; do { @@ -647,7 +546,7 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou switch (st->factors[2*i]) { case 2: - kf_bfly2(fout,fstride[i]<scale_shift-1; +#endif + scale = st->scale; - /* st->shift can be -1 */ - shift = st->shift>0 ? st->shift : 0; celt_assert2 (fin != fout, "In-place FFT not supported"); /* Bit-reverse the input */ for (i=0;infft;i++) - fout[st->bitrev[i]] = fin[i]; - - fstride[0] = 1; - L=0; - do { - p = st->factors[2*L]; - m = st->factors[2*L+1]; - fstride[L+1] = fstride[L]*p; - L++; - } while(m!=1); - m = st->factors[2*L-1]; - for (i=L-1;i>=0;i--) { - if (i!=0) - m2 = st->factors[2*i-1]; - else - m2 = 1; - switch (st->factors[2*i]) - { - case 2: - ki_bfly2(fout,fstride[i]<bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift); + fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift); } + opus_fft_impl(st, fout); } + +void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +{ + int i; + celt_assert2 (fin != fout, "In-place FFT not supported"); + /* Bit-reverse the input */ + for (i=0;infft;i++) + fout[st->bitrev[i]] = fin[i]; + for (i=0;infft;i++) + fout[i].i = -fout[i].i; + opus_fft_impl(st, fout); + for (i=0;infft;i++) + fout[i].i = -fout[i].i; +} diff --git a/drivers/opus/celt/kiss_fft.h b/drivers/opus/celt/kiss_fft.h index db2532c692..6229044c43 100644 --- a/drivers/opus/celt/kiss_fft.h +++ b/drivers/opus/celt/kiss_fft.h @@ -32,6 +32,7 @@ #include #include #include "opus/celt/arch.h" +#include "opus/celt/cpu_support.h" #ifdef __cplusplus extern "C" { @@ -77,17 +78,28 @@ typedef struct { 4*4*4*2 */ +typedef struct arch_fft_state{ + int is_supported; + void *priv; +} arch_fft_state; + typedef struct kiss_fft_state{ int nfft; -#ifndef OPUS_FIXED_POINT - kiss_fft_scalar scale; + opus_val16 scale; +#ifdef OPUS_FIXED_POINT + int scale_shift; #endif int shift; opus_int16 factors[2*MAXFACTORS]; const opus_int16 *bitrev; const kiss_twiddle_cpx *twiddles; + arch_fft_state *arch_fft; } kiss_fft_state; +#if defined(HAVE_ARM_NE10) +#include "opus/celt/arm/fft_arm.h" +#endif + /*typedef struct kiss_fft_state* kiss_fft_cfg;*/ /** @@ -113,9 +125,9 @@ typedef struct kiss_fft_state{ * buffer size in *lenmem. * */ -kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base); +kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch); -kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem); +kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch); /** * opus_fft(cfg,in_out_buf) @@ -127,10 +139,59 @@ kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem); * Note that each element is complex and can be accessed like f[k].r and f[k].i * */ -void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); -void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); +void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); +void opus_ifft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); + +void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); +void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); + +void opus_fft_free(const kiss_fft_state *cfg, int arch); + + +void opus_fft_free_arch_c(kiss_fft_state *st); +int opus_fft_alloc_arch_c(kiss_fft_state *st); + +#if !defined(OVERRIDE_OPUS_FFT) +/* Is run-time CPU detection enabled on this platform? */ +#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) + +extern int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])( + kiss_fft_state *st); + +#define opus_fft_alloc_arch(_st, arch) \ + ((*OPUS_FFT_ALLOC_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st)) + +extern void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])( + kiss_fft_state *st); +#define opus_fft_free_arch(_st, arch) \ + ((*OPUS_FFT_FREE_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st)) + +extern void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, + const kiss_fft_cpx *fin, kiss_fft_cpx *fout); +#define opus_fft(_cfg, _fin, _fout, arch) \ + ((*OPUS_FFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout)) + +extern void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, + const kiss_fft_cpx *fin, kiss_fft_cpx *fout); +#define opus_ifft(_cfg, _fin, _fout, arch) \ + ((*OPUS_IFFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout)) + +#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */ + +#define opus_fft_alloc_arch(_st, arch) \ + ((void)(arch), opus_fft_alloc_arch_c(_st)) + +#define opus_fft_free_arch(_st, arch) \ + ((void)(arch), opus_fft_free_arch_c(_st)) + +#define opus_fft(_cfg, _fin, _fout, arch) \ + ((void)(arch), opus_fft_c(_cfg, _fin, _fout)) + +#define opus_ifft(_cfg, _fin, _fout, arch) \ + ((void)(arch), opus_ifft_c(_cfg, _fin, _fout)) -void opus_fft_free(const kiss_fft_state *cfg); +#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */ +#endif /* end if !defined(OVERRIDE_OPUS_FFT) */ #ifdef __cplusplus } diff --git a/drivers/opus/celt/laplace.c b/drivers/opus/celt/laplace.c index 9dc4d94d24..5bc956af45 100644 --- a/drivers/opus/celt/laplace.c +++ b/drivers/opus/celt/laplace.c @@ -25,10 +25,7 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -#ifdef OPUS_ENABLED #include "opus/opus_config.h" -#endif #include "opus/celt/laplace.h" #include "opus/celt/mathops.h" diff --git a/drivers/opus/celt/mathops.c b/drivers/opus/celt/mathops.c index 88e5aea129..3f7c1a8bdd 100644 --- a/drivers/opus/celt/mathops.c +++ b/drivers/opus/celt/mathops.c @@ -30,10 +30,7 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -#ifdef OPUS_ENABLED #include "opus/opus_config.h" -#endif #include "opus/celt/mathops.h" diff --git a/drivers/opus/celt/mathops.h b/drivers/opus/celt/mathops.h index 759f58d1b7..a008d71e18 100644 --- a/drivers/opus/celt/mathops.h +++ b/drivers/opus/celt/mathops.h @@ -254,5 +254,5 @@ static OPUS_INLINE opus_val16 celt_atan2p(opus_val16 y, opus_val16 x) } } -#endif /* OPUS_FIXED_POINT */ +#endif /* FIXED_POINT */ #endif /* MATHOPS_H */ diff --git a/drivers/opus/celt/mdct.c b/drivers/opus/celt/mdct.c index ae34538d6c..641a20e49a 100644 --- a/drivers/opus/celt/mdct.c +++ b/drivers/opus/celt/mdct.c @@ -1,4 +1,4 @@ - /* Copyright (c) 2007-2008 CSIRO +/* Copyright (c) 2007-2008 CSIRO Copyright (c) 2007-2008 Xiph.Org Foundation Written by Jean-Marc Valin */ /* @@ -40,10 +40,8 @@ */ #ifndef SKIP_CONFIG_H -#ifdef OPUS_ENABLED #include "opus/opus_config.h" #endif -#endif #include "opus/celt/mdct.h" #include "opus/celt/kiss_fft.h" @@ -53,76 +51,100 @@ #include "opus/celt/mathops.h" #include "opus/celt/stack_alloc.h" +#if defined(MIPSr1_ASM) +#include "opus/celt/mips/mdct_mipsr1.h" +#endif + + #ifdef CUSTOM_MODES -int clt_mdct_init(celt_mdct_lookup *l,int N, int maxshift) +int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch) { int i; - int N4; kiss_twiddle_scalar *trig; -#if defined(OPUS_FIXED_POINT) + int shift; int N2=N>>1; -#endif l->n = N; - N4 = N>>2; l->maxshift = maxshift; for (i=0;i<=maxshift;i++) { if (i==0) - l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0); + l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0, arch); else - l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]); + l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0], arch); #ifndef ENABLE_TI_DSPLIB55 if (l->kfft[i]==NULL) return 0; #endif } - l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar)); + l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N-(N2>>maxshift))*sizeof(kiss_twiddle_scalar)); if (l->trig==NULL) return 0; - /* We have enough points that sine isn't necessary */ -#if defined(OPUS_FIXED_POINT) - for (i=0;i<=N4;i++) - trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N)); + for (shift=0;shift<=maxshift;shift++) + { + /* We have enough points that sine isn't necessary */ +#if defined(FIXED_POINT) +#if 1 + for (i=0;i>= 1; + N >>= 1; + } return 1; } -void clt_mdct_clear(celt_mdct_lookup *l) +void clt_mdct_clear(mdct_lookup *l, int arch) { int i; for (i=0;i<=l->maxshift;i++) - opus_fft_free(l->kfft[i]); + opus_fft_free(l->kfft[i], arch); opus_free((kiss_twiddle_scalar*)l->trig); } #endif /* CUSTOM_MODES */ /* Forward MDCT trashes the input array */ -void clt_mdct_forward(const celt_mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 *window, int overlap, int shift, int stride) +#ifndef OVERRIDE_clt_mdct_forward +void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, int overlap, int shift, int stride, int arch) { int i; int N, N2, N4; - kiss_twiddle_scalar sine; VARDECL(kiss_fft_scalar, f); - VARDECL(kiss_fft_scalar, f2); + VARDECL(kiss_fft_cpx, f2); + const kiss_fft_state *st = l->kfft[shift]; + const kiss_twiddle_scalar *trig; + opus_val16 scale; +#ifdef OPUS_FIXED_POINT + /* Allows us to scale with MULT16_32_Q16(), which is faster than + MULT16_32_Q15() on ARM. */ + int scale_shift = st->scale_shift-1; +#endif SAVE_STACK; + (void)arch; + scale = st->scale; + N = l->n; - N >>= shift; + trig = l->trig; + for (i=0;i>= 1; + trig += N; + } N2 = N>>1; N4 = N>>2; + ALLOC(f, N2, kiss_fft_scalar); - ALLOC(f2, N2, kiss_fft_scalar); - /* sin(x) ~= x here */ -#ifdef OPUS_FIXED_POINT - sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; -#else - sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; -#endif + ALLOC(f2, N4, kiss_fft_cpx); /* Consider the input to be composed of four blocks: [a, b, c, d] */ /* Window, shuffle, fold */ @@ -167,123 +189,131 @@ void clt_mdct_forward(const celt_mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_s /* Pre-rotation */ { kiss_fft_scalar * OPUS_RESTRICT yp = f; - const kiss_twiddle_scalar *t = &l->trig[0]; + const kiss_twiddle_scalar *t = &trig[0]; for(i=0;ibitrev[i]] = yc; } } - /* N/4 complex FFT, down-scales by 4/N */ - opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2); + /* N/4 complex FFT, does not downscale anymore */ + opus_fft_impl(st, f2); /* Post-rotate */ { /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_scalar * OPUS_RESTRICT fp = f2; + const kiss_fft_cpx * OPUS_RESTRICT fp = f2; kiss_fft_scalar * OPUS_RESTRICT yp1 = out; kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); - const kiss_twiddle_scalar *t = &l->trig[0]; + const kiss_twiddle_scalar *t = &trig[0]; /* Temp pointers to make it really clear to the compiler what we're doing */ for(i=0;ii,t[N4+i]) - S_MUL(fp->r,t[i]); + yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]); + *yp1 = yr; + *yp2 = yi; + fp++; yp1 += 2*stride; yp2 -= 2*stride; } } RESTORE_STACK; } +#endif /* OVERRIDE_clt_mdct_forward */ -void clt_mdct_backward(const celt_mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride) +#ifndef OVERRIDE_clt_mdct_backward +void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch) { int i; int N, N2, N4; - kiss_twiddle_scalar sine; - VARDECL(kiss_fft_scalar, f2); - SAVE_STACK; + const kiss_twiddle_scalar *trig; + (void) arch; + N = l->n; - N >>= shift; + trig = l->trig; + for (i=0;i>= 1; + trig += N; + } N2 = N>>1; N4 = N>>2; - ALLOC(f2, N2, kiss_fft_scalar); - /* sin(x) ~= x here */ -#ifdef OPUS_FIXED_POINT - sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; -#else - sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; -#endif /* Pre-rotate */ { /* Temp pointers to make it really clear to the compiler what we're doing */ const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); - kiss_fft_scalar * OPUS_RESTRICT yp = f2; - const kiss_twiddle_scalar *t = &l->trig[0]; + kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1); + const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; + const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev; for(i=0;ikfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1))); + opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1))); /* Post-rotate and de-shuffle from both ends of the buffer at once to make it in-place. */ { - kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); - kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; - const kiss_twiddle_scalar *t = &l->trig[0]; + kiss_fft_scalar * yp0 = out+(overlap>>1); + kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2; + const kiss_twiddle_scalar *t = &trig[0]; /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the middle pair will be computed twice. */ for(i=0;i<(N4+1)>>1;i++) { kiss_fft_scalar re, im, yr, yi; kiss_twiddle_scalar t0, t1; - re = yp0[0]; - im = yp0[1]; - t0 = t[i< +#include "opus/celt/celt.h" +#include "opus/celt/pitch.h" +#include "opus/celt/bands.h" +#include "opus/celt/modes.h" +#include "opus/celt/entcode.h" +#include "opus/celt/quant_bands.h" +#include "opus/celt/rate.h" +#include "opus/celt/stack_alloc.h" +#include "opus/celt/mathops.h" +#include "opus/celt/float_cast.h" +#include +#include "opus/celt/celt_lpc.h" +#include "opus/celt/vq.h" + +#define OVERRIDE_comb_filter +void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, + opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, + const opus_val16 *window, int overlap, int arch) +{ + int i; + opus_val32 x0, x1, x2, x3, x4; + + (void)arch; + + /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */ + opus_val16 g00, g01, g02, g10, g11, g12; + static const opus_val16 gains[3][3] = { + {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)}, + {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)}, + {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}}; + + if (g0==0 && g1==0) + { + /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */ + if (x!=y) + OPUS_MOVE(y, x, N); + return; + } + + g00 = MULT16_16_P15(g0, gains[tapset0][0]); + g01 = MULT16_16_P15(g0, gains[tapset0][1]); + g02 = MULT16_16_P15(g0, gains[tapset0][2]); + g10 = MULT16_16_P15(g1, gains[tapset1][0]); + g11 = MULT16_16_P15(g1, gains[tapset1][1]); + g12 = MULT16_16_P15(g1, gains[tapset1][2]); + x1 = x[-T1+1]; + x2 = x[-T1 ]; + x3 = x[-T1-1]; + x4 = x[-T1-2]; + /* If the filter didn't change, we don't need the overlap */ + if (g0==g1 && T0==T1 && tapset0==tapset1) + overlap=0; + + for (i=0;itwiddles[fstride*m]; + yb = st->twiddles[fstride*2*m]; +#endif + + tw=st->twiddles; + + for (i=0;ir += scratch[7].r + scratch[8].r; + Fout0->i += scratch[7].i + scratch[8].i; + scratch[5].r = scratch[0].r + S_MUL_ADD(scratch[7].r,ya.r,scratch[8].r,yb.r); + scratch[5].i = scratch[0].i + S_MUL_ADD(scratch[7].i,ya.r,scratch[8].i,yb.r); + + scratch[6].r = S_MUL_ADD(scratch[10].i,ya.i,scratch[9].i,yb.i); + scratch[6].i = -S_MUL_ADD(scratch[10].r,ya.i,scratch[9].r,yb.i); + + C_SUB(*Fout1,scratch[5],scratch[6]); + C_ADD(*Fout4,scratch[5],scratch[6]); + + scratch[11].r = scratch[0].r + S_MUL_ADD(scratch[7].r,yb.r,scratch[8].r,ya.r); + scratch[11].i = scratch[0].i + S_MUL_ADD(scratch[7].i,yb.r,scratch[8].i,ya.r); + + scratch[12].r = S_MUL_SUB(scratch[9].i,ya.i,scratch[10].i,yb.i); + scratch[12].i = S_MUL_SUB(scratch[10].r,yb.i,scratch[9].r,ya.i); + + C_ADD(*Fout2,scratch[11],scratch[12]); + C_SUB(*Fout3,scratch[11],scratch[12]); + + ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; + } + } +} + + +#endif /* KISS_FFT_MIPSR1_H */ diff --git a/drivers/opus/celt/mips/mdct_mipsr1.h b/drivers/opus/celt/mips/mdct_mipsr1.h new file mode 100644 index 0000000000..0bb8b5a056 --- /dev/null +++ b/drivers/opus/celt/mips/mdct_mipsr1.h @@ -0,0 +1,286 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2008 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* This is a simple MDCT implementation that uses a N/4 complex FFT + to do most of the work. It should be relatively straightforward to + plug in pretty much and FFT here. + + This replaces the Vorbis FFT (and uses the exact same API), which + was a bit too messy and that was ending up duplicating code + (might as well use the same FFT everywhere). + + The algorithm is similar to (and inspired from) Fabrice Bellard's + MDCT implementation in FFMPEG, but has differences in signs, ordering + and scaling in many places. +*/ +#ifndef __MDCT_MIPSR1_H__ +#define __MDCT_MIPSR1_H__ + +#ifndef SKIP_CONFIG_H +#include "opus/opus_config.h" +#endif + +#include "opus/celt/mdct.h" +#include "opus/celt/kiss_fft.h" +#include "opus/celt/_kiss_fft_guts.h" +#include +#include "opus/celt/os_support.h" +#include "opus/celt/mathops.h" +#include "opus/celt/stack_alloc.h" + +/* Forward MDCT trashes the input array */ +#define OVERRIDE_clt_mdct_forward +void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, int overlap, int shift, int stride, int arch) +{ + int i; + int N, N2, N4; + VARDECL(kiss_fft_scalar, f); + VARDECL(kiss_fft_cpx, f2); + const kiss_fft_state *st = l->kfft[shift]; + const kiss_twiddle_scalar *trig; + opus_val16 scale; +#ifdef OPUS_FIXED_POINT + /* Allows us to scale with MULT16_32_Q16(), which is faster than + MULT16_32_Q15() on ARM. */ + int scale_shift = st->scale_shift-1; +#endif + + (void)arch; + + SAVE_STACK; + scale = st->scale; + + N = l->n; + trig = l->trig; + for (i=0;i>= 1; + trig += N; + } + N2 = N>>1; + N4 = N>>2; + + ALLOC(f, N2, kiss_fft_scalar); + ALLOC(f2, N4, kiss_fft_cpx); + + /* Consider the input to be composed of four blocks: [a, b, c, d] */ + /* Window, shuffle, fold */ + { + /* Temp pointers to make it really clear to the compiler what we're doing */ + const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); + const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); + kiss_fft_scalar * OPUS_RESTRICT yp = f; + const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); + const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; + for(i=0;i<((overlap+3)>>2);i++) + { + /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ + *yp++ = S_MUL_ADD(*wp2, xp1[N2],*wp1,*xp2); + *yp++ = S_MUL_SUB(*wp1, *xp1,*wp2, xp2[-N2]); + xp1+=2; + xp2-=2; + wp1+=2; + wp2-=2; + } + wp1 = window; + wp2 = window+overlap-1; + for(;i>2);i++) + { + /* Real part arranged as a-bR, Imag part arranged as -c-dR */ + *yp++ = *xp2; + *yp++ = *xp1; + xp1+=2; + xp2-=2; + } + for(;ibitrev[i]] = yc; + } + } + + /* N/4 complex FFT, does not downscale anymore */ + opus_fft_impl(st, f2); + + /* Post-rotate */ + { + /* Temp pointers to make it really clear to the compiler what we're doing */ + const kiss_fft_cpx * OPUS_RESTRICT fp = f2; + kiss_fft_scalar * OPUS_RESTRICT yp1 = out; + kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); + const kiss_twiddle_scalar *t = &trig[0]; + /* Temp pointers to make it really clear to the compiler what we're doing */ + for(i=0;ii,t[N4+i] , fp->r,t[i]); + yi = S_MUL_ADD(fp->r,t[N4+i] ,fp->i,t[i]); + *yp1 = yr; + *yp2 = yi; + fp++; + yp1 += 2*stride; + yp2 -= 2*stride; + } + } + RESTORE_STACK; +} + +#define OVERRIDE_clt_mdct_backward +void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch) +{ + int i; + int N, N2, N4; + const kiss_twiddle_scalar *trig; + + (void)arch; + + N = l->n; + trig = l->trig; + for (i=0;i>= 1; + trig += N; + } + N2 = N>>1; + N4 = N>>2; + + /* Pre-rotate */ + { + /* Temp pointers to make it really clear to the compiler what we're doing */ + const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; + const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); + kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1); + const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; + const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev; + for(i=0;ikfft[shift], (kiss_fft_cpx*)(out+(overlap>>1))); + + /* Post-rotate and de-shuffle from both ends of the buffer at once to make + it in-place. */ + { + kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); + kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; + const kiss_twiddle_scalar *t = &trig[0]; + /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the + middle pair will be computed twice. */ + for(i=0;i<(N4+1)>>1;i++) + { + kiss_fft_scalar re, im, yr, yi; + kiss_twiddle_scalar t0, t1; + /* We swap real and imag because we're using an FFT instead of an IFFT. */ + re = yp0[1]; + im = yp0[0]; + t0 = t[i]; + t1 = t[N4+i]; + /* We'd scale up by 2 here, but instead it's done when mixing the windows */ + yr = S_MUL_ADD(re,t0 , im,t1); + yi = S_MUL_SUB(re,t1 , im,t0); + /* We swap real and imag because we're using an FFT instead of an IFFT. */ + re = yp1[1]; + im = yp1[0]; + yp0[0] = yr; + yp1[1] = yi; + + t0 = t[(N4-i-1)]; + t1 = t[(N2-i-1)]; + /* We'd scale up by 2 here, but instead it's done when mixing the windows */ + yr = S_MUL_ADD(re,t0,im,t1); + yi = S_MUL_SUB(re,t1,im,t0); + yp1[0] = yr; + yp0[1] = yi; + yp0 += 2; + yp1 -= 2; + } + } + + /* Mirror on both sides for TDAC */ + { + kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; + kiss_fft_scalar * OPUS_RESTRICT yp1 = out; + const opus_val16 * OPUS_RESTRICT wp1 = window; + const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; + + for(i = 0; i < overlap/2; i++) + { + kiss_fft_scalar x1, x2; + x1 = *xp1; + x2 = *yp1; + *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); + *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); + wp1++; + wp2--; + } + } +} +#endif /* __MDCT_MIPSR1_H__ */ diff --git a/drivers/opus/celt/mips/pitch_mipsr1.h b/drivers/opus/celt/mips/pitch_mipsr1.h new file mode 100644 index 0000000000..a9500aff58 --- /dev/null +++ b/drivers/opus/celt/mips/pitch_mipsr1.h @@ -0,0 +1,161 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/** + @file pitch.h + @brief Pitch analysis + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef PITCH_MIPSR1_H +#define PITCH_MIPSR1_H + +#define OVERRIDE_DUAL_INNER_PROD +static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, + int N, opus_val32 *xy1, opus_val32 *xy2, int arch) +{ + int j; + opus_val32 xy01=0; + opus_val32 xy02=0; + + (void)arch; + + asm volatile("MULT $ac1, $0, $0"); + asm volatile("MULT $ac2, $0, $0"); + /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */ + for (j=0;j=0;i--) + { + celt_norm x1, x2; + x1 = Xptr[0]; + x2 = Xptr[stride]; + Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); + *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); + } +} + +#define OVERRIDE_renormalise_vector + +#define renormalise_vector(X, N, gain, arch) \ + (renormalise_vector_mips(X, N, gain, arch)) + +void renormalise_vector_mips(celt_norm *X, int N, opus_val16 gain, int arch) +{ + int i; +#ifdef OPUS_FIXED_POINT + int k; +#endif + opus_val32 E = EPSILON; + opus_val16 g; + opus_val32 t; + celt_norm *xptr = X; + int X0, X1; + + (void)arch; + + asm volatile("mult $ac1, $0, $0"); + asm volatile("MTLO %0, $ac1" : :"r" (E)); + /*if(N %4) + printf("error");*/ + for (i=0;i>1; +#endif + t = VSHR32(E, 2*(k-7)); + g = MULT16_16_P15(celt_rsqrt_norm(t),gain); + + xptr = X; + for (i=0;imaxLM); if (clt_mdct_init(&mode->mdct, 2*mode->shortMdctSize*mode->nbShortMdcts, - mode->maxLM) == 0) + mode->maxLM, arch) == 0) goto failure; if (error) @@ -408,6 +407,8 @@ failure: #ifdef CUSTOM_MODES void opus_custom_mode_destroy(CELTMode *mode) { + int arch = opus_select_arch(); + if (mode == NULL) return; #ifndef CUSTOM_MODES_ONLY @@ -431,7 +432,7 @@ void opus_custom_mode_destroy(CELTMode *mode) opus_free((opus_int16*)mode->cache.index); opus_free((unsigned char*)mode->cache.bits); opus_free((unsigned char*)mode->cache.caps); - clt_mdct_clear(&mode->mdct); + clt_mdct_clear(&mode->mdct, arch); opus_free((CELTMode *)mode); } diff --git a/drivers/opus/celt/modes.h b/drivers/opus/celt/modes.h new file mode 100644 index 0000000000..a5f1c30ac9 --- /dev/null +++ b/drivers/opus/celt/modes.h @@ -0,0 +1,75 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Copyright (c) 2008 Gregory Maxwell + Written by Jean-Marc Valin and Gregory Maxwell */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef MODES_H +#define MODES_H + +#include "opus/opus_types.h" +#include "opus/celt/celt.h" +#include "opus/celt/arch.h" +#include "opus/celt/mdct.h" +#include "opus/celt/entenc.h" +#include "opus/celt/entdec.h" + +#define MAX_PERIOD 1024 + +typedef struct { + int size; + const opus_int16 *index; + const unsigned char *bits; + const unsigned char *caps; +} PulseCache; + +/** Mode definition (opaque) + @brief Mode definition + */ +struct OpusCustomMode { + opus_int32 Fs; + int overlap; + + int nbEBands; + int effEBands; + opus_val16 preemph[4]; + const opus_int16 *eBands; /**< Definition for each "pseudo-critical band" */ + + int maxLM; + int nbShortMdcts; + int shortMdctSize; + + int nbAllocVectors; /**< Number of lines in the matrix below */ + const unsigned char *allocVectors; /**< Number of bits in each band for several rates */ + const opus_int16 *logN; + + const opus_val16 *window; + mdct_lookup mdct; + PulseCache cache; +}; + + +#endif diff --git a/drivers/opus/celt/opus_custom_demo.c b/drivers/opus/celt/opus_custom_demo.c index b3129de779..956bec4e45 100644 --- a/drivers/opus/celt/opus_custom_demo.c +++ b/drivers/opus/celt/opus_custom_demo.c @@ -25,10 +25,7 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -#ifdef OPUS_ENABLED #include "opus/opus_config.h" -#endif #include "opus/opus_custom.h" #include "opus/celt/arch.h" @@ -52,7 +49,7 @@ int main(int argc, char *argv[]) int bytes_per_packet; unsigned char data[MAX_PACKET]; int complexity; -#if !(defined (OPUS_FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH) +#if !(defined (FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH) int i; double rmsd = 0; #endif @@ -174,7 +171,7 @@ int main(int argc, char *argv[]) for (i=0;i 0) { rmsd = sqrt(rmsd/(1.0*frame_size*channels*count)); diff --git a/drivers/opus/celt/opus_modes.h b/drivers/opus/celt/opus_modes.h deleted file mode 100644 index 38e5844535..0000000000 --- a/drivers/opus/celt/opus_modes.h +++ /dev/null @@ -1,83 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Copyright (c) 2008 Gregory Maxwell - Written by Jean-Marc Valin and Gregory Maxwell */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef OPUS_MODES_H -#define OPUS_MODES_H - -#include "opus/opus_types.h" -#include "opus/celt/celt.h" -#include "opus/celt/arch.h" -#include "opus/celt/mdct.h" -#include "opus/celt/entenc.h" -#include "opus/celt/entdec.h" - -#define MAX_PERIOD 1024 - -#ifndef OVERLAP -#define OVERLAP(mode) ((mode)->overlap) -#endif - -#ifndef FRAMESIZE -#define FRAMESIZE(mode) ((mode)->mdctSize) -#endif - -typedef struct { - int size; - const opus_int16 *index; - const unsigned char *bits; - const unsigned char *caps; -} PulseCache; - -/** Mode definition (opaque) - @brief Mode definition - */ -struct OpusCustomMode { - opus_int32 Fs; - int overlap; - - int nbEBands; - int effEBands; - opus_val16 preemph[4]; - const opus_int16 *eBands; /**< Definition for each "pseudo-critical band" */ - - int maxLM; - int nbShortMdcts; - int shortMdctSize; - - int nbAllocVectors; /**< Number of lines in the matrix below */ - const unsigned char *allocVectors; /**< Number of bits in each band for several rates */ - const opus_int16 *logN; - - const opus_val16 *window; - celt_mdct_lookup mdct; - PulseCache cache; -}; - - -#endif diff --git a/drivers/opus/celt/os_support.h b/drivers/opus/celt/os_support.h index e1cf884467..5e36b3ae56 100644 --- a/drivers/opus/celt/os_support.h +++ b/drivers/opus/celt/os_support.h @@ -67,18 +67,18 @@ static OPUS_INLINE void opus_free (void *ptr) } #endif -/** Copy n bytes of memory from src to dst. The 0* term provides compile-time type checking */ +/** Copy n elements from src to dst. The 0* term provides compile-time type checking */ #ifndef OVERRIDE_OPUS_COPY #define OPUS_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) #endif -/** Copy n bytes of memory from src to dst, allowing overlapping regions. The 0* term +/** Copy n elements from src to dst, allowing overlapping regions. The 0* term provides compile-time type checking */ #ifndef OVERRIDE_OPUS_MOVE #define OPUS_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) #endif -/** Set n elements of dst to zero, starting at address s */ +/** Set n elements of dst to zero */ #ifndef OVERRIDE_OPUS_CLEAR #define OPUS_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst)))) #endif diff --git a/drivers/opus/celt/pitch.c b/drivers/opus/celt/pitch.c index c7c2b98c00..1bcbabe943 100644 --- a/drivers/opus/celt/pitch.c +++ b/drivers/opus/celt/pitch.c @@ -30,14 +30,11 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -#ifdef OPUS_ENABLED #include "opus/opus_config.h" -#endif #include "opus/celt/pitch.h" #include "opus/celt/os_support.h" -#include "opus/celt/opus_modes.h" +#include "opus/celt/modes.h" #include "opus/celt/stack_alloc.h" #include "opus/celt/mathops.h" #include "opus/celt/celt_lpc.h" @@ -214,25 +211,35 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x celt_fir5(x_lp, lpc2, x_lp, len>>1, mem); } -#if 0 /* This is a simple version of the pitch correlation that should work - well on DSPs like Blackfin and TI C5x/C6x */ - +/* Pure C implementation. */ #ifdef OPUS_FIXED_POINT opus_val32 #else void #endif -celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch) +#if defined(OVERRIDE_PITCH_XCORR) +celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, + opus_val32 *xcorr, int len, int max_pitch) +#else +celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, + opus_val32 *xcorr, int len, int max_pitch, int arch) +#endif { + +#if 0 /* This is a simple version of the pitch correlation that should work + well on DSPs like Blackfin and TI C5x/C6x */ int i, j; #ifdef OPUS_FIXED_POINT opus_val32 maxcorr=1; +#endif +#if !defined(OVERRIDE_PITCH_XCORR) + (void)arch; #endif for (i=0;i0); - celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); #ifdef OPUS_FIXED_POINT opus_val32 maxcorr=1; #endif + celt_assert(max_pitch>0); + celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); for (i=0;i>1;i++) { - opus_val32 sum=0; + opus_val32 sum; xcorr[i] = 0; if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2) continue; +#ifdef OPUS_FIXED_POINT + sum = 0; for (j=0;j>1;j++) sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift); +#else + sum = celt_inner_prod_c(x_lp, y+i, len>>1); +#endif xcorr[i] = MAX32(-1, sum); #ifdef OPUS_FIXED_POINT maxcorr = MAX32(maxcorr, sum); @@ -401,7 +411,7 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2}; opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, - int N, int *T0_, int prev_period, opus_val16 prev_gain) + int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch) { int k, i, T, T0; opus_val16 g, g0; @@ -426,7 +436,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, T = T0 = *T0_; ALLOC(yy_lookup, maxperiod+1, opus_val32); - dual_inner_prod(x, x, x-T0, N, &xx, &xy); + dual_inner_prod(x, x, x-T0, N, &xx, &xy, arch); yy_lookup[0] = xx; yy=xx; for (i=1;i<=maxperiod;i++) @@ -456,7 +466,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, opus_val16 g1; opus_val16 cont=0; opus_val16 thresh; - T1 = (2*T0+k)/(2*k); + T1 = celt_udiv(2*T0+k, 2*k); if (T1 < minperiod) break; /* Look for another strong correlation at T1b */ @@ -468,9 +478,9 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, T1b = T0+T1; } else { - T1b = (2*second_check[k]*T0+k)/(2*k); + T1b = celt_udiv(2*second_check[k]*T0+k, 2*k); } - dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2); + dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2, arch); xy += xy2; yy = yy_lookup[T1] + yy_lookup[T1b]; #ifdef OPUS_FIXED_POINT @@ -513,13 +523,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, pg = SHR32(frac_div32(best_xy,best_yy+1),16); for (k=0;k<3;k++) - { - int T1 = T+k-1; - xy = 0; - for (i=0;i MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0])) offset = 1; else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2])) diff --git a/drivers/opus/celt/pitch.h b/drivers/opus/celt/pitch.h index f599f5fc76..ecae7667b8 100644 --- a/drivers/opus/celt/pitch.h +++ b/drivers/opus/celt/pitch.h @@ -34,15 +34,21 @@ #ifndef PITCH_H #define PITCH_H -#include "opus/celt/opus_modes.h" +#include "opus/celt/modes.h" #include "opus/celt/cpu_support.h" -#if defined(__SSE__) && !defined(OPUS_FIXED_POINT) -#include "x86/pitch_sse.h" +#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) \ + || ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) +#include "opus/celt/x86/pitch_sse.h" #endif -#if defined(OPUS_ARM_ASM) && defined(OPUS_FIXED_POINT) -# include "arm/pitch_arm.h" +#if defined(MIPSr1_ASM) +#include "opus/celt/mips/pitch_mipsr1.h" +#endif + +#if ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \ + || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)) +# include "opus/celt/arm/pitch_arm.h" #endif void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp, @@ -52,12 +58,12 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR int len, int max_pitch, int *pitch, int arch); opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, - int N, int *T0, int prev_period, opus_val16 prev_gain); + int N, int *T0, int prev_period, opus_val16 prev_gain, int arch); + /* OPT: This is the kernel you really want to optimize. It gets used a lot by the prefilter and by the PLC. */ -#ifndef OVERRIDE_XCORR_KERNEL -static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len) +static OPUS_INLINE void xcorr_kernel_c(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len) { int j; opus_val16 y_0, y_1, y_2, y_3; @@ -122,10 +128,14 @@ static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, sum[3] = MAC16_16(sum[3],tmp,y_1); } } + +#ifndef OVERRIDE_XCORR_KERNEL +#define xcorr_kernel(x, y, sum, len, arch) \ + ((void)(arch),xcorr_kernel_c(x, y, sum, len)) #endif /* OVERRIDE_XCORR_KERNEL */ -#ifndef OVERRIDE_DUAL_INNER_PROD -static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, + +static OPUS_INLINE void dual_inner_prod_c(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, int N, opus_val32 *xy1, opus_val32 *xy2) { int i; @@ -139,8 +149,35 @@ static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y *xy1 = xy01; *xy2 = xy02; } + +#ifndef OVERRIDE_DUAL_INNER_PROD +# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \ + ((void)(arch),dual_inner_prod_c(x, y01, y02, N, xy1, xy2)) +#endif + +/*We make sure a C version is always available for cases where the overhead of + vectorization and passing around an arch flag aren't worth it.*/ +static OPUS_INLINE opus_val32 celt_inner_prod_c(const opus_val16 *x, + const opus_val16 *y, int N) +{ + int i; + opus_val32 xy=0; + for (i=0;inbEBands, opus_val16); diff --git a/drivers/opus/celt/quant_bands.h b/drivers/opus/celt/quant_bands.h index 85d011e6e0..31d4b94e41 100644 --- a/drivers/opus/celt/quant_bands.h +++ b/drivers/opus/celt/quant_bands.h @@ -30,7 +30,7 @@ #define QUANT_BANDS #include "opus/celt/arch.h" -#include "opus/celt/opus_modes.h" +#include "opus/celt/modes.h" #include "opus/celt/entenc.h" #include "opus/celt/entdec.h" #include "opus/celt/mathops.h" diff --git a/drivers/opus/celt/rate.c b/drivers/opus/celt/rate.c index ecc0ab2a4f..e69fb3f030 100644 --- a/drivers/opus/celt/rate.c +++ b/drivers/opus/celt/rate.c @@ -25,13 +25,10 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -#ifdef OPUS_ENABLED #include "opus/opus_config.h" -#endif #include -#include "opus/celt/opus_modes.h" +#include "opus/celt/modes.h" #include "opus/celt/cwrs.h" #include "opus/celt/arch.h" #include "opus/celt/os_support.h" @@ -131,7 +128,7 @@ void compute_pulse_cache(CELTMode *m, int LM) for (i=0;ieBands[codedBands]-m->eBands[start]); + percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]); left -= (m->eBands[codedBands]-m->eBands[start])*percoeff; rem = IMAX(left-(m->eBands[j]-m->eBands[start]),0); band_width = m->eBands[codedBands]-m->eBands[j]; @@ -414,7 +411,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, /* Allocate the remaining bits */ left = total-psum; - percoeff = left/(m->eBands[codedBands]-m->eBands[start]); + percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]); left -= (m->eBands[codedBands]-m->eBands[start])*percoeff; for (j=start;jeBands[j+1]-m->eBands[j])); @@ -465,7 +462,8 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, offset += NClogN>>3; /* Divide with rounding */ - ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1))) / (den<>BITRES; /* Make sure not to bust */ if (C*ebits[j] > (bits[j]>>BITRES)) diff --git a/drivers/opus/celt/rate.h b/drivers/opus/celt/rate.h index e12dd29db8..cf8085fccf 100644 --- a/drivers/opus/celt/rate.h +++ b/drivers/opus/celt/rate.h @@ -32,7 +32,7 @@ #define MAX_PSEUDO 40 #define LOG_MAX_PSEUDO 6 -#define MAX_PULSES 128 +#define CELT_MAX_PULSES 128 #define MAX_FINE_BITS 8 @@ -41,7 +41,7 @@ #define QTHETA_OFFSET_TWOPHASE 16 #include "opus/celt/cwrs.h" -#include "opus/celt/opus_modes.h" +#include "opus/celt/modes.h" void compute_pulse_cache(CELTMode *m, int LM); diff --git a/drivers/opus/celt/stack_alloc.h b/drivers/opus/celt/stack_alloc.h index 464a6d0b7f..7cd6c56ccf 100644 --- a/drivers/opus/celt/stack_alloc.h +++ b/drivers/opus/celt/stack_alloc.h @@ -36,21 +36,17 @@ #include "opus/opus_defines.h" #if (!defined (VAR_ARRAYS) && !defined (USE_ALLOCA) && !defined (NONTHREADSAFE_PSEUDOSTACK)) -#define VAR_ARRAYS +#error "Opus requires one of VAR_ARRAYS, USE_ALLOCA, or NONTHREADSAFE_PSEUDOSTACK be defined to select the temporary allocation mode." #endif #ifdef USE_ALLOCA # ifdef WIN32 # include # else -# ifdef OPUS_HAVE_ALLOCA_H +# ifdef HAVE_ALLOCA_H # include # else -# ifdef __linux__ -# include -# else -# include -# endif +# include # endif # endif #endif @@ -120,9 +116,11 @@ #else #ifdef CELT_C +char *scratch_ptr=0; char *global_stack=0; #else extern char *global_stack; +extern char *scratch_ptr; #endif /* CELT_C */ #ifdef ENABLE_VALGRIND @@ -144,8 +142,12 @@ extern char *global_stack_top; #define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1)) #define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/sizeof(char)),(stack)+=(size)*(sizeof(type)/sizeof(char)),(type*)((stack)-(size)*(sizeof(type)/sizeof(char)))) +#if 0 /* Set this to 1 to instrument pseudostack usage */ +#define RESTORE_STACK (printf("%ld %s:%d\n", global_stack-scratch_ptr, __FILE__, __LINE__),global_stack = _saved_stack) +#else #define RESTORE_STACK (global_stack = _saved_stack) -#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? opus_alloc_scratch(GLOBAL_STACK_SIZE) : global_stack); _saved_stack = global_stack; +#endif +#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? (scratch_ptr=opus_alloc_scratch(GLOBAL_STACK_SIZE)) : global_stack); _saved_stack = global_stack; #endif /* ENABLE_VALGRIND */ diff --git a/drivers/opus/celt/static_modes_fixed.h b/drivers/opus/celt/static_modes_fixed.h index 1d92fc4b27..352642f507 100644 --- a/drivers/opus/celt/static_modes_fixed.h +++ b/drivers/opus/celt/static_modes_fixed.h @@ -1,9 +1,14 @@ /* The contents of this file was automatically generated by dump_modes.c with arguments: 48000 960 It contains static definitions for some pre-defined modes. */ -#include "opus/celt/opus_modes.h" +#include "opus/celt/modes.h" #include "opus/celt/rate.h" +#ifdef HAVE_ARM_NE10 +#define OVERRIDE_FFT 1 +#include "opus/celt/static_modes_fixed_arm_ne10.h" +#endif + #ifndef DEF_WINDOW120 #define DEF_WINDOW120 static const opus_val16 window120[120] = { @@ -341,84 +346,84 @@ static const kiss_twiddle_cpx fft_twiddles48000_960[480] = { #ifndef FFT_BITREV480 #define FFT_BITREV480 static const opus_int16 fft_bitrev480[480] = { -0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330, -450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225, -345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95, -215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440, -110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310, -430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205, -325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61, -181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406, -76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276, -396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171, -291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41, -161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386, -56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242, -362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137, -257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7, -127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457, -22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352, -472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222, -342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117, -237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423, -93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318, -438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188, -308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83, -203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403, -73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298, -418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154, -274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49, -169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369, -39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264, -384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134, -254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29, -149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479, +0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448, +8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456, +16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464, +24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472, +4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452, +12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460, +20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468, +28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476, +1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449, +9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457, +17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465, +25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473, +5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453, +13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461, +21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469, +29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477, +2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450, +10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458, +18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466, +26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474, +6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454, +14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462, +22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470, +30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478, +3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451, +11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459, +19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467, +27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475, +7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455, +15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463, +23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471, +31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479, }; #endif #ifndef FFT_BITREV240 #define FFT_BITREV240 static const opus_int16 fft_bitrev240[240] = { -0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165, -225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110, -170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55, -115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211, -46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156, -216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101, -161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32, -92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202, -37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147, -207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78, -138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23, -83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193, -28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124, -184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69, -129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14, -74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239, +0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224, +4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228, +8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232, +12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236, +1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225, +5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229, +9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233, +13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237, +2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226, +6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230, +10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234, +14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238, +3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227, +7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231, +11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235, +15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239, }; #endif #ifndef FFT_BITREV120 #define FFT_BITREV120 static const opus_int16 fft_bitrev120[120] = { -0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80, -110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46, -76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26, -56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97, -22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63, -93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43, -73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9, -39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119, +0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112, +4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116, +1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113, +5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117, +2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114, +6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118, +3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115, +7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119, }; #endif #ifndef FFT_BITREV60 #define FFT_BITREV60 static const opus_int16 fft_bitrev60[60] = { -0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31, -46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22, -37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13, -28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59, +0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56, +1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57, +2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58, +3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59, }; #endif @@ -426,10 +431,17 @@ static const opus_int16 fft_bitrev60[60] = { #define FFT_STATE48000_960_0 static const kiss_fft_state fft_state48000_960_0 = { 480, /* nfft */ +17476, /* scale */ +8, /* scale_shift */ -1, /* shift */ -{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev480, /* bitrev */ fft_twiddles48000_960, /* bitrev */ +#ifdef OVERRIDE_FFT +(arch_fft_state *)&cfg_arch_480, +#else +NULL, +#endif }; #endif @@ -437,10 +449,17 @@ fft_twiddles48000_960, /* bitrev */ #define FFT_STATE48000_960_1 static const kiss_fft_state fft_state48000_960_1 = { 240, /* nfft */ +17476, /* scale */ +7, /* scale_shift */ 1, /* shift */ -{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev240, /* bitrev */ fft_twiddles48000_960, /* bitrev */ +#ifdef OVERRIDE_FFT +(arch_fft_state *)&cfg_arch_240, +#else +NULL, +#endif }; #endif @@ -448,10 +467,17 @@ fft_twiddles48000_960, /* bitrev */ #define FFT_STATE48000_960_2 static const kiss_fft_state fft_state48000_960_2 = { 120, /* nfft */ +17476, /* scale */ +6, /* scale_shift */ 2, /* shift */ -{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev120, /* bitrev */ fft_twiddles48000_960, /* bitrev */ +#ifdef OVERRIDE_FFT +(arch_fft_state *)&cfg_arch_120, +#else +NULL, +#endif }; #endif @@ -459,10 +485,17 @@ fft_twiddles48000_960, /* bitrev */ #define FFT_STATE48000_960_3 static const kiss_fft_state fft_state48000_960_3 = { 60, /* nfft */ +17476, /* scale */ +5, /* scale_shift */ 3, /* shift */ -{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev60, /* bitrev */ fft_twiddles48000_960, /* bitrev */ +#ifdef OVERRIDE_FFT +(arch_fft_state *)&cfg_arch_60, +#else +NULL, +#endif }; #endif @@ -470,104 +503,368 @@ fft_twiddles48000_960, /* bitrev */ #ifndef MDCT_TWIDDLES960 #define MDCT_TWIDDLES960 -static const opus_val16 mdct_twiddles960[481] = { -32767, 32767, 32767, 32767, 32766, -32763, 32762, 32759, 32757, 32753, -32751, 32747, 32743, 32738, 32733, -32729, 32724, 32717, 32711, 32705, -32698, 32690, 32683, 32676, 32667, -32658, 32650, 32640, 32631, 32620, -32610, 32599, 32588, 32577, 32566, -32554, 32541, 32528, 32515, 32502, -32487, 32474, 32459, 32444, 32429, -32413, 32397, 32381, 32364, 32348, -32331, 32313, 32294, 32277, 32257, -32239, 32219, 32200, 32180, 32159, -32138, 32118, 32096, 32074, 32051, -32029, 32006, 31984, 31960, 31936, -31912, 31888, 31863, 31837, 31812, -31786, 31760, 31734, 31707, 31679, -31652, 31624, 31596, 31567, 31539, -31508, 31479, 31450, 31419, 31388, -31357, 31326, 31294, 31262, 31230, -31198, 31164, 31131, 31097, 31063, -31030, 30994, 30959, 30924, 30889, -30853, 30816, 30779, 30743, 30705, -30668, 30629, 30592, 30553, 30515, -30475, 30435, 30396, 30356, 30315, -30274, 30233, 30191, 30149, 30107, -30065, 30022, 29979, 29936, 29891, -29847, 29803, 29758, 29713, 29668, -29622, 29577, 29529, 29483, 29436, -29390, 29341, 29293, 29246, 29197, -29148, 29098, 29050, 29000, 28949, -28899, 28848, 28797, 28746, 28694, -28642, 28590, 28537, 28485, 28432, -28378, 28324, 28271, 28217, 28162, -28106, 28051, 27995, 27940, 27884, -27827, 27770, 27713, 27657, 27598, -27540, 27481, 27423, 27365, 27305, -27246, 27187, 27126, 27066, 27006, -26945, 26883, 26822, 26760, 26698, -26636, 26574, 26510, 26448, 26383, -26320, 26257, 26191, 26127, 26062, -25997, 25931, 25866, 25800, 25734, -25667, 25601, 25533, 25466, 25398, -25330, 25262, 25194, 25125, 25056, -24987, 24917, 24848, 24778, 24707, -24636, 24566, 24495, 24424, 24352, -24280, 24208, 24135, 24063, 23990, -23917, 23842, 23769, 23695, 23622, -23546, 23472, 23398, 23322, 23246, -23171, 23095, 23018, 22942, 22866, -22788, 22711, 22634, 22557, 22478, -22400, 22322, 22244, 22165, 22085, -22006, 21927, 21846, 21766, 21687, -21606, 21524, 21443, 21363, 21282, -21199, 21118, 21035, 20954, 20870, -20788, 20705, 20621, 20538, 20455, -20371, 20286, 20202, 20118, 20034, -19947, 19863, 19777, 19692, 19606, -19520, 19434, 19347, 19260, 19174, -19088, 18999, 18911, 18825, 18737, -18648, 18560, 18472, 18384, 18294, -18205, 18116, 18025, 17936, 17846, -17757, 17666, 17576, 17485, 17395, -17303, 17212, 17122, 17030, 16937, -16846, 16755, 16662, 16569, 16477, -16385, 16291, 16198, 16105, 16012, -15917, 15824, 15730, 15636, 15541, -15447, 15352, 15257, 15162, 15067, -14973, 14875, 14781, 14685, 14589, -14493, 14396, 14300, 14204, 14107, -14010, 13914, 13815, 13718, 13621, -13524, 13425, 13328, 13230, 13133, -13033, 12935, 12836, 12738, 12638, -12540, 12441, 12341, 12241, 12142, -12044, 11943, 11843, 11744, 11643, -11542, 11442, 11342, 11241, 11139, -11039, 10939, 10836, 10736, 10635, -10534, 10431, 10330, 10228, 10127, -10024, 9921, 9820, 9718, 9614, -9512, 9410, 9306, 9204, 9101, -8998, 8895, 8791, 8689, 8585, -8481, 8377, 8274, 8171, 8067, -7962, 7858, 7753, 7650, 7545, -7441, 7336, 7231, 7129, 7023, -6917, 6813, 6709, 6604, 6498, -6393, 6288, 6182, 6077, 5973, -5867, 5760, 5656, 5549, 5445, -5339, 5232, 5127, 5022, 4914, -4809, 4703, 4596, 4490, 4384, -4278, 4171, 4065, 3958, 3852, -3745, 3640, 3532, 3426, 3318, -3212, 3106, 2998, 2891, 2786, -2679, 2570, 2465, 2358, 2251, -2143, 2037, 1929, 1823, 1715, -1609, 1501, 1393, 1287, 1180, -1073, 964, 858, 751, 644, -535, 429, 322, 214, 107, -0, }; +static const opus_val16 mdct_twiddles960[1800] = { +32767, 32767, 32767, 32766, 32765, +32763, 32761, 32759, 32756, 32753, +32750, 32746, 32742, 32738, 32733, +32728, 32722, 32717, 32710, 32704, +32697, 32690, 32682, 32674, 32666, +32657, 32648, 32639, 32629, 32619, +32609, 32598, 32587, 32576, 32564, +32552, 32539, 32526, 32513, 32500, +32486, 32472, 32457, 32442, 32427, +32411, 32395, 32379, 32362, 32345, +32328, 32310, 32292, 32274, 32255, +32236, 32217, 32197, 32177, 32157, +32136, 32115, 32093, 32071, 32049, +32027, 32004, 31981, 31957, 31933, +31909, 31884, 31859, 31834, 31809, +31783, 31756, 31730, 31703, 31676, +31648, 31620, 31592, 31563, 31534, +31505, 31475, 31445, 31415, 31384, +31353, 31322, 31290, 31258, 31226, +31193, 31160, 31127, 31093, 31059, +31025, 30990, 30955, 30920, 30884, +30848, 30812, 30775, 30738, 30701, +30663, 30625, 30587, 30548, 30509, +30470, 30430, 30390, 30350, 30309, +30269, 30227, 30186, 30144, 30102, +30059, 30016, 29973, 29930, 29886, +29842, 29797, 29752, 29707, 29662, +29616, 29570, 29524, 29477, 29430, +29383, 29335, 29287, 29239, 29190, +29142, 29092, 29043, 28993, 28943, +28892, 28842, 28791, 28739, 28688, +28636, 28583, 28531, 28478, 28425, +28371, 28317, 28263, 28209, 28154, +28099, 28044, 27988, 27932, 27876, +27820, 27763, 27706, 27648, 27591, +27533, 27474, 27416, 27357, 27298, +27238, 27178, 27118, 27058, 26997, +26936, 26875, 26814, 26752, 26690, +26628, 26565, 26502, 26439, 26375, +26312, 26247, 26183, 26119, 26054, +25988, 25923, 25857, 25791, 25725, +25658, 25592, 25524, 25457, 25389, +25322, 25253, 25185, 25116, 25047, +24978, 24908, 24838, 24768, 24698, +24627, 24557, 24485, 24414, 24342, +24270, 24198, 24126, 24053, 23980, +23907, 23834, 23760, 23686, 23612, +23537, 23462, 23387, 23312, 23237, +23161, 23085, 23009, 22932, 22856, +22779, 22701, 22624, 22546, 22468, +22390, 22312, 22233, 22154, 22075, +21996, 21916, 21836, 21756, 21676, +21595, 21515, 21434, 21352, 21271, +21189, 21107, 21025, 20943, 20860, +20777, 20694, 20611, 20528, 20444, +20360, 20276, 20192, 20107, 20022, +19937, 19852, 19767, 19681, 19595, +19509, 19423, 19336, 19250, 19163, +19076, 18988, 18901, 18813, 18725, +18637, 18549, 18460, 18372, 18283, +18194, 18104, 18015, 17925, 17835, +17745, 17655, 17565, 17474, 17383, +17292, 17201, 17110, 17018, 16927, +16835, 16743, 16650, 16558, 16465, +16372, 16279, 16186, 16093, 15999, +15906, 15812, 15718, 15624, 15529, +15435, 15340, 15245, 15150, 15055, +14960, 14864, 14769, 14673, 14577, +14481, 14385, 14288, 14192, 14095, +13998, 13901, 13804, 13706, 13609, +13511, 13414, 13316, 13218, 13119, +13021, 12923, 12824, 12725, 12626, +12527, 12428, 12329, 12230, 12130, +12030, 11930, 11831, 11730, 11630, +11530, 11430, 11329, 11228, 11128, +11027, 10926, 10824, 10723, 10622, +10520, 10419, 10317, 10215, 10113, +10011, 9909, 9807, 9704, 9602, +9499, 9397, 9294, 9191, 9088, +8985, 8882, 8778, 8675, 8572, +8468, 8364, 8261, 8157, 8053, +7949, 7845, 7741, 7637, 7532, +7428, 7323, 7219, 7114, 7009, +6905, 6800, 6695, 6590, 6485, +6380, 6274, 6169, 6064, 5958, +5853, 5747, 5642, 5536, 5430, +5325, 5219, 5113, 5007, 4901, +4795, 4689, 4583, 4476, 4370, +4264, 4157, 4051, 3945, 3838, +3732, 3625, 3518, 3412, 3305, +3198, 3092, 2985, 2878, 2771, +2664, 2558, 2451, 2344, 2237, +2130, 2023, 1916, 1809, 1702, +1594, 1487, 1380, 1273, 1166, +1059, 952, 844, 737, 630, +523, 416, 308, 201, 94, +-13, -121, -228, -335, -442, +-550, -657, -764, -871, -978, +-1086, -1193, -1300, -1407, -1514, +-1621, -1728, -1835, -1942, -2049, +-2157, -2263, -2370, -2477, -2584, +-2691, -2798, -2905, -3012, -3118, +-3225, -3332, -3439, -3545, -3652, +-3758, -3865, -3971, -4078, -4184, +-4290, -4397, -4503, -4609, -4715, +-4821, -4927, -5033, -5139, -5245, +-5351, -5457, -5562, -5668, -5774, +-5879, -5985, -6090, -6195, -6301, +-6406, -6511, -6616, -6721, -6826, +-6931, -7036, -7140, -7245, -7349, +-7454, -7558, -7663, -7767, -7871, +-7975, -8079, -8183, -8287, -8390, +-8494, -8597, -8701, -8804, -8907, +-9011, -9114, -9217, -9319, -9422, +-9525, -9627, -9730, -9832, -9934, +-10037, -10139, -10241, -10342, -10444, +-10546, -10647, -10748, -10850, -10951, +-11052, -11153, -11253, -11354, -11455, +-11555, -11655, -11756, -11856, -11955, +-12055, -12155, -12254, -12354, -12453, +-12552, -12651, -12750, -12849, -12947, +-13046, -13144, -13242, -13340, -13438, +-13536, -13633, -13731, -13828, -13925, +-14022, -14119, -14216, -14312, -14409, +-14505, -14601, -14697, -14793, -14888, +-14984, -15079, -15174, -15269, -15364, +-15459, -15553, -15647, -15741, -15835, +-15929, -16023, -16116, -16210, -16303, +-16396, -16488, -16581, -16673, -16766, +-16858, -16949, -17041, -17133, -17224, +-17315, -17406, -17497, -17587, -17678, +-17768, -17858, -17948, -18037, -18127, +-18216, -18305, -18394, -18483, -18571, +-18659, -18747, -18835, -18923, -19010, +-19098, -19185, -19271, -19358, -19444, +-19531, -19617, -19702, -19788, -19873, +-19959, -20043, -20128, -20213, -20297, +-20381, -20465, -20549, -20632, -20715, +-20798, -20881, -20963, -21046, -21128, +-21210, -21291, -21373, -21454, -21535, +-21616, -21696, -21776, -21856, -21936, +-22016, -22095, -22174, -22253, -22331, +-22410, -22488, -22566, -22643, -22721, +-22798, -22875, -22951, -23028, -23104, +-23180, -23256, -23331, -23406, -23481, +-23556, -23630, -23704, -23778, -23852, +-23925, -23998, -24071, -24144, -24216, +-24288, -24360, -24432, -24503, -24574, +-24645, -24716, -24786, -24856, -24926, +-24995, -25064, -25133, -25202, -25270, +-25339, -25406, -25474, -25541, -25608, +-25675, -25742, -25808, -25874, -25939, +-26005, -26070, -26135, -26199, -26264, +-26327, -26391, -26455, -26518, -26581, +-26643, -26705, -26767, -26829, -26891, +-26952, -27013, -27073, -27133, -27193, +-27253, -27312, -27372, -27430, -27489, +-27547, -27605, -27663, -27720, -27777, +-27834, -27890, -27946, -28002, -28058, +-28113, -28168, -28223, -28277, -28331, +-28385, -28438, -28491, -28544, -28596, +-28649, -28701, -28752, -28803, -28854, +-28905, -28955, -29006, -29055, -29105, +-29154, -29203, -29251, -29299, -29347, +-29395, -29442, -29489, -29535, -29582, +-29628, -29673, -29719, -29764, -29808, +-29853, -29897, -29941, -29984, -30027, +-30070, -30112, -30154, -30196, -30238, +-30279, -30320, -30360, -30400, -30440, +-30480, -30519, -30558, -30596, -30635, +-30672, -30710, -30747, -30784, -30821, +-30857, -30893, -30929, -30964, -30999, +-31033, -31068, -31102, -31135, -31168, +-31201, -31234, -31266, -31298, -31330, +-31361, -31392, -31422, -31453, -31483, +-31512, -31541, -31570, -31599, -31627, +-31655, -31682, -31710, -31737, -31763, +-31789, -31815, -31841, -31866, -31891, +-31915, -31939, -31963, -31986, -32010, +-32032, -32055, -32077, -32099, -32120, +-32141, -32162, -32182, -32202, -32222, +-32241, -32260, -32279, -32297, -32315, +-32333, -32350, -32367, -32383, -32399, +-32415, -32431, -32446, -32461, -32475, +-32489, -32503, -32517, -32530, -32542, +-32555, -32567, -32579, -32590, -32601, +-32612, -32622, -32632, -32641, -32651, +-32659, -32668, -32676, -32684, -32692, +-32699, -32706, -32712, -32718, -32724, +-32729, -32734, -32739, -32743, -32747, +-32751, -32754, -32757, -32760, -32762, +-32764, -32765, -32767, -32767, -32767, +32767, 32767, 32765, 32761, 32756, +32750, 32742, 32732, 32722, 32710, +32696, 32681, 32665, 32647, 32628, +32608, 32586, 32562, 32538, 32512, +32484, 32455, 32425, 32393, 32360, +32326, 32290, 32253, 32214, 32174, +32133, 32090, 32046, 32001, 31954, +31906, 31856, 31805, 31753, 31700, +31645, 31588, 31530, 31471, 31411, +31349, 31286, 31222, 31156, 31089, +31020, 30951, 30880, 30807, 30733, +30658, 30582, 30504, 30425, 30345, +30263, 30181, 30096, 30011, 29924, +29836, 29747, 29656, 29564, 29471, +29377, 29281, 29184, 29086, 28987, +28886, 28784, 28681, 28577, 28471, +28365, 28257, 28147, 28037, 27925, +27812, 27698, 27583, 27467, 27349, +27231, 27111, 26990, 26868, 26744, +26620, 26494, 26367, 26239, 26110, +25980, 25849, 25717, 25583, 25449, +25313, 25176, 25038, 24900, 24760, +24619, 24477, 24333, 24189, 24044, +23898, 23751, 23602, 23453, 23303, +23152, 22999, 22846, 22692, 22537, +22380, 22223, 22065, 21906, 21746, +21585, 21423, 21261, 21097, 20933, +20767, 20601, 20434, 20265, 20096, +19927, 19756, 19584, 19412, 19239, +19065, 18890, 18714, 18538, 18361, +18183, 18004, 17824, 17644, 17463, +17281, 17098, 16915, 16731, 16546, +16361, 16175, 15988, 15800, 15612, +15423, 15234, 15043, 14852, 14661, +14469, 14276, 14083, 13889, 13694, +13499, 13303, 13107, 12910, 12713, +12515, 12317, 12118, 11918, 11718, +11517, 11316, 11115, 10913, 10710, +10508, 10304, 10100, 9896, 9691, +9486, 9281, 9075, 8869, 8662, +8455, 8248, 8040, 7832, 7623, +7415, 7206, 6996, 6787, 6577, +6366, 6156, 5945, 5734, 5523, +5311, 5100, 4888, 4675, 4463, +4251, 4038, 3825, 3612, 3399, +3185, 2972, 2758, 2544, 2330, +2116, 1902, 1688, 1474, 1260, +1045, 831, 617, 402, 188, +-27, -241, -456, -670, -885, +-1099, -1313, -1528, -1742, -1956, +-2170, -2384, -2598, -2811, -3025, +-3239, -3452, -3665, -3878, -4091, +-4304, -4516, -4728, -4941, -5153, +-5364, -5576, -5787, -5998, -6209, +-6419, -6629, -6839, -7049, -7258, +-7467, -7676, -7884, -8092, -8300, +-8507, -8714, -8920, -9127, -9332, +-9538, -9743, -9947, -10151, -10355, +-10558, -10761, -10963, -11165, -11367, +-11568, -11768, -11968, -12167, -12366, +-12565, -12762, -12960, -13156, -13352, +-13548, -13743, -13937, -14131, -14324, +-14517, -14709, -14900, -15091, -15281, +-15470, -15659, -15847, -16035, -16221, +-16407, -16593, -16777, -16961, -17144, +-17326, -17508, -17689, -17869, -18049, +-18227, -18405, -18582, -18758, -18934, +-19108, -19282, -19455, -19627, -19799, +-19969, -20139, -20308, -20475, -20642, +-20809, -20974, -21138, -21301, -21464, +-21626, -21786, -21946, -22105, -22263, +-22420, -22575, -22730, -22884, -23037, +-23189, -23340, -23490, -23640, -23788, +-23935, -24080, -24225, -24369, -24512, +-24654, -24795, -24934, -25073, -25211, +-25347, -25482, -25617, -25750, -25882, +-26013, -26143, -26272, -26399, -26526, +-26651, -26775, -26898, -27020, -27141, +-27260, -27379, -27496, -27612, -27727, +-27841, -27953, -28065, -28175, -28284, +-28391, -28498, -28603, -28707, -28810, +-28911, -29012, -29111, -29209, -29305, +-29401, -29495, -29587, -29679, -29769, +-29858, -29946, -30032, -30118, -30201, +-30284, -30365, -30445, -30524, -30601, +-30677, -30752, -30825, -30897, -30968, +-31038, -31106, -31172, -31238, -31302, +-31365, -31426, -31486, -31545, -31602, +-31658, -31713, -31766, -31818, -31869, +-31918, -31966, -32012, -32058, -32101, +-32144, -32185, -32224, -32262, -32299, +-32335, -32369, -32401, -32433, -32463, +-32491, -32518, -32544, -32568, -32591, +-32613, -32633, -32652, -32669, -32685, +-32700, -32713, -32724, -32735, -32744, +-32751, -32757, -32762, -32766, -32767, +32767, 32764, 32755, 32741, 32720, +32694, 32663, 32626, 32583, 32535, +32481, 32421, 32356, 32286, 32209, +32128, 32041, 31948, 31850, 31747, +31638, 31523, 31403, 31278, 31148, +31012, 30871, 30724, 30572, 30415, +30253, 30086, 29913, 29736, 29553, +29365, 29172, 28974, 28771, 28564, +28351, 28134, 27911, 27684, 27452, +27216, 26975, 26729, 26478, 26223, +25964, 25700, 25432, 25159, 24882, +24601, 24315, 24026, 23732, 23434, +23133, 22827, 22517, 22204, 21886, +21565, 21240, 20912, 20580, 20244, +19905, 19563, 19217, 18868, 18516, +18160, 17802, 17440, 17075, 16708, +16338, 15964, 15588, 15210, 14829, +14445, 14059, 13670, 13279, 12886, +12490, 12093, 11693, 11291, 10888, +10482, 10075, 9666, 9255, 8843, +8429, 8014, 7597, 7180, 6760, +6340, 5919, 5496, 5073, 4649, +4224, 3798, 3372, 2945, 2517, +2090, 1661, 1233, 804, 375, +-54, -483, -911, -1340, -1768, +-2197, -2624, -3052, -3479, -3905, +-4330, -4755, -5179, -5602, -6024, +-6445, -6865, -7284, -7702, -8118, +-8533, -8946, -9358, -9768, -10177, +-10584, -10989, -11392, -11793, -12192, +-12589, -12984, -13377, -13767, -14155, +-14541, -14924, -15305, -15683, -16058, +-16430, -16800, -17167, -17531, -17892, +-18249, -18604, -18956, -19304, -19649, +-19990, -20329, -20663, -20994, -21322, +-21646, -21966, -22282, -22595, -22904, +-23208, -23509, -23806, -24099, -24387, +-24672, -24952, -25228, -25499, -25766, +-26029, -26288, -26541, -26791, -27035, +-27275, -27511, -27741, -27967, -28188, +-28405, -28616, -28823, -29024, -29221, +-29412, -29599, -29780, -29957, -30128, +-30294, -30455, -30611, -30761, -30906, +-31046, -31181, -31310, -31434, -31552, +-31665, -31773, -31875, -31972, -32063, +-32149, -32229, -32304, -32373, -32437, +-32495, -32547, -32594, -32635, -32671, +-32701, -32726, -32745, -32758, -32766, +32767, 32754, 32717, 32658, 32577, +32473, 32348, 32200, 32029, 31837, +31624, 31388, 31131, 30853, 30553, +30232, 29891, 29530, 29148, 28746, +28324, 27883, 27423, 26944, 26447, +25931, 25398, 24847, 24279, 23695, +23095, 22478, 21846, 21199, 20538, +19863, 19174, 18472, 17757, 17030, +16291, 15541, 14781, 14010, 13230, +12441, 11643, 10837, 10024, 9204, +8377, 7545, 6708, 5866, 5020, +4171, 3319, 2464, 1608, 751, +-107, -965, -1822, -2678, -3532, +-4383, -5232, -6077, -6918, -7754, +-8585, -9409, -10228, -11039, -11843, +-12639, -13426, -14204, -14972, -15730, +-16477, -17213, -17937, -18648, -19347, +-20033, -20705, -21363, -22006, -22634, +-23246, -23843, -24423, -24986, -25533, +-26062, -26573, -27066, -27540, -27995, +-28431, -28848, -29245, -29622, -29979, +-30315, -30630, -30924, -31197, -31449, +-31679, -31887, -32074, -32239, -32381, +-32501, -32600, -32675, -32729, -32759, +}; #endif static const CELTMode mode48000_960_120 = { diff --git a/drivers/opus/celt/static_modes_fixed_arm_ne10.h b/drivers/opus/celt/static_modes_fixed_arm_ne10.h new file mode 100644 index 0000000000..b8ef0cee98 --- /dev/null +++ b/drivers/opus/celt/static_modes_fixed_arm_ne10.h @@ -0,0 +1,388 @@ +/* The contents of this file was automatically generated by + * dump_mode_arm_ne10.c with arguments: 48000 960 + * It contains static definitions for some pre-defined modes. */ +#include + +#ifndef NE10_FFT_PARAMS48000_960 +#define NE10_FFT_PARAMS48000_960 +static const ne10_int32_t ne10_factors_480[64] = { +4, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, }; +static const ne10_int32_t ne10_factors_240[64] = { +3, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, }; +static const ne10_int32_t ne10_factors_120[64] = { +3, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, }; +static const ne10_int32_t ne10_factors_60[64] = { +2, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, }; +static const ne10_fft_cpx_int32_t ne10_twiddles_480[480] = { +{0,0}, {2147483647,0}, {2147483647,0}, +{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394}, +{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496}, +{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096}, +{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152}, +{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313}, +{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424}, +{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496}, +{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268}, +{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785}, +{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172}, +{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682}, +{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313}, +{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450}, +{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067}, +{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277}, +{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424}, +{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771}, +{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994}, +{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593}, +{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968}, +{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851}, +{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394}, +{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959}, +{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516}, +{-94,-2147483647}, {-224473265,-2135719496}, {-446487060,-2100555955}, +{-663609049,-2042378281}, {-873460398,-1961823883}, {-1073741932,-1859775330}, +{-1262259116,-1737350839}, {-1436947137,-1595891268}, {-1595891628,-1436946738}, +{-1737350854,-1262259096}, {-1859775343,-1073741910}, {-1961823997,-873460141}, +{-2042378447,-663608538}, {-2100556013,-446486785}, {-2135719499,-224473240}, +{2147483647,0}, {2121044558,-335940465}, {2042378310,-663608960}, +{1913421927,-974937199}, {1737350743,-1262259248}, {1518500216,-1518500282}, +{1262259172,-1737350799}, {974937230,-1913421912}, {663608871,-2042378339}, +{335940246,-2121044593}, {-94,-2147483647}, {-335940431,-2121044564}, +{-663609049,-2042378281}, {-974937397,-1913421827}, {-1262259116,-1737350839}, +{-1518500258,-1518500240}, {-1737350854,-1262259096}, {-1913422071,-974936918}, +{-2042378447,-663608538}, {-2121044568,-335940406}, {-2147483647,188}, +{-2121044509,335940777}, {-2042378331,663608895}, {-1913421900,974937252}, +{-1737350633,1262259400}, {-1518499993,1518500506}, {-1262258813,1737351059}, +{-974936606,1913422229}, {-663609179,2042378239}, {-335940566,2121044542}, +{2147483647,0}, {2147299667,-28109693}, {2146747758,-56214570}, +{2145828015,-84309815}, {2144540595,-112390613}, {2142885719,-140452154}, +{2140863671,-168489630}, {2138474797,-196498235}, {2135719506,-224473172}, +{2132598271,-252409646}, {2129111626,-280302871}, {2125260168,-308148068}, +{2121044558,-335940465}, {2116465518,-363675300}, {2111523833,-391347822}, +{2106220349,-418953288}, {2100555974,-446486968}, {2094531681,-473944146}, +{2088148500,-501320115}, {2081407525,-528610186}, {2074309912,-555809682}, +{2066856885,-582913912}, {2059049696,-609918325}, {2050889698,-636818231}, +{2042378310,-663608960}, {2033516972,-690285983}, {2024307180,-716844791}, +{2014750533,-743280770}, {2004848691,-769589332}, {1994603329,-795766029}, +{1984016179,-821806435}, {1973089077,-847706028}, {1961823921,-873460313}, +{1950222618,-899064934}, {1938287127,-924515564}, {1926019520,-949807783}, +{1913421927,-974937199}, {1900496481,-999899565}, {1887245364,-1024690661}, +{1873670877,-1049306180}, {1859775377,-1073741851}, {1845561215,-1097993541}, +{1831030826,-1122057097}, {1816186632,-1145928502}, {1801031311,-1169603450}, +{1785567394,-1193077993}, {1769797456,-1216348214}, {1753724345,-1239409914}, +{1737350743,-1262259248}, {1720679456,-1284892300}, {1703713340,-1307305194}, +{1686455222,-1329494189}, {1668908218,-1351455280}, {1651075255,-1373184807}, +{1632959307,-1394679144}, {1614563642,-1415934412}, {1595891331,-1436947067}, +{1576945572,-1457713510}, {1557729613,-1478230181}, {1538246655,-1498493658}, +{1518500216,-1518500282}, {1498493590,-1538246721}, {1478230113,-1557729677}, +{1457713441,-1576945636}, {1436946998,-1595891394}, {1415934341,-1614563704}, +{1394679073,-1632959368}, {1373184735,-1651075315}, {1351455207,-1668908277}, +{1329494115,-1686455280}, {1307305120,-1703713397}, {1284892225,-1720679512}, +{1262259172,-1737350799}, {1239409837,-1753724400}, {1216348136,-1769797510}, +{1193077915,-1785567446}, {1169603371,-1801031362}, {1145928423,-1816186682}, +{1122057017,-1831030875}, {1097993571,-1845561197}, {1073741769,-1859775424}, +{1049305987,-1873670985}, {1024690635,-1887245378}, {999899482,-1900496524}, +{974937230,-1913421912}, {949807699,-1926019561}, {924515422,-1938287195}, +{899064965,-1950222603}, {873460227,-1961823959}, {847705824,-1973089164}, +{821806407,-1984016190}, {795765941,-1994603364}, {769589125,-2004848771}, +{743280682,-2014750566}, {716844642,-2024307233}, {690286016,-2033516961}, +{663608871,-2042378339}, {636818019,-2050889764}, {609918296,-2059049705}, +{582913822,-2066856911}, {555809715,-2074309903}, {528610126,-2081407540}, +{501319962,-2088148536}, {473944148,-2094531680}, {446486876,-2100555994}, +{418953102,-2106220386}, {391347792,-2111523838}, {363675176,-2116465540}, +{335940246,-2121044593}, {308148006,-2125260177}, {280302715,-2129111646}, +{252409648,-2132598271}, {224473078,-2135719516}, {196498046,-2138474814}, +{168489600,-2140863674}, {140452029,-2142885728}, {112390647,-2144540593}, +{84309753,-2145828017}, {56214412,-2146747762}, {28109695,-2147299667}, +{2147483647,0}, {2146747758,-56214570}, {2144540595,-112390613}, +{2140863671,-168489630}, {2135719506,-224473172}, {2129111626,-280302871}, +{2121044558,-335940465}, {2111523833,-391347822}, {2100555974,-446486968}, +{2088148500,-501320115}, {2074309912,-555809682}, {2059049696,-609918325}, +{2042378310,-663608960}, {2024307180,-716844791}, {2004848691,-769589332}, +{1984016179,-821806435}, {1961823921,-873460313}, {1938287127,-924515564}, +{1913421927,-974937199}, {1887245364,-1024690661}, {1859775377,-1073741851}, +{1831030826,-1122057097}, {1801031311,-1169603450}, {1769797456,-1216348214}, +{1737350743,-1262259248}, {1703713340,-1307305194}, {1668908218,-1351455280}, +{1632959307,-1394679144}, {1595891331,-1436947067}, {1557729613,-1478230181}, +{1518500216,-1518500282}, {1478230113,-1557729677}, {1436946998,-1595891394}, +{1394679073,-1632959368}, {1351455207,-1668908277}, {1307305120,-1703713397}, +{1262259172,-1737350799}, {1216348136,-1769797510}, {1169603371,-1801031362}, +{1122057017,-1831030875}, {1073741769,-1859775424}, {1024690635,-1887245378}, +{974937230,-1913421912}, {924515422,-1938287195}, {873460227,-1961823959}, +{821806407,-1984016190}, {769589125,-2004848771}, {716844642,-2024307233}, +{663608871,-2042378339}, {609918296,-2059049705}, {555809715,-2074309903}, +{501319962,-2088148536}, {446486876,-2100555994}, {391347792,-2111523838}, +{335940246,-2121044593}, {280302715,-2129111646}, {224473078,-2135719516}, +{168489600,-2140863674}, {112390647,-2144540593}, {56214412,-2146747762}, +{-94,-2147483647}, {-56214600,-2146747757}, {-112390835,-2144540584}, +{-168489787,-2140863659}, {-224473265,-2135719496}, {-280302901,-2129111622}, +{-335940431,-2121044564}, {-391347977,-2111523804}, {-446487060,-2100555955}, +{-501320144,-2088148493}, {-555809896,-2074309855}, {-609918476,-2059049651}, +{-663609049,-2042378281}, {-716844819,-2024307170}, {-769589300,-2004848703}, +{-821806581,-1984016118}, {-873460398,-1961823883}, {-924515591,-1938287114}, +{-974937397,-1913421827}, {-1024690575,-1887245411}, {-1073741932,-1859775330}, +{-1122057395,-1831030643}, {-1169603421,-1801031330}, {-1216348291,-1769797403}, +{-1262259116,-1737350839}, {-1307305268,-1703713283}, {-1351455453,-1668908078}, +{-1394679021,-1632959413}, {-1436947137,-1595891268}, {-1478230435,-1557729372}, +{-1518500258,-1518500240}, {-1557729742,-1478230045}, {-1595891628,-1436946738}, +{-1632959429,-1394679001}, {-1668908417,-1351455035}, {-1703713298,-1307305248}, +{-1737350854,-1262259096}, {-1769797708,-1216347848}, {-1801031344,-1169603400}, +{-1831030924,-1122056937}, {-1859775343,-1073741910}, {-1887245423,-1024690552}, +{-1913422071,-974936918}, {-1938287125,-924515568}, {-1961823997,-873460141}, +{-1984016324,-821806084}, {-2004848713,-769589276}, {-2024307264,-716844553}, +{-2042378447,-663608538}, {-2059049731,-609918206}, {-2074309994,-555809377}, +{-2088148499,-501320119}, {-2100556013,-446486785}, {-2111523902,-391347448}, +{-2121044568,-335940406}, {-2129111659,-280302621}, {-2135719499,-224473240}, +{-2140863681,-168489506}, {-2144540612,-112390298}, {-2146747758,-56214574}, +{2147483647,0}, {2145828015,-84309815}, {2140863671,-168489630}, +{2132598271,-252409646}, {2121044558,-335940465}, {2106220349,-418953288}, +{2088148500,-501320115}, {2066856885,-582913912}, {2042378310,-663608960}, +{2014750533,-743280770}, {1984016179,-821806435}, {1950222618,-899064934}, +{1913421927,-974937199}, {1873670877,-1049306180}, {1831030826,-1122057097}, +{1785567394,-1193077993}, {1737350743,-1262259248}, {1686455222,-1329494189}, +{1632959307,-1394679144}, {1576945572,-1457713510}, {1518500216,-1518500282}, +{1457713441,-1576945636}, {1394679073,-1632959368}, {1329494115,-1686455280}, +{1262259172,-1737350799}, {1193077915,-1785567446}, {1122057017,-1831030875}, +{1049305987,-1873670985}, {974937230,-1913421912}, {899064965,-1950222603}, +{821806407,-1984016190}, {743280682,-2014750566}, {663608871,-2042378339}, +{582913822,-2066856911}, {501319962,-2088148536}, {418953102,-2106220386}, +{335940246,-2121044593}, {252409648,-2132598271}, {168489600,-2140863674}, +{84309753,-2145828017}, {-94,-2147483647}, {-84309940,-2145828010}, +{-168489787,-2140863659}, {-252409834,-2132598249}, {-335940431,-2121044564}, +{-418953286,-2106220349}, {-501320144,-2088148493}, {-582914003,-2066856860}, +{-663609049,-2042378281}, {-743280858,-2014750501}, {-821806581,-1984016118}, +{-899065136,-1950222525}, {-974937397,-1913421827}, {-1049306374,-1873670768}, +{-1122057395,-1831030643}, {-1193078284,-1785567199}, {-1262259116,-1737350839}, +{-1329494061,-1686455323}, {-1394679021,-1632959413}, {-1457713485,-1576945595}, +{-1518500258,-1518500240}, {-1576945613,-1457713466}, {-1632959429,-1394679001}, +{-1686455338,-1329494041}, {-1737350854,-1262259096}, {-1785567498,-1193077837}, +{-1831030924,-1122056937}, {-1873671031,-1049305905}, {-1913422071,-974936918}, +{-1950222750,-899064648}, {-1984016324,-821806084}, {-2014750687,-743280354}, +{-2042378447,-663608538}, {-2066856867,-582913978}, {-2088148499,-501320119}, +{-2106220354,-418953261}, {-2121044568,-335940406}, {-2132598282,-252409555}, +{-2140863681,-168489506}, {-2145828021,-84309659}, {-2147483647,188}, +{-2145828006,84310034}, {-2140863651,168489881}, {-2132598237,252409928}, +{-2121044509,335940777}, {-2106220281,418953629}, {-2088148411,501320484}, +{-2066856765,582914339}, {-2042378331,663608895}, {-2014750557,743280706}, +{-1984016181,821806431}, {-1950222593,899064989}, {-1913421900,974937252}, +{-1873670848,1049306232}, {-1831030728,1122057257}, {-1785567289,1193078149}, +{-1737350633,1262259400}, {-1686455106,1329494336}, {-1632959185,1394679287}, +{-1576945358,1457713742}, {-1518499993,1518500506}, {-1457713209,1576945850}, +{-1394678735,1632959656}, {-1329493766,1686455555}, {-1262258813,1737351059}, +{-1193077546,1785567692}, {-1122056638,1831031107}, {-1049305599,1873671202}, +{-974936606,1913422229}, {-899064330,1950222896}, {-821805761,1984016458}, +{-743280025,2014750808}, {-663609179,2042378239}, {-582914134,2066856823}, +{-501320277,2088148461}, {-418953420,2106220322}, {-335940566,2121044542}, +{-252409716,2132598263}, {-168489668,2140863668}, {-84309821,2145828015}, +}; +static const ne10_fft_cpx_int32_t ne10_twiddles_240[240] = { +{0,0}, {2147483647,0}, {2147483647,0}, +{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394}, +{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496}, +{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096}, +{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152}, +{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968}, +{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851}, +{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394}, +{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959}, +{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516}, +{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313}, +{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424}, +{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496}, +{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268}, +{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785}, +{2147483647,0}, {2042378310,-663608960}, {1737350743,-1262259248}, +{1262259172,-1737350799}, {663608871,-2042378339}, {-94,-2147483647}, +{-663609049,-2042378281}, {-1262259116,-1737350839}, {-1737350854,-1262259096}, +{-2042378447,-663608538}, {-2147483647,188}, {-2042378331,663608895}, +{-1737350633,1262259400}, {-1262258813,1737351059}, {-663609179,2042378239}, +{2147483647,0}, {2146747758,-56214570}, {2144540595,-112390613}, +{2140863671,-168489630}, {2135719506,-224473172}, {2129111626,-280302871}, +{2121044558,-335940465}, {2111523833,-391347822}, {2100555974,-446486968}, +{2088148500,-501320115}, {2074309912,-555809682}, {2059049696,-609918325}, +{2042378310,-663608960}, {2024307180,-716844791}, {2004848691,-769589332}, +{1984016179,-821806435}, {1961823921,-873460313}, {1938287127,-924515564}, +{1913421927,-974937199}, {1887245364,-1024690661}, {1859775377,-1073741851}, +{1831030826,-1122057097}, {1801031311,-1169603450}, {1769797456,-1216348214}, +{1737350743,-1262259248}, {1703713340,-1307305194}, {1668908218,-1351455280}, +{1632959307,-1394679144}, {1595891331,-1436947067}, {1557729613,-1478230181}, +{1518500216,-1518500282}, {1478230113,-1557729677}, {1436946998,-1595891394}, +{1394679073,-1632959368}, {1351455207,-1668908277}, {1307305120,-1703713397}, +{1262259172,-1737350799}, {1216348136,-1769797510}, {1169603371,-1801031362}, +{1122057017,-1831030875}, {1073741769,-1859775424}, {1024690635,-1887245378}, +{974937230,-1913421912}, {924515422,-1938287195}, {873460227,-1961823959}, +{821806407,-1984016190}, {769589125,-2004848771}, {716844642,-2024307233}, +{663608871,-2042378339}, {609918296,-2059049705}, {555809715,-2074309903}, +{501319962,-2088148536}, {446486876,-2100555994}, {391347792,-2111523838}, +{335940246,-2121044593}, {280302715,-2129111646}, {224473078,-2135719516}, +{168489600,-2140863674}, {112390647,-2144540593}, {56214412,-2146747762}, +{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172}, +{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682}, +{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313}, +{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450}, +{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067}, +{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277}, +{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424}, +{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771}, +{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994}, +{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593}, +{-94,-2147483647}, {-112390835,-2144540584}, {-224473265,-2135719496}, +{-335940431,-2121044564}, {-446487060,-2100555955}, {-555809896,-2074309855}, +{-663609049,-2042378281}, {-769589300,-2004848703}, {-873460398,-1961823883}, +{-974937397,-1913421827}, {-1073741932,-1859775330}, {-1169603421,-1801031330}, +{-1262259116,-1737350839}, {-1351455453,-1668908078}, {-1436947137,-1595891268}, +{-1518500258,-1518500240}, {-1595891628,-1436946738}, {-1668908417,-1351455035}, +{-1737350854,-1262259096}, {-1801031344,-1169603400}, {-1859775343,-1073741910}, +{-1913422071,-974936918}, {-1961823997,-873460141}, {-2004848713,-769589276}, +{-2042378447,-663608538}, {-2074309994,-555809377}, {-2100556013,-446486785}, +{-2121044568,-335940406}, {-2135719499,-224473240}, {-2144540612,-112390298}, +{2147483647,0}, {2140863671,-168489630}, {2121044558,-335940465}, +{2088148500,-501320115}, {2042378310,-663608960}, {1984016179,-821806435}, +{1913421927,-974937199}, {1831030826,-1122057097}, {1737350743,-1262259248}, +{1632959307,-1394679144}, {1518500216,-1518500282}, {1394679073,-1632959368}, +{1262259172,-1737350799}, {1122057017,-1831030875}, {974937230,-1913421912}, +{821806407,-1984016190}, {663608871,-2042378339}, {501319962,-2088148536}, +{335940246,-2121044593}, {168489600,-2140863674}, {-94,-2147483647}, +{-168489787,-2140863659}, {-335940431,-2121044564}, {-501320144,-2088148493}, +{-663609049,-2042378281}, {-821806581,-1984016118}, {-974937397,-1913421827}, +{-1122057395,-1831030643}, {-1262259116,-1737350839}, {-1394679021,-1632959413}, +{-1518500258,-1518500240}, {-1632959429,-1394679001}, {-1737350854,-1262259096}, +{-1831030924,-1122056937}, {-1913422071,-974936918}, {-1984016324,-821806084}, +{-2042378447,-663608538}, {-2088148499,-501320119}, {-2121044568,-335940406}, +{-2140863681,-168489506}, {-2147483647,188}, {-2140863651,168489881}, +{-2121044509,335940777}, {-2088148411,501320484}, {-2042378331,663608895}, +{-1984016181,821806431}, {-1913421900,974937252}, {-1831030728,1122057257}, +{-1737350633,1262259400}, {-1632959185,1394679287}, {-1518499993,1518500506}, +{-1394678735,1632959656}, {-1262258813,1737351059}, {-1122056638,1831031107}, +{-974936606,1913422229}, {-821805761,1984016458}, {-663609179,2042378239}, +{-501320277,2088148461}, {-335940566,2121044542}, {-168489668,2140863668}, +}; +static const ne10_fft_cpx_int32_t ne10_twiddles_120[120] = { +{0,0}, {2147483647,0}, {2147483647,0}, +{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394}, +{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496}, +{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096}, +{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152}, +{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313}, +{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424}, +{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496}, +{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268}, +{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785}, +{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172}, +{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682}, +{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313}, +{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450}, +{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067}, +{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277}, +{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424}, +{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771}, +{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994}, +{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593}, +{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968}, +{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851}, +{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394}, +{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959}, +{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516}, +{-94,-2147483647}, {-224473265,-2135719496}, {-446487060,-2100555955}, +{-663609049,-2042378281}, {-873460398,-1961823883}, {-1073741932,-1859775330}, +{-1262259116,-1737350839}, {-1436947137,-1595891268}, {-1595891628,-1436946738}, +{-1737350854,-1262259096}, {-1859775343,-1073741910}, {-1961823997,-873460141}, +{-2042378447,-663608538}, {-2100556013,-446486785}, {-2135719499,-224473240}, +{2147483647,0}, {2121044558,-335940465}, {2042378310,-663608960}, +{1913421927,-974937199}, {1737350743,-1262259248}, {1518500216,-1518500282}, +{1262259172,-1737350799}, {974937230,-1913421912}, {663608871,-2042378339}, +{335940246,-2121044593}, {-94,-2147483647}, {-335940431,-2121044564}, +{-663609049,-2042378281}, {-974937397,-1913421827}, {-1262259116,-1737350839}, +{-1518500258,-1518500240}, {-1737350854,-1262259096}, {-1913422071,-974936918}, +{-2042378447,-663608538}, {-2121044568,-335940406}, {-2147483647,188}, +{-2121044509,335940777}, {-2042378331,663608895}, {-1913421900,974937252}, +{-1737350633,1262259400}, {-1518499993,1518500506}, {-1262258813,1737351059}, +{-974936606,1913422229}, {-663609179,2042378239}, {-335940566,2121044542}, +}; +static const ne10_fft_cpx_int32_t ne10_twiddles_60[60] = { +{0,0}, {2147483647,0}, {2147483647,0}, +{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394}, +{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496}, +{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096}, +{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152}, +{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968}, +{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851}, +{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394}, +{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959}, +{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516}, +{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313}, +{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424}, +{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496}, +{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268}, +{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785}, +{2147483647,0}, {2042378310,-663608960}, {1737350743,-1262259248}, +{1262259172,-1737350799}, {663608871,-2042378339}, {-94,-2147483647}, +{-663609049,-2042378281}, {-1262259116,-1737350839}, {-1737350854,-1262259096}, +{-2042378447,-663608538}, {-2147483647,188}, {-2042378331,663608895}, +{-1737350633,1262259400}, {-1262258813,1737351059}, {-663609179,2042378239}, +}; +static const ne10_fft_state_int32_t ne10_fft_state_int32_t_480 = { +120, +(ne10_int32_t *)ne10_factors_480, +(ne10_fft_cpx_int32_t *)ne10_twiddles_480, +NULL, +(ne10_fft_cpx_int32_t *)&ne10_twiddles_480[120], +}; +static const arch_fft_state cfg_arch_480 = { +1, +(void *)&ne10_fft_state_int32_t_480, +}; + +static const ne10_fft_state_int32_t ne10_fft_state_int32_t_240 = { +60, +(ne10_int32_t *)ne10_factors_240, +(ne10_fft_cpx_int32_t *)ne10_twiddles_240, +NULL, +(ne10_fft_cpx_int32_t *)&ne10_twiddles_240[60], +}; +static const arch_fft_state cfg_arch_240 = { +1, +(void *)&ne10_fft_state_int32_t_240, +}; + +static const ne10_fft_state_int32_t ne10_fft_state_int32_t_120 = { +30, +(ne10_int32_t *)ne10_factors_120, +(ne10_fft_cpx_int32_t *)ne10_twiddles_120, +NULL, +(ne10_fft_cpx_int32_t *)&ne10_twiddles_120[30], +}; +static const arch_fft_state cfg_arch_120 = { +1, +(void *)&ne10_fft_state_int32_t_120, +}; + +static const ne10_fft_state_int32_t ne10_fft_state_int32_t_60 = { +15, +(ne10_int32_t *)ne10_factors_60, +(ne10_fft_cpx_int32_t *)ne10_twiddles_60, +NULL, +(ne10_fft_cpx_int32_t *)&ne10_twiddles_60[15], +}; +static const arch_fft_state cfg_arch_60 = { +1, +(void *)&ne10_fft_state_int32_t_60, +}; + +#endif /* end NE10_FFT_PARAMS48000_960 */ diff --git a/drivers/opus/celt/static_modes_float.h b/drivers/opus/celt/static_modes_float.h index 362be6cca8..f7d501a993 100644 --- a/drivers/opus/celt/static_modes_float.h +++ b/drivers/opus/celt/static_modes_float.h @@ -1,9 +1,14 @@ /* The contents of this file was automatically generated by dump_modes.c with arguments: 48000 960 It contains static definitions for some pre-defined modes. */ -#include "opus/celt/opus_modes.h" +#include "opus/celt/modes.h" #include "opus/celt/rate.h" +#ifdef HAVE_ARM_NE10 +#define OVERRIDE_FFT 1 +#include "opus/celt/static_modes_float_arm_ne10.h" +#endif + #ifndef DEF_WINDOW120 #define DEF_WINDOW120 static const opus_val16 window120[120] = { @@ -341,84 +346,84 @@ static const kiss_twiddle_cpx fft_twiddles48000_960[480] = { #ifndef FFT_BITREV480 #define FFT_BITREV480 static const opus_int16 fft_bitrev480[480] = { -0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330, -450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225, -345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95, -215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440, -110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310, -430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205, -325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61, -181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406, -76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276, -396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171, -291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41, -161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386, -56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242, -362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137, -257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7, -127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457, -22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352, -472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222, -342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117, -237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423, -93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318, -438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188, -308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83, -203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403, -73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298, -418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154, -274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49, -169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369, -39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264, -384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134, -254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29, -149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479, +0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448, +8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456, +16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464, +24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472, +4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452, +12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460, +20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468, +28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476, +1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449, +9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457, +17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465, +25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473, +5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453, +13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461, +21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469, +29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477, +2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450, +10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458, +18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466, +26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474, +6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454, +14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462, +22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470, +30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478, +3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451, +11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459, +19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467, +27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475, +7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455, +15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463, +23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471, +31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479, }; #endif #ifndef FFT_BITREV240 #define FFT_BITREV240 static const opus_int16 fft_bitrev240[240] = { -0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165, -225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110, -170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55, -115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211, -46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156, -216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101, -161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32, -92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202, -37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147, -207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78, -138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23, -83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193, -28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124, -184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69, -129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14, -74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239, +0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224, +4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228, +8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232, +12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236, +1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225, +5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229, +9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233, +13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237, +2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226, +6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230, +10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234, +14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238, +3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227, +7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231, +11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235, +15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239, }; #endif #ifndef FFT_BITREV120 #define FFT_BITREV120 static const opus_int16 fft_bitrev120[120] = { -0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80, -110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46, -76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26, -56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97, -22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63, -93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43, -73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9, -39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119, +0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112, +4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116, +1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113, +5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117, +2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114, +6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118, +3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115, +7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119, }; #endif #ifndef FFT_BITREV60 #define FFT_BITREV60 static const opus_int16 fft_bitrev60[60] = { -0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31, -46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22, -37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13, -28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59, +0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56, +1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57, +2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58, +3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59, }; #endif @@ -428,9 +433,14 @@ static const kiss_fft_state fft_state48000_960_0 = { 480, /* nfft */ 0.002083333f, /* scale */ -1, /* shift */ -{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev480, /* bitrev */ fft_twiddles48000_960, /* bitrev */ +#ifdef OVERRIDE_FFT +(arch_fft_state *)&cfg_arch_480, +#else +NULL, +#endif }; #endif @@ -440,9 +450,14 @@ static const kiss_fft_state fft_state48000_960_1 = { 240, /* nfft */ 0.004166667f, /* scale */ 1, /* shift */ -{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev240, /* bitrev */ fft_twiddles48000_960, /* bitrev */ +#ifdef OVERRIDE_FFT +(arch_fft_state *)&cfg_arch_240, +#else +NULL, +#endif }; #endif @@ -452,9 +467,14 @@ static const kiss_fft_state fft_state48000_960_2 = { 120, /* nfft */ 0.008333333f, /* scale */ 2, /* shift */ -{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev120, /* bitrev */ fft_twiddles48000_960, /* bitrev */ +#ifdef OVERRIDE_FFT +(arch_fft_state *)&cfg_arch_120, +#else +NULL, +#endif }; #endif @@ -464,9 +484,14 @@ static const kiss_fft_state fft_state48000_960_3 = { 60, /* nfft */ 0.016666667f, /* scale */ 3, /* shift */ -{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev60, /* bitrev */ fft_twiddles48000_960, /* bitrev */ +#ifdef OVERRIDE_FFT +(arch_fft_state *)&cfg_arch_60, +#else +NULL, +#endif }; #endif @@ -474,104 +499,368 @@ fft_twiddles48000_960, /* bitrev */ #ifndef MDCT_TWIDDLES960 #define MDCT_TWIDDLES960 -static const opus_val16 mdct_twiddles960[481] = { -1.0000000f, 0.99999465f, 0.99997858f, 0.99995181f, 0.99991433f, -0.99986614f, 0.99980724f, 0.99973764f, 0.99965732f, 0.99956631f, -0.99946459f, 0.99935216f, 0.99922904f, 0.99909521f, 0.99895068f, -0.99879546f, 0.99862953f, 0.99845292f, 0.99826561f, 0.99806761f, -0.99785892f, 0.99763955f, 0.99740949f, 0.99716875f, 0.99691733f, -0.99665524f, 0.99638247f, 0.99609903f, 0.99580493f, 0.99550016f, -0.99518473f, 0.99485864f, 0.99452190f, 0.99417450f, 0.99381646f, -0.99344778f, 0.99306846f, 0.99267850f, 0.99227791f, 0.99186670f, -0.99144486f, 0.99101241f, 0.99056934f, 0.99011566f, 0.98965139f, -0.98917651f, 0.98869104f, 0.98819498f, 0.98768834f, 0.98717112f, -0.98664333f, 0.98610497f, 0.98555606f, 0.98499659f, 0.98442657f, -0.98384600f, 0.98325491f, 0.98265328f, 0.98204113f, 0.98141846f, -0.98078528f, 0.98014159f, 0.97948742f, 0.97882275f, 0.97814760f, -0.97746197f, 0.97676588f, 0.97605933f, 0.97534232f, 0.97461487f, -0.97387698f, 0.97312866f, 0.97236992f, 0.97160077f, 0.97082121f, -0.97003125f, 0.96923091f, 0.96842019f, 0.96759909f, 0.96676764f, -0.96592582f, 0.96507367f, 0.96421118f, 0.96333837f, 0.96245523f, -0.96156180f, 0.96065806f, 0.95974403f, 0.95881973f, 0.95788517f, -0.95694034f, 0.95598526f, 0.95501995f, 0.95404440f, 0.95305864f, -0.95206267f, 0.95105651f, 0.95004016f, 0.94901364f, 0.94797697f, -0.94693013f, 0.94587315f, 0.94480604f, 0.94372882f, 0.94264149f, -0.94154406f, 0.94043656f, 0.93931897f, 0.93819133f, 0.93705365f, -0.93590592f, 0.93474818f, 0.93358042f, 0.93240268f, 0.93121493f, -0.93001722f, 0.92880955f, 0.92759193f, 0.92636438f, 0.92512690f, -0.92387953f, 0.92262225f, 0.92135509f, 0.92007809f, 0.91879121f, -0.91749449f, 0.91618795f, 0.91487161f, 0.91354545f, 0.91220952f, -0.91086382f, 0.90950836f, 0.90814316f, 0.90676824f, 0.90538363f, -0.90398929f, 0.90258528f, 0.90117161f, 0.89974828f, 0.89831532f, -0.89687273f, 0.89542055f, 0.89395877f, 0.89248742f, 0.89100652f, -0.88951606f, 0.88801610f, 0.88650661f, 0.88498764f, 0.88345918f, -0.88192125f, 0.88037390f, 0.87881711f, 0.87725090f, 0.87567531f, -0.87409035f, 0.87249599f, 0.87089232f, 0.86927933f, 0.86765699f, -0.86602540f, 0.86438453f, 0.86273437f, 0.86107503f, 0.85940641f, -0.85772862f, 0.85604161f, 0.85434547f, 0.85264014f, 0.85092572f, -0.84920218f, 0.84746955f, 0.84572781f, 0.84397704f, 0.84221721f, -0.84044838f, 0.83867056f, 0.83688375f, 0.83508799f, 0.83328325f, -0.83146961f, 0.82964704f, 0.82781562f, 0.82597530f, 0.82412620f, -0.82226820f, 0.82040144f, 0.81852589f, 0.81664154f, 0.81474847f, -0.81284665f, 0.81093620f, 0.80901698f, 0.80708914f, 0.80515262f, -0.80320752f, 0.80125378f, 0.79929149f, 0.79732067f, 0.79534125f, -0.79335335f, 0.79135691f, 0.78935204f, 0.78733867f, 0.78531691f, -0.78328674f, 0.78124818f, 0.77920122f, 0.77714595f, 0.77508232f, -0.77301043f, 0.77093026f, 0.76884183f, 0.76674517f, 0.76464026f, -0.76252720f, 0.76040593f, 0.75827656f, 0.75613907f, 0.75399349f, -0.75183978f, 0.74967807f, 0.74750833f, 0.74533054f, 0.74314481f, -0.74095112f, 0.73874950f, 0.73653993f, 0.73432251f, 0.73209718f, -0.72986405f, 0.72762307f, 0.72537438f, 0.72311787f, 0.72085359f, -0.71858162f, 0.71630192f, 0.71401459f, 0.71171956f, 0.70941701f, -0.70710677f, 0.70478900f, 0.70246363f, 0.70013079f, 0.69779041f, -0.69544260f, 0.69308738f, 0.69072466f, 0.68835458f, 0.68597709f, -0.68359229f, 0.68120013f, 0.67880072f, 0.67639404f, 0.67398011f, -0.67155892f, 0.66913059f, 0.66669509f, 0.66425240f, 0.66180265f, -0.65934581f, 0.65688191f, 0.65441092f, 0.65193298f, 0.64944801f, -0.64695613f, 0.64445727f, 0.64195160f, 0.63943902f, 0.63691954f, -0.63439328f, 0.63186019f, 0.62932037f, 0.62677377f, 0.62422055f, -0.62166055f, 0.61909394f, 0.61652065f, 0.61394081f, 0.61135435f, -0.60876139f, 0.60616195f, 0.60355593f, 0.60094349f, 0.59832457f, -0.59569929f, 0.59306758f, 0.59042957f, 0.58778523f, 0.58513460f, -0.58247766f, 0.57981452f, 0.57714518f, 0.57446961f, 0.57178793f, -0.56910013f, 0.56640624f, 0.56370623f, 0.56100023f, 0.55828818f, -0.55557020f, 0.55284627f, 0.55011641f, 0.54738067f, 0.54463901f, -0.54189157f, 0.53913828f, 0.53637921f, 0.53361450f, 0.53084398f, -0.52806787f, 0.52528601f, 0.52249852f, 0.51970543f, 0.51690688f, -0.51410279f, 0.51129310f, 0.50847793f, 0.50565732f, 0.50283139f, -0.49999997f, 0.49716321f, 0.49432122f, 0.49147383f, 0.48862118f, -0.48576340f, 0.48290042f, 0.48003216f, 0.47715876f, 0.47428025f, -0.47139677f, 0.46850813f, 0.46561448f, 0.46271584f, 0.45981235f, -0.45690383f, 0.45399042f, 0.45107214f, 0.44814915f, 0.44522124f, -0.44228868f, 0.43935137f, 0.43640926f, 0.43346247f, 0.43051104f, -0.42755511f, 0.42459449f, 0.42162932f, 0.41865964f, 0.41568558f, -0.41270697f, 0.40972393f, 0.40673661f, 0.40374494f, 0.40074884f, -0.39774844f, 0.39474390f, 0.39173501f, 0.38872193f, 0.38570469f, -0.38268343f, 0.37965796f, 0.37662842f, 0.37359496f, 0.37055739f, -0.36751585f, 0.36447038f, 0.36142122f, 0.35836797f, 0.35531089f, -0.35225000f, 0.34918544f, 0.34611704f, 0.34304493f, 0.33996926f, -0.33688983f, 0.33380680f, 0.33072019f, 0.32763015f, 0.32453650f, -0.32143936f, 0.31833890f, 0.31523503f, 0.31212767f, 0.30901696f, -0.30590306f, 0.30278577f, 0.29966524f, 0.29654150f, 0.29341470f, -0.29028464f, 0.28715147f, 0.28401522f, 0.28087605f, 0.27773376f, -0.27458861f, 0.27144052f, 0.26828940f, 0.26513541f, 0.26197859f, -0.25881907f, 0.25565666f, 0.25249152f, 0.24932367f, 0.24615327f, -0.24298012f, 0.23980436f, 0.23662604f, 0.23344530f, 0.23026206f, -0.22707623f, 0.22388809f, 0.22069744f, 0.21750443f, 0.21430908f, -0.21111156f, 0.20791165f, 0.20470953f, 0.20150520f, 0.19829884f, -0.19509024f, 0.19187955f, 0.18866692f, 0.18545227f, 0.18223552f, -0.17901681f, 0.17579631f, 0.17257380f, 0.16934945f, 0.16612328f, -0.16289546f, 0.15966577f, 0.15643437f, 0.15320141f, 0.14996669f, -0.14673037f, 0.14349260f, 0.14025329f, 0.13701235f, 0.13376995f, -0.13052612f, 0.12728101f, 0.12403442f, 0.12078650f, 0.11753740f, -0.11428693f, 0.11103523f, 0.10778234f, 0.10452842f, 0.10127326f, -0.098017137f, 0.094759842f, 0.091501652f, 0.088242363f, 0.084982129f, -0.081721103f, 0.078459084f, 0.075196224f, 0.071932560f, 0.068668243f, -0.065403073f, 0.062137201f, 0.058870665f, 0.055603617f, 0.052335974f, -0.049067651f, 0.045798921f, 0.042529582f, 0.039259788f, 0.035989573f, -0.032719092f, 0.029448142f, 0.026176876f, 0.022905329f, 0.019633657f, -0.016361655f, 0.013089478f, 0.0098171604f, 0.0065449764f, 0.0032724839f, --4.3711390e-08f, }; +static const opus_val16 mdct_twiddles960[1800] = { +0.99999994f, 0.99999321f, 0.99997580f, 0.99994773f, 0.99990886f, +0.99985933f, 0.99979913f, 0.99972820f, 0.99964654f, 0.99955416f, +0.99945110f, 0.99933738f, 0.99921292f, 0.99907774f, 0.99893188f, +0.99877530f, 0.99860805f, 0.99843007f, 0.99824142f, 0.99804211f, +0.99783206f, 0.99761140f, 0.99737996f, 0.99713790f, 0.99688518f, +0.99662173f, 0.99634761f, 0.99606287f, 0.99576741f, 0.99546129f, +0.99514455f, 0.99481714f, 0.99447906f, 0.99413031f, 0.99377096f, +0.99340093f, 0.99302030f, 0.99262899f, 0.99222708f, 0.99181455f, +0.99139136f, 0.99095762f, 0.99051321f, 0.99005818f, 0.98959261f, +0.98911643f, 0.98862964f, 0.98813224f, 0.98762429f, 0.98710573f, +0.98657662f, 0.98603696f, 0.98548669f, 0.98492593f, 0.98435456f, +0.98377270f, 0.98318028f, 0.98257732f, 0.98196387f, 0.98133987f, +0.98070538f, 0.98006040f, 0.97940493f, 0.97873890f, 0.97806245f, +0.97737551f, 0.97667813f, 0.97597027f, 0.97525197f, 0.97452319f, +0.97378403f, 0.97303438f, 0.97227436f, 0.97150391f, 0.97072303f, +0.96993178f, 0.96913016f, 0.96831810f, 0.96749574f, 0.96666300f, +0.96581990f, 0.96496642f, 0.96410263f, 0.96322852f, 0.96234411f, +0.96144938f, 0.96054435f, 0.95962906f, 0.95870346f, 0.95776761f, +0.95682150f, 0.95586514f, 0.95489854f, 0.95392174f, 0.95293468f, +0.95193744f, 0.95093000f, 0.94991243f, 0.94888461f, 0.94784665f, +0.94679856f, 0.94574034f, 0.94467193f, 0.94359344f, 0.94250488f, +0.94140619f, 0.94029742f, 0.93917859f, 0.93804967f, 0.93691075f, +0.93576175f, 0.93460274f, 0.93343377f, 0.93225473f, 0.93106574f, +0.92986679f, 0.92865789f, 0.92743903f, 0.92621022f, 0.92497152f, +0.92372292f, 0.92246443f, 0.92119598f, 0.91991776f, 0.91862965f, +0.91733170f, 0.91602397f, 0.91470635f, 0.91337901f, 0.91204184f, +0.91069490f, 0.90933824f, 0.90797186f, 0.90659571f, 0.90520984f, +0.90381432f, 0.90240908f, 0.90099424f, 0.89956969f, 0.89813554f, +0.89669174f, 0.89523834f, 0.89377540f, 0.89230281f, 0.89082074f, +0.88932908f, 0.88782793f, 0.88631725f, 0.88479710f, 0.88326746f, +0.88172835f, 0.88017982f, 0.87862182f, 0.87705445f, 0.87547767f, +0.87389153f, 0.87229604f, 0.87069118f, 0.86907703f, 0.86745358f, +0.86582077f, 0.86417878f, 0.86252749f, 0.86086690f, 0.85919720f, +0.85751826f, 0.85583007f, 0.85413277f, 0.85242635f, 0.85071075f, +0.84898609f, 0.84725231f, 0.84550947f, 0.84375757f, 0.84199661f, +0.84022665f, 0.83844769f, 0.83665979f, 0.83486289f, 0.83305705f, +0.83124226f, 0.82941860f, 0.82758605f, 0.82574469f, 0.82389444f, +0.82203537f, 0.82016748f, 0.81829083f, 0.81640542f, 0.81451124f, +0.81260836f, 0.81069672f, 0.80877650f, 0.80684757f, 0.80490994f, +0.80296379f, 0.80100900f, 0.79904562f, 0.79707366f, 0.79509324f, +0.79310423f, 0.79110676f, 0.78910083f, 0.78708643f, 0.78506362f, +0.78303236f, 0.78099275f, 0.77894479f, 0.77688843f, 0.77482378f, +0.77275085f, 0.77066964f, 0.76858020f, 0.76648247f, 0.76437658f, +0.76226246f, 0.76014024f, 0.75800985f, 0.75587130f, 0.75372469f, +0.75157005f, 0.74940729f, 0.74723655f, 0.74505776f, 0.74287105f, +0.74067634f, 0.73847371f, 0.73626316f, 0.73404479f, 0.73181850f, +0.72958434f, 0.72734243f, 0.72509271f, 0.72283524f, 0.72057003f, +0.71829706f, 0.71601641f, 0.71372813f, 0.71143216f, 0.70912862f, +0.70681745f, 0.70449871f, 0.70217246f, 0.69983864f, 0.69749737f, +0.69514859f, 0.69279242f, 0.69042879f, 0.68805778f, 0.68567938f, +0.68329364f, 0.68090063f, 0.67850029f, 0.67609268f, 0.67367786f, +0.67125577f, 0.66882652f, 0.66639012f, 0.66394657f, 0.66149592f, +0.65903819f, 0.65657341f, 0.65410155f, 0.65162271f, 0.64913690f, +0.64664418f, 0.64414448f, 0.64163786f, 0.63912445f, 0.63660413f, +0.63407701f, 0.63154310f, 0.62900239f, 0.62645501f, 0.62390089f, +0.62134010f, 0.61877263f, 0.61619854f, 0.61361790f, 0.61103064f, +0.60843682f, 0.60583651f, 0.60322970f, 0.60061646f, 0.59799677f, +0.59537065f, 0.59273821f, 0.59009939f, 0.58745426f, 0.58480281f, +0.58214509f, 0.57948118f, 0.57681108f, 0.57413477f, 0.57145232f, +0.56876373f, 0.56606907f, 0.56336832f, 0.56066155f, 0.55794877f, +0.55523002f, 0.55250537f, 0.54977477f, 0.54703826f, 0.54429591f, +0.54154772f, 0.53879374f, 0.53603399f, 0.53326851f, 0.53049731f, +0.52772039f, 0.52493787f, 0.52214974f, 0.51935595f, 0.51655668f, +0.51375180f, 0.51094145f, 0.50812566f, 0.50530440f, 0.50247771f, +0.49964568f, 0.49680826f, 0.49396557f, 0.49111754f, 0.48826426f, +0.48540577f, 0.48254207f, 0.47967321f, 0.47679919f, 0.47392011f, +0.47103590f, 0.46814668f, 0.46525243f, 0.46235323f, 0.45944905f, +0.45653993f, 0.45362595f, 0.45070711f, 0.44778344f, 0.44485497f, +0.44192174f, 0.43898380f, 0.43604112f, 0.43309379f, 0.43014181f, +0.42718524f, 0.42422408f, 0.42125839f, 0.41828820f, 0.41531351f, +0.41233435f, 0.40935081f, 0.40636289f, 0.40337059f, 0.40037400f, +0.39737311f, 0.39436796f, 0.39135858f, 0.38834500f, 0.38532731f, +0.38230544f, 0.37927949f, 0.37624949f, 0.37321547f, 0.37017745f, +0.36713544f, 0.36408952f, 0.36103970f, 0.35798600f, 0.35492846f, +0.35186714f, 0.34880206f, 0.34573323f, 0.34266070f, 0.33958447f, +0.33650464f, 0.33342120f, 0.33033419f, 0.32724363f, 0.32414958f, +0.32105204f, 0.31795108f, 0.31484672f, 0.31173897f, 0.30862790f, +0.30551350f, 0.30239585f, 0.29927495f, 0.29615086f, 0.29302359f, +0.28989318f, 0.28675964f, 0.28362307f, 0.28048345f, 0.27734083f, +0.27419522f, 0.27104670f, 0.26789525f, 0.26474094f, 0.26158381f, +0.25842386f, 0.25526115f, 0.25209570f, 0.24892756f, 0.24575676f, +0.24258332f, 0.23940729f, 0.23622867f, 0.23304754f, 0.22986393f, +0.22667783f, 0.22348931f, 0.22029841f, 0.21710514f, 0.21390954f, +0.21071166f, 0.20751151f, 0.20430915f, 0.20110460f, 0.19789790f, +0.19468907f, 0.19147816f, 0.18826519f, 0.18505022f, 0.18183327f, +0.17861435f, 0.17539354f, 0.17217083f, 0.16894630f, 0.16571994f, +0.16249183f, 0.15926196f, 0.15603039f, 0.15279715f, 0.14956227f, +0.14632578f, 0.14308774f, 0.13984816f, 0.13660708f, 0.13336454f, +0.13012058f, 0.12687522f, 0.12362850f, 0.12038045f, 0.11713112f, +0.11388054f, 0.11062872f, 0.10737573f, 0.10412160f, 0.10086634f, +0.097609997f, 0.094352618f, 0.091094226f, 0.087834857f, 0.084574550f, +0.081313334f, 0.078051247f, 0.074788325f, 0.071524605f, 0.068260118f, +0.064994894f, 0.061728980f, 0.058462404f, 0.055195201f, 0.051927410f, +0.048659060f, 0.045390189f, 0.042120833f, 0.038851023f, 0.035580799f, +0.032310195f, 0.029039243f, 0.025767982f, 0.022496443f, 0.019224664f, +0.015952680f, 0.012680525f, 0.0094082337f, 0.0061358409f, 0.0028633832f, +-0.00040910527f, -0.0036815894f, -0.0069540343f, -0.010226404f, -0.013498665f, +-0.016770782f, -0.020042717f, -0.023314439f, -0.026585912f, -0.029857099f, +-0.033127967f, -0.036398482f, -0.039668605f, -0.042938303f, -0.046207540f, +-0.049476285f, -0.052744497f, -0.056012146f, -0.059279196f, -0.062545612f, +-0.065811358f, -0.069076397f, -0.072340697f, -0.075604223f, -0.078866936f, +-0.082128808f, -0.085389800f, -0.088649876f, -0.091909006f, -0.095167145f, +-0.098424271f, -0.10168034f, -0.10493532f, -0.10818918f, -0.11144188f, +-0.11469338f, -0.11794366f, -0.12119267f, -0.12444039f, -0.12768677f, +-0.13093179f, -0.13417540f, -0.13741758f, -0.14065829f, -0.14389749f, +-0.14713514f, -0.15037122f, -0.15360570f, -0.15683852f, -0.16006967f, +-0.16329910f, -0.16652679f, -0.16975269f, -0.17297678f, -0.17619900f, +-0.17941935f, -0.18263777f, -0.18585424f, -0.18906870f, -0.19228116f, +-0.19549155f, -0.19869985f, -0.20190603f, -0.20511003f, -0.20831184f, +-0.21151142f, -0.21470875f, -0.21790376f, -0.22109644f, -0.22428675f, +-0.22747467f, -0.23066014f, -0.23384315f, -0.23702365f, -0.24020162f, +-0.24337701f, -0.24654980f, -0.24971995f, -0.25288740f, -0.25605217f, +-0.25921419f, -0.26237345f, -0.26552987f, -0.26868346f, -0.27183419f, +-0.27498198f, -0.27812684f, -0.28126872f, -0.28440759f, -0.28754342f, +-0.29067615f, -0.29380578f, -0.29693225f, -0.30005556f, -0.30317566f, +-0.30629250f, -0.30940607f, -0.31251630f, -0.31562322f, -0.31872672f, +-0.32182685f, -0.32492352f, -0.32801670f, -0.33110636f, -0.33419248f, +-0.33727503f, -0.34035397f, -0.34342924f, -0.34650084f, -0.34956875f, +-0.35263291f, -0.35569328f, -0.35874987f, -0.36180258f, -0.36485144f, +-0.36789638f, -0.37093741f, -0.37397444f, -0.37700745f, -0.38003644f, +-0.38306138f, -0.38608220f, -0.38909888f, -0.39211139f, -0.39511973f, +-0.39812380f, -0.40112361f, -0.40411916f, -0.40711036f, -0.41009718f, +-0.41307965f, -0.41605768f, -0.41903123f, -0.42200032f, -0.42496487f, +-0.42792490f, -0.43088034f, -0.43383113f, -0.43677729f, -0.43971881f, +-0.44265559f, -0.44558764f, -0.44851488f, -0.45143735f, -0.45435500f, +-0.45726776f, -0.46017563f, -0.46307856f, -0.46597654f, -0.46886954f, +-0.47175750f, -0.47464043f, -0.47751826f, -0.48039100f, -0.48325855f, +-0.48612097f, -0.48897815f, -0.49183011f, -0.49467680f, -0.49751821f, +-0.50035429f, -0.50318497f, -0.50601029f, -0.50883019f, -0.51164466f, +-0.51445359f, -0.51725709f, -0.52005500f, -0.52284735f, -0.52563411f, +-0.52841520f, -0.53119069f, -0.53396046f, -0.53672451f, -0.53948283f, +-0.54223537f, -0.54498214f, -0.54772300f, -0.55045801f, -0.55318713f, +-0.55591035f, -0.55862761f, -0.56133890f, -0.56404412f, -0.56674337f, +-0.56943649f, -0.57212353f, -0.57480448f, -0.57747924f, -0.58014780f, +-0.58281022f, -0.58546633f, -0.58811617f, -0.59075975f, -0.59339696f, +-0.59602785f, -0.59865236f, -0.60127044f, -0.60388207f, -0.60648727f, +-0.60908598f, -0.61167812f, -0.61426371f, -0.61684275f, -0.61941516f, +-0.62198097f, -0.62454009f, -0.62709254f, -0.62963831f, -0.63217729f, +-0.63470948f, -0.63723493f, -0.63975352f, -0.64226526f, -0.64477009f, +-0.64726806f, -0.64975911f, -0.65224314f, -0.65472025f, -0.65719032f, +-0.65965337f, -0.66210932f, -0.66455823f, -0.66700000f, -0.66943461f, +-0.67186207f, -0.67428231f, -0.67669535f, -0.67910111f, -0.68149966f, +-0.68389088f, -0.68627477f, -0.68865126f, -0.69102043f, -0.69338220f, +-0.69573659f, -0.69808346f, -0.70042288f, -0.70275480f, -0.70507920f, +-0.70739603f, -0.70970529f, -0.71200693f, -0.71430099f, -0.71658736f, +-0.71886611f, -0.72113711f, -0.72340041f, -0.72565591f, -0.72790372f, +-0.73014367f, -0.73237586f, -0.73460019f, -0.73681659f, -0.73902518f, +-0.74122584f, -0.74341851f, -0.74560326f, -0.74778003f, -0.74994880f, +-0.75210953f, -0.75426215f, -0.75640678f, -0.75854325f, -0.76067162f, +-0.76279181f, -0.76490390f, -0.76700771f, -0.76910341f, -0.77119076f, +-0.77326995f, -0.77534080f, -0.77740335f, -0.77945763f, -0.78150350f, +-0.78354102f, -0.78557014f, -0.78759086f, -0.78960317f, -0.79160696f, +-0.79360235f, -0.79558921f, -0.79756755f, -0.79953730f, -0.80149853f, +-0.80345118f, -0.80539525f, -0.80733067f, -0.80925739f, -0.81117553f, +-0.81308490f, -0.81498563f, -0.81687760f, -0.81876087f, -0.82063532f, +-0.82250100f, -0.82435787f, -0.82620591f, -0.82804507f, -0.82987541f, +-0.83169687f, -0.83350939f, -0.83531296f, -0.83710766f, -0.83889335f, +-0.84067005f, -0.84243774f, -0.84419644f, -0.84594607f, -0.84768665f, +-0.84941816f, -0.85114056f, -0.85285389f, -0.85455805f, -0.85625303f, +-0.85793889f, -0.85961550f, -0.86128294f, -0.86294121f, -0.86459017f, +-0.86622989f, -0.86786032f, -0.86948150f, -0.87109333f, -0.87269586f, +-0.87428904f, -0.87587279f, -0.87744725f, -0.87901229f, -0.88056785f, +-0.88211405f, -0.88365078f, -0.88517809f, -0.88669586f, -0.88820416f, +-0.88970292f, -0.89119220f, -0.89267188f, -0.89414203f, -0.89560264f, +-0.89705360f, -0.89849502f, -0.89992678f, -0.90134889f, -0.90276134f, +-0.90416414f, -0.90555727f, -0.90694070f, -0.90831441f, -0.90967834f, +-0.91103262f, -0.91237706f, -0.91371179f, -0.91503674f, -0.91635185f, +-0.91765714f, -0.91895264f, -0.92023826f, -0.92151409f, -0.92277998f, +-0.92403603f, -0.92528218f, -0.92651838f, -0.92774469f, -0.92896110f, +-0.93016750f, -0.93136400f, -0.93255049f, -0.93372697f, -0.93489349f, +-0.93604994f, -0.93719643f, -0.93833286f, -0.93945926f, -0.94057560f, +-0.94168180f, -0.94277799f, -0.94386405f, -0.94494003f, -0.94600588f, +-0.94706154f, -0.94810712f, -0.94914252f, -0.95016778f, -0.95118284f, +-0.95218778f, -0.95318246f, -0.95416695f, -0.95514119f, -0.95610523f, +-0.95705903f, -0.95800257f, -0.95893586f, -0.95985889f, -0.96077162f, +-0.96167403f, -0.96256620f, -0.96344805f, -0.96431959f, -0.96518075f, +-0.96603161f, -0.96687216f, -0.96770233f, -0.96852213f, -0.96933156f, +-0.97013056f, -0.97091925f, -0.97169751f, -0.97246534f, -0.97322279f, +-0.97396982f, -0.97470641f, -0.97543252f, -0.97614825f, -0.97685349f, +-0.97754824f, -0.97823256f, -0.97890645f, -0.97956979f, -0.98022264f, +-0.98086500f, -0.98149687f, -0.98211825f, -0.98272908f, -0.98332942f, +-0.98391914f, -0.98449844f, -0.98506713f, -0.98562527f, -0.98617285f, +-0.98670989f, -0.98723638f, -0.98775226f, -0.98825759f, -0.98875231f, +-0.98923647f, -0.98971003f, -0.99017298f, -0.99062532f, -0.99106705f, +-0.99149817f, -0.99191868f, -0.99232858f, -0.99272782f, -0.99311644f, +-0.99349445f, -0.99386179f, -0.99421853f, -0.99456459f, -0.99489999f, +-0.99522477f, -0.99553883f, -0.99584228f, -0.99613506f, -0.99641716f, +-0.99668860f, -0.99694937f, -0.99719942f, -0.99743885f, -0.99766755f, +-0.99788558f, -0.99809295f, -0.99828959f, -0.99847561f, -0.99865085f, +-0.99881548f, -0.99896932f, -0.99911255f, -0.99924499f, -0.99936682f, +-0.99947786f, -0.99957830f, -0.99966794f, -0.99974692f, -0.99981517f, +-0.99987274f, -0.99991959f, -0.99995571f, -0.99998116f, -0.99999589f, +0.99999964f, 0.99997288f, 0.99990326f, 0.99979085f, 0.99963558f, +0.99943751f, 0.99919659f, 0.99891287f, 0.99858636f, 0.99821711f, +0.99780506f, 0.99735034f, 0.99685282f, 0.99631262f, 0.99572974f, +0.99510419f, 0.99443603f, 0.99372530f, 0.99297196f, 0.99217612f, +0.99133772f, 0.99045694f, 0.98953366f, 0.98856801f, 0.98756003f, +0.98650974f, 0.98541719f, 0.98428243f, 0.98310548f, 0.98188645f, +0.98062533f, 0.97932225f, 0.97797716f, 0.97659022f, 0.97516143f, +0.97369087f, 0.97217858f, 0.97062469f, 0.96902919f, 0.96739221f, +0.96571374f, 0.96399397f, 0.96223283f, 0.96043050f, 0.95858705f, +0.95670253f, 0.95477700f, 0.95281059f, 0.95080340f, 0.94875544f, +0.94666684f, 0.94453770f, 0.94236809f, 0.94015813f, 0.93790787f, +0.93561745f, 0.93328691f, 0.93091643f, 0.92850608f, 0.92605597f, +0.92356616f, 0.92103678f, 0.91846794f, 0.91585976f, 0.91321236f, +0.91052586f, 0.90780038f, 0.90503591f, 0.90223277f, 0.89939094f, +0.89651060f, 0.89359182f, 0.89063478f, 0.88763964f, 0.88460642f, +0.88153529f, 0.87842643f, 0.87527996f, 0.87209594f, 0.86887461f, +0.86561602f, 0.86232042f, 0.85898781f, 0.85561842f, 0.85221243f, +0.84876984f, 0.84529096f, 0.84177583f, 0.83822471f, 0.83463764f, +0.83101481f, 0.82735640f, 0.82366252f, 0.81993335f, 0.81616908f, +0.81236988f, 0.80853581f, 0.80466717f, 0.80076402f, 0.79682660f, +0.79285502f, 0.78884947f, 0.78481019f, 0.78073722f, 0.77663082f, +0.77249116f, 0.76831841f, 0.76411277f, 0.75987434f, 0.75560343f, +0.75130010f, 0.74696463f, 0.74259710f, 0.73819780f, 0.73376691f, +0.72930455f, 0.72481096f, 0.72028631f, 0.71573079f, 0.71114463f, +0.70652801f, 0.70188117f, 0.69720417f, 0.69249737f, 0.68776089f, +0.68299496f, 0.67819971f, 0.67337549f, 0.66852236f, 0.66364062f, +0.65873051f, 0.65379208f, 0.64882571f, 0.64383155f, 0.63880974f, +0.63376063f, 0.62868434f, 0.62358117f, 0.61845124f, 0.61329484f, +0.60811216f, 0.60290343f, 0.59766883f, 0.59240872f, 0.58712316f, +0.58181250f, 0.57647687f, 0.57111657f, 0.56573176f, 0.56032276f, +0.55488980f, 0.54943299f, 0.54395270f, 0.53844911f, 0.53292239f, +0.52737290f, 0.52180082f, 0.51620632f, 0.51058978f, 0.50495136f, +0.49929130f, 0.49360985f, 0.48790723f, 0.48218375f, 0.47643960f, +0.47067502f, 0.46489030f, 0.45908567f, 0.45326138f, 0.44741765f, +0.44155475f, 0.43567297f, 0.42977250f, 0.42385364f, 0.41791660f, +0.41196167f, 0.40598908f, 0.39999911f, 0.39399201f, 0.38796803f, +0.38192743f, 0.37587047f, 0.36979741f, 0.36370850f, 0.35760403f, +0.35148421f, 0.34534934f, 0.33919969f, 0.33303553f, 0.32685706f, +0.32066461f, 0.31445843f, 0.30823877f, 0.30200592f, 0.29576012f, +0.28950164f, 0.28323078f, 0.27694780f, 0.27065292f, 0.26434645f, +0.25802869f, 0.25169984f, 0.24536023f, 0.23901010f, 0.23264973f, +0.22627939f, 0.21989937f, 0.21350993f, 0.20711134f, 0.20070387f, +0.19428782f, 0.18786344f, 0.18143101f, 0.17499080f, 0.16854310f, +0.16208819f, 0.15562633f, 0.14915779f, 0.14268288f, 0.13620184f, +0.12971498f, 0.12322257f, 0.11672486f, 0.11022217f, 0.10371475f, +0.097202882f, 0.090686858f, 0.084166944f, 0.077643424f, 0.071116582f, +0.064586692f, 0.058054037f, 0.051518895f, 0.044981543f, 0.038442269f, +0.031901345f, 0.025359053f, 0.018815678f, 0.012271495f, 0.0057267868f, +-0.00081816671f, -0.0073630852f, -0.013907688f, -0.020451695f, -0.026994826f, +-0.033536803f, -0.040077340f, -0.046616159f, -0.053152986f, -0.059687532f, +-0.066219524f, -0.072748676f, -0.079274714f, -0.085797355f, -0.092316322f, +-0.098831341f, -0.10534211f, -0.11184838f, -0.11834986f, -0.12484626f, +-0.13133731f, -0.13782275f, -0.14430228f, -0.15077563f, -0.15724251f, +-0.16370267f, -0.17015581f, -0.17660165f, -0.18303993f, -0.18947038f, +-0.19589271f, -0.20230664f, -0.20871192f, -0.21510825f, -0.22149536f, +-0.22787298f, -0.23424086f, -0.24059868f, -0.24694622f, -0.25328314f, +-0.25960925f, -0.26592422f, -0.27222782f, -0.27851975f, -0.28479972f, +-0.29106751f, -0.29732284f, -0.30356544f, -0.30979502f, -0.31601134f, +-0.32221413f, -0.32840309f, -0.33457801f, -0.34073856f, -0.34688455f, +-0.35301566f, -0.35913166f, -0.36523229f, -0.37131724f, -0.37738630f, +-0.38343921f, -0.38947567f, -0.39549544f, -0.40149832f, -0.40748394f, +-0.41345215f, -0.41940263f, -0.42533514f, -0.43124944f, -0.43714526f, +-0.44302234f, -0.44888046f, -0.45471936f, -0.46053877f, -0.46633846f, +-0.47211814f, -0.47787762f, -0.48361665f, -0.48933494f, -0.49503228f, +-0.50070840f, -0.50636309f, -0.51199609f, -0.51760709f, -0.52319598f, +-0.52876246f, -0.53430629f, -0.53982723f, -0.54532504f, -0.55079949f, +-0.55625033f, -0.56167740f, -0.56708032f, -0.57245898f, -0.57781315f, +-0.58314258f, -0.58844697f, -0.59372622f, -0.59897995f, -0.60420811f, +-0.60941035f, -0.61458647f, -0.61973625f, -0.62485951f, -0.62995601f, +-0.63502556f, -0.64006782f, -0.64508271f, -0.65007001f, -0.65502942f, +-0.65996075f, -0.66486382f, -0.66973841f, -0.67458433f, -0.67940134f, +-0.68418926f, -0.68894786f, -0.69367695f, -0.69837630f, -0.70304573f, +-0.70768511f, -0.71229410f, -0.71687263f, -0.72142041f, -0.72593731f, +-0.73042315f, -0.73487765f, -0.73930067f, -0.74369204f, -0.74805158f, +-0.75237900f, -0.75667429f, -0.76093709f, -0.76516730f, -0.76936477f, +-0.77352923f, -0.77766061f, -0.78175867f, -0.78582323f, -0.78985411f, +-0.79385114f, -0.79781419f, -0.80174309f, -0.80563760f, -0.80949765f, +-0.81332302f, -0.81711352f, -0.82086903f, -0.82458937f, -0.82827437f, +-0.83192390f, -0.83553779f, -0.83911592f, -0.84265804f, -0.84616417f, +-0.84963393f, -0.85306740f, -0.85646427f, -0.85982448f, -0.86314780f, +-0.86643422f, -0.86968350f, -0.87289548f, -0.87607014f, -0.87920725f, +-0.88230664f, -0.88536829f, -0.88839203f, -0.89137769f, -0.89432514f, +-0.89723432f, -0.90010506f, -0.90293723f, -0.90573072f, -0.90848541f, +-0.91120118f, -0.91387796f, -0.91651553f, -0.91911387f, -0.92167282f, +-0.92419231f, -0.92667222f, -0.92911243f, -0.93151283f, -0.93387336f, +-0.93619382f, -0.93847424f, -0.94071442f, -0.94291431f, -0.94507378f, +-0.94719279f, -0.94927126f, -0.95130903f, -0.95330608f, -0.95526224f, +-0.95717752f, -0.95905179f, -0.96088499f, -0.96267700f, -0.96442777f, +-0.96613729f, -0.96780539f, -0.96943200f, -0.97101706f, -0.97256058f, +-0.97406244f, -0.97552258f, -0.97694093f, -0.97831738f, -0.97965199f, +-0.98094457f, -0.98219514f, -0.98340368f, -0.98457009f, -0.98569429f, +-0.98677629f, -0.98781598f, -0.98881340f, -0.98976845f, -0.99068111f, +-0.99155134f, -0.99237907f, -0.99316430f, -0.99390697f, -0.99460709f, +-0.99526459f, -0.99587947f, -0.99645168f, -0.99698120f, -0.99746799f, +-0.99791211f, -0.99831343f, -0.99867201f, -0.99898779f, -0.99926084f, +-0.99949104f, -0.99967843f, -0.99982297f, -0.99992472f, -0.99998361f, +0.99999869f, 0.99989158f, 0.99961317f, 0.99916345f, 0.99854255f, +0.99775058f, 0.99678761f, 0.99565387f, 0.99434954f, 0.99287480f, +0.99122995f, 0.98941529f, 0.98743105f, 0.98527765f, 0.98295540f, +0.98046476f, 0.97780609f, 0.97497988f, 0.97198665f, 0.96882683f, +0.96550101f, 0.96200979f, 0.95835376f, 0.95453346f, 0.95054960f, +0.94640291f, 0.94209403f, 0.93762374f, 0.93299282f, 0.92820197f, +0.92325211f, 0.91814411f, 0.91287869f, 0.90745693f, 0.90187967f, +0.89614785f, 0.89026248f, 0.88422459f, 0.87803519f, 0.87169534f, +0.86520612f, 0.85856867f, 0.85178405f, 0.84485358f, 0.83777827f, +0.83055943f, 0.82319832f, 0.81569612f, 0.80805415f, 0.80027372f, +0.79235619f, 0.78430289f, 0.77611518f, 0.76779449f, 0.75934225f, +0.75075996f, 0.74204898f, 0.73321080f, 0.72424710f, 0.71515924f, +0.70594883f, 0.69661748f, 0.68716675f, 0.67759830f, 0.66791373f, +0.65811473f, 0.64820296f, 0.63818014f, 0.62804794f, 0.61780810f, +0.60746247f, 0.59701276f, 0.58646071f, 0.57580817f, 0.56505698f, +0.55420899f, 0.54326600f, 0.53222996f, 0.52110273f, 0.50988621f, +0.49858227f, 0.48719296f, 0.47572014f, 0.46416581f, 0.45253196f, +0.44082057f, 0.42903364f, 0.41717321f, 0.40524128f, 0.39323992f, +0.38117120f, 0.36903715f, 0.35683987f, 0.34458145f, 0.33226398f, +0.31988961f, 0.30746040f, 0.29497850f, 0.28244606f, 0.26986524f, +0.25723818f, 0.24456702f, 0.23185398f, 0.21910121f, 0.20631088f, +0.19348522f, 0.18062639f, 0.16773662f, 0.15481812f, 0.14187308f, +0.12890373f, 0.11591230f, 0.10290100f, 0.089872077f, 0.076827750f, +0.063770257f, 0.050701842f, 0.037624735f, 0.024541186f, 0.011453429f, +-0.0016362892f, -0.014725727f, -0.027812643f, -0.040894791f, -0.053969935f, +-0.067035832f, -0.080090240f, -0.093130924f, -0.10615565f, -0.11916219f, +-0.13214831f, -0.14511178f, -0.15805040f, -0.17096193f, -0.18384418f, +-0.19669491f, -0.20951195f, -0.22229309f, -0.23503613f, -0.24773891f, +-0.26039925f, -0.27301496f, -0.28558388f, -0.29810387f, -0.31057280f, +-0.32298848f, -0.33534884f, -0.34765175f, -0.35989508f, -0.37207675f, +-0.38419467f, -0.39624676f, -0.40823093f, -0.42014518f, -0.43198743f, +-0.44375566f, -0.45544785f, -0.46706200f, -0.47859612f, -0.49004826f, +-0.50141639f, -0.51269865f, -0.52389306f, -0.53499764f, -0.54601061f, +-0.55693001f, -0.56775403f, -0.57848072f, -0.58910829f, -0.59963489f, +-0.61005878f, -0.62037814f, -0.63059121f, -0.64069623f, -0.65069145f, +-0.66057515f, -0.67034572f, -0.68000144f, -0.68954057f, -0.69896162f, +-0.70826286f, -0.71744281f, -0.72649974f, -0.73543227f, -0.74423873f, +-0.75291771f, -0.76146764f, -0.76988715f, -0.77817470f, -0.78632891f, +-0.79434842f, -0.80223179f, -0.80997771f, -0.81758487f, -0.82505190f, +-0.83237761f, -0.83956063f, -0.84659988f, -0.85349399f, -0.86024189f, +-0.86684239f, -0.87329435f, -0.87959671f, -0.88574833f, -0.89174819f, +-0.89759529f, -0.90328854f, -0.90882701f, -0.91420978f, -0.91943592f, +-0.92450452f, -0.92941469f, -0.93416560f, -0.93875647f, -0.94318646f, +-0.94745487f, -0.95156091f, -0.95550388f, -0.95928317f, -0.96289814f, +-0.96634805f, -0.96963239f, -0.97275060f, -0.97570217f, -0.97848648f, +-0.98110318f, -0.98355180f, -0.98583186f, -0.98794299f, -0.98988485f, +-0.99165714f, -0.99325943f, -0.99469161f, -0.99595332f, -0.99704438f, +-0.99796462f, -0.99871385f, -0.99929196f, -0.99969882f, -0.99993443f, +0.99999464f, 0.99956632f, 0.99845290f, 0.99665523f, 0.99417448f, +0.99101239f, 0.98717111f, 0.98265326f, 0.97746199f, 0.97160077f, +0.96507365f, 0.95788515f, 0.95004016f, 0.94154406f, 0.93240267f, +0.92262226f, 0.91220951f, 0.90117162f, 0.88951606f, 0.87725091f, +0.86438453f, 0.85092574f, 0.83688372f, 0.82226819f, 0.80708915f, +0.79135692f, 0.77508235f, 0.75827658f, 0.74095112f, 0.72311783f, +0.70478898f, 0.68597710f, 0.66669506f, 0.64695615f, 0.62677377f, +0.60616189f, 0.58513457f, 0.56370622f, 0.54189157f, 0.51970547f, +0.49716324f, 0.47428027f, 0.45107225f, 0.42755505f, 0.40374488f, +0.37965798f, 0.35531086f, 0.33072025f, 0.30590299f, 0.28087607f, +0.25565663f, 0.23026201f, 0.20470956f, 0.17901683f, 0.15320139f, +0.12728097f, 0.10127331f, 0.075196236f, 0.049067631f, 0.022905400f, +-0.0032725304f, -0.029448219f, -0.055603724f, -0.081721120f, -0.10778251f, +-0.13377003f, -0.15966587f, -0.18545228f, -0.21111161f, -0.23662624f, +-0.26197869f, -0.28715160f, -0.31212771f, -0.33688989f, -0.36142120f, +-0.38570482f, -0.40972409f, -0.43346253f, -0.45690393f, -0.48003218f, +-0.50283146f, -0.52528608f, -0.54738069f, -0.56910020f, -0.59042966f, +-0.61135447f, -0.63186026f, -0.65193301f, -0.67155898f, -0.69072473f, +-0.70941705f, -0.72762316f, -0.74533063f, -0.76252723f, -0.77920127f, +-0.79534131f, -0.81093621f, -0.82597536f, -0.84044844f, -0.85434550f, +-0.86765707f, -0.88037395f, -0.89248747f, -0.90398932f, -0.91487163f, +-0.92512697f, -0.93474823f, -0.94372886f, -0.95206273f, -0.95974404f, +-0.96676767f, -0.97312868f, -0.97882277f, -0.98384601f, -0.98819500f, +-0.99186671f, -0.99485862f, -0.99716878f, -0.99879545f, -0.99973762f, +}; #endif static const CELTMode mode48000_960_120 = { diff --git a/drivers/opus/celt/static_modes_float_arm_ne10.h b/drivers/opus/celt/static_modes_float_arm_ne10.h new file mode 100644 index 0000000000..934a82a420 --- /dev/null +++ b/drivers/opus/celt/static_modes_float_arm_ne10.h @@ -0,0 +1,404 @@ +/* The contents of this file was automatically generated by + * dump_mode_arm_ne10.c with arguments: 48000 960 + * It contains static definitions for some pre-defined modes. */ +#include + +#ifndef NE10_FFT_PARAMS48000_960 +#define NE10_FFT_PARAMS48000_960 +static const ne10_int32_t ne10_factors_480[64] = { +4, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, }; +static const ne10_int32_t ne10_factors_240[64] = { +3, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, }; +static const ne10_int32_t ne10_factors_120[64] = { +3, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, }; +static const ne10_int32_t ne10_factors_60[64] = { +2, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, }; +static const ne10_fft_cpx_float32_t ne10_twiddles_480[480] = { +{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f}, +{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f}, +{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f}, +{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f}, +{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f}, +{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f}, +{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f}, +{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f}, +{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f}, +{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f}, +{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f}, +{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f}, +{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f}, +{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f}, +{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f}, +{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f}, +{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f}, +{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f}, +{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f}, +{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f}, +{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f}, +{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f}, +{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f}, +{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f}, +{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f}, +{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f}, +{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f}, +{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f}, +{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f}, +{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f}, +{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f}, +{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f}, +{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f}, +{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f}, +{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f}, +{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f}, +{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f}, +{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f}, +{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f}, +{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f}, +{1.0000000f,-0.0000000f}, {0.99991435f,-0.013089596f}, {0.99965733f,-0.026176950f}, +{0.99922901f,-0.039259817f}, {0.99862951f,-0.052335959f}, {0.99785894f,-0.065403134f}, +{0.99691731f,-0.078459099f}, {0.99580491f,-0.091501623f}, {0.99452192f,-0.10452846f}, +{0.99306846f,-0.11753740f}, {0.99144489f,-0.13052620f}, {0.98965138f,-0.14349262f}, +{0.98768836f,-0.15643448f}, {0.98555607f,-0.16934951f}, {0.98325491f,-0.18223552f}, +{0.98078525f,-0.19509032f}, {0.97814763f,-0.20791170f}, {0.97534233f,-0.22069745f}, +{0.97236991f,-0.23344538f}, {0.96923089f,-0.24615330f}, {0.96592581f,-0.25881904f}, +{0.96245521f,-0.27144045f}, {0.95881975f,-0.28401536f}, {0.95501995f,-0.29654160f}, +{0.95105648f,-0.30901700f}, {0.94693011f,-0.32143945f}, {0.94264150f,-0.33380687f}, +{0.93819129f,-0.34611708f}, {0.93358040f,-0.35836795f}, {0.92880952f,-0.37055743f}, +{0.92387956f,-0.38268346f}, {0.91879117f,-0.39474389f}, {0.91354543f,-0.40673664f}, +{0.90814316f,-0.41865975f}, {0.90258527f,-0.43051112f}, {0.89687270f,-0.44228873f}, +{0.89100653f,-0.45399052f}, {0.88498765f,-0.46561453f}, {0.87881708f,-0.47715878f}, +{0.87249601f,-0.48862126f}, {0.86602545f,-0.50000000f}, {0.85940641f,-0.51129311f}, +{0.85264015f,-0.52249855f}, {0.84572786f,-0.53361452f}, {0.83867055f,-0.54463905f}, +{0.83146960f,-0.55557024f}, {0.82412618f,-0.56640625f}, {0.81664151f,-0.57714522f}, +{0.80901700f,-0.58778524f}, {0.80125380f,-0.59832460f}, {0.79335332f,-0.60876143f}, +{0.78531694f,-0.61909395f}, {0.77714598f,-0.62932038f}, {0.76884180f,-0.63943899f}, +{0.76040596f,-0.64944810f}, {0.75183982f,-0.65934587f}, {0.74314475f,-0.66913062f}, +{0.73432249f,-0.67880076f}, {0.72537434f,-0.68835455f}, {0.71630192f,-0.69779050f}, +{0.70710677f,-0.70710683f}, {0.69779044f,-0.71630198f}, {0.68835455f,-0.72537440f}, +{0.67880070f,-0.73432255f}, {0.66913056f,-0.74314487f}, {0.65934581f,-0.75183982f}, +{0.64944804f,-0.76040596f}, {0.63943899f,-0.76884186f}, {0.62932038f,-0.77714598f}, +{0.61909395f,-0.78531694f}, {0.60876137f,-0.79335338f}, {0.59832460f,-0.80125386f}, +{0.58778524f,-0.80901700f}, {0.57714516f,-0.81664151f}, {0.56640625f,-0.82412618f}, +{0.55557019f,-0.83146960f}, {0.54463899f,-0.83867055f}, {0.53361452f,-0.84572786f}, +{0.52249849f,-0.85264015f}, {0.51129311f,-0.85940641f}, {0.49999997f,-0.86602545f}, +{0.48862118f,-0.87249601f}, {0.47715876f,-0.87881708f}, {0.46561447f,-0.88498765f}, +{0.45399052f,-0.89100653f}, {0.44228867f,-0.89687276f}, {0.43051103f,-0.90258533f}, +{0.41865975f,-0.90814316f}, {0.40673661f,-0.91354549f}, {0.39474380f,-0.91879129f}, +{0.38268343f,-0.92387956f}, {0.37055740f,-0.92880958f}, {0.35836786f,-0.93358046f}, +{0.34611705f,-0.93819135f}, {0.33380681f,-0.94264150f}, {0.32143947f,-0.94693011f}, +{0.30901697f,-0.95105654f}, {0.29654151f,-0.95501995f}, {0.28401533f,-0.95881975f}, +{0.27144039f,-0.96245527f}, {0.25881907f,-0.96592581f}, {0.24615327f,-0.96923089f}, +{0.23344530f,-0.97236991f}, {0.22069745f,-0.97534233f}, {0.20791166f,-0.97814763f}, +{0.19509023f,-0.98078531f}, {0.18223552f,-0.98325491f}, {0.16934945f,-0.98555607f}, +{0.15643437f,-0.98768836f}, {0.14349259f,-0.98965138f}, {0.13052613f,-0.99144489f}, +{0.11753740f,-0.99306846f}, {0.10452842f,-0.99452192f}, {0.091501534f,-0.99580491f}, +{0.078459084f,-0.99691731f}, {0.065403074f,-0.99785894f}, {0.052335974f,-0.99862951f}, +{0.039259788f,-0.99922901f}, {0.026176875f,-0.99965733f}, {0.013089597f,-0.99991435f}, +{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f}, +{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f}, +{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f}, +{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f}, +{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f}, +{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f}, +{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f}, +{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f}, +{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f}, +{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f}, +{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f}, +{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f}, +{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f}, +{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f}, +{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f}, +{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f}, +{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f}, +{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f}, +{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f}, +{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f}, +{-4.3711388e-08f,-1.0000000f}, {-0.026176963f,-0.99965733f}, {-0.052336060f,-0.99862951f}, +{-0.078459173f,-0.99691731f}, {-0.10452851f,-0.99452192f}, {-0.13052621f,-0.99144489f}, +{-0.15643445f,-0.98768836f}, {-0.18223560f,-0.98325491f}, {-0.20791174f,-0.97814757f}, +{-0.23344538f,-0.97236991f}, {-0.25881916f,-0.96592581f}, {-0.28401542f,-0.95881969f}, +{-0.30901703f,-0.95105648f}, {-0.33380687f,-0.94264150f}, {-0.35836795f,-0.93358040f}, +{-0.38268352f,-0.92387950f}, {-0.40673670f,-0.91354543f}, {-0.43051112f,-0.90258527f}, +{-0.45399061f,-0.89100647f}, {-0.47715873f,-0.87881708f}, {-0.50000006f,-0.86602533f}, +{-0.52249867f,-0.85264009f}, {-0.54463905f,-0.83867055f}, {-0.56640631f,-0.82412612f}, +{-0.58778518f,-0.80901700f}, {-0.60876143f,-0.79335332f}, {-0.62932050f,-0.77714586f}, +{-0.64944804f,-0.76040596f}, {-0.66913068f,-0.74314475f}, {-0.68835467f,-0.72537428f}, +{-0.70710677f,-0.70710677f}, {-0.72537446f,-0.68835449f}, {-0.74314493f,-0.66913044f}, +{-0.76040596f,-0.64944804f}, {-0.77714604f,-0.62932026f}, {-0.79335332f,-0.60876143f}, +{-0.80901700f,-0.58778518f}, {-0.82412624f,-0.56640613f}, {-0.83867055f,-0.54463899f}, +{-0.85264021f,-0.52249849f}, {-0.86602539f,-0.50000006f}, {-0.87881714f,-0.47715873f}, +{-0.89100659f,-0.45399037f}, {-0.90258527f,-0.43051112f}, {-0.91354549f,-0.40673658f}, +{-0.92387956f,-0.38268328f}, {-0.93358040f,-0.35836792f}, {-0.94264150f,-0.33380675f}, +{-0.95105654f,-0.30901679f}, {-0.95881975f,-0.28401530f}, {-0.96592587f,-0.25881892f}, +{-0.97236991f,-0.23344538f}, {-0.97814763f,-0.20791161f}, {-0.98325491f,-0.18223536f}, +{-0.98768836f,-0.15643445f}, {-0.99144489f,-0.13052608f}, {-0.99452192f,-0.10452849f}, +{-0.99691737f,-0.078459039f}, {-0.99862957f,-0.052335810f}, {-0.99965733f,-0.026176952f}, +{1.0000000f,-0.0000000f}, {0.99922901f,-0.039259817f}, {0.99691731f,-0.078459099f}, +{0.99306846f,-0.11753740f}, {0.98768836f,-0.15643448f}, {0.98078525f,-0.19509032f}, +{0.97236991f,-0.23344538f}, {0.96245521f,-0.27144045f}, {0.95105648f,-0.30901700f}, +{0.93819129f,-0.34611708f}, {0.92387956f,-0.38268346f}, {0.90814316f,-0.41865975f}, +{0.89100653f,-0.45399052f}, {0.87249601f,-0.48862126f}, {0.85264015f,-0.52249855f}, +{0.83146960f,-0.55557024f}, {0.80901700f,-0.58778524f}, {0.78531694f,-0.61909395f}, +{0.76040596f,-0.64944810f}, {0.73432249f,-0.67880076f}, {0.70710677f,-0.70710683f}, +{0.67880070f,-0.73432255f}, {0.64944804f,-0.76040596f}, {0.61909395f,-0.78531694f}, +{0.58778524f,-0.80901700f}, {0.55557019f,-0.83146960f}, {0.52249849f,-0.85264015f}, +{0.48862118f,-0.87249601f}, {0.45399052f,-0.89100653f}, {0.41865975f,-0.90814316f}, +{0.38268343f,-0.92387956f}, {0.34611705f,-0.93819135f}, {0.30901697f,-0.95105654f}, +{0.27144039f,-0.96245527f}, {0.23344530f,-0.97236991f}, {0.19509023f,-0.98078531f}, +{0.15643437f,-0.98768836f}, {0.11753740f,-0.99306846f}, {0.078459084f,-0.99691731f}, +{0.039259788f,-0.99922901f}, {-4.3711388e-08f,-1.0000000f}, {-0.039259877f,-0.99922901f}, +{-0.078459173f,-0.99691731f}, {-0.11753749f,-0.99306846f}, {-0.15643445f,-0.98768836f}, +{-0.19509032f,-0.98078525f}, {-0.23344538f,-0.97236991f}, {-0.27144048f,-0.96245521f}, +{-0.30901703f,-0.95105648f}, {-0.34611711f,-0.93819129f}, {-0.38268352f,-0.92387950f}, +{-0.41865984f,-0.90814310f}, {-0.45399061f,-0.89100647f}, {-0.48862135f,-0.87249595f}, +{-0.52249867f,-0.85264009f}, {-0.55557036f,-0.83146954f}, {-0.58778518f,-0.80901700f}, +{-0.61909389f,-0.78531694f}, {-0.64944804f,-0.76040596f}, {-0.67880076f,-0.73432249f}, +{-0.70710677f,-0.70710677f}, {-0.73432249f,-0.67880070f}, {-0.76040596f,-0.64944804f}, +{-0.78531694f,-0.61909389f}, {-0.80901700f,-0.58778518f}, {-0.83146966f,-0.55557019f}, +{-0.85264021f,-0.52249849f}, {-0.87249607f,-0.48862115f}, {-0.89100659f,-0.45399037f}, +{-0.90814322f,-0.41865960f}, {-0.92387956f,-0.38268328f}, {-0.93819135f,-0.34611690f}, +{-0.95105654f,-0.30901679f}, {-0.96245521f,-0.27144048f}, {-0.97236991f,-0.23344538f}, +{-0.98078531f,-0.19509031f}, {-0.98768836f,-0.15643445f}, {-0.99306846f,-0.11753736f}, +{-0.99691737f,-0.078459039f}, {-0.99922901f,-0.039259743f}, {-1.0000000f,8.7422777e-08f}, +{-0.99922901f,0.039259918f}, {-0.99691731f,0.078459218f}, {-0.99306846f,0.11753753f}, +{-0.98768830f,0.15643461f}, {-0.98078525f,0.19509049f}, {-0.97236985f,0.23344554f}, +{-0.96245515f,0.27144065f}, {-0.95105654f,0.30901697f}, {-0.93819135f,0.34611705f}, +{-0.92387956f,0.38268346f}, {-0.90814316f,0.41865975f}, {-0.89100653f,0.45399055f}, +{-0.87249601f,0.48862129f}, {-0.85264015f,0.52249861f}, {-0.83146960f,0.55557030f}, +{-0.80901694f,0.58778536f}, {-0.78531688f,0.61909401f}, {-0.76040590f,0.64944816f}, +{-0.73432243f,0.67880082f}, {-0.70710665f,0.70710689f}, {-0.67880058f,0.73432261f}, +{-0.64944792f,0.76040608f}, {-0.61909378f,0.78531706f}, {-0.58778507f,0.80901712f}, +{-0.55557001f,0.83146977f}, {-0.52249837f,0.85264033f}, {-0.48862100f,0.87249613f}, +{-0.45399022f,0.89100665f}, {-0.41865945f,0.90814328f}, {-0.38268313f,0.92387968f}, +{-0.34611672f,0.93819147f}, {-0.30901709f,0.95105648f}, {-0.27144054f,0.96245521f}, +{-0.23344545f,0.97236991f}, {-0.19509038f,0.98078525f}, {-0.15643452f,0.98768830f}, +{-0.11753743f,0.99306846f}, {-0.078459114f,0.99691731f}, {-0.039259821f,0.99922901f}, +}; +static const ne10_fft_cpx_float32_t ne10_twiddles_240[240] = { +{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f}, +{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f}, +{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f}, +{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f}, +{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f}, +{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f}, +{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f}, +{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f}, +{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f}, +{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f}, +{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f}, +{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f}, +{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f}, +{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f}, +{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f}, +{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f}, +{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f}, +{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f}, +{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f}, +{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f}, +{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f}, +{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f}, +{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f}, +{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f}, +{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f}, +{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f}, +{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f}, +{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f}, +{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f}, +{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f}, +{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f}, +{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f}, +{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f}, +{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f}, +{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f}, +{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f}, +{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f}, +{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f}, +{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f}, +{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f}, +{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f}, +{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f}, +{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f}, +{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f}, +{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f}, +{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f}, +{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f}, +{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f}, +{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f}, +{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f}, +{-4.3711388e-08f,-1.0000000f}, {-0.052336060f,-0.99862951f}, {-0.10452851f,-0.99452192f}, +{-0.15643445f,-0.98768836f}, {-0.20791174f,-0.97814757f}, {-0.25881916f,-0.96592581f}, +{-0.30901703f,-0.95105648f}, {-0.35836795f,-0.93358040f}, {-0.40673670f,-0.91354543f}, +{-0.45399061f,-0.89100647f}, {-0.50000006f,-0.86602533f}, {-0.54463905f,-0.83867055f}, +{-0.58778518f,-0.80901700f}, {-0.62932050f,-0.77714586f}, {-0.66913068f,-0.74314475f}, +{-0.70710677f,-0.70710677f}, {-0.74314493f,-0.66913044f}, {-0.77714604f,-0.62932026f}, +{-0.80901700f,-0.58778518f}, {-0.83867055f,-0.54463899f}, {-0.86602539f,-0.50000006f}, +{-0.89100659f,-0.45399037f}, {-0.91354549f,-0.40673658f}, {-0.93358040f,-0.35836792f}, +{-0.95105654f,-0.30901679f}, {-0.96592587f,-0.25881892f}, {-0.97814763f,-0.20791161f}, +{-0.98768836f,-0.15643445f}, {-0.99452192f,-0.10452849f}, {-0.99862957f,-0.052335810f}, +{1.0000000f,-0.0000000f}, {0.99691731f,-0.078459099f}, {0.98768836f,-0.15643448f}, +{0.97236991f,-0.23344538f}, {0.95105648f,-0.30901700f}, {0.92387956f,-0.38268346f}, +{0.89100653f,-0.45399052f}, {0.85264015f,-0.52249855f}, {0.80901700f,-0.58778524f}, +{0.76040596f,-0.64944810f}, {0.70710677f,-0.70710683f}, {0.64944804f,-0.76040596f}, +{0.58778524f,-0.80901700f}, {0.52249849f,-0.85264015f}, {0.45399052f,-0.89100653f}, +{0.38268343f,-0.92387956f}, {0.30901697f,-0.95105654f}, {0.23344530f,-0.97236991f}, +{0.15643437f,-0.98768836f}, {0.078459084f,-0.99691731f}, {-4.3711388e-08f,-1.0000000f}, +{-0.078459173f,-0.99691731f}, {-0.15643445f,-0.98768836f}, {-0.23344538f,-0.97236991f}, +{-0.30901703f,-0.95105648f}, {-0.38268352f,-0.92387950f}, {-0.45399061f,-0.89100647f}, +{-0.52249867f,-0.85264009f}, {-0.58778518f,-0.80901700f}, {-0.64944804f,-0.76040596f}, +{-0.70710677f,-0.70710677f}, {-0.76040596f,-0.64944804f}, {-0.80901700f,-0.58778518f}, +{-0.85264021f,-0.52249849f}, {-0.89100659f,-0.45399037f}, {-0.92387956f,-0.38268328f}, +{-0.95105654f,-0.30901679f}, {-0.97236991f,-0.23344538f}, {-0.98768836f,-0.15643445f}, +{-0.99691737f,-0.078459039f}, {-1.0000000f,8.7422777e-08f}, {-0.99691731f,0.078459218f}, +{-0.98768830f,0.15643461f}, {-0.97236985f,0.23344554f}, {-0.95105654f,0.30901697f}, +{-0.92387956f,0.38268346f}, {-0.89100653f,0.45399055f}, {-0.85264015f,0.52249861f}, +{-0.80901694f,0.58778536f}, {-0.76040590f,0.64944816f}, {-0.70710665f,0.70710689f}, +{-0.64944792f,0.76040608f}, {-0.58778507f,0.80901712f}, {-0.52249837f,0.85264033f}, +{-0.45399022f,0.89100665f}, {-0.38268313f,0.92387968f}, {-0.30901709f,0.95105648f}, +{-0.23344545f,0.97236991f}, {-0.15643452f,0.98768830f}, {-0.078459114f,0.99691731f}, +}; +static const ne10_fft_cpx_float32_t ne10_twiddles_120[120] = { +{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f}, +{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f}, +{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f}, +{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f}, +{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f}, +{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f}, +{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f}, +{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f}, +{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f}, +{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f}, +{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f}, +{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f}, +{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f}, +{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f}, +{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f}, +{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f}, +{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f}, +{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f}, +{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f}, +{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f}, +{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f}, +{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f}, +{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f}, +{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f}, +{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f}, +{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f}, +{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f}, +{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f}, +{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f}, +{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f}, +{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f}, +{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f}, +{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f}, +{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f}, +{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f}, +{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f}, +{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f}, +{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f}, +{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f}, +{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f}, +}; +static const ne10_fft_cpx_float32_t ne10_twiddles_60[60] = { +{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f}, +{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f}, +{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f}, +{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f}, +{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f}, +{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f}, +{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f}, +{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f}, +{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f}, +{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f}, +{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f}, +{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f}, +{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f}, +{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f}, +{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f}, +{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f}, +{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f}, +{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f}, +{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f}, +{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f}, +}; +static const ne10_fft_state_float32_t ne10_fft_state_float32_t_480 = { +120, +(ne10_int32_t *)ne10_factors_480, +(ne10_fft_cpx_float32_t *)ne10_twiddles_480, +NULL, +(ne10_fft_cpx_float32_t *)&ne10_twiddles_480[120], +/* is_forward_scaled = true */ +(ne10_int32_t) 1, +/* is_backward_scaled = false */ +(ne10_int32_t) 0, +}; +static const arch_fft_state cfg_arch_480 = { +1, +(void *)&ne10_fft_state_float32_t_480, +}; + +static const ne10_fft_state_float32_t ne10_fft_state_float32_t_240 = { +60, +(ne10_int32_t *)ne10_factors_240, +(ne10_fft_cpx_float32_t *)ne10_twiddles_240, +NULL, +(ne10_fft_cpx_float32_t *)&ne10_twiddles_240[60], +/* is_forward_scaled = true */ +(ne10_int32_t) 1, +/* is_backward_scaled = false */ +(ne10_int32_t) 0, +}; +static const arch_fft_state cfg_arch_240 = { +1, +(void *)&ne10_fft_state_float32_t_240, +}; + +static const ne10_fft_state_float32_t ne10_fft_state_float32_t_120 = { +30, +(ne10_int32_t *)ne10_factors_120, +(ne10_fft_cpx_float32_t *)ne10_twiddles_120, +NULL, +(ne10_fft_cpx_float32_t *)&ne10_twiddles_120[30], +/* is_forward_scaled = true */ +(ne10_int32_t) 1, +/* is_backward_scaled = false */ +(ne10_int32_t) 0, +}; +static const arch_fft_state cfg_arch_120 = { +1, +(void *)&ne10_fft_state_float32_t_120, +}; + +static const ne10_fft_state_float32_t ne10_fft_state_float32_t_60 = { +15, +(ne10_int32_t *)ne10_factors_60, +(ne10_fft_cpx_float32_t *)ne10_twiddles_60, +NULL, +(ne10_fft_cpx_float32_t *)&ne10_twiddles_60[15], +/* is_forward_scaled = true */ +(ne10_int32_t) 1, +/* is_backward_scaled = false */ +(ne10_int32_t) 0, +}; +static const arch_fft_state cfg_arch_60 = { +1, +(void *)&ne10_fft_state_float32_t_60, +}; + +#endif /* end NE10_FFT_PARAMS48000_960 */ diff --git a/drivers/opus/celt/tests/test_unit_cwrs32.c b/drivers/opus/celt/tests/test_unit_cwrs32.c deleted file mode 100644 index db43e3392b..0000000000 --- a/drivers/opus/celt/tests/test_unit_cwrs32.c +++ /dev/null @@ -1,161 +0,0 @@ -/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation, - Gregory Maxwell - Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef OPUS_ENABLED -#include "opus/opus_config.h" -#endif - -#include -#include - -#ifndef CUSTOM_MODES -#define CUSTOM_MODES -#else -#define TEST_CUSTOM_MODES -#endif - -#define CELT_C -#include "opus/celt/stack_alloc.h" -#include "opus/celt/entenc.c" -#include "opus/celt/entdec.c" -#include "opus/celt/entcode.c" -#include "opus/celt/cwrs.c" -#include "opus/celt/mathops.c" -#include "opus/celt/rate.h" - -#define NMAX (240) -#define KMAX (128) - -#ifdef TEST_CUSTOM_MODES - -#define NDIMS (44) -static const int pn[NDIMS]={ - 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 18, 20, 22, - 24, 26, 28, 30, 32, 36, 40, 44, 48, - 52, 56, 60, 64, 72, 80, 88, 96, 104, - 112, 120, 128, 144, 160, 176, 192, 208 -}; -static const int pkmax[NDIMS]={ - 128, 128, 128, 128, 88, 52, 36, 26, 22, - 18, 16, 15, 13, 12, 12, 11, 10, 9, - 9, 8, 8, 7, 7, 7, 7, 6, 6, - 6, 6, 6, 5, 5, 5, 5, 5, 5, - 4, 4, 4, 4, 4, 4, 4, 4 -}; - -#else /* TEST_CUSTOM_MODES */ - -#define NDIMS (22) -static const int pn[NDIMS]={ - 2, 3, 4, 6, 8, 9, 11, 12, 16, - 18, 22, 24, 32, 36, 44, 48, 64, 72, - 88, 96, 144, 176 -}; -static const int pkmax[NDIMS]={ - 128, 128, 128, 88, 36, 26, 18, 16, 12, - 11, 9, 9, 7, 7, 6, 6, 5, 5, - 5, 5, 4, 4 -}; - -#endif - -int main(void){ - int t; - int n; - ALLOC_STACK; - for(t=0;tpkmax[t])break; - printf("Testing CWRS with N=%i, K=%i...\n",n,k); -#if defined(SMALL_FOOTPRINT) - nc=ncwrs_urow(n,k,uu); -#else - nc=CELT_PVQ_V(n,k); -#endif - inc=nc/20000; - if(inc<1)inc=1; - for(i=0;i");*/ -#if defined(SMALL_FOOTPRINT) - ii=icwrs(n,k,&v,y,u); -#else - ii=icwrs(n,y); - v=CELT_PVQ_V(n,k); -#endif - if(ii!=i){ - fprintf(stderr,"Combination-index mismatch (%lu!=%lu).\n", - (long)ii,(long)i); - return 1; - } - if(v!=nc){ - fprintf(stderr,"Combination count mismatch (%lu!=%lu).\n", - (long)v,(long)nc); - return 2; - } - /*printf(" %6u\n",i);*/ - } - /*printf("\n");*/ - } - } - return 0; -} diff --git a/drivers/opus/celt/tests/test_unit_dft.c b/drivers/opus/celt/tests/test_unit_dft.c deleted file mode 100644 index 9c0db3e9ac..0000000000 --- a/drivers/opus/celt/tests/test_unit_dft.c +++ /dev/null @@ -1,164 +0,0 @@ -/* Copyright (c) 2008 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef OPUS_ENABLED -#include "opus/opus_config.h" -#endif - -#define SKIP_CONFIG_H - -#ifndef CUSTOM_MODES -#define CUSTOM_MODES -#endif - -#include - -#define CELT_C -#include "opus/celt/stack_alloc.h" -#include "opus/celt/kiss_fft.h" -#include "opus/celt/kiss_fft.c" -#include "opus/celt/mathops.c" -#include "opus/celt/entcode.c" - - -#ifndef M_PI -#define M_PI 3.141592653 -#endif - -int ret = 0; - -void check(kiss_fft_cpx * in,kiss_fft_cpx * out,int nfft,int isinverse) -{ - int bin,k; - double errpow=0,sigpow=0, snr; - - for (bin=0;bin1) { - int k; - for (k=1;k -#include -#include -#include -#include "opus/celt/entcode.h" -#include "opus/celt/entenc.h" -#include "opus/celt/entdec.h" -#include - -#include "opus/celt/entenc.c" -#include "opus/celt/entdec.c" -#include "opus/celt/entcode.c" - -#ifndef M_LOG2E -# define M_LOG2E 1.4426950408889634074 -#endif -#define DATA_SIZE 10000000 -#define DATA_SIZE2 10000 - -int main(int _argc,char **_argv){ - ec_enc enc; - ec_dec dec; - long nbits; - long nbits2; - double entropy; - int ft; - int ftb; - int sz; - int i; - int ret; - unsigned int sym; - unsigned int seed; - unsigned char *ptr; - const char *env_seed; - ret=0; - entropy=0; - if (_argc > 2) { - fprintf(stderr, "Usage: %s []\n", _argv[0]); - return 1; - } - env_seed = getenv("SEED"); - if (_argc > 1) - seed = atoi(_argv[1]); - else if (env_seed) - seed = atoi(env_seed); - else - seed = time(NULL); - /*Testing encoding of raw bit values.*/ - ptr = (unsigned char *)malloc(DATA_SIZE); - ec_enc_init(&enc,ptr, DATA_SIZE); - for(ft=2;ft<1024;ft++){ - for(i=0;i>(rand()%11U))+1U)+10; - sz=rand()/((RAND_MAX>>(rand()%9U))+1U); - data=(unsigned *)malloc(sz*sizeof(*data)); - tell=(unsigned *)malloc((sz+1)*sizeof(*tell)); - ec_enc_init(&enc,ptr,DATA_SIZE2); - zeros = rand()%13==0; - tell[0]=ec_tell_frac(&enc); - for(j=0;j>(rand()%9U))+1U); - logp1=(unsigned *)malloc(sz*sizeof(*logp1)); - data=(unsigned *)malloc(sz*sizeof(*data)); - tell=(unsigned *)malloc((sz+1)*sizeof(*tell)); - enc_method=(unsigned *)malloc(sz*sizeof(*enc_method)); - ec_enc_init(&enc,ptr,DATA_SIZE2); - tell[0]=ec_tell_frac(&enc); - for(j=0;j>1)+1); - logp1[j]=(rand()%15)+1; - enc_method[j]=rand()/((RAND_MAX>>2)+1); - switch(enc_method[j]){ - case 0:{ - ec_encode(&enc,data[j]?(1<>2)+1); - switch(dec_method){ - case 0:{ - fs=ec_decode(&dec,1<=(1<=(1< -#include -#include "opus/celt/laplace.h" -#define CELT_C -#include "opus/celt/stack_alloc.h" - -#include "opus/celt/entenc.c" -#include "opus/celt/entdec.c" -#include "opus/celt/entcode.c" -#include "opus/celt/laplace.c" - -#define DATA_SIZE 40000 - -int ec_laplace_get_start_freq(int decay) -{ - opus_uint32 ft = 32768 - LAPLACE_MINP*(2*LAPLACE_NMIN+1); - int fs = (ft*(16384-decay))/(16384+decay); - return fs+LAPLACE_MINP; -} - -int main(void) -{ - int i; - int ret = 0; - ec_enc enc; - ec_dec dec; - unsigned char *ptr; - int val[10000], decay[10000]; - ALLOC_STACK; - ptr = (unsigned char *)malloc(DATA_SIZE); - ec_enc_init(&enc,ptr,DATA_SIZE); - - val[0] = 3; decay[0] = 6000; - val[1] = 0; decay[1] = 5800; - val[2] = -1; decay[2] = 5600; - for (i=3;i<10000;i++) - { - val[i] = rand()%15-7; - decay[i] = rand()%11000+5000; - } - for (i=0;i<10000;i++) - ec_laplace_encode(&enc, &val[i], - ec_laplace_get_start_freq(decay[i]), decay[i]); - - ec_enc_done(&enc); - - ec_dec_init(&dec,ec_get_buffer(&enc),ec_range_bytes(&enc)); - - for (i=0;i<10000;i++) - { - int d = ec_laplace_decode(&dec, - ec_laplace_get_start_freq(decay[i]), decay[i]); - if (d != val[i]) - { - fprintf (stderr, "Got %d instead of %d\n", d, val[i]); - ret = 1; - } - } - - return ret; -} diff --git a/drivers/opus/celt/tests/test_unit_mathops.c b/drivers/opus/celt/tests/test_unit_mathops.c deleted file mode 100644 index 0e3f300d40..0000000000 --- a/drivers/opus/celt/tests/test_unit_mathops.c +++ /dev/null @@ -1,275 +0,0 @@ -/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation, - Gregory Maxwell - Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef OPUS_ENABLED -#include "opus/opus_config.h" -#endif - -#ifndef CUSTOM_MODES -#define CUSTOM_MODES -#endif - -#define CELT_C - -#include "opus/celt/mathops.c" -#include "opus/celt/entenc.c" -#include "opus/celt/entdec.c" -#include "opus/celt/entcode.c" -#include "opus/celt/bands.c" -#include "opus/celt/quant_bands.c" -#include "opus/celt/laplace.c" -#include "opus/celt/vq.c" -#include "opus/celt/cwrs.c" -#include -#include - -#ifdef OPUS_FIXED_POINT -#define WORD "%d" -#else -#define WORD "%f" -#endif - -int ret = 0; - -void testdiv(void) -{ - opus_int32 i; - for (i=1;i<=327670;i++) - { - double prod; - opus_val32 val; - val = celt_rcp(i); -#ifdef OPUS_FIXED_POINT - prod = (1./32768./65526.)*val*i; -#else - prod = val*i; -#endif - if (fabs(prod-1) > .00025) - { - fprintf (stderr, "div failed: 1/%d="WORD" (product = %f)\n", i, val, prod); - ret = 1; - } - } -} - -void testsqrt(void) -{ - opus_int32 i; - for (i=1;i<=1000000000;i++) - { - double ratio; - opus_val16 val; - val = celt_sqrt(i); - ratio = val/sqrt(i); - if (fabs(ratio - 1) > .0005 && fabs(val-sqrt(i)) > 2) - { - fprintf (stderr, "sqrt failed: sqrt(%d)="WORD" (ratio = %f)\n", i, val, ratio); - ret = 1; - } - i+= i>>10; - } -} - -void testbitexactcos(void) -{ - int i; - opus_int32 min_d,max_d,last,chk; - chk=max_d=0; - last=min_d=32767; - for(i=64;i<=16320;i++) - { - opus_int32 d; - opus_int32 q=bitexact_cos(i); - chk ^= q*i; - d = last - q; - if (d>max_d)max_d=d; - if (dmax_d)max_d=d; - if (d0.0009) - { - fprintf (stderr, "celt_log2 failed: fabs((1.442695040888963387*log(x))-celt_log2(x))>0.001 (x = %f, error = %f)\n", x,error); - ret = 1; - } - } -} - -void testexp2(void) -{ - float x; - for (x=-11.0;x<24.0;x+=0.0007) - { - float error = fabs(x-(1.442695040888963387*log(celt_exp2(x)))); - if (error>0.0002) - { - fprintf (stderr, "celt_exp2 failed: fabs(x-(1.442695040888963387*log(celt_exp2(x))))>0.0005 (x = %f, error = %f)\n", x,error); - ret = 1; - } - } -} - -void testexp2log2(void) -{ - float x; - for (x=-11.0;x<24.0;x+=0.0007) - { - float error = fabs(x-(celt_log2(celt_exp2(x)))); - if (error>0.001) - { - fprintf (stderr, "celt_log2/celt_exp2 failed: fabs(x-(celt_log2(celt_exp2(x))))>0.001 (x = %f, error = %f)\n", x,error); - ret = 1; - } - } -} -#else -void testlog2(void) -{ - opus_val32 x; - for (x=8;x<1073741824;x+=(x>>3)) - { - float error = fabs((1.442695040888963387*log(x/16384.0))-celt_log2(x)/1024.0); - if (error>0.003) - { - fprintf (stderr, "celt_log2 failed: x = %ld, error = %f\n", (long)x,error); - ret = 1; - } - } -} - -void testexp2(void) -{ - opus_val16 x; - for (x=-32768;x<15360;x++) - { - float error1 = fabs(x/1024.0-(1.442695040888963387*log(celt_exp2(x)/65536.0))); - float error2 = fabs(exp(0.6931471805599453094*x/1024.0)-celt_exp2(x)/65536.0); - if (error1>0.0002&&error2>0.00004) - { - fprintf (stderr, "celt_exp2 failed: x = "WORD", error1 = %f, error2 = %f\n", x,error1,error2); - ret = 1; - } - } -} - -void testexp2log2(void) -{ - opus_val32 x; - for (x=8;x<65536;x+=(x>>3)) - { - float error = fabs(x-0.25*celt_exp2(celt_log2(x)))/16384; - if (error>0.004) - { - fprintf (stderr, "celt_log2/celt_exp2 failed: fabs(x-(celt_exp2(celt_log2(x))))>0.001 (x = %ld, error = %f)\n", (long)x,error); - ret = 1; - } - } -} - -void testilog2(void) -{ - opus_val32 x; - for (x=1;x<=268435455;x+=127) - { - opus_val32 lg; - opus_val32 y; - - lg = celt_ilog2(x); - if (lg<0 || lg>=31) - { - printf("celt_ilog2 failed: 0<=celt_ilog2(x)<31 (x = %d, celt_ilog2(x) = %d)\n",x,lg); - ret = 1; - } - y = 1<>1)>=y) - { - printf("celt_ilog2 failed: 2**celt_ilog2(x)<=x<2**(celt_ilog2(x)+1) (x = %d, 2**celt_ilog2(x) = %d)\n",x,y); - ret = 1; - } - } -} -#endif - -int main(void) -{ - testbitexactcos(); - testbitexactlog2tan(); - testdiv(); - testsqrt(); - testlog2(); - testexp2(); - testexp2log2(); -#ifdef OPUS_FIXED_POINT - testilog2(); -#endif - return ret; -} diff --git a/drivers/opus/celt/tests/test_unit_mdct.c b/drivers/opus/celt/tests/test_unit_mdct.c deleted file mode 100644 index 0be03db2e8..0000000000 --- a/drivers/opus/celt/tests/test_unit_mdct.c +++ /dev/null @@ -1,210 +0,0 @@ -/* Copyright (c) 2008-2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef OPUS_ENABLED -#include "opus/opus_config.h" -#endif - -#define SKIP_CONFIG_H - -#ifndef CUSTOM_MODES -#define CUSTOM_MODES -#endif - -#include - -#define CELT_C -#include "opus/celt/mdct.h" -#include "opus/celt/stack_alloc.h" - -#include "opus/celt/kiss_fft.c" -#include "opus/celt/mdct.c" -#include "opus/celt/mathops.c" -#include "opus/celt/entcode.c" - -#ifndef M_PI -#define M_PI 3.141592653 -#endif - -int ret = 0; -void check(kiss_fft_scalar * in,kiss_fft_scalar * out,int nfft,int isinverse) -{ - int bin,k; - double errpow=0,sigpow=0; - double snr; - for (bin=0;bin1) { - int k; - for (k=1;k -#include -#include "opus/celt/vq.c" -#include "opus/celt/cwrs.c" -#include "opus/celt/entcode.c" -#include "opus/celt/entenc.c" -#include "opus/celt/entdec.c" -#include "opus/celt/mathops.c" -#include "opus/celt/bands.h" -#include -#define MAX_SIZE 100 - -int ret=0; -void test_rotation(int N, int K) -{ - int i; - double err = 0, ener = 0, snr, snr0; - opus_val16 x0[MAX_SIZE]; - opus_val16 x1[MAX_SIZE]; - for (i=0;i 20) - { - fprintf(stderr, "FAIL!\n"); - ret = 1; - } -} - -int main(void) -{ - ALLOC_STACK; - test_rotation(15, 3); - test_rotation(23, 5); - test_rotation(50, 3); - test_rotation(80, 1); - return ret; -} diff --git a/drivers/opus/celt/tests/test_unit_types.c b/drivers/opus/celt/tests/test_unit_types.c deleted file mode 100644 index 12d5ca72f8..0000000000 --- a/drivers/opus/celt/tests/test_unit_types.c +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright (c) 2008-2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef OPUS_ENABLED -#include "opus/opus_config.h" -#endif - -#include "opus/opus_types.h" -#include - -int main(void) -{ - opus_int16 i = 1; - i <<= 14; - if (i>>14 != 1) - { - fprintf(stderr, "opus_int16 isn't 16 bits\n"); - return 1; - } - if (sizeof(opus_int16)*2 != sizeof(opus_int32)) - { - fprintf(stderr, "16*2 != 32\n"); - return 1; - } - return 0; -} diff --git a/drivers/opus/celt/vq.c b/drivers/opus/celt/vq.c index 29a98a3a63..3061e3006f 100644 --- a/drivers/opus/celt/vq.c +++ b/drivers/opus/celt/vq.c @@ -25,10 +25,7 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -#ifdef OPUS_ENABLED #include "opus/opus_config.h" -#endif #include "opus/celt/mathops.h" #include "opus/celt/cwrs.h" @@ -37,19 +34,23 @@ #include "opus/celt/os_support.h" #include "opus/celt/bands.h" #include "opus/celt/rate.h" +#include "opus/celt/pitch.h" +#ifndef OVERRIDE_vq_exp_rotation1 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) { int i; + opus_val16 ms; celt_norm *Xptr; Xptr = X; + ms = NEG16(s); for (i=0;i=0;i--) @@ -57,10 +58,11 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_ celt_norm x1, x2; x1 = Xptr[0]; x2 = Xptr[stride]; - Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); - *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); + Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); + *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); } } +#endif /* OVERRIDE_vq_exp_rotation1 */ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) { @@ -91,7 +93,7 @@ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int } /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for extract_collapse_mask().*/ - len /= stride; + len = celt_udiv(len, stride); for (i=0;i0, "alg_unquant() needs at least one pulse"); celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); ALLOC(iy, N, int); - decode_pulses(iy, N, K, dec); - Ryy = 0; - i=0; - do { - Ryy = MAC16_16(Ryy, iy[i], iy[i]); - } while (++i < N); + Ryy = decode_pulses(iy, N, K, dec); normalise_residual(iy, X, N, Ryy, gain); exp_rotation(X, N, -1, B, K, spread); collapse_mask = extract_collapse_mask(iy, N, B); @@ -344,21 +342,18 @@ unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, return collapse_mask; } -void renormalise_vector(celt_norm *X, int N, opus_val16 gain) +#ifndef OVERRIDE_renormalise_vector +void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch) { int i; #ifdef OPUS_FIXED_POINT int k; #endif - opus_val32 E = EPSILON; + opus_val32 E; opus_val16 g; opus_val32 t; - celt_norm *xptr = X; - for (i=0;i>1; #endif @@ -373,8 +368,9 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain) } /*return celt_sqrt(E);*/ } +#endif /* OVERRIDE_renormalise_vector */ -int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) +int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch) { int i; int itheta; @@ -393,14 +389,8 @@ int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) Eside = MAC16_16(Eside, s, s); } } else { - for (i=0;i +#include +#include +#include "opus/celt/celt_lpc.h" +#include "opus/celt/stack_alloc.h" +#include "opus/celt/mathops.h" +#include "opus/celt/pitch.h" +#include "opus/celt/x86/x86cpu.h" + +#if defined(FIXED_POINT) + +void celt_fir_sse4_1(const opus_val16 *_x, + const opus_val16 *num, + opus_val16 *_y, + int N, + int ord, + opus_val16 *mem, + int arch) +{ + int i,j; + VARDECL(opus_val16, rnum); + VARDECL(opus_val16, x); + + __m128i vecNoA; + opus_int32 noA ; + SAVE_STACK; + + ALLOC(rnum, ord, opus_val16); + ALLOC(x, N+ord, opus_val16); + for(i=0;i> 1; + vecNoA = _mm_set_epi32(noA, noA, noA, noA); + + for (i=0;i +#include "opus/celt/arch.h" + +void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len) +{ + int j; + __m128 xsum1, xsum2; + xsum1 = _mm_loadu_ps(sum); + xsum2 = _mm_setzero_ps(); + + for (j = 0; j < len-3; j += 4) + { + __m128 x0 = _mm_loadu_ps(x+j); + __m128 yj = _mm_loadu_ps(y+j); + __m128 y3 = _mm_loadu_ps(y+j+3); + + xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x00),yj)); + xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x55), + _mm_shuffle_ps(yj,y3,0x49))); + xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xaa), + _mm_shuffle_ps(yj,y3,0x9e))); + xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xff),y3)); + } + if (j < len) + { + xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); + if (++j < len) + { + xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); + if (++j < len) + { + xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); + } + } + } + _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2)); +} + + +void dual_inner_prod_sse(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, + int N, opus_val32 *xy1, opus_val32 *xy2) +{ + int i; + __m128 xsum1, xsum2; + xsum1 = _mm_setzero_ps(); + xsum2 = _mm_setzero_ps(); + for (i=0;i -#include "opus/celt/arch.h" +#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT) +void xcorr_kernel_sse4_1( + const opus_int16 *x, + const opus_int16 *y, + opus_val32 sum[4], + int len); +#endif + +#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT) +void xcorr_kernel_sse( + const opus_val16 *x, + const opus_val16 *y, + opus_val32 sum[4], + int len); +#endif +#if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT) #define OVERRIDE_XCORR_KERNEL -static OPUS_INLINE void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len) -{ - int j; - __m128 xsum1, xsum2; - xsum1 = _mm_loadu_ps(sum); - xsum2 = _mm_setzero_ps(); - - for (j = 0; j < len-3; j += 4) - { - __m128 x0 = _mm_loadu_ps(x+j); - __m128 yj = _mm_loadu_ps(y+j); - __m128 y3 = _mm_loadu_ps(y+j+3); - - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x00),yj)); - xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x55), - _mm_shuffle_ps(yj,y3,0x49))); - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xaa), - _mm_shuffle_ps(yj,y3,0x9e))); - xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xff),y3)); - } - if (j < len) - { - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); - if (++j < len) - { - xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); - if (++j < len) - { - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); - } - } - } - _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2)); -} +#define xcorr_kernel(x, y, sum, len, arch) \ + ((void)arch, xcorr_kernel_sse4_1(x, y, sum, len)) -#define OVERRIDE_DUAL_INNER_PROD -static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, - int N, opus_val32 *xy1, opus_val32 *xy2) -{ - int i; - __m128 xsum1, xsum2; - xsum1 = _mm_setzero_ps(); - xsum2 = _mm_setzero_ps(); - for (i=0;i +#include + +#include "opus/silk/macros.h" +#include "opus/celt/celt_lpc.h" +#include "opus/celt/stack_alloc.h" +#include "opus/celt/mathops.h" +#include "opus/celt/pitch.h" + +#if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT) +opus_val32 celt_inner_prod_sse2(const opus_val16 *x, const opus_val16 *y, + int N) +{ + opus_int i, dataSize16; + opus_int32 sum; + + __m128i inVec1_76543210, inVec1_FEDCBA98, acc1; + __m128i inVec2_76543210, inVec2_FEDCBA98, acc2; + + sum = 0; + dataSize16 = N & ~15; + + acc1 = _mm_setzero_si128(); + acc2 = _mm_setzero_si128(); + + for (i=0;i= 8) + { + inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0])); + inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0])); + + inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210); + + acc1 = _mm_add_epi32(acc1, inVec1_76543210); + i += 8; + } + + acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64( acc1, acc1)); + acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16( acc1, 0x0E)); + sum += _mm_cvtsi128_si32(acc1); + + for (;i +#include + +#include "opus/silk/macros.h" +#include "opus/celt/celt_lpc.h" +#include "opus/celt/stack_alloc.h" +#include "opus/celt/mathops.h" +#include "opus/celt/pitch.h" + +#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT) +#include +#include "opus/celt/x86/x86cpu.h" + +opus_val32 celt_inner_prod_sse4_1(const opus_val16 *x, const opus_val16 *y, + int N) +{ + opus_int i, dataSize16; + opus_int32 sum; + __m128i inVec1_76543210, inVec1_FEDCBA98, acc1; + __m128i inVec2_76543210, inVec2_FEDCBA98, acc2; + __m128i inVec1_3210, inVec2_3210; + + sum = 0; + dataSize16 = N & ~15; + + acc1 = _mm_setzero_si128(); + acc2 = _mm_setzero_si128(); + + for (i=0;i= 8) + { + inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0])); + inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0])); + + inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210); + + acc1 = _mm_add_epi32(acc1, inVec1_76543210); + i += 8; + } + + if (N - i >= 4) + { + inVec1_3210 = OP_CVTEPI16_EPI32_M64(&x[i + 0]); + inVec2_3210 = OP_CVTEPI16_EPI32_M64(&y[i + 0]); + + inVec1_3210 = _mm_mullo_epi32(inVec1_3210, inVec2_3210); + + acc1 = _mm_add_epi32(acc1, inVec1_3210); + i += 4; + } + + acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64(acc1, acc1)); + acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16(acc1, 0x0E)); + + sum += _mm_cvtsi128_si32(acc1); + + for (;i= 3); + + sum0 = _mm_setzero_si128(); + sum1 = _mm_setzero_si128(); + sum2 = _mm_setzero_si128(); + sum3 = _mm_setzero_si128(); + + for (j=0;j<(len-7);j+=8) + { + vecX = _mm_loadu_si128((__m128i *)(&x[j + 0])); + vecY0 = _mm_loadu_si128((__m128i *)(&y[j + 0])); + vecY1 = _mm_loadu_si128((__m128i *)(&y[j + 1])); + vecY2 = _mm_loadu_si128((__m128i *)(&y[j + 2])); + vecY3 = _mm_loadu_si128((__m128i *)(&y[j + 3])); + + sum0 = _mm_add_epi32(sum0, _mm_madd_epi16(vecX, vecY0)); + sum1 = _mm_add_epi32(sum1, _mm_madd_epi16(vecX, vecY1)); + sum2 = _mm_add_epi32(sum2, _mm_madd_epi16(vecX, vecY2)); + sum3 = _mm_add_epi32(sum3, _mm_madd_epi16(vecX, vecY3)); + } + + sum0 = _mm_add_epi32(sum0, _mm_unpackhi_epi64( sum0, sum0)); + sum0 = _mm_add_epi32(sum0, _mm_shufflelo_epi16( sum0, 0x0E)); + + sum1 = _mm_add_epi32(sum1, _mm_unpackhi_epi64( sum1, sum1)); + sum1 = _mm_add_epi32(sum1, _mm_shufflelo_epi16( sum1, 0x0E)); + + sum2 = _mm_add_epi32(sum2, _mm_unpackhi_epi64( sum2, sum2)); + sum2 = _mm_add_epi32(sum2, _mm_shufflelo_epi16( sum2, 0x0E)); + + sum3 = _mm_add_epi32(sum3, _mm_unpackhi_epi64( sum3, sum3)); + sum3 = _mm_add_epi32(sum3, _mm_shufflelo_epi16( sum3, 0x0E)); + + vecSum = _mm_unpacklo_epi64(_mm_unpacklo_epi32(sum0, sum1), + _mm_unpacklo_epi32(sum2, sum3)); + + for (;j<(len-3);j+=4) + { + vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]); + vecX0 = _mm_shuffle_epi32(vecX, 0x00); + vecX1 = _mm_shuffle_epi32(vecX, 0x55); + vecX2 = _mm_shuffle_epi32(vecX, 0xaa); + vecX3 = _mm_shuffle_epi32(vecX, 0xff); + + vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]); + vecY1 = OP_CVTEPI16_EPI32_M64(&y[j + 1]); + vecY2 = OP_CVTEPI16_EPI32_M64(&y[j + 2]); + vecY3 = OP_CVTEPI16_EPI32_M64(&y[j + 3]); + + sum0 = _mm_mullo_epi32(vecX0, vecY0); + sum1 = _mm_mullo_epi32(vecX1, vecY1); + sum2 = _mm_mullo_epi32(vecX2, vecY2); + sum3 = _mm_mullo_epi32(vecX3, vecY3); + + sum0 = _mm_add_epi32(sum0, sum1); + sum2 = _mm_add_epi32(sum2, sum3); + vecSum = _mm_add_epi32(vecSum, sum0); + vecSum = _mm_add_epi32(vecSum, sum2); + } + + for (;j +static _inline void cpuid(unsigned int CPUInfo[4], unsigned int InfoType) +{ + __cpuid((int*)CPUInfo, InfoType); +} + +#else + +#if defined(CPU_INFO_BY_C) +#include +#endif + +static void cpuid(unsigned int CPUInfo[4], unsigned int InfoType) +{ +#if defined(CPU_INFO_BY_ASM) +#if defined(__i386__) && defined(__PIC__) +/* %ebx is PIC register in 32-bit, so mustn't clobber it. */ + __asm__ __volatile__ ( + "xchg %%ebx, %1\n" + "cpuid\n" + "xchg %%ebx, %1\n": + "=a" (CPUInfo[0]), + "=r" (CPUInfo[1]), + "=c" (CPUInfo[2]), + "=d" (CPUInfo[3]) : + "0" (InfoType) + ); +#else + __asm__ __volatile__ ( + "cpuid": + "=a" (CPUInfo[0]), + "=b" (CPUInfo[1]), + "=c" (CPUInfo[2]), + "=d" (CPUInfo[3]) : + "0" (InfoType) + ); +#endif +#elif defined(CPU_INFO_BY_C) + __get_cpuid(InfoType, &(CPUInfo[0]), &(CPUInfo[1]), &(CPUInfo[2]), &(CPUInfo[3])); +#endif +} + +#endif + +typedef struct CPU_Feature{ + /* SIMD: 128-bit */ + int HW_SSE; + int HW_SSE2; + int HW_SSE41; + /* SIMD: 256-bit */ + int HW_AVX; +} CPU_Feature; + +static void opus_cpu_feature_check(CPU_Feature *cpu_feature) +{ + unsigned int info[4] = {0}; + unsigned int nIds = 0; + + cpuid(info, 0); + nIds = info[0]; + + if (nIds >= 1){ + cpuid(info, 1); + cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0; + cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0; + cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0; + cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0; + } + else { + cpu_feature->HW_SSE = 0; + cpu_feature->HW_SSE2 = 0; + cpu_feature->HW_SSE41 = 0; + cpu_feature->HW_AVX = 0; + } +} + +int opus_select_arch(void) +{ + CPU_Feature cpu_feature; + int arch; + + opus_cpu_feature_check(&cpu_feature); + + arch = 0; + if (!cpu_feature.HW_SSE) + { + return arch; + } + arch++; + + if (!cpu_feature.HW_SSE2) + { + return arch; + } + arch++; + + if (!cpu_feature.HW_SSE41) + { + return arch; + } + arch++; + + if (!cpu_feature.HW_AVX) + { + return arch; + } + arch++; + + return arch; +} + +#endif diff --git a/drivers/opus/celt/x86/x86cpu.h b/drivers/opus/celt/x86/x86cpu.h new file mode 100644 index 0000000000..04fd48aac4 --- /dev/null +++ b/drivers/opus/celt/x86/x86cpu.h @@ -0,0 +1,93 @@ +/* Copyright (c) 2014, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#if !defined(X86CPU_H) +# define X86CPU_H + +# if defined(OPUS_X86_MAY_HAVE_SSE) +# define MAY_HAVE_SSE(name) name ## _sse +# else +# define MAY_HAVE_SSE(name) name ## _c +# endif + +# if defined(OPUS_X86_MAY_HAVE_SSE2) +# define MAY_HAVE_SSE2(name) name ## _sse2 +# else +# define MAY_HAVE_SSE2(name) name ## _c +# endif + +# if defined(OPUS_X86_MAY_HAVE_SSE4_1) +# define MAY_HAVE_SSE4_1(name) name ## _sse4_1 +# else +# define MAY_HAVE_SSE4_1(name) name ## _c +# endif + +# if defined(OPUS_X86_MAY_HAVE_AVX) +# define MAY_HAVE_AVX(name) name ## _avx +# else +# define MAY_HAVE_AVX(name) name ## _c +# endif + +# if defined(OPUS_HAVE_RTCD) +int opus_select_arch(void); +# endif + +/*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32() + or _mm_cvtepi16_epi32() when optimizations are disabled, even though the + actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory + reference, these require 16-byte alignment and load a full 16 bytes (instead + of 4 or 8), possibly reading out of bounds. + + We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or + _mm_loadl_epi64(), which should have the same semantics as an m32 or m64 + reference in the PMOVSXWD instruction itself, but gcc is not smart enough to + optimize this out when optimizations ARE enabled. + + Clang, in contrast, requires us to do this always for _mm_cvtepi8_epi32 + (which is fair, since technically the compiler is always allowed to do the + dereference before invoking the function implementing the intrinsic). + However, it is smart enough to eliminate the extra MOVD instruction. + For _mm_cvtepi16_epi32, it does the right thing, though does *not* optimize out + the extra MOVQ if it's specified explicitly */ + +# if defined(__clang__) || !defined(__OPTIMIZE__) +# define OP_CVTEPI8_EPI32_M32(x) \ + (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x)))) +# else +# define OP_CVTEPI8_EPI32_M32(x) \ + (_mm_cvtepi8_epi32(*(__m128i *)(x))) +#endif + +# if !defined(__OPTIMIZE__) +# define OP_CVTEPI16_EPI32_M64(x) \ + (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x)))) +# else +# define OP_CVTEPI16_EPI32_M64(x) \ + (_mm_cvtepi16_epi32(*(__m128i *)(x))) +# endif + +#endif -- cgit v1.2.3