diff options
author | George Marques <george@gmarqu.es> | 2016-05-01 12:48:46 -0300 |
---|---|---|
committer | George Marques <george@gmarqu.es> | 2016-05-02 13:18:34 -0300 |
commit | 7c59d819a7ebb936d51ca032e66a2489e4080d08 (patch) | |
tree | df6747620113a36812194db3456729aac2fa1b08 /drivers/opus/celt/mips | |
parent | a3d81cab8a97eeece54ebadb82c40532188b4d57 (diff) |
Update Opus driver to 1.1.2
And opusfile to 0.7.
Diffstat (limited to 'drivers/opus/celt/mips')
-rw-r--r-- | drivers/opus/celt/mips/celt_mipsr1.h | 148 | ||||
-rw-r--r-- | drivers/opus/celt/mips/fixed_generic_mipsr1.h | 126 | ||||
-rw-r--r-- | drivers/opus/celt/mips/kiss_fft_mipsr1.h | 167 | ||||
-rw-r--r-- | drivers/opus/celt/mips/mdct_mipsr1.h | 286 | ||||
-rw-r--r-- | drivers/opus/celt/mips/pitch_mipsr1.h | 161 | ||||
-rw-r--r-- | drivers/opus/celt/mips/vq_mipsr1.h | 122 |
6 files changed, 1010 insertions, 0 deletions
diff --git a/drivers/opus/celt/mips/celt_mipsr1.h b/drivers/opus/celt/mips/celt_mipsr1.h new file mode 100644 index 0000000000..906848f1ec --- /dev/null +++ b/drivers/opus/celt/mips/celt_mipsr1.h @@ -0,0 +1,148 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2010 Xiph.Org Foundation + Copyright (c) 2008 Gregory Maxwell + Written by Jean-Marc Valin and Gregory Maxwell */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef __CELT_MIPSR1_H__ +#define __CELT_MIPSR1_H__ +#include "opus/opus_config.h" + +#define CELT_C + +#include "opus/celt/os_support.h" +#include "opus/celt/mdct.h" +#include <math.h> +#include "opus/celt/celt.h" +#include "opus/celt/pitch.h" +#include "opus/celt/bands.h" +#include "opus/celt/modes.h" +#include "opus/celt/entcode.h" +#include "opus/celt/quant_bands.h" +#include "opus/celt/rate.h" +#include "opus/celt/stack_alloc.h" +#include "opus/celt/mathops.h" +#include "opus/celt/float_cast.h" +#include <stdarg.h> +#include "opus/celt/celt_lpc.h" +#include "opus/celt/vq.h" + +#define OVERRIDE_comb_filter +void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, + opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, + const opus_val16 *window, int overlap, int arch) +{ + int i; + opus_val32 x0, x1, x2, x3, x4; + + (void)arch; + + /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */ + opus_val16 g00, g01, g02, g10, g11, g12; + static const opus_val16 gains[3][3] = { + {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)}, + {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)}, + {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}}; + + if (g0==0 && g1==0) + { + /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */ + if (x!=y) + OPUS_MOVE(y, x, N); + return; + } + + g00 = MULT16_16_P15(g0, gains[tapset0][0]); + g01 = MULT16_16_P15(g0, gains[tapset0][1]); + g02 = MULT16_16_P15(g0, gains[tapset0][2]); + g10 = MULT16_16_P15(g1, gains[tapset1][0]); + g11 = MULT16_16_P15(g1, gains[tapset1][1]); + g12 = MULT16_16_P15(g1, gains[tapset1][2]); + x1 = x[-T1+1]; + x2 = x[-T1 ]; + x3 = x[-T1-1]; + x4 = x[-T1-2]; + /* If the filter didn't change, we don't need the overlap */ + if (g0==g1 && T0==T1 && tapset0==tapset1) + overlap=0; + + for (i=0;i<overlap;i++) + { + opus_val16 f; + opus_val32 res; + f = MULT16_16_Q15(window[i],window[i]); + x0= x[i-T1+2]; + + asm volatile("MULT $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g00)), "r" ((int)x[i-T0])); + + asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g01)), "r" ((int)ADD32(x[i-T0-1],x[i-T0+1]))); + asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g02)), "r" ((int)ADD32(x[i-T0-2],x[i-T0+2]))); + asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g10)), "r" ((int)x2)); + asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g11)), "r" ((int)ADD32(x3,x1))); + asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g12)), "r" ((int)ADD32(x4,x0))); + + asm volatile("EXTR.W %0,$ac1, %1" : "=r" (res): "i" (15)); + + y[i] = x[i] + res; + + x4=x3; + x3=x2; + x2=x1; + x1=x0; + } + + x4 = x[i-T1-2]; + x3 = x[i-T1-1]; + x2 = x[i-T1]; + x1 = x[i-T1+1]; + + if (g1==0) + { + /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */ + if (x!=y) + OPUS_MOVE(y+overlap, x+overlap, N-overlap); + return; + } + + for (i=overlap;i<N;i++) + { + opus_val32 res; + x0=x[i-T1+2]; + + asm volatile("MULT $ac1, %0, %1" : : "r" ((int)g10), "r" ((int)x2)); + + asm volatile("MADD $ac1, %0, %1" : : "r" ((int)g11), "r" ((int)ADD32(x3,x1))); + asm volatile("MADD $ac1, %0, %1" : : "r" ((int)g12), "r" ((int)ADD32(x4,x0))); + asm volatile("EXTR.W %0,$ac1, %1" : "=r" (res): "i" (15)); + y[i] = x[i] + res; + x4=x3; + x3=x2; + x2=x1; + x1=x0; + } +} + +#endif /* __CELT_MIPSR1_H__ */ diff --git a/drivers/opus/celt/mips/fixed_generic_mipsr1.h b/drivers/opus/celt/mips/fixed_generic_mipsr1.h new file mode 100644 index 0000000000..4a05efbf85 --- /dev/null +++ b/drivers/opus/celt/mips/fixed_generic_mipsr1.h @@ -0,0 +1,126 @@ +/* Copyright (C) 2007-2009 Xiph.Org Foundation + Copyright (C) 2003-2008 Jean-Marc Valin + Copyright (C) 2007-2008 CSIRO */ +/** + @file fixed_generic.h + @brief Generic fixed-point operations +*/ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef CELT_FIXED_GENERIC_MIPSR1_H +#define CELT_FIXED_GENERIC_MIPSR1_H + +#undef MULT16_32_Q15_ADD +static inline int MULT16_32_Q15_ADD(int a, int b, int c, int d) { + int m; + asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b)); + asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d)); + asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15)); + return m; +} + +#undef MULT16_32_Q15_SUB +static inline int MULT16_32_Q15_SUB(int a, int b, int c, int d) { + int m; + asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b)); + asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d)); + asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15)); + return m; +} + +#undef MULT16_16_Q15_ADD +static inline int MULT16_16_Q15_ADD(int a, int b, int c, int d) { + int m; + asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b)); + asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d)); + asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15)); + return m; +} + +#undef MULT16_16_Q15_SUB +static inline int MULT16_16_Q15_SUB(int a, int b, int c, int d) { + int m; + asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b)); + asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d)); + asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15)); + return m; +} + + +#undef MULT16_32_Q16 +static inline int MULT16_32_Q16(int a, int b) +{ + int c; + asm volatile("MULT $ac1,%0, %1" : : "r" (a), "r" (b)); + asm volatile("EXTR.W %0,$ac1, %1" : "=r" (c): "i" (16)); + return c; +} + +#undef MULT16_32_P16 +static inline int MULT16_32_P16(int a, int b) +{ + int c; + asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b)); + asm volatile("EXTR_R.W %0,$ac1, %1" : "=r" (c): "i" (16)); + return c; +} + +#undef MULT16_32_Q15 +static inline int MULT16_32_Q15(int a, int b) +{ + int c; + asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b)); + asm volatile("EXTR.W %0,$ac1, %1" : "=r" (c): "i" (15)); + return c; +} + +#undef MULT32_32_Q31 +static inline int MULT32_32_Q31(int a, int b) +{ + int r; + asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b)); + asm volatile("EXTR.W %0,$ac1, %1" : "=r" (r): "i" (31)); + return r; +} + +#undef PSHR32 +static inline int PSHR32(int a, int shift) +{ + int r; + asm volatile ("SHRAV_R.W %0, %1, %2" :"=r" (r): "r" (a), "r" (shift)); + return r; +} + +#undef MULT16_16_P15 +static inline int MULT16_16_P15(int a, int b) +{ + int r; + asm volatile ("mul %0, %1, %2" :"=r" (r): "r" (a), "r" (b)); + asm volatile ("SHRA_R.W %0, %1, %2" : "+r" (r): "0" (r), "i"(15)); + return r; +} + +#endif /* CELT_FIXED_GENERIC_MIPSR1_H */ diff --git a/drivers/opus/celt/mips/kiss_fft_mipsr1.h b/drivers/opus/celt/mips/kiss_fft_mipsr1.h new file mode 100644 index 0000000000..09dc9af464 --- /dev/null +++ b/drivers/opus/celt/mips/kiss_fft_mipsr1.h @@ -0,0 +1,167 @@ +/*Copyright (c) 2013, Xiph.Org Foundation and contributors. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE.*/ + +#ifndef KISS_FFT_MIPSR1_H +#define KISS_FFT_MIPSR1_H + +#if !defined(KISS_FFT_GUTS_H) +#error "This file should only be included from _kiss_fft_guts.h" +#endif + +#ifdef OPUS_FIXED_POINT + +#define S_MUL_ADD(a, b, c, d) (S_MUL(a,b)+S_MUL(c,d)) +#define S_MUL_SUB(a, b, c, d) (S_MUL(a,b)-S_MUL(c,d)) + +#undef S_MUL_ADD +static inline int S_MUL_ADD(int a, int b, int c, int d) { + int m; + asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b)); + asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d)); + asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15)); + return m; +} + +#undef S_MUL_SUB +static inline int S_MUL_SUB(int a, int b, int c, int d) { + int m; + asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b)); + asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d)); + asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15)); + return m; +} + +#undef C_MUL +# define C_MUL(m,a,b) (m=C_MUL_fun(a,b)) +static inline kiss_fft_cpx C_MUL_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) { + kiss_fft_cpx m; + + asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r)); + asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i)); + asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15)); + asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i)); + asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r)); + asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15)); + + return m; +} +#undef C_MULC +# define C_MULC(m,a,b) (m=C_MULC_fun(a,b)) +static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) { + kiss_fft_cpx m; + + asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r)); + asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i)); + asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15)); + asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r)); + asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i)); + asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15)); + + return m; +} + +#endif /* FIXED_POINT */ + +#define OVERRIDE_kf_bfly5 +static void kf_bfly5( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_state *st, + int m, + int N, + int mm + ) +{ + kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + int i, u; + kiss_fft_cpx scratch[13]; + + const kiss_twiddle_cpx *tw; + kiss_twiddle_cpx ya,yb; + kiss_fft_cpx * Fout_beg = Fout; + +#ifdef OPUS_FIXED_POINT + ya.r = 10126; + ya.i = -31164; + yb.r = -26510; + yb.i = -19261; +#else + ya = st->twiddles[fstride*m]; + yb = st->twiddles[fstride*2*m]; +#endif + + tw=st->twiddles; + + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + /* For non-custom modes, m is guaranteed to be a multiple of 4. */ + for ( u=0; u<m; ++u ) { + scratch[0] = *Fout0; + + + C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); + C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); + C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); + C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); + + C_ADD( scratch[7],scratch[1],scratch[4]); + C_SUB( scratch[10],scratch[1],scratch[4]); + C_ADD( scratch[8],scratch[2],scratch[3]); + C_SUB( scratch[9],scratch[2],scratch[3]); + + Fout0->r += scratch[7].r + scratch[8].r; + Fout0->i += scratch[7].i + scratch[8].i; + scratch[5].r = scratch[0].r + S_MUL_ADD(scratch[7].r,ya.r,scratch[8].r,yb.r); + scratch[5].i = scratch[0].i + S_MUL_ADD(scratch[7].i,ya.r,scratch[8].i,yb.r); + + scratch[6].r = S_MUL_ADD(scratch[10].i,ya.i,scratch[9].i,yb.i); + scratch[6].i = -S_MUL_ADD(scratch[10].r,ya.i,scratch[9].r,yb.i); + + C_SUB(*Fout1,scratch[5],scratch[6]); + C_ADD(*Fout4,scratch[5],scratch[6]); + + scratch[11].r = scratch[0].r + S_MUL_ADD(scratch[7].r,yb.r,scratch[8].r,ya.r); + scratch[11].i = scratch[0].i + S_MUL_ADD(scratch[7].i,yb.r,scratch[8].i,ya.r); + + scratch[12].r = S_MUL_SUB(scratch[9].i,ya.i,scratch[10].i,yb.i); + scratch[12].i = S_MUL_SUB(scratch[10].r,yb.i,scratch[9].r,ya.i); + + C_ADD(*Fout2,scratch[11],scratch[12]); + C_SUB(*Fout3,scratch[11],scratch[12]); + + ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; + } + } +} + + +#endif /* KISS_FFT_MIPSR1_H */ diff --git a/drivers/opus/celt/mips/mdct_mipsr1.h b/drivers/opus/celt/mips/mdct_mipsr1.h new file mode 100644 index 0000000000..0bb8b5a056 --- /dev/null +++ b/drivers/opus/celt/mips/mdct_mipsr1.h @@ -0,0 +1,286 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2008 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* This is a simple MDCT implementation that uses a N/4 complex FFT + to do most of the work. It should be relatively straightforward to + plug in pretty much and FFT here. + + This replaces the Vorbis FFT (and uses the exact same API), which + was a bit too messy and that was ending up duplicating code + (might as well use the same FFT everywhere). + + The algorithm is similar to (and inspired from) Fabrice Bellard's + MDCT implementation in FFMPEG, but has differences in signs, ordering + and scaling in many places. +*/ +#ifndef __MDCT_MIPSR1_H__ +#define __MDCT_MIPSR1_H__ + +#ifndef SKIP_CONFIG_H +#include "opus/opus_config.h" +#endif + +#include "opus/celt/mdct.h" +#include "opus/celt/kiss_fft.h" +#include "opus/celt/_kiss_fft_guts.h" +#include <math.h> +#include "opus/celt/os_support.h" +#include "opus/celt/mathops.h" +#include "opus/celt/stack_alloc.h" + +/* Forward MDCT trashes the input array */ +#define OVERRIDE_clt_mdct_forward +void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, int overlap, int shift, int stride, int arch) +{ + int i; + int N, N2, N4; + VARDECL(kiss_fft_scalar, f); + VARDECL(kiss_fft_cpx, f2); + const kiss_fft_state *st = l->kfft[shift]; + const kiss_twiddle_scalar *trig; + opus_val16 scale; +#ifdef OPUS_FIXED_POINT + /* Allows us to scale with MULT16_32_Q16(), which is faster than + MULT16_32_Q15() on ARM. */ + int scale_shift = st->scale_shift-1; +#endif + + (void)arch; + + SAVE_STACK; + scale = st->scale; + + N = l->n; + trig = l->trig; + for (i=0;i<shift;i++) + { + N >>= 1; + trig += N; + } + N2 = N>>1; + N4 = N>>2; + + ALLOC(f, N2, kiss_fft_scalar); + ALLOC(f2, N4, kiss_fft_cpx); + + /* Consider the input to be composed of four blocks: [a, b, c, d] */ + /* Window, shuffle, fold */ + { + /* Temp pointers to make it really clear to the compiler what we're doing */ + const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); + const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); + kiss_fft_scalar * OPUS_RESTRICT yp = f; + const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); + const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; + for(i=0;i<((overlap+3)>>2);i++) + { + /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ + *yp++ = S_MUL_ADD(*wp2, xp1[N2],*wp1,*xp2); + *yp++ = S_MUL_SUB(*wp1, *xp1,*wp2, xp2[-N2]); + xp1+=2; + xp2-=2; + wp1+=2; + wp2-=2; + } + wp1 = window; + wp2 = window+overlap-1; + for(;i<N4-((overlap+3)>>2);i++) + { + /* Real part arranged as a-bR, Imag part arranged as -c-dR */ + *yp++ = *xp2; + *yp++ = *xp1; + xp1+=2; + xp2-=2; + } + for(;i<N4;i++) + { + /* Real part arranged as a-bR, Imag part arranged as -c-dR */ + *yp++ = S_MUL_SUB(*wp2, *xp2, *wp1, xp1[-N2]); + *yp++ = S_MUL_ADD(*wp2, *xp1, *wp1, xp2[N2]); + xp1+=2; + xp2-=2; + wp1+=2; + wp2-=2; + } + } + /* Pre-rotation */ + { + kiss_fft_scalar * OPUS_RESTRICT yp = f; + const kiss_twiddle_scalar *t = &trig[0]; + for(i=0;i<N4;i++) + { + kiss_fft_cpx yc; + kiss_twiddle_scalar t0, t1; + kiss_fft_scalar re, im, yr, yi; + t0 = t[i]; + t1 = t[N4+i]; + re = *yp++; + im = *yp++; + + yr = S_MUL_SUB(re,t0,im,t1); + yi = S_MUL_ADD(im,t0,re,t1); + + yc.r = yr; + yc.i = yi; + yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift); + yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift); + f2[st->bitrev[i]] = yc; + } + } + + /* N/4 complex FFT, does not downscale anymore */ + opus_fft_impl(st, f2); + + /* Post-rotate */ + { + /* Temp pointers to make it really clear to the compiler what we're doing */ + const kiss_fft_cpx * OPUS_RESTRICT fp = f2; + kiss_fft_scalar * OPUS_RESTRICT yp1 = out; + kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); + const kiss_twiddle_scalar *t = &trig[0]; + /* Temp pointers to make it really clear to the compiler what we're doing */ + for(i=0;i<N4;i++) + { + kiss_fft_scalar yr, yi; + yr = S_MUL_SUB(fp->i,t[N4+i] , fp->r,t[i]); + yi = S_MUL_ADD(fp->r,t[N4+i] ,fp->i,t[i]); + *yp1 = yr; + *yp2 = yi; + fp++; + yp1 += 2*stride; + yp2 -= 2*stride; + } + } + RESTORE_STACK; +} + +#define OVERRIDE_clt_mdct_backward +void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch) +{ + int i; + int N, N2, N4; + const kiss_twiddle_scalar *trig; + + (void)arch; + + N = l->n; + trig = l->trig; + for (i=0;i<shift;i++) + { + N >>= 1; + trig += N; + } + N2 = N>>1; + N4 = N>>2; + + /* Pre-rotate */ + { + /* Temp pointers to make it really clear to the compiler what we're doing */ + const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; + const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); + kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1); + const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; + const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev; + for(i=0;i<N4;i++) + { + int rev; + kiss_fft_scalar yr, yi; + rev = *bitrev++; + yr = S_MUL_ADD(*xp2, t[i] , *xp1, t[N4+i]); + yi = S_MUL_SUB(*xp1, t[i] , *xp2, t[N4+i]); + /* We swap real and imag because we use an FFT instead of an IFFT. */ + yp[2*rev+1] = yr; + yp[2*rev] = yi; + /* Storing the pre-rotation directly in the bitrev order. */ + xp1+=2*stride; + xp2-=2*stride; + } + } + + opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1))); + + /* Post-rotate and de-shuffle from both ends of the buffer at once to make + it in-place. */ + { + kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); + kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; + const kiss_twiddle_scalar *t = &trig[0]; + /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the + middle pair will be computed twice. */ + for(i=0;i<(N4+1)>>1;i++) + { + kiss_fft_scalar re, im, yr, yi; + kiss_twiddle_scalar t0, t1; + /* We swap real and imag because we're using an FFT instead of an IFFT. */ + re = yp0[1]; + im = yp0[0]; + t0 = t[i]; + t1 = t[N4+i]; + /* We'd scale up by 2 here, but instead it's done when mixing the windows */ + yr = S_MUL_ADD(re,t0 , im,t1); + yi = S_MUL_SUB(re,t1 , im,t0); + /* We swap real and imag because we're using an FFT instead of an IFFT. */ + re = yp1[1]; + im = yp1[0]; + yp0[0] = yr; + yp1[1] = yi; + + t0 = t[(N4-i-1)]; + t1 = t[(N2-i-1)]; + /* We'd scale up by 2 here, but instead it's done when mixing the windows */ + yr = S_MUL_ADD(re,t0,im,t1); + yi = S_MUL_SUB(re,t1,im,t0); + yp1[0] = yr; + yp0[1] = yi; + yp0 += 2; + yp1 -= 2; + } + } + + /* Mirror on both sides for TDAC */ + { + kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; + kiss_fft_scalar * OPUS_RESTRICT yp1 = out; + const opus_val16 * OPUS_RESTRICT wp1 = window; + const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; + + for(i = 0; i < overlap/2; i++) + { + kiss_fft_scalar x1, x2; + x1 = *xp1; + x2 = *yp1; + *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); + *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); + wp1++; + wp2--; + } + } +} +#endif /* __MDCT_MIPSR1_H__ */ diff --git a/drivers/opus/celt/mips/pitch_mipsr1.h b/drivers/opus/celt/mips/pitch_mipsr1.h new file mode 100644 index 0000000000..a9500aff58 --- /dev/null +++ b/drivers/opus/celt/mips/pitch_mipsr1.h @@ -0,0 +1,161 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/** + @file pitch.h + @brief Pitch analysis + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef PITCH_MIPSR1_H +#define PITCH_MIPSR1_H + +#define OVERRIDE_DUAL_INNER_PROD +static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, + int N, opus_val32 *xy1, opus_val32 *xy2, int arch) +{ + int j; + opus_val32 xy01=0; + opus_val32 xy02=0; + + (void)arch; + + asm volatile("MULT $ac1, $0, $0"); + asm volatile("MULT $ac2, $0, $0"); + /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */ + for (j=0;j<N;j++) + { + asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j])); + asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j])); + ++j; + asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j])); + asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j])); + } + asm volatile ("mflo %0, $ac1": "=r"(xy01)); + asm volatile ("mflo %0, $ac2": "=r"(xy02)); + *xy1 = xy01; + *xy2 = xy02; +} + +static inline void xcorr_kernel_mips(const opus_val16 * x, + const opus_val16 * y, opus_val32 sum[4], int len) +{ + int j; + opus_val16 y_0, y_1, y_2, y_3; + + opus_int64 sum_0, sum_1, sum_2, sum_3; + sum_0 = (opus_int64)sum[0]; + sum_1 = (opus_int64)sum[1]; + sum_2 = (opus_int64)sum[2]; + sum_3 = (opus_int64)sum[3]; + + y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */ + y_0=*y++; + y_1=*y++; + y_2=*y++; + for (j=0;j<len-3;j+=4) + { + opus_val16 tmp; + tmp = *x++; + y_3=*y++; + + sum_0 = __builtin_mips_madd( sum_0, tmp, y_0); + sum_1 = __builtin_mips_madd( sum_1, tmp, y_1); + sum_2 = __builtin_mips_madd( sum_2, tmp, y_2); + sum_3 = __builtin_mips_madd( sum_3, tmp, y_3); + + tmp=*x++; + y_0=*y++; + + sum_0 = __builtin_mips_madd( sum_0, tmp, y_1 ); + sum_1 = __builtin_mips_madd( sum_1, tmp, y_2 ); + sum_2 = __builtin_mips_madd( sum_2, tmp, y_3); + sum_3 = __builtin_mips_madd( sum_3, tmp, y_0); + + tmp=*x++; + y_1=*y++; + + sum_0 = __builtin_mips_madd( sum_0, tmp, y_2 ); + sum_1 = __builtin_mips_madd( sum_1, tmp, y_3 ); + sum_2 = __builtin_mips_madd( sum_2, tmp, y_0); + sum_3 = __builtin_mips_madd( sum_3, tmp, y_1); + + + tmp=*x++; + y_2=*y++; + + sum_0 = __builtin_mips_madd( sum_0, tmp, y_3 ); + sum_1 = __builtin_mips_madd( sum_1, tmp, y_0 ); + sum_2 = __builtin_mips_madd( sum_2, tmp, y_1); + sum_3 = __builtin_mips_madd( sum_3, tmp, y_2); + + } + if (j++<len) + { + opus_val16 tmp = *x++; + y_3=*y++; + + sum_0 = __builtin_mips_madd( sum_0, tmp, y_0 ); + sum_1 = __builtin_mips_madd( sum_1, tmp, y_1 ); + sum_2 = __builtin_mips_madd( sum_2, tmp, y_2); + sum_3 = __builtin_mips_madd( sum_3, tmp, y_3); + } + + if (j++<len) + { + opus_val16 tmp=*x++; + y_0=*y++; + + sum_0 = __builtin_mips_madd( sum_0, tmp, y_1 ); + sum_1 = __builtin_mips_madd( sum_1, tmp, y_2 ); + sum_2 = __builtin_mips_madd( sum_2, tmp, y_3); + sum_3 = __builtin_mips_madd( sum_3, tmp, y_0); + } + + if (j<len) + { + opus_val16 tmp=*x++; + y_1=*y++; + + sum_0 = __builtin_mips_madd( sum_0, tmp, y_2 ); + sum_1 = __builtin_mips_madd( sum_1, tmp, y_3 ); + sum_2 = __builtin_mips_madd( sum_2, tmp, y_0); + sum_3 = __builtin_mips_madd( sum_3, tmp, y_1); + + } + + sum[0] = (opus_val32)sum_0; + sum[1] = (opus_val32)sum_1; + sum[2] = (opus_val32)sum_2; + sum[3] = (opus_val32)sum_3; +} + +#define OVERRIDE_XCORR_KERNEL +#define xcorr_kernel(x, y, sum, len, arch) \ + ((void)(arch), xcorr_kernel_mips(x, y, sum, len)) + +#endif /* PITCH_MIPSR1_H */ diff --git a/drivers/opus/celt/mips/vq_mipsr1.h b/drivers/opus/celt/mips/vq_mipsr1.h new file mode 100644 index 0000000000..c68159972f --- /dev/null +++ b/drivers/opus/celt/mips/vq_mipsr1.h @@ -0,0 +1,122 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef __VQ_MIPSR1_H__ +#define __VQ_MIPSR1_H__ +#include "opus/opus_config.h" + +#include "opus/celt/mathops.h" +#include "opus/celt/arch.h" + +static unsigned extract_collapse_mask(int *iy, int N, int B); +static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X, int N, opus_val32 Ryy, opus_val16 gain); +static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread); +static void renormalise_vector_mips(celt_norm *X, int N, opus_val16 gain, int arch); + +#define OVERRIDE_vq_exp_rotation1 +static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) +{ + int i; + opus_val16 ms; + celt_norm *Xptr; + Xptr = X; + ms = NEG16(s); + for (i=0;i<len-stride;i++) + { + celt_norm x1, x2; + x1 = Xptr[0]; + x2 = Xptr[stride]; + Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); + *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); + } + Xptr = &X[len-2*stride-1]; + for (i=len-2*stride-1;i>=0;i--) + { + celt_norm x1, x2; + x1 = Xptr[0]; + x2 = Xptr[stride]; + Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); + *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); + } +} + +#define OVERRIDE_renormalise_vector + +#define renormalise_vector(X, N, gain, arch) \ + (renormalise_vector_mips(X, N, gain, arch)) + +void renormalise_vector_mips(celt_norm *X, int N, opus_val16 gain, int arch) +{ + int i; +#ifdef OPUS_FIXED_POINT + int k; +#endif + opus_val32 E = EPSILON; + opus_val16 g; + opus_val32 t; + celt_norm *xptr = X; + int X0, X1; + + (void)arch; + + asm volatile("mult $ac1, $0, $0"); + asm volatile("MTLO %0, $ac1" : :"r" (E)); + /*if(N %4) + printf("error");*/ + for (i=0;i<N-2;i+=2) + { + X0 = (int)*xptr++; + asm volatile("MADD $ac1, %0, %1" : : "r" (X0), "r" (X0)); + + X1 = (int)*xptr++; + asm volatile("MADD $ac1, %0, %1" : : "r" (X1), "r" (X1)); + } + + for (;i<N;i++) + { + X0 = (int)*xptr++; + asm volatile("MADD $ac1, %0, %1" : : "r" (X0), "r" (X0)); + } + + asm volatile("MFLO %0, $ac1" : "=r" (E)); +#ifdef OPUS_FIXED_POINT + k = celt_ilog2(E)>>1; +#endif + t = VSHR32(E, 2*(k-7)); + g = MULT16_16_P15(celt_rsqrt_norm(t),gain); + + xptr = X; + for (i=0;i<N;i++) + { + *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1)); + xptr++; + } + /*return celt_sqrt(E);*/ +} + +#endif /* __VQ_MIPSR1_H__ */ |