diff options
Diffstat (limited to 'drivers/opus/celt/kiss_fft.c')
-rw-r--r-- | drivers/opus/celt/kiss_fft.c | 497 |
1 files changed, 190 insertions, 307 deletions
diff --git a/drivers/opus/celt/kiss_fft.c b/drivers/opus/celt/kiss_fft.c index 89a1790b24..cf8d049fa1 100644 --- a/drivers/opus/celt/kiss_fft.c +++ b/drivers/opus/celt/kiss_fft.c @@ -30,9 +30,7 @@ heavily modified to better suit Opus */ #ifndef SKIP_CONFIG_H -# ifdef OPUS_ENABLED #include "opus/opus_config.h" -# endif #endif #include "opus/celt/_kiss_fft_guts.h" @@ -47,64 +45,56 @@ static void kf_bfly2( kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, int m, - int N, - int mm + int N ) { kiss_fft_cpx * Fout2; - const kiss_twiddle_cpx * tw1; - int i,j; - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;i<N;i++) + int i; + (void)m; +#ifdef CUSTOM_MODES + if (m==1) { - Fout = Fout_beg + i*mm; - Fout2 = Fout + m; - tw1 = st->twiddles; - for(j=0;j<m;j++) + celt_assert(m==1); + for (i=0;i<N;i++) { kiss_fft_cpx t; - Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1); - Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1); - C_MUL (t, *Fout2 , *tw1); - tw1 += fstride; + Fout2 = Fout + 1; + t = *Fout2; C_SUB( *Fout2 , *Fout , t ); C_ADDTO( *Fout , t ); - ++Fout2; - ++Fout; + Fout += 2; } - } -} - -static void ki_bfly2( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - kiss_fft_cpx * Fout2; - const kiss_twiddle_cpx * tw1; - kiss_fft_cpx t; - int i,j; - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;i<N;i++) + } else +#endif { - Fout = Fout_beg + i*mm; - Fout2 = Fout + m; - tw1 = st->twiddles; - for(j=0;j<m;j++) + opus_val16 tw; + tw = QCONST16(0.7071067812f, 15); + /* We know that m==4 here because the radix-2 is just after a radix-4 */ + celt_assert(m==4); + for (i=0;i<N;i++) { - C_MULC (t, *Fout2 , *tw1); - tw1 += fstride; - C_SUB( *Fout2 , *Fout , t ); - C_ADDTO( *Fout , t ); - ++Fout2; - ++Fout; + kiss_fft_cpx t; + Fout2 = Fout + 4; + t = Fout2[0]; + C_SUB( Fout2[0] , Fout[0] , t ); + C_ADDTO( Fout[0] , t ); + + t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw); + t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw); + C_SUB( Fout2[1] , Fout[1] , t ); + C_ADDTO( Fout[1] , t ); + + t.r = Fout2[2].i; + t.i = -Fout2[2].r; + C_SUB( Fout2[2] , Fout[2] , t ); + C_ADDTO( Fout[2] , t ); + + t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw); + t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw); + C_SUB( Fout2[3] , Fout[3] , t ); + C_ADDTO( Fout[3] , t ); + Fout += 8; } } } @@ -118,89 +108,67 @@ static void kf_bfly4( int mm ) { - const kiss_twiddle_cpx *tw1,*tw2,*tw3; - kiss_fft_cpx scratch[6]; - const size_t m2=2*m; - const size_t m3=3*m; - int i, j; + int i; - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;i<N;i++) + if (m==1) { - Fout = Fout_beg + i*mm; - tw3 = tw2 = tw1 = st->twiddles; - for (j=0;j<m;j++) + /* Degenerate case where all the twiddles are 1. */ + for (i=0;i<N;i++) { - C_MUL4(scratch[0],Fout[m] , *tw1 ); - C_MUL4(scratch[1],Fout[m2] , *tw2 ); - C_MUL4(scratch[2],Fout[m3] , *tw3 ); - - Fout->r = PSHR32(Fout->r, 2); - Fout->i = PSHR32(Fout->i, 2); - C_SUB( scratch[5] , *Fout, scratch[1] ); - C_ADDTO(*Fout, scratch[1]); - C_ADD( scratch[3] , scratch[0] , scratch[2] ); - C_SUB( scratch[4] , scratch[0] , scratch[2] ); - C_SUB( Fout[m2], *Fout, scratch[3] ); - tw1 += fstride; - tw2 += fstride*2; - tw3 += fstride*3; - C_ADDTO( *Fout , scratch[3] ); - - Fout[m].r = scratch[5].r + scratch[4].i; - Fout[m].i = scratch[5].i - scratch[4].r; - Fout[m3].r = scratch[5].r - scratch[4].i; - Fout[m3].i = scratch[5].i + scratch[4].r; - ++Fout; + kiss_fft_cpx scratch0, scratch1; + + C_SUB( scratch0 , *Fout, Fout[2] ); + C_ADDTO(*Fout, Fout[2]); + C_ADD( scratch1 , Fout[1] , Fout[3] ); + C_SUB( Fout[2], *Fout, scratch1 ); + C_ADDTO( *Fout , scratch1 ); + C_SUB( scratch1 , Fout[1] , Fout[3] ); + + Fout[1].r = scratch0.r + scratch1.i; + Fout[1].i = scratch0.i - scratch1.r; + Fout[3].r = scratch0.r - scratch1.i; + Fout[3].i = scratch0.i + scratch1.r; + Fout+=4; } - } -} - -static void ki_bfly4( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - const kiss_twiddle_cpx *tw1,*tw2,*tw3; - kiss_fft_cpx scratch[6]; - const size_t m2=2*m; - const size_t m3=3*m; - int i, j; - - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;i<N;i++) - { - Fout = Fout_beg + i*mm; - tw3 = tw2 = tw1 = st->twiddles; - for (j=0;j<m;j++) + } else { + int j; + kiss_fft_cpx scratch[6]; + const kiss_twiddle_cpx *tw1,*tw2,*tw3; + const int m2=2*m; + const int m3=3*m; + kiss_fft_cpx * Fout_beg = Fout; + for (i=0;i<N;i++) { - C_MULC(scratch[0],Fout[m] , *tw1 ); - C_MULC(scratch[1],Fout[m2] , *tw2 ); - C_MULC(scratch[2],Fout[m3] , *tw3 ); - - C_SUB( scratch[5] , *Fout, scratch[1] ); - C_ADDTO(*Fout, scratch[1]); - C_ADD( scratch[3] , scratch[0] , scratch[2] ); - C_SUB( scratch[4] , scratch[0] , scratch[2] ); - C_SUB( Fout[m2], *Fout, scratch[3] ); - tw1 += fstride; - tw2 += fstride*2; - tw3 += fstride*3; - C_ADDTO( *Fout , scratch[3] ); - - Fout[m].r = scratch[5].r - scratch[4].i; - Fout[m].i = scratch[5].i + scratch[4].r; - Fout[m3].r = scratch[5].r + scratch[4].i; - Fout[m3].i = scratch[5].i - scratch[4].r; - ++Fout; + Fout = Fout_beg + i*mm; + tw3 = tw2 = tw1 = st->twiddles; + /* m is guaranteed to be a multiple of 4. */ + for (j=0;j<m;j++) + { + C_MUL(scratch[0],Fout[m] , *tw1 ); + C_MUL(scratch[1],Fout[m2] , *tw2 ); + C_MUL(scratch[2],Fout[m3] , *tw3 ); + + C_SUB( scratch[5] , *Fout, scratch[1] ); + C_ADDTO(*Fout, scratch[1]); + C_ADD( scratch[3] , scratch[0] , scratch[2] ); + C_SUB( scratch[4] , scratch[0] , scratch[2] ); + C_SUB( Fout[m2], *Fout, scratch[3] ); + tw1 += fstride; + tw2 += fstride*2; + tw3 += fstride*3; + C_ADDTO( *Fout , scratch[3] ); + + Fout[m].r = scratch[5].r + scratch[4].i; + Fout[m].i = scratch[5].i - scratch[4].r; + Fout[m3].r = scratch[5].r - scratch[4].i; + Fout[m3].i = scratch[5].i + scratch[4].r; + ++Fout; + } } } } + #ifndef RADIX_TWO_ONLY static void kf_bfly3( @@ -220,14 +188,19 @@ static void kf_bfly3( kiss_twiddle_cpx epi3; kiss_fft_cpx * Fout_beg = Fout; +#ifdef OPUS_FIXED_POINT + epi3.r = -16384; + epi3.i = -28378; +#else epi3 = st->twiddles[fstride*m]; +#endif for (i=0;i<N;i++) { Fout = Fout_beg + i*mm; tw1=tw2=st->twiddles; + /* For non-custom modes, m is guaranteed to be a multiple of 4. */ k=m; do { - C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); C_MUL(scratch[1],Fout[m] , *tw1); C_MUL(scratch[2],Fout[m2] , *tw2); @@ -255,56 +228,8 @@ static void kf_bfly3( } } -static void ki_bfly3( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - int i, k; - const size_t m2 = 2*m; - const kiss_twiddle_cpx *tw1,*tw2; - kiss_fft_cpx scratch[5]; - kiss_twiddle_cpx epi3; - - kiss_fft_cpx * Fout_beg = Fout; - epi3 = st->twiddles[fstride*m]; - for (i=0;i<N;i++) - { - Fout = Fout_beg + i*mm; - tw1=tw2=st->twiddles; - k=m; - do{ - - C_MULC(scratch[1],Fout[m] , *tw1); - C_MULC(scratch[2],Fout[m2] , *tw2); - - C_ADD(scratch[3],scratch[1],scratch[2]); - C_SUB(scratch[0],scratch[1],scratch[2]); - tw1 += fstride; - tw2 += fstride*2; - - Fout[m].r = Fout->r - HALF_OF(scratch[3].r); - Fout[m].i = Fout->i - HALF_OF(scratch[3].i); - - C_MULBYSCALAR( scratch[0] , -epi3.i ); - - C_ADDTO(*Fout,scratch[3]); - - Fout[m2].r = Fout[m].r + scratch[0].i; - Fout[m2].i = Fout[m].i - scratch[0].r; - - Fout[m].r -= scratch[0].i; - Fout[m].i += scratch[0].r; - - ++Fout; - }while(--k); - } -} +#ifndef OVERRIDE_kf_bfly5 static void kf_bfly5( kiss_fft_cpx * Fout, const size_t fstride, @@ -317,13 +242,19 @@ static void kf_bfly5( kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; int i, u; kiss_fft_cpx scratch[13]; - const kiss_twiddle_cpx * twiddles = st->twiddles; const kiss_twiddle_cpx *tw; kiss_twiddle_cpx ya,yb; kiss_fft_cpx * Fout_beg = Fout; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; +#ifdef OPUS_FIXED_POINT + ya.r = 10126; + ya.i = -31164; + yb.r = -26510; + yb.i = -19261; +#else + ya = st->twiddles[fstride*m]; + yb = st->twiddles[fstride*2*m]; +#endif tw=st->twiddles; for (i=0;i<N;i++) @@ -335,8 +266,8 @@ static void kf_bfly5( Fout3=Fout0+3*m; Fout4=Fout0+4*m; + /* For non-custom modes, m is guaranteed to be a multiple of 4. */ for ( u=0; u<m; ++u ) { - C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); scratch[0] = *Fout0; C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); @@ -373,74 +304,8 @@ static void kf_bfly5( } } } +#endif /* OVERRIDE_kf_bfly5 */ -static void ki_bfly5( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - int i, u; - kiss_fft_cpx scratch[13]; - const kiss_twiddle_cpx * twiddles = st->twiddles; - const kiss_twiddle_cpx *tw; - kiss_twiddle_cpx ya,yb; - kiss_fft_cpx * Fout_beg = Fout; - - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - tw=st->twiddles; - - for (i=0;i<N;i++) - { - Fout = Fout_beg + i*mm; - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - Fout3=Fout0+3*m; - Fout4=Fout0+4*m; - - for ( u=0; u<m; ++u ) { - scratch[0] = *Fout0; - - C_MULC(scratch[1] ,*Fout1, tw[u*fstride]); - C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]); - C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]); - C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]); - - C_ADD( scratch[7],scratch[1],scratch[4]); - C_SUB( scratch[10],scratch[1],scratch[4]); - C_ADD( scratch[8],scratch[2],scratch[3]); - C_SUB( scratch[9],scratch[2],scratch[3]); - - Fout0->r += scratch[7].r + scratch[8].r; - Fout0->i += scratch[7].i + scratch[8].i; - - scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); - scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); - - scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i); - scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i); - - C_SUB(*Fout1,scratch[5],scratch[6]); - C_ADD(*Fout4,scratch[5],scratch[6]); - - scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); - scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); - scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i); - scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i); - - C_ADD(*Fout2,scratch[11],scratch[12]); - C_SUB(*Fout3,scratch[11],scratch[12]); - - ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; - } - } -} #endif @@ -488,6 +353,9 @@ static int kf_factor(int n,opus_int16 * facbuf) { int p=4; + int i; + int stages=0; + int nbak = n; /*factor out powers of 4, powers of 2, then any remaining primes */ do { @@ -509,9 +377,30 @@ int kf_factor(int n,opus_int16 * facbuf) { return 0; } - *facbuf++ = p; - *facbuf++ = n; + facbuf[2*stages] = p; + if (p==2 && stages > 1) + { + facbuf[2*stages] = 4; + facbuf[2] = 2; + } + stages++; } while (n > 1); + n = nbak; + /* Reverse the order to get the radix 4 at the end, so we can use the + fast degenerate case. It turns out that reversing the order also + improves the noise behaviour. */ + for (i=0;i<stages/2;i++) + { + int tmp; + tmp = facbuf[2*i]; + facbuf[2*i] = facbuf[2*(stages-i-1)]; + facbuf[2*(stages-i-1)] = tmp; + } + for (i=0;i<stages;i++) + { + n /= facbuf[2*i]; + facbuf[2*i+1] = n; + } return 1; } @@ -532,13 +421,19 @@ static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft) #endif } +int opus_fft_alloc_arch_c(kiss_fft_state *st) { + (void)st; + return 0; +} + /* * * Allocates all necessary storage space for the fft and ifft. * The return value is a contiguous block of memory. As such, * It can be freed with free(). * */ -kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base) +kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, + const kiss_fft_state *base, int arch) { kiss_fft_state *st=NULL; size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/ @@ -555,14 +450,20 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, co kiss_twiddle_cpx *twiddles; st->nfft=nfft; -#ifndef OPUS_FIXED_POINT +#ifdef OPUS_FIXED_POINT + st->scale_shift = celt_ilog2(st->nfft); + if (st->nfft == 1<<st->scale_shift) + st->scale = Q15ONE; + else + st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift); +#else st->scale = 1.f/nfft; #endif if (base != NULL) { st->twiddles = base->twiddles; st->shift = 0; - while (nfft<<st->shift != base->nfft && st->shift < 32) + while (st->shift < 32 && nfft<<st->shift != base->nfft) st->shift++; if (st->shift>=32) goto fail; @@ -581,22 +482,31 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, co if (st->bitrev==NULL) goto fail; compute_bitrev_table(0, bitrev, 1,1, st->factors,st); + + /* Initialize architecture specific fft parameters */ + if (opus_fft_alloc_arch(st, arch)) + goto fail; } return st; fail: - opus_fft_free(st); + opus_fft_free(st, arch); return NULL; } -kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem ) +kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch) { - return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL); + return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch); +} + +void opus_fft_free_arch_c(kiss_fft_state *st) { + (void)st; } -void opus_fft_free(const kiss_fft_state *cfg) +void opus_fft_free(const kiss_fft_state *cfg, int arch) { if (cfg) { + opus_fft_free_arch((kiss_fft_state *)cfg, arch); opus_free((opus_int16*)cfg->bitrev); if (cfg->shift < 0) opus_free((kiss_twiddle_cpx*)cfg->twiddles); @@ -606,7 +516,7 @@ void opus_fft_free(const kiss_fft_state *cfg) #endif /* CUSTOM_MODES */ -void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) { int m2, m; int p; @@ -618,17 +528,6 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou /* st->shift can be -1 */ shift = st->shift>0 ? st->shift : 0; - celt_assert2 (fin != fout, "In-place FFT not supported"); - /* Bit-reverse the input */ - for (i=0;i<st->nfft;i++) - { - fout[st->bitrev[i]] = fin[i]; -#ifndef OPUS_FIXED_POINT - fout[st->bitrev[i]].r *= st->scale; - fout[st->bitrev[i]].i *= st->scale; -#endif - } - fstride[0] = 1; L=0; do { @@ -647,7 +546,7 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou switch (st->factors[2*i]) { case 2: - kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2); + kf_bfly2(fout, m, fstride[i]); break; case 4: kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); @@ -665,55 +564,39 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou } } -void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) { - int m2, m; - int p; - int L; - int fstride[MAXFACTORS]; int i; - int shift; + opus_val16 scale; +#ifdef OPUS_FIXED_POINT + /* Allows us to scale with MULT16_32_Q16(), which is faster than + MULT16_32_Q15() on ARM. */ + int scale_shift = st->scale_shift-1; +#endif + scale = st->scale; - /* st->shift can be -1 */ - shift = st->shift>0 ? st->shift : 0; celt_assert2 (fin != fout, "In-place FFT not supported"); /* Bit-reverse the input */ for (i=0;i<st->nfft;i++) - fout[st->bitrev[i]] = fin[i]; - - fstride[0] = 1; - L=0; - do { - p = st->factors[2*L]; - m = st->factors[2*L+1]; - fstride[L+1] = fstride[L]*p; - L++; - } while(m!=1); - m = st->factors[2*L-1]; - for (i=L-1;i>=0;i--) { - if (i!=0) - m2 = st->factors[2*i-1]; - else - m2 = 1; - switch (st->factors[2*i]) - { - case 2: - ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2); - break; - case 4: - ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); - break; -#ifndef RADIX_TWO_ONLY - case 3: - ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2); - break; - case 5: - ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2); - break; -#endif - } - m = m2; + kiss_fft_cpx x = fin[i]; + fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift); + fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift); } + opus_fft_impl(st, fout); } + +void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +{ + int i; + celt_assert2 (fin != fout, "In-place FFT not supported"); + /* Bit-reverse the input */ + for (i=0;i<st->nfft;i++) + fout[st->bitrev[i]] = fin[i]; + for (i=0;i<st->nfft;i++) + fout[i].i = -fout[i].i; + opus_fft_impl(st, fout); + for (i=0;i<st->nfft;i++) + fout[i].i = -fout[i].i; +} |