diff options
Diffstat (limited to 'drivers/opus/celt/celt_encoder.c')
-rw-r--r-- | drivers/opus/celt/celt_encoder.c | 391 |
1 files changed, 221 insertions, 170 deletions
diff --git a/drivers/opus/celt/celt_encoder.c b/drivers/opus/celt/celt_encoder.c index 810ee5d743..9099bcd278 100644 --- a/drivers/opus/celt/celt_encoder.c +++ b/drivers/opus/celt/celt_encoder.c @@ -26,10 +26,7 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -#ifdef OPUS_ENABLED #include "opus/opus_config.h" -#endif #define CELT_ENCODER_C @@ -40,7 +37,7 @@ #include "opus/celt/celt.h" #include "opus/celt/pitch.h" #include "opus/celt/bands.h" -#include "opus/celt/opus_modes.h" +#include "opus/celt/modes.h" #include "opus/celt/entcode.h" #include "opus/celt/quant_bands.h" #include "opus/celt/rate.h" @@ -57,7 +54,6 @@ */ struct OpusCustomEncoder { const OpusCustomMode *mode; /**< Mode used by the encoder */ - int overlap; int channels; int stream_channels; @@ -173,7 +169,6 @@ static int opus_custom_encoder_init_arch(CELTEncoder *st, const CELTMode *mode, OPUS_CLEAR((char*)st, opus_custom_encoder_get_size(mode, channels)); st->mode = mode; - st->overlap = mode->overlap; st->stream_channels = st->channels = channels; st->upsample = 1; @@ -276,8 +271,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int } /*printf("\n");*/ /* First few samples are bad because we don't propagate the memory */ - for (i=0;i<12;i++) - tmp[i] = 0; + OPUS_CLEAR(tmp, 12); #ifdef OPUS_FIXED_POINT /* Normalize tmp to max range */ @@ -346,9 +340,9 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int { int id; #ifdef OPUS_FIXED_POINT - id = IMAX(0,IMIN(127,MULT16_32_Q15(tmp[i],norm))); /* Do not round to nearest */ + id = MAX32(0,MIN32(127,MULT16_32_Q15(tmp[i]+EPSILON,norm))); /* Do not round to nearest */ #else - id = IMAX(0,IMIN(127,(int)floor(64*norm*tmp[i]))); /* Do not round to nearest */ + id = (int)MAX32(0,MIN32(127,floor(64*norm*(tmp[i]+EPSILON)))); /* Do not round to nearest */ #endif unmask += inv_table[id]; } @@ -366,7 +360,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int /* Arbitrary metric for VBR boost */ tf_max = MAX16(0,celt_sqrt(27*mask_metric)-42); /* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */ - *tf_estimate = celt_sqrt(MAX16(0, SHL32(MULT16_16(QCONST16(0.0069,14),MIN16(163,tf_max)),14)-QCONST32(0.139,28))); + *tf_estimate = celt_sqrt(MAX32(0, SHL32(MULT16_16(QCONST16(0.0069,14),MIN16(163,tf_max)),14)-QCONST32(0.139,28))); /*printf("%d %f\n", tf_max, mask_metric);*/ RESTORE_STACK; #ifdef FUZZING @@ -378,8 +372,8 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int /* Looks for sudden increases of energy to decide whether we need to patch the transient decision */ -int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands, - int end, int C) +static int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands, + int start, int end, int C) { int i, c; opus_val32 mean_diff=0; @@ -388,28 +382,28 @@ int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands, avoid false detection caused by irrelevant bands */ if (C==1) { - spread_old[0] = oldE[0]; - for (i=1;i<end;i++) + spread_old[start] = oldE[start]; + for (i=start+1;i<end;i++) spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), oldE[i]); } else { - spread_old[0] = MAX16(oldE[0],oldE[nbEBands]); - for (i=1;i<end;i++) + spread_old[start] = MAX16(oldE[start],oldE[start+nbEBands]); + for (i=start+1;i<end;i++) spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), MAX16(oldE[i],oldE[i+nbEBands])); } - for (i=end-2;i>=0;i--) + for (i=end-2;i>=start;i--) spread_old[i] = MAX16(spread_old[i], spread_old[i+1]-QCONST16(1.0f, DB_SHIFT)); /* Compute mean increase */ c=0; do { - for (i=2;i<end-1;i++) + for (i=IMAX(2,start);i<end-1;i++) { opus_val16 x1, x2; - x1 = MAX16(0, newE[i]); + x1 = MAX16(0, newE[i + c*nbEBands]); x2 = MAX16(0, spread_old[i]); mean_diff = ADD32(mean_diff, EXTEND32(MAX16(0, SUB16(x1, x2)))); } } while (++c<C); - mean_diff = DIV32(mean_diff, C*(end-3)); + mean_diff = DIV32(mean_diff, C*(end-1-IMAX(2,start))); /*printf("%f %f %d\n", mean_diff, max_diff, count);*/ return mean_diff > QCONST16(1.f, DB_SHIFT); } @@ -417,9 +411,10 @@ int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands, /** Apply window and compute the MDCT for all sub-frames and all channels in a frame */ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in, - celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample) + celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample, + int arch) { - const int overlap = OVERLAP(mode); + const int overlap = mode->overlap; int N; int B; int shift; @@ -438,7 +433,9 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS for (b=0;b<B;b++) { /* Interleaving the sub-frames while doing the MDCTs */ - clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, &out[b+c*N*B], mode->window, overlap, shift, B); + clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, + &out[b+c*N*B], mode->window, overlap, shift, B, + arch); } } while (++c<CC); if (CC==2&&C==1) @@ -453,8 +450,7 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS int bound = B*N/upsample; for (i=0;i<bound;i++) out[c*B*N+i] *= upsample; - for (;i<B*N;i++) - out[c*B*N+i] = 0; + OPUS_CLEAR(&out[c*B*N+bound], B*N-bound); } while (++c<C); } } @@ -469,26 +465,30 @@ void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RES int Nu; coef0 = coef[0]; + m = *mem; + /* Fast path for the normal 48kHz case and no clipping */ + if (coef[1] == 0 && upsample == 1 && !clip) + { + for (i=0;i<N;i++) + { + opus_val16 x; + x = SCALEIN(pcmp[CC*i]); + /* Apply pre-emphasis */ + inp[i] = SHL32(x, SIG_SHIFT) - m; + m = SHR32(MULT16_16(coef0, x), 15-SIG_SHIFT); + } + *mem = m; + return; + } Nu = N/upsample; if (upsample!=1) { - for (i=0;i<N;i++) - inp[i] = 0; + OPUS_CLEAR(inp, N); } for (i=0;i<Nu;i++) - { - celt_sig x; - - x = SCALEIN(pcmp[CC*i]); -#ifndef OPUS_FIXED_POINT - /* Replace NaNs with zeros */ - if (!(x==x)) - x = 0; -#endif - inp[i*upsample] = x; - } + inp[i*upsample] = SCALEIN(pcmp[CC*i]); #ifndef OPUS_FIXED_POINT if (clip) @@ -500,7 +500,6 @@ void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RES #else (void)clip; /* Avoids a warning about clip being unused. */ #endif - m = *mem; #ifdef CUSTOM_MODES if (coef[1] != 0) { @@ -520,11 +519,11 @@ void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RES { for (i=0;i<N;i++) { - celt_sig x; - x = SHL32(inp[i], SIG_SHIFT); + opus_val16 x; + x = inp[i]; /* Apply pre-emphasis */ - inp[i] = x + m; - m = - MULT16_32_Q15(coef0, x); + inp[i] = SHL32(x, SIG_SHIFT) - m; + m = SHR32(MULT16_16(coef0, x), 15-SIG_SHIFT); } } *mem = m; @@ -575,15 +574,14 @@ static int tf_analysis(const CELTMode *m, int len, int isTransient, *tf_sum = 0; for (i=0;i<len;i++) { - int j, k, N; + int k, N; int narrow; opus_val32 L1, best_L1; int best_level=0; N = (m->eBands[i+1]-m->eBands[i])<<LM; /* band is too narrow to be split down to LM=-1 */ narrow = (m->eBands[i+1]-m->eBands[i])==1; - for (j=0;j<N;j++) - tmp[j] = X[tf_chan*N0 + j+(m->eBands[i]<<LM)]; + OPUS_COPY(tmp, &X[tf_chan*N0 + (m->eBands[i]<<LM)], N); /* Just add the right channel if we're in stereo */ /*if (C==2) for (j=0;j<N;j++) @@ -593,8 +591,7 @@ static int tf_analysis(const CELTMode *m, int len, int isTransient, /* Check the -1 case for transients */ if (isTransient && !narrow) { - for (j=0;j<N;j++) - tmp_1[j] = tmp[j]; + OPUS_COPY(tmp_1, tmp, N); haar1(tmp_1, N>>LM, 1<<LM); L1 = l1_metric(tmp_1, N, LM+1, bias); if (L1<best_L1) @@ -754,12 +751,12 @@ static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, const opus_val16 *bandLogE, int end, int LM, int C, int N0, AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate, - int intensity, opus_val16 surround_trim) + int intensity, opus_val16 surround_trim, int arch) { int i; opus_val32 diff=0; int c; - int trim_index = 5; + int trim_index; opus_val16 trim = QCONST16(5.f, 8); opus_val16 logXC, logXC2; if (C==2) @@ -769,10 +766,9 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, /* Compute inter-channel correlation for low frequencies */ for (i=0;i<8;i++) { - int j; - opus_val32 partial = 0; - for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++) - partial = MAC16_16(partial, X[j], X[N0+j]); + opus_val32 partial; + partial = celt_inner_prod(&X[m->eBands[i]<<LM], &X[N0+(m->eBands[i]<<LM)], + (m->eBands[i+1]-m->eBands[i])<<LM, arch); sum = ADD16(sum, EXTRACT16(SHR32(partial, 18))); } sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum); @@ -780,22 +776,13 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, minXC = sum; for (i=8;i<intensity;i++) { - int j; - opus_val32 partial = 0; - for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++) - partial = MAC16_16(partial, X[j], X[N0+j]); + opus_val32 partial; + partial = celt_inner_prod(&X[m->eBands[i]<<LM], &X[N0+(m->eBands[i]<<LM)], + (m->eBands[i+1]-m->eBands[i])<<LM, arch); minXC = MIN16(minXC, ABS16(EXTRACT16(SHR32(partial, 18)))); } minXC = MIN16(QCONST16(1.f, 10), ABS16(minXC)); /*printf ("%f\n", sum);*/ - if (sum > QCONST16(.995f,10)) - trim_index-=4; - else if (sum > QCONST16(.92f,10)) - trim_index-=3; - else if (sum > QCONST16(.85f,10)) - trim_index-=2; - else if (sum > QCONST16(.8f,10)) - trim_index-=1; /* mid-side savings estimations based on the LF average*/ logXC = celt_log2(QCONST32(1.001f, 20)-MULT16_16(sum, sum)); /* mid-side savings estimations based on min correlation */ @@ -819,14 +806,6 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, } while (++c<C); diff /= C*(end-1); /*printf("%f\n", diff);*/ - if (diff > QCONST16(2.f, DB_SHIFT)) - trim_index--; - if (diff > QCONST16(8.f, DB_SHIFT)) - trim_index--; - if (diff < -QCONST16(4.f, DB_SHIFT)) - trim_index++; - if (diff < -QCONST16(10.f, DB_SHIFT)) - trim_index++; trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 )); trim -= SHR16(surround_trim, DB_SHIFT-8); trim -= 2*SHR16(tf_estimate, 14-8); @@ -836,6 +815,8 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), (opus_val16)(QCONST16(2.f, 8)*(analysis->tonality_slope+.05f)))); } +#else + (void)analysis; #endif #ifdef OPUS_FIXED_POINT @@ -843,10 +824,7 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, #else trim_index = (int)floor(.5f+trim); #endif - if (trim_index<0) - trim_index = 0; - if (trim_index>10) - trim_index = 10; + trim_index = IMAX(0, IMIN(10, trim_index)); /*printf("%d\n", trim_index);*/ #ifdef FUZZING trim_index = rand()%11; @@ -886,6 +864,66 @@ static int stereo_analysis(const CELTMode *m, const celt_norm *X, > MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR); } +#define MSWAP(a,b) do {opus_val16 tmp = a;a=b;b=tmp;} while(0) +static opus_val16 median_of_5(const opus_val16 *x) +{ + opus_val16 t0, t1, t2, t3, t4; + t2 = x[2]; + if (x[0] > x[1]) + { + t0 = x[1]; + t1 = x[0]; + } else { + t0 = x[0]; + t1 = x[1]; + } + if (x[3] > x[4]) + { + t3 = x[4]; + t4 = x[3]; + } else { + t3 = x[3]; + t4 = x[4]; + } + if (t0 > t3) + { + MSWAP(t0, t3); + MSWAP(t1, t4); + } + if (t2 > t1) + { + if (t1 < t3) + return MIN16(t2, t3); + else + return MIN16(t4, t1); + } else { + if (t2 < t3) + return MIN16(t1, t3); + else + return MIN16(t2, t4); + } +} + +static opus_val16 median_of_3(const opus_val16 *x) +{ + opus_val16 t0, t1, t2; + if (x[0] > x[1]) + { + t0 = x[1]; + t1 = x[0]; + } else { + t0 = x[0]; + t1 = x[1]; + } + t2 = x[2]; + if (t1 < t2) + return t1; + else if (t0 < t2) + return t2; + else + return t0; +} + static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2, int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN, int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM, @@ -899,8 +937,7 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 SAVE_STACK; ALLOC(follower, C*nbEBands, opus_val16); ALLOC(noise_floor, C*nbEBands, opus_val16); - for (i=0;i<nbEBands;i++) - offsets[i] = 0; + OPUS_CLEAR(offsets, nbEBands); /* Dynamic allocation code */ maxDepth=-QCONST16(31.9f, DB_SHIFT); for (i=0;i<end;i++) @@ -922,7 +959,11 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 int last=0; c=0;do { - follower[c*nbEBands] = bandLogE2[c*nbEBands]; + opus_val16 offset; + opus_val16 tmp; + opus_val16 *f; + f = &follower[c*nbEBands]; + f[0] = bandLogE2[c*nbEBands]; for (i=1;i<end;i++) { /* The last band to be at least 3 dB higher than the previous one @@ -930,12 +971,26 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 bandlimited signals. */ if (bandLogE2[c*nbEBands+i] > bandLogE2[c*nbEBands+i-1]+QCONST16(.5f,DB_SHIFT)) last=i; - follower[c*nbEBands+i] = MIN16(follower[c*nbEBands+i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*nbEBands+i]); + f[i] = MIN16(f[i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*nbEBands+i]); } for (i=last-1;i>=0;i--) - follower[c*nbEBands+i] = MIN16(follower[c*nbEBands+i], MIN16(follower[c*nbEBands+i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*nbEBands+i])); + f[i] = MIN16(f[i], MIN16(f[i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*nbEBands+i])); + + /* Combine with a median filter to avoid dynalloc triggering unnecessarily. + The "offset" value controls how conservative we are -- a higher offset + reduces the impact of the median filter and makes dynalloc use more bits. */ + offset = QCONST16(1.f, DB_SHIFT); + for (i=2;i<end-2;i++) + f[i] = MAX16(f[i], median_of_5(&bandLogE2[c*nbEBands+i-2])-offset); + tmp = median_of_3(&bandLogE2[c*nbEBands])-offset; + f[0] = MAX16(f[0], tmp); + f[1] = MAX16(f[1], tmp); + tmp = median_of_3(&bandLogE2[c*nbEBands+end-3])-offset; + f[end-2] = MAX16(f[end-2], tmp); + f[end-1] = MAX16(f[end-1], tmp); + for (i=0;i<end;i++) - follower[c*nbEBands+i] = MAX16(follower[c*nbEBands+i], noise_floor[i]); + f[i] = MAX16(f[i], noise_floor[i]); } while (++c<C); if (C==2) { @@ -1016,9 +1071,11 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, opus_val16 pf_threshold; int pf_on; int qg; + int overlap; SAVE_STACK; mode = st->mode; + overlap = mode->overlap; ALLOC(_pre, CC*(N+COMBFILTER_MAXPERIOD), celt_sig); pre[0] = _pre; @@ -1027,7 +1084,7 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, c=0; do { OPUS_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD); - OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+st->overlap)+st->overlap, N); + OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+overlap)+overlap, N); } while (++c<CC); if (enabled) @@ -1044,7 +1101,7 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, pitch_index = COMBFILTER_MAXPERIOD-pitch_index; gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD, - N, &pitch_index, st->prefilter_period, st->prefilter_gain); + N, &pitch_index, st->prefilter_period, st->prefilter_gain, st->arch); if (pitch_index > COMBFILTER_MAXPERIOD-2) pitch_index = COMBFILTER_MAXPERIOD-2; gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1); @@ -1100,18 +1157,18 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, /*printf("%d %f\n", pitch_index, gain1);*/ c=0; do { - int offset = mode->shortMdctSize-st->overlap; + int offset = mode->shortMdctSize-overlap; st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD); - OPUS_COPY(in+c*(N+st->overlap), st->in_mem+c*(st->overlap), st->overlap); + OPUS_COPY(in+c*(N+overlap), st->in_mem+c*(overlap), overlap); if (offset) - comb_filter(in+c*(N+st->overlap)+st->overlap, pre[c]+COMBFILTER_MAXPERIOD, + comb_filter(in+c*(N+overlap)+overlap, pre[c]+COMBFILTER_MAXPERIOD, st->prefilter_period, st->prefilter_period, offset, -st->prefilter_gain, -st->prefilter_gain, - st->prefilter_tapset, st->prefilter_tapset, NULL, 0); + st->prefilter_tapset, st->prefilter_tapset, NULL, 0, st->arch); - comb_filter(in+c*(N+st->overlap)+st->overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset, + comb_filter(in+c*(N+overlap)+overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset, st->prefilter_period, pitch_index, N-offset, -st->prefilter_gain, -gain1, - st->prefilter_tapset, prefilter_tapset, mode->window, st->overlap); - OPUS_COPY(st->in_mem+c*(st->overlap), in+c*(N+st->overlap)+N, st->overlap); + st->prefilter_tapset, prefilter_tapset, mode->window, overlap, st->arch); + OPUS_COPY(st->in_mem+c*(overlap), in+c*(N+overlap)+N, overlap); if (N>COMBFILTER_MAXPERIOD) { @@ -1196,6 +1253,9 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 /*printf("%f %f ", analysis->tonality, tonal);*/ target = tonal_target; } +#else + (void)analysis; + (void)pitch_change; #endif if (has_surround_mask&&!lfe) @@ -1273,6 +1333,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int LM, M; int tf_select; int nbFilledBytes, nbAvailableBytes; + int start; + int end; int effEnd; int codedBands; int tf_sum; @@ -1316,6 +1378,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, nbEBands = mode->nbEBands; overlap = mode->overlap; eBands = mode->eBands; + start = st->start; + end = st->end; tf_estimate = 0; if (nbCompressedBytes<2 || pcm==NULL) { @@ -1335,8 +1399,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, M=1<<LM; N = M*mode->shortMdctSize; - prefilter_mem = st->in_mem+CC*(st->overlap); - oldBandE = (opus_val16*)(st->in_mem+CC*(st->overlap+COMBFILTER_MAXPERIOD)); + prefilter_mem = st->in_mem+CC*(overlap); + oldBandE = (opus_val16*)(st->in_mem+CC*(overlap+COMBFILTER_MAXPERIOD)); oldLogE = oldBandE + CC*nbEBands; oldLogE2 = oldLogE + CC*nbEBands; @@ -1352,8 +1416,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, #ifdef CUSTOM_MODES if (st->signalling && enc==NULL) { - int tmp = (mode->effEBands-st->end)>>1; - st->end = IMAX(1, mode->effEBands-tmp); + int tmp = (mode->effEBands-end)>>1; + end = st->end = IMAX(1, mode->effEBands-tmp); compressed[0] = tmp<<5; compressed[0] |= LM<<3; compressed[0] |= (C==2)<<2; @@ -1436,11 +1500,11 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, } total_bits = nbCompressedBytes*8; - effEnd = st->end; + effEnd = end; if (effEnd > mode->effEBands) effEnd = mode->effEBands; - ALLOC(in, CC*(N+st->overlap), celt_sig); + ALLOC(in, CC*(N+overlap), celt_sig); sample_max=MAX32(st->overlap_max, celt_maxabs16(pcm, C*(N-overlap)/st->upsample)); st->overlap_max=celt_maxabs16(pcm+C*(N-overlap)/st->upsample, C*overlap/st->upsample); @@ -1474,8 +1538,12 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, enc->nbits_total+=tell-ec_tell(enc); } c=0; do { - celt_preemphasis(pcm+c, in+c*(N+st->overlap)+st->overlap, N, CC, st->upsample, - mode->preemph, st->preemph_memE+c, st->clip); + int need_clip=0; +#ifndef OPUS_FIXED_POINT + need_clip = st->clip && sample_max>65536.f; +#endif + celt_preemphasis(pcm+c, in+c*(N+overlap)+overlap, N, CC, st->upsample, + mode->preemph, st->preemph_memE+c, need_clip); } while (++c<CC); @@ -1484,7 +1552,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, { int enabled; int qg; - enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && st->start==0 && !silence && !st->disable_pf + enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && start==0 && !silence && !st->disable_pf && st->complexity >= 5 && !(st->consec_transient && LM!=3 && st->variable_duration==OPUS_FRAMESIZE_VARIABLE); prefilter_tapset = st->tapset_decision; @@ -1494,7 +1562,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, pitch_change = 1; if (pf_on==0) { - if(st->start==0 && tell+16<=total_bits) + if(start==0 && tell+16<=total_bits) ec_enc_bit_logp(enc, 0, 1); } else { /*This block is not gated by a total bits check only because @@ -1515,7 +1583,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, shortBlocks = 0; if (st->complexity >= 1 && !st->lfe) { - isTransient = transient_analysis(in, N+st->overlap, CC, + isTransient = transient_analysis(in, N+overlap, CC, &tf_estimate, &tf_chan); } if (LM>0 && ec_tell(enc)+3<=total_bits) @@ -1535,33 +1603,32 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, ALLOC(bandLogE2, C*nbEBands, opus_val16); if (secondMdct) { - compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample); - compute_band_energies(mode, freq, bandE, effEnd, C, M); - amp2Log2(mode, effEnd, st->end, bandE, bandLogE2, C); + compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch); + compute_band_energies(mode, freq, bandE, effEnd, C, LM); + amp2Log2(mode, effEnd, end, bandE, bandLogE2, C); for (i=0;i<C*nbEBands;i++) bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT)); } - compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample); + compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch); if (CC==2&&C==1) tf_chan = 0; - compute_band_energies(mode, freq, bandE, effEnd, C, M); + compute_band_energies(mode, freq, bandE, effEnd, C, LM); if (st->lfe) { - for (i=2;i<st->end;i++) + for (i=2;i<end;i++) { bandE[i] = IMIN(bandE[i], MULT16_32_Q15(QCONST16(1e-4f,15),bandE[0])); bandE[i] = MAX32(bandE[i], EPSILON); } } - amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C); + amp2Log2(mode, effEnd, end, bandE, bandLogE, C); ALLOC(surround_dynalloc, C*nbEBands, opus_val16); - for(i=0;i<st->end;i++) - surround_dynalloc[i] = 0; + OPUS_CLEAR(surround_dynalloc, end); /* This computes how much masking takes place between surround channels */ - if (st->start==0&&st->energy_mask&&!st->lfe) + if (start==0&&st->energy_mask&&!st->lfe) { int mask_end; int midband; @@ -1584,6 +1651,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, diff += MULT16_16(mask, 1+2*i-mask_end); } } + celt_assert(count>0); mask_avg = DIV32_16(mask_avg,count); mask_avg += QCONST16(.2f, DB_SHIFT); diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end); @@ -1621,8 +1689,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, disabling masking. */ mask_avg = 0; diff = 0; - for(i=0;i<mask_end;i++) - surround_dynalloc[i] = 0; + OPUS_CLEAR(surround_dynalloc, mask_end); } else { for(i=0;i<mask_end;i++) surround_dynalloc[i] = MAX16(0, surround_dynalloc[i]-QCONST16(.25f, DB_SHIFT)); @@ -1640,14 +1707,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, opus_val16 follow=-QCONST16(10.0f,DB_SHIFT); opus_val32 frame_avg=0; opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0; - for(i=st->start;i<st->end;i++) + for(i=start;i<end;i++) { follow = MAX16(follow-QCONST16(1.f, DB_SHIFT), bandLogE[i]-offset); if (C==2) follow = MAX16(follow, bandLogE[i+nbEBands]-offset); frame_avg += follow; } - frame_avg /= (st->end-st->start); + frame_avg /= (end-start); temporal_vbr = SUB16(frame_avg,st->spec_avg); temporal_vbr = MIN16(QCONST16(3.f, DB_SHIFT), MAX16(-QCONST16(1.5f, DB_SHIFT), temporal_vbr)); st->spec_avg += MULT16_16_Q15(QCONST16(.02f, 15), temporal_vbr); @@ -1658,21 +1725,20 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, if (!secondMdct) { - for (i=0;i<C*nbEBands;i++) - bandLogE2[i] = bandLogE[i]; + OPUS_COPY(bandLogE2, bandLogE, C*nbEBands); } /* Last chance to catch any transient we might have missed in the time-domain analysis */ if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe) { - if (patch_transient_decision(bandLogE, oldBandE, nbEBands, st->end, C)) + if (patch_transient_decision(bandLogE, oldBandE, nbEBands, start, end, C)) { isTransient = 1; shortBlocks = M; - compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample); - compute_band_energies(mode, freq, bandE, effEnd, C, M); - amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C); + compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch); + compute_band_energies(mode, freq, bandE, effEnd, C, LM); + amp2Log2(mode, effEnd, end, bandE, bandLogE, C); /* Compensate for the scaling of short vs long mdcts */ for (i=0;i<C*nbEBands;i++) bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT)); @@ -1690,7 +1756,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, ALLOC(tf_res, nbEBands, int); /* Disable variable tf resolution for hybrid and at very low bitrate */ - if (effectiveBytes>=15*C && st->start==0 && st->complexity>=2 && !st->lfe) + if (effectiveBytes>=15*C && start==0 && st->complexity>=2 && !st->lfe) { int lambda; if (effectiveBytes<40) @@ -1703,22 +1769,22 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, lambda = 3; lambda*=2; tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, &tf_sum, tf_estimate, tf_chan); - for (i=effEnd;i<st->end;i++) + for (i=effEnd;i<end;i++) tf_res[i] = tf_res[effEnd-1]; } else { tf_sum = 0; - for (i=0;i<st->end;i++) + for (i=0;i<end;i++) tf_res[i] = isTransient; tf_select=0; } ALLOC(error, C*nbEBands, opus_val16); - quant_coarse_energy(mode, st->start, st->end, effEnd, bandLogE, + quant_coarse_energy(mode, start, end, effEnd, bandLogE, oldBandE, total_bits, error, enc, C, LM, nbAvailableBytes, st->force_intra, &st->delayedIntra, st->complexity >= 4, st->loss_rate, st->lfe); - tf_encode(st->start, st->end, isTransient, tf_res, LM, tf_select, enc); + tf_encode(start, end, isTransient, tf_res, LM, tf_select, enc); if (ec_tell(enc)+4<=total_bits) { @@ -1726,7 +1792,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, { st->tapset_decision = 0; st->spread_decision = SPREAD_NORMAL; - } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || st->start != 0) + } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || start != 0) { if (st->complexity == 0) st->spread_decision = SPREAD_NONE; @@ -1760,7 +1826,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, ALLOC(offsets, nbEBands, int); - maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, st->start, st->end, C, offsets, + maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, start, end, C, offsets, st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr, eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc); /* For LFE, everything interesting is in the first band */ @@ -1773,7 +1839,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, total_bits<<=BITRES; total_boost = 0; tell = ec_tell_frac(enc); - for (i=st->start;i<st->end;i++) + for (i=start;i<end;i++) { int width, quanta; int dynalloc_loop_logp; @@ -1818,7 +1884,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, st->intensity = hysteresis_decision((opus_val16)(equiv_rate/1000), intensity_thresholds, intensity_histeresis, 21, st->intensity); - st->intensity = IMIN(st->end,IMAX(st->start, st->intensity)); + st->intensity = IMIN(end,IMAX(start, st->intensity)); } alloc_trim = 5; @@ -1828,7 +1894,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, alloc_trim = 5; else alloc_trim = alloc_trim_analysis(mode, X, bandLogE, - st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity, surround_trim); + end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, + st->intensity, surround_trim, st->arch); ec_enc_icdf(enc, alloc_trim, trim_icdf, 7); tell = ec_tell_frac(enc); } @@ -1930,7 +1997,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, bits = (((opus_int32)nbCompressedBytes*8)<<BITRES) - ec_tell_frac(enc) - 1; anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0; bits -= anti_collapse_rsv; - signalBandwidth = st->end-1; + signalBandwidth = end-1; #ifndef DISABLE_FLOAT_API if (st->analysis.valid) { @@ -1950,7 +2017,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, #endif if (st->lfe) signalBandwidth = 1; - codedBands = compute_allocation(mode, st->start, st->end, offsets, cap, + codedBands = compute_allocation(mode, start, end, offsets, cap, alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses, fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth); if (st->lastCodedBands) @@ -1958,13 +2025,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, else st->lastCodedBands = codedBands; - quant_fine_energy(mode, st->start, st->end, oldBandE, error, fine_quant, enc, C); + quant_fine_energy(mode, start, end, oldBandE, error, fine_quant, enc, C); /* Residual quantisation */ ALLOC(collapse_masks, C*nbEBands, unsigned char); - quant_all_bands(1, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks, - bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, st->intensity, tf_res, - nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, balance, enc, LM, codedBands, &st->rng); + quant_all_bands(1, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks, + bandE, pulses, shortBlocks, st->spread_decision, + dual_stereo, st->intensity, tf_res, nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, + balance, enc, LM, codedBands, &st->rng, st->arch); if (anti_collapse_rsv > 0) { @@ -1974,7 +2042,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, #endif ec_enc_bits(enc, anti_collapse_on, 1); } - quant_energy_finalise(mode, st->start, st->end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C); + quant_energy_finalise(mode, start, end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C); if (silence) { @@ -1990,40 +2058,26 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, if (anti_collapse_on) { anti_collapse(mode, X, collapse_masks, LM, C, N, - st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); - } - - if (silence) - { - for (i=0;i<C*N;i++) - freq[i] = 0; - } else { - /* Synthesis */ - denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M); + start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); } c=0; do { OPUS_MOVE(st->syn_mem[c], st->syn_mem[c]+N, 2*MAX_PERIOD-N+overlap/2); } while (++c<CC); - if (CC==2&&C==1) - { - for (i=0;i<N;i++) - freq[N+i] = freq[i]; - } - c=0; do { out_mem[c] = st->syn_mem[c]+2*MAX_PERIOD-N; } while (++c<CC); - compute_inv_mdcts(mode, shortBlocks, freq, out_mem, CC, LM); + celt_synthesis(mode, X, out_mem, oldBandE, start, effEnd, + C, CC, isTransient, LM, st->upsample, silence, st->arch); c=0; do { st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD); st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD); comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, mode->shortMdctSize, st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset, - mode->window, st->overlap); + mode->window, overlap); if (LM!=0) comb_filter(out_mem[c]+mode->shortMdctSize, out_mem[c]+mode->shortMdctSize, st->prefilter_period, pitch_index, N-mode->shortMdctSize, st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset, @@ -2031,7 +2085,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, } while (++c<CC); /* We reuse freq[] as scratch space for the de-emphasis */ - deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD, freq); + deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD); st->prefilter_period_old = st->prefilter_period; st->prefilter_gain_old = st->prefilter_gain; st->prefilter_tapset_old = st->prefilter_tapset; @@ -2051,16 +2105,13 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, #endif if (CC==2&&C==1) { - for (i=0;i<nbEBands;i++) - oldBandE[nbEBands+i]=oldBandE[i]; + OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands); } if (!isTransient) { - for (i=0;i<CC*nbEBands;i++) - oldLogE2[i] = oldLogE[i]; - for (i=0;i<CC*nbEBands;i++) - oldLogE[i] = oldBandE[i]; + OPUS_COPY(oldLogE2, oldLogE, CC*nbEBands); + OPUS_COPY(oldLogE, oldBandE, CC*nbEBands); } else { for (i=0;i<CC*nbEBands;i++) oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]); @@ -2068,12 +2119,12 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, /* In case start or end were to change */ c=0; do { - for (i=0;i<st->start;i++) + for (i=0;i<start;i++) { oldBandE[c*nbEBands+i]=0; oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); } - for (i=st->end;i<nbEBands;i++) + for (i=end;i<nbEBands;i++) { oldBandE[c*nbEBands+i]=0; oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); @@ -2274,7 +2325,7 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...) { int i; opus_val16 *oldBandE, *oldLogE, *oldLogE2; - oldBandE = (opus_val16*)(st->in_mem+st->channels*(st->overlap+COMBFILTER_MAXPERIOD)); + oldBandE = (opus_val16*)(st->in_mem+st->channels*(st->mode->overlap+COMBFILTER_MAXPERIOD)); oldLogE = oldBandE + st->channels*st->mode->nbEBands; oldLogE2 = oldLogE + st->channels*st->mode->nbEBands; OPUS_CLEAR((char*)&st->ENCODER_RESET_START, |