diff options
author | Rémi Verschelde <rverschelde@gmail.com> | 2019-11-18 09:56:18 +0100 |
---|---|---|
committer | Rémi Verschelde <rverschelde@gmail.com> | 2019-11-18 09:56:48 +0100 |
commit | 46ae64cd60166ead412bacc1bf03e9c8f8965e2c (patch) | |
tree | 9e592667ffa91e55491a66733e5e3d7de0b666c9 /thirdparty | |
parent | 974646309bfe09c48c8a72bf751b0ea6ad8b5bc5 (diff) |
Revert "Update opus to 1.3.1 and opusfile to 0.11"
This reverts commit e00426c512a7905f5f925d382c443bab7a0ca693.
The way we handle platform-specific intrinsics is not good, so the
current state will not compile on armv8. This commit also requires
SSE4.1 support, which is likely not a good idea for portable binaries.
We'll have to redo this with more caution after 3.2 is released, or
we might simply drop opus as we're only using it as dependency for
theora right now.
Fixes #33606.
Diffstat (limited to 'thirdparty')
224 files changed, 6902 insertions, 10443 deletions
diff --git a/thirdparty/README.md b/thirdparty/README.md index 27cfe41238..9571ee49b9 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -397,19 +397,17 @@ Files extracted from the upstream source: ## opus - Upstream: https://opus-codec.org -- Version: 1.3.1 (opus) and 0.11 (opusfile) +- Version: 1.1.5 (opus) and 0.8 (opusfile) - License: BSD-3-Clause Files extracted from upstream source: -- Run `opus/configure` and copy/sync changes to `config.h` - (note that this file may have Godot-specific options enabled) - all .c and .h files in src/ (both opus and opusfile) - all .h files in include/ (both opus and opusfile) as opus/ - remove unused `opus_demo.c`, - remove `http.c`, `wincerts.c` and `winerrno.h` (part of unused libopusurl) -- celt/ and silk/ subfolders (minus tests folders) +- celt/ and silk/ subfolders - COPYING diff --git a/thirdparty/opus/analysis.c b/thirdparty/opus/analysis.c index cb46dec582..663431a436 100644 --- a/thirdparty/opus/analysis.c +++ b/thirdparty/opus/analysis.c @@ -29,29 +29,20 @@ #include "config.h" #endif -#define ANALYSIS_C - -#include <stdio.h> - -#include "mathops.h" #include "kiss_fft.h" #include "celt.h" #include "modes.h" #include "arch.h" #include "quant_bands.h" +#include <stdio.h> #include "analysis.h" #include "mlp.h" #include "stack_alloc.h" -#include "float_cast.h" #ifndef M_PI #define M_PI 3.141592653 #endif -#ifndef DISABLE_FLOAT_API - -#define TRANSITION_PENALTY 10 - static const float dct_table[128] = { 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, @@ -105,118 +96,52 @@ static const float analysis_window[240] = { }; static const int tbands[NB_TBANDS+1] = { - 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 136, 160, 192, 240 + 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120 }; -#define NB_TONAL_SKIP_BANDS 9 +static const int extra_bands[NB_TOT_BANDS+1] = { + 1, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120, 160, 200 +}; -static opus_val32 silk_resampler_down2_hp( - opus_val32 *S, /* I/O State vector [ 2 ] */ - opus_val32 *out, /* O Output signal [ floor(len/2) ] */ - const opus_val32 *in, /* I Input signal [ len ] */ - int inLen /* I Number of input samples */ -) -{ - int k, len2 = inLen/2; - opus_val32 in32, out32, out32_hp, Y, X; - opus_val64 hp_ener = 0; - /* Internal variables and state are in Q10 format */ - for( k = 0; k < len2; k++ ) { - /* Convert to Q10 */ - in32 = in[ 2 * k ]; - - /* All-pass section for even input sample */ - Y = SUB32( in32, S[ 0 ] ); - X = MULT16_32_Q15(QCONST16(0.6074371f, 15), Y); - out32 = ADD32( S[ 0 ], X ); - S[ 0 ] = ADD32( in32, X ); - out32_hp = out32; - /* Convert to Q10 */ - in32 = in[ 2 * k + 1 ]; - - /* All-pass section for odd input sample, and add to output of previous section */ - Y = SUB32( in32, S[ 1 ] ); - X = MULT16_32_Q15(QCONST16(0.15063f, 15), Y); - out32 = ADD32( out32, S[ 1 ] ); - out32 = ADD32( out32, X ); - S[ 1 ] = ADD32( in32, X ); - - Y = SUB32( -in32, S[ 2 ] ); - X = MULT16_32_Q15(QCONST16(0.15063f, 15), Y); - out32_hp = ADD32( out32_hp, S[ 2 ] ); - out32_hp = ADD32( out32_hp, X ); - S[ 2 ] = ADD32( -in32, X ); - - hp_ener += out32_hp*(opus_val64)out32_hp; - /* Add, convert back to int16 and store to output */ - out[ k ] = HALF32(out32); - } -#ifdef FIXED_POINT - /* len2 can be up to 480, so we shift by 8 more to make it fit. */ - hp_ener = hp_ener >> (2*SIG_SHIFT + 8); -#endif - return (opus_val32)hp_ener; -} +/*static const float tweight[NB_TBANDS+1] = { + .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5 +};*/ -static opus_val32 downmix_and_resample(downmix_func downmix, const void *_x, opus_val32 *y, opus_val32 S[3], int subframe, int offset, int c1, int c2, int C, int Fs) -{ - VARDECL(opus_val32, tmp); - opus_val32 scale; - int j; - opus_val32 ret = 0; - SAVE_STACK; - - if (subframe==0) return 0; - if (Fs == 48000) - { - subframe *= 2; - offset *= 2; - } else if (Fs == 16000) { - subframe = subframe*2/3; - offset = offset*2/3; - } - ALLOC(tmp, subframe, opus_val32); +#define NB_TONAL_SKIP_BANDS 9 - downmix(_x, tmp, subframe, offset, c1, c2, C); -#ifdef FIXED_POINT - scale = (1<<SIG_SHIFT); -#else - scale = 1.f/32768; -#endif - if (c2==-2) - scale /= C; - else if (c2>-1) - scale /= 2; - for (j=0;j<subframe;j++) - tmp[j] *= scale; - if (Fs == 48000) +#define cA 0.43157974f +#define cB 0.67848403f +#define cC 0.08595542f +#define cE ((float)M_PI/2) +static OPUS_INLINE float fast_atan2f(float y, float x) { + float x2, y2; + /* Should avoid underflow on the values we'll get */ + if (ABS16(x)+ABS16(y)<1e-9f) { - ret = silk_resampler_down2_hp(S, y, tmp, subframe); - } else if (Fs == 24000) { - OPUS_COPY(y, tmp, subframe); - } else if (Fs == 16000) { - VARDECL(opus_val32, tmp3x); - ALLOC(tmp3x, 3*subframe, opus_val32); - /* Don't do this at home! This resampler is horrible and it's only (barely) - usable for the purpose of the analysis because we don't care about all - the aliasing between 8 kHz and 12 kHz. */ - for (j=0;j<subframe;j++) - { - tmp3x[3*j] = tmp[j]; - tmp3x[3*j+1] = tmp[j]; - tmp3x[3*j+2] = tmp[j]; - } - silk_resampler_down2_hp(S, y, tmp3x, 3*subframe); + x*=1e12f; + y*=1e12f; + } + x2 = x*x; + y2 = y*y; + if(x2<y2){ + float den = (y2 + cB*x2) * (y2 + cC*x2); + if (den!=0) + return -x*y*(y2 + cA*x2) / den + (y<0 ? -cE : cE); + else + return (y<0 ? -cE : cE); + }else{ + float den = (x2 + cB*y2) * (x2 + cC*y2); + if (den!=0) + return x*y*(x2 + cA*y2) / den + (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE); + else + return (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE); } - RESTORE_STACK; - return ret; } -void tonality_analysis_init(TonalityAnalysisState *tonal, opus_int32 Fs) +void tonality_analysis_init(TonalityAnalysisState *tonal) { /* Initialize reusable fields. */ tonal->arch = opus_select_arch(); - tonal->Fs = Fs; /* Clear remaining fields. */ tonality_analysis_reset(tonal); } @@ -232,34 +157,15 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int { int pos; int curr_lookahead; - float tonality_max; - float tonality_avg; - int tonality_count; + float psum; int i; - int pos0; - float prob_avg; - float prob_count; - float prob_min, prob_max; - float vad_prob; - int mpos, vpos; - int bandwidth_span; pos = tonal->read_pos; curr_lookahead = tonal->write_pos-tonal->read_pos; if (curr_lookahead<0) curr_lookahead += DETECT_SIZE; - tonal->read_subframe += len/(tonal->Fs/400); - while (tonal->read_subframe>=8) - { - tonal->read_subframe -= 8; - tonal->read_pos++; - } - if (tonal->read_pos>=DETECT_SIZE) - tonal->read_pos-=DETECT_SIZE; - - /* On long frames, look at the second analysis window rather than the first. */ - if (len > tonal->Fs/50 && pos != tonal->write_pos) + if (len > 480 && pos != tonal->write_pos) { pos++; if (pos==DETECT_SIZE) @@ -269,177 +175,32 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int pos--; if (pos<0) pos = DETECT_SIZE-1; - pos0 = pos; OPUS_COPY(info_out, &tonal->info[pos], 1); - if (!info_out->valid) - return; - tonality_max = tonality_avg = info_out->tonality; - tonality_count = 1; - /* Look at the neighbouring frames and pick largest bandwidth found (to be safe). */ - bandwidth_span = 6; - /* If possible, look ahead for a tone to compensate for the delay in the tone detector. */ - for (i=0;i<3;i++) - { - pos++; - if (pos==DETECT_SIZE) - pos = 0; - if (pos == tonal->write_pos) - break; - tonality_max = MAX32(tonality_max, tonal->info[pos].tonality); - tonality_avg += tonal->info[pos].tonality; - tonality_count++; - info_out->bandwidth = IMAX(info_out->bandwidth, tonal->info[pos].bandwidth); - bandwidth_span--; - } - pos = pos0; - /* Look back in time to see if any has a wider bandwidth than the current frame. */ - for (i=0;i<bandwidth_span;i++) - { - pos--; - if (pos < 0) - pos = DETECT_SIZE-1; - if (pos == tonal->write_pos) - break; - info_out->bandwidth = IMAX(info_out->bandwidth, tonal->info[pos].bandwidth); - } - info_out->tonality = MAX32(tonality_avg/tonality_count, tonality_max-.2f); - - mpos = vpos = pos0; - /* If we have enough look-ahead, compensate for the ~5-frame delay in the music prob and - ~1 frame delay in the VAD prob. */ - if (curr_lookahead > 15) + tonal->read_subframe += len/120; + while (tonal->read_subframe>=4) { - mpos += 5; - if (mpos>=DETECT_SIZE) - mpos -= DETECT_SIZE; - vpos += 1; - if (vpos>=DETECT_SIZE) - vpos -= DETECT_SIZE; - } - - /* The following calculations attempt to minimize a "badness function" - for the transition. When switching from speech to music, the badness - of switching at frame k is - b_k = S*v_k + \sum_{i=0}^{k-1} v_i*(p_i - T) - where - v_i is the activity probability (VAD) at frame i, - p_i is the music probability at frame i - T is the probability threshold for switching - S is the penalty for switching during active audio rather than silence - the current frame has index i=0 - - Rather than apply badness to directly decide when to switch, what we compute - instead is the threshold for which the optimal switching point is now. When - considering whether to switch now (frame 0) or at frame k, we have: - S*v_0 = S*v_k + \sum_{i=0}^{k-1} v_i*(p_i - T) - which gives us: - T = ( \sum_{i=0}^{k-1} v_i*p_i + S*(v_k-v_0) ) / ( \sum_{i=0}^{k-1} v_i ) - We take the min threshold across all positive values of k (up to the maximum - amount of lookahead we have) to give us the threshold for which the current - frame is the optimal switch point. - - The last step is that we need to consider whether we want to switch at all. - For that we use the average of the music probability over the entire window. - If the threshold is higher than that average we're not going to - switch, so we compute a min with the average as well. The result of all these - min operations is music_prob_min, which gives the threshold for switching to music - if we're currently encoding for speech. - - We do the exact opposite to compute music_prob_max which is used for switching - from music to speech. - */ - prob_min = 1.f; - prob_max = 0.f; - vad_prob = tonal->info[vpos].activity_probability; - prob_count = MAX16(.1f, vad_prob); - prob_avg = MAX16(.1f, vad_prob)*tonal->info[mpos].music_prob; - while (1) - { - float pos_vad; - mpos++; - if (mpos==DETECT_SIZE) - mpos = 0; - if (mpos == tonal->write_pos) - break; - vpos++; - if (vpos==DETECT_SIZE) - vpos = 0; - if (vpos == tonal->write_pos) - break; - pos_vad = tonal->info[vpos].activity_probability; - prob_min = MIN16((prob_avg - TRANSITION_PENALTY*(vad_prob - pos_vad))/prob_count, prob_min); - prob_max = MAX16((prob_avg + TRANSITION_PENALTY*(vad_prob - pos_vad))/prob_count, prob_max); - prob_count += MAX16(.1f, pos_vad); - prob_avg += MAX16(.1f, pos_vad)*tonal->info[mpos].music_prob; - } - info_out->music_prob = prob_avg/prob_count; - prob_min = MIN16(prob_avg/prob_count, prob_min); - prob_max = MAX16(prob_avg/prob_count, prob_max); - prob_min = MAX16(prob_min, 0.f); - prob_max = MIN16(prob_max, 1.f); - - /* If we don't have enough look-ahead, do our best to make a decent decision. */ - if (curr_lookahead < 10) - { - float pmin, pmax; - pmin = prob_min; - pmax = prob_max; - pos = pos0; - /* Look for min/max in the past. */ - for (i=0;i<IMIN(tonal->count-1, 15);i++) - { - pos--; - if (pos < 0) - pos = DETECT_SIZE-1; - pmin = MIN16(pmin, tonal->info[pos].music_prob); - pmax = MAX16(pmax, tonal->info[pos].music_prob); - } - /* Bias against switching on active audio. */ - pmin = MAX16(0.f, pmin - .1f*vad_prob); - pmax = MIN16(1.f, pmax + .1f*vad_prob); - prob_min += (1.f-.1f*curr_lookahead)*(pmin - prob_min); - prob_max += (1.f-.1f*curr_lookahead)*(pmax - prob_max); + tonal->read_subframe -= 4; + tonal->read_pos++; } - info_out->music_prob_min = prob_min; - info_out->music_prob_max = prob_max; - - /* printf("%f %f %f %f %f\n", prob_min, prob_max, prob_avg/prob_count, vad_prob, info_out->music_prob); */ -} - -static const float std_feature_bias[9] = { - 5.684947f, 3.475288f, 1.770634f, 1.599784f, 3.773215f, - 2.163313f, 1.260756f, 1.116868f, 1.918795f -}; - -#define LEAKAGE_OFFSET 2.5f -#define LEAKAGE_SLOPE 2.f - -#ifdef FIXED_POINT -/* For fixed-point, the input is +/-2^15 shifted up by SIG_SHIFT, so we need to - compensate for that in the energy. */ -#define SCALE_COMPENS (1.f/((opus_int32)1<<(15+SIG_SHIFT))) -#define SCALE_ENER(e) ((SCALE_COMPENS*SCALE_COMPENS)*(e)) -#else -#define SCALE_ENER(e) (e) -#endif - -#ifdef FIXED_POINT -static int is_digital_silence32(const opus_val32* pcm, int frame_size, int channels, int lsb_depth) -{ - int silence = 0; - opus_val32 sample_max = 0; -#ifdef MLP_TRAINING - return 0; -#endif - sample_max = celt_maxabs32(pcm, frame_size*channels); + if (tonal->read_pos>=DETECT_SIZE) + tonal->read_pos-=DETECT_SIZE; - silence = (sample_max == 0); - (void)lsb_depth; - return silence; + /* Compensate for the delay in the features themselves. + FIXME: Need a better estimate the 10 I just made up */ + curr_lookahead = IMAX(curr_lookahead-10, 0); + + psum=0; + /* Summing the probability of transition patterns that involve music at + time (DETECT_SIZE-curr_lookahead-1) */ + for (i=0;i<DETECT_SIZE-curr_lookahead;i++) + psum += tonal->pmusic[i]; + for (;i<DETECT_SIZE;i++) + psum += tonal->pspeech[i]; + psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence; + /*printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);*/ + + info_out->music_prob = psum; } -#else -#define is_digital_silence32(pcm, frame_size, channels, lsb_depth) is_digital_silence(pcm, frame_size, channels, lsb_depth) -#endif static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix) { @@ -469,50 +230,24 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt float alpha, alphaE, alphaE2; float frame_loudness; float bandwidth_mask; - int is_masked[NB_TBANDS+1]; int bandwidth=0; float maxE = 0; float noise_floor; int remaining; AnalysisInfo *info; - float hp_ener; - float tonality2[240]; - float midE[8]; - float spec_variability=0; - float band_log2[NB_TBANDS+1]; - float leakage_from[NB_TBANDS+1]; - float leakage_to[NB_TBANDS+1]; - float layer_out[MAX_NEURONS]; - float below_max_pitch; - float above_max_pitch; - int is_silence; SAVE_STACK; - if (!tonal->initialized) - { - tonal->mem_fill = 240; - tonal->initialized = 1; - } - alpha = 1.f/IMIN(10, 1+tonal->count); - alphaE = 1.f/IMIN(25, 1+tonal->count); - /* Noise floor related decay for bandwidth detection: -2.2 dB/second */ - alphaE2 = 1.f/IMIN(100, 1+tonal->count); - if (tonal->count <= 1) alphaE2 = 1; - - if (tonal->Fs == 48000) - { - /* len and offset are now at 24 kHz. */ - len/= 2; - offset /= 2; - } else if (tonal->Fs == 16000) { - len = 3*len/2; - offset = 3*offset/2; - } + tonal->last_transition++; + alpha = 1.f/IMIN(20, 1+tonal->count); + alphaE = 1.f/IMIN(50, 1+tonal->count); + alphaE2 = 1.f/IMIN(1000, 1+tonal->count); + if (tonal->count<4) + tonal->music_prob = .5; kfft = celt_mode->mdct.kfft[0]; - tonal->hp_ener_accum += (float)downmix_and_resample(downmix, x, - &tonal->inmem[tonal->mem_fill], tonal->downmix_state, - IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C, tonal->Fs); + if (tonal->count==0) + tonal->mem_fill = 240; + downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C); if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE) { tonal->mem_fill += len; @@ -520,13 +255,10 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt RESTORE_STACK; return; } - hp_ener = tonal->hp_ener_accum; info = &tonal->info[tonal->write_pos++]; if (tonal->write_pos>=DETECT_SIZE) tonal->write_pos-=DETECT_SIZE; - is_silence = is_digital_silence32(tonal->inmem, ANALYSIS_BUF_SIZE, 1, lsb_depth); - ALLOC(in, 480, kiss_fft_cpx); ALLOC(out, 480, kiss_fft_cpx); ALLOC(tonality, 240, float); @@ -541,20 +273,8 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt } OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240); remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill); - tonal->hp_ener_accum = (float)downmix_and_resample(downmix, x, - &tonal->inmem[240], tonal->downmix_state, remaining, - offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C, tonal->Fs); + downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C); tonal->mem_fill = 240 + remaining; - if (is_silence) - { - /* On silence, copy the previous analysis. */ - int prev_pos = tonal->write_pos-2; - if (prev_pos < 0) - prev_pos += DETECT_SIZE; - OPUS_COPY(info, &tonal->info[prev_pos], 1); - RESTORE_STACK; - return; - } opus_fft(kfft, in, out, tonal->arch); #ifndef FIXED_POINT /* If there's any NaN on the input, the entire output will be NaN, so we only need to check one value. */ @@ -585,31 +305,24 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt d_angle2 = angle2 - angle; d2_angle2 = d_angle2 - d_angle; - mod1 = d2_angle - (float)float2int(d2_angle); + mod1 = d2_angle - (float)floor(.5+d2_angle); noisiness[i] = ABS16(mod1); mod1 *= mod1; mod1 *= mod1; - mod2 = d2_angle2 - (float)float2int(d2_angle2); + mod2 = d2_angle2 - (float)floor(.5+d2_angle2); noisiness[i] += ABS16(mod2); mod2 *= mod2; mod2 *= mod2; - avg_mod = .25f*(d2A[i]+mod1+2*mod2); - /* This introduces an extra delay of 2 frames in the detection. */ + avg_mod = .25f*(d2A[i]+2.f*mod1+mod2); tonality[i] = 1.f/(1.f+40.f*16.f*pi4*avg_mod)-.015f; - /* No delay on this detection, but it's less reliable. */ - tonality2[i] = 1.f/(1.f+40.f*16.f*pi4*mod2)-.015f; A[i] = angle2; dA[i] = d_angle2; d2A[i] = mod2; } - for (i=2;i<N2-1;i++) - { - float tt = MIN32(tonality2[i], MAX32(tonality2[i-1], tonality2[i+1])); - tonality[i] = .9f*MAX32(tonality[i], tt-.1f); - } + frame_tonality = 0; max_frame_tonality = 0; /*tw_sum = 0;*/ @@ -626,22 +339,6 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt } relativeE = 0; frame_loudness = 0; - /* The energy of the very first band is special because of DC. */ - { - float E = 0; - float X1r, X2r; - X1r = 2*(float)out[0].r; - X2r = 2*(float)out[0].i; - E = X1r*X1r + X2r*X2r; - for (i=1;i<4;i++) - { - float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r - + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i; - E += binE; - } - E = SCALE_ENER(E); - band_log2[0] = .5f*1.442695f*(float)log(E+1e-10f); - } for (b=0;b<NB_TBANDS;b++) { float E=0, tE=0, nE=0; @@ -651,9 +348,12 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt { float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i; - binE = SCALE_ENER(binE); +#ifdef FIXED_POINT + /* FIXME: It's probably best to change the BFCC filter initial state instead */ + binE *= 5.55e-17f; +#endif E += binE; - tE += binE*MAX32(0, tonality[i]); + tE += binE*tonality[i]; nE += binE*2.f*(.5f-noisiness[i]); } #ifndef FIXED_POINT @@ -671,27 +371,14 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt frame_loudness += (float)sqrt(E+1e-10f); logE[b] = (float)log(E+1e-10f); - band_log2[b+1] = .5f*1.442695f*(float)log(E+1e-10f); - tonal->logE[tonal->E_count][b] = logE[b]; - if (tonal->count==0) - tonal->highE[b] = tonal->lowE[b] = logE[b]; - if (tonal->highE[b] > tonal->lowE[b] + 7.5) + tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f); + tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f); + if (tonal->highE[b] < tonal->lowE[b]+1.f) { - if (tonal->highE[b] - logE[b] > logE[b] - tonal->lowE[b]) - tonal->highE[b] -= .01f; - else - tonal->lowE[b] += .01f; + tonal->highE[b]+=.5f; + tonal->lowE[b]-=.5f; } - if (logE[b] > tonal->highE[b]) - { - tonal->highE[b] = logE[b]; - tonal->lowE[b] = MAX32(tonal->highE[b]-15, tonal->lowE[b]); - } else if (logE[b] < tonal->lowE[b]) - { - tonal->lowE[b] = logE[b]; - tonal->highE[b] = MIN32(tonal->lowE[b]+15, tonal->highE[b]); - } - relativeE += (logE[b]-tonal->lowE[b])/(1e-5f + (tonal->highE[b]-tonal->lowE[b])); + relativeE += (logE[b]-tonal->lowE[b])/(1e-15f+tonal->highE[b]-tonal->lowE[b]); L1=L2=0; for (i=0;i<NB_FRAMES;i++) @@ -723,135 +410,45 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt tonal->prev_band_tonality[b] = band_tonality[b]; } - leakage_from[0] = band_log2[0]; - leakage_to[0] = band_log2[0] - LEAKAGE_OFFSET; - for (b=1;b<NB_TBANDS+1;b++) - { - float leak_slope = LEAKAGE_SLOPE*(tbands[b]-tbands[b-1])/4; - leakage_from[b] = MIN16(leakage_from[b-1]+leak_slope, band_log2[b]); - leakage_to[b] = MAX16(leakage_to[b-1]-leak_slope, band_log2[b]-LEAKAGE_OFFSET); - } - for (b=NB_TBANDS-2;b>=0;b--) - { - float leak_slope = LEAKAGE_SLOPE*(tbands[b+1]-tbands[b])/4; - leakage_from[b] = MIN16(leakage_from[b+1]+leak_slope, leakage_from[b]); - leakage_to[b] = MAX16(leakage_to[b+1]-leak_slope, leakage_to[b]); - } - celt_assert(NB_TBANDS+1 <= LEAK_BANDS); - for (b=0;b<NB_TBANDS+1;b++) - { - /* leak_boost[] is made up of two terms. The first, based on leakage_to[], - represents the boost needed to overcome the amount of analysis leakage - cause in a weaker band b by louder neighbouring bands. - The second, based on leakage_from[], applies to a loud band b for - which the quantization noise causes synthesis leakage to the weaker - neighbouring bands. */ - float boost = MAX16(0, leakage_to[b] - band_log2[b]) + - MAX16(0, band_log2[b] - (leakage_from[b]+LEAKAGE_OFFSET)); - info->leak_boost[b] = IMIN(255, (int)floor(.5 + 64.f*boost)); - } - for (;b<LEAK_BANDS;b++) info->leak_boost[b] = 0; - - for (i=0;i<NB_FRAMES;i++) - { - int j; - float mindist = 1e15f; - for (j=0;j<NB_FRAMES;j++) - { - int k; - float dist=0; - for (k=0;k<NB_TBANDS;k++) - { - float tmp; - tmp = tonal->logE[i][k] - tonal->logE[j][k]; - dist += tmp*tmp; - } - if (j!=i) - mindist = MIN32(mindist, dist); - } - spec_variability += mindist; - } - spec_variability = (float)sqrt(spec_variability/NB_FRAMES/NB_TBANDS); bandwidth_mask = 0; bandwidth = 0; maxE = 0; noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8))); +#ifdef FIXED_POINT + noise_floor *= 1<<(15+SIG_SHIFT); +#endif noise_floor *= noise_floor; - below_max_pitch=0; - above_max_pitch=0; - for (b=0;b<NB_TBANDS;b++) + for (b=0;b<NB_TOT_BANDS;b++) { float E=0; - float Em; int band_start, band_end; /* Keep a margin of 300 Hz for aliasing */ - band_start = tbands[b]; - band_end = tbands[b+1]; + band_start = extra_bands[b]; + band_end = extra_bands[b+1]; for (i=band_start;i<band_end;i++) { float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i; E += binE; } - E = SCALE_ENER(E); maxE = MAX32(maxE, E); - if (band_start < 64) - { - below_max_pitch += E; - } else { - above_max_pitch += E; - } tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E); - Em = MAX32(E, tonal->meanE[b]); + E = MAX32(E, tonal->meanE[b]); + /* Use a simple follower with 13 dB/Bark slope for spreading function */ + bandwidth_mask = MAX32(.05f*bandwidth_mask, E); /* Consider the band "active" only if all these conditions are met: - 1) less than 90 dB below the peak band (maximal masking possible considering + 1) less than 10 dB below the simple follower + 2) less than 90 dB below the peak band (maximal masking possible considering both the ATH and the loudness-dependent slope of the spreading function) - 2) above the PCM quantization noise floor - We use b+1 because the first CELT band isn't included in tbands[] + 3) above the PCM quantization noise floor */ - if (E*1e9f > maxE && (Em > 3*noise_floor*(band_end-band_start) || E > noise_floor*(band_end-band_start))) - bandwidth = b+1; - /* Check if the band is masked (see below). */ - is_masked[b] = E < (tonal->prev_bandwidth >= b+1 ? .01f : .05f)*bandwidth_mask; - /* Use a simple follower with 13 dB/Bark slope for spreading function. */ - bandwidth_mask = MAX32(.05f*bandwidth_mask, E); + if (E>.1*bandwidth_mask && E*1e9f > maxE && E > noise_floor*(band_end-band_start)) + bandwidth = b; } - /* Special case for the last two bands, for which we don't have spectrum but only - the energy above 12 kHz. The difficulty here is that the high-pass we use - leaks some LF energy, so we need to increase the threshold without accidentally cutting - off the band. */ - if (tonal->Fs == 48000) { - float noise_ratio; - float Em; - float E = hp_ener*(1.f/(60*60)); - noise_ratio = tonal->prev_bandwidth==20 ? 10.f : 30.f; - -#ifdef FIXED_POINT - /* silk_resampler_down2_hp() shifted right by an extra 8 bits. */ - E *= 256.f*(1.f/Q15ONE)*(1.f/Q15ONE); -#endif - above_max_pitch += E; - tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E); - Em = MAX32(E, tonal->meanE[b]); - if (Em > 3*noise_ratio*noise_floor*160 || E > noise_ratio*noise_floor*160) - bandwidth = 20; - /* Check if the band is masked (see below). */ - is_masked[b] = E < (tonal->prev_bandwidth == 20 ? .01f : .05f)*bandwidth_mask; - } - if (above_max_pitch > below_max_pitch) - info->max_pitch_ratio = below_max_pitch/above_max_pitch; - else - info->max_pitch_ratio = 1; - /* In some cases, resampling aliasing can create a small amount of energy in the first band - being cut. So if the last band is masked, we don't include it. */ - if (bandwidth == 20 && is_masked[NB_TBANDS]) - bandwidth-=2; - else if (bandwidth > 0 && bandwidth <= NB_TBANDS && is_masked[bandwidth-1]) - bandwidth--; if (tonal->count<=2) bandwidth = 20; frame_loudness = 20*(float)log10(frame_loudness); - tonal->Etracker = MAX32(tonal->Etracker-.003f, frame_loudness); + tonal->Etracker = MAX32(tonal->Etracker-.03f, frame_loudness); tonal->lowECount *= (1-alphaE); if (frame_loudness < tonal->Etracker-30) tonal->lowECount += alphaE; @@ -863,18 +460,11 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt sum += dct_table[i*16+b]*logE[b]; BFCC[i] = sum; } - for (i=0;i<8;i++) - { - float sum=0; - for (b=0;b<16;b++) - sum += dct_table[i*16+b]*.5f*(tonal->highE[b]+tonal->lowE[b]); - midE[i] = sum; - } frame_stationarity /= NB_TBANDS; relativeE /= NB_TBANDS; if (tonal->count<10) - relativeE = .5f; + relativeE = .5; frame_noisiness /= NB_TBANDS; #if 1 info->activity = frame_noisiness + (1-frame_noisiness)*relativeE; @@ -889,7 +479,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt info->tonality_slope = slope; tonal->E_count = (tonal->E_count+1)%NB_FRAMES; - tonal->count = IMIN(tonal->count+1, ANALYSIS_COUNT_MAX); + tonal->count++; info->tonality = frame_tonality; for (i=0;i<4;i++) @@ -908,8 +498,6 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt for (i=0;i<9;i++) tonal->std[i] = (1-alpha)*tonal->std[i] + alpha*features[i]*features[i]; } - for (i=0;i<4;i++) - features[i] = BFCC[i]-midE[i]; for (i=0;i<8;i++) { @@ -919,31 +507,136 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt tonal->mem[i] = BFCC[i]; } for (i=0;i<9;i++) - features[11+i] = (float)sqrt(tonal->std[i]) - std_feature_bias[i]; - features[18] = spec_variability - 0.78f; - features[20] = info->tonality - 0.154723f; - features[21] = info->activity - 0.724643f; - features[22] = frame_stationarity - 0.743717f; - features[23] = info->tonality_slope + 0.069216f; - features[24] = tonal->lowECount - 0.067930f; - - compute_dense(&layer0, layer_out, features); - compute_gru(&layer1, tonal->rnn_state, layer_out); - compute_dense(&layer2, frame_probs, tonal->rnn_state); - - /* Probability of speech or music vs noise */ - info->activity_probability = frame_probs[1]; - info->music_prob = frame_probs[0]; - - /*printf("%f %f %f\n", frame_probs[0], frame_probs[1], info->music_prob);*/ -#ifdef MLP_TRAINING - for (i=0;i<25;i++) - printf("%f ", features[i]); - printf("\n"); + features[11+i] = (float)sqrt(tonal->std[i]); + features[20] = info->tonality; + features[21] = info->activity; + features[22] = frame_stationarity; + features[23] = info->tonality_slope; + features[24] = tonal->lowECount; + +#ifndef DISABLE_FLOAT_API + mlp_process(&net, features, frame_probs); + frame_probs[0] = .5f*(frame_probs[0]+1); + /* Curve fitting between the MLP probability and the actual probability */ + frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10); + /* Probability of active audio (as opposed to silence) */ + frame_probs[1] = .5f*frame_probs[1]+.5f; + /* Consider that silence has a 50-50 probability. */ + frame_probs[0] = frame_probs[1]*frame_probs[0] + (1-frame_probs[1])*.5f; + + /*printf("%f %f ", frame_probs[0], frame_probs[1]);*/ + { + /* Probability of state transition */ + float tau; + /* Represents independence of the MLP probabilities, where + beta=1 means fully independent. */ + float beta; + /* Denormalized probability of speech (p0) and music (p1) after update */ + float p0, p1; + /* Probabilities for "all speech" and "all music" */ + float s0, m0; + /* Probability sum for renormalisation */ + float psum; + /* Instantaneous probability of speech and music, with beta pre-applied. */ + float speech0; + float music0; + float p, q; + + /* One transition every 3 minutes of active audio */ + tau = .00005f*frame_probs[1]; + /* Adapt beta based on how "unexpected" the new prob is */ + p = MAX16(.05f,MIN16(.95f,frame_probs[0])); + q = MAX16(.05f,MIN16(.95f,tonal->music_prob)); + beta = .01f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p)); + /* p0 and p1 are the probabilities of speech and music at this frame + using only information from previous frame and applying the + state transition model */ + p0 = (1-tonal->music_prob)*(1-tau) + tonal->music_prob *tau; + p1 = tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau; + /* We apply the current probability with exponent beta to work around + the fact that the probability estimates aren't independent. */ + p0 *= (float)pow(1-frame_probs[0], beta); + p1 *= (float)pow(frame_probs[0], beta); + /* Normalise the probabilities to get the Marokv probability of music. */ + tonal->music_prob = p1/(p0+p1); + info->music_prob = tonal->music_prob; + + /* This chunk of code deals with delayed decision. */ + psum=1e-20f; + /* Instantaneous probability of speech and music, with beta pre-applied. */ + speech0 = (float)pow(1-frame_probs[0], beta); + music0 = (float)pow(frame_probs[0], beta); + if (tonal->count==1) + { + tonal->pspeech[0]=.5; + tonal->pmusic [0]=.5; + } + /* Updated probability of having only speech (s0) or only music (m0), + before considering the new observation. */ + s0 = tonal->pspeech[0] + tonal->pspeech[1]; + m0 = tonal->pmusic [0] + tonal->pmusic [1]; + /* Updates s0 and m0 with instantaneous probability. */ + tonal->pspeech[0] = s0*(1-tau)*speech0; + tonal->pmusic [0] = m0*(1-tau)*music0; + /* Propagate the transition probabilities */ + for (i=1;i<DETECT_SIZE-1;i++) + { + tonal->pspeech[i] = tonal->pspeech[i+1]*speech0; + tonal->pmusic [i] = tonal->pmusic [i+1]*music0; + } + /* Probability that the latest frame is speech, when all the previous ones were music. */ + tonal->pspeech[DETECT_SIZE-1] = m0*tau*speech0; + /* Probability that the latest frame is music, when all the previous ones were speech. */ + tonal->pmusic [DETECT_SIZE-1] = s0*tau*music0; + + /* Renormalise probabilities to 1 */ + for (i=0;i<DETECT_SIZE;i++) + psum += tonal->pspeech[i] + tonal->pmusic[i]; + psum = 1.f/psum; + for (i=0;i<DETECT_SIZE;i++) + { + tonal->pspeech[i] *= psum; + tonal->pmusic [i] *= psum; + } + psum = tonal->pmusic[0]; + for (i=1;i<DETECT_SIZE;i++) + psum += tonal->pspeech[i]; + + /* Estimate our confidence in the speech/music decisions */ + if (frame_probs[1]>.75) + { + if (tonal->music_prob>.9) + { + float adapt; + adapt = 1.f/(++tonal->music_confidence_count); + tonal->music_confidence_count = IMIN(tonal->music_confidence_count, 500); + tonal->music_confidence += adapt*MAX16(-.2f,frame_probs[0]-tonal->music_confidence); + } + if (tonal->music_prob<.1) + { + float adapt; + adapt = 1.f/(++tonal->speech_confidence_count); + tonal->speech_confidence_count = IMIN(tonal->speech_confidence_count, 500); + tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->speech_confidence); + } + } else { + if (tonal->music_confidence_count==0) + tonal->music_confidence = .9f; + if (tonal->speech_confidence_count==0) + tonal->speech_confidence = .1f; + } + } + if (tonal->last_music != (tonal->music_prob>.5f)) + tonal->last_transition=0; + tonal->last_music = tonal->music_prob>.5f; +#else + info->music_prob = 0; #endif + /*for (i=0;i<25;i++) + printf("%f ", features[i]); + printf("\n");*/ info->bandwidth = bandwidth; - tonal->prev_bandwidth = bandwidth; /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/ info->noisiness = frame_noisiness; info->valid = 1; @@ -957,25 +650,23 @@ void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, co int offset; int pcm_len; - analysis_frame_size -= analysis_frame_size&1; if (analysis_pcm != NULL) { /* Avoid overflow/wrap-around of the analysis buffer */ - analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/50, analysis_frame_size); + analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size); pcm_len = analysis_frame_size - analysis->analysis_offset; offset = analysis->analysis_offset; - while (pcm_len>0) { - tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(Fs/50, pcm_len), offset, c1, c2, C, lsb_depth, downmix); - offset += Fs/50; - pcm_len -= Fs/50; - } + do { + tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix); + offset += 480; + pcm_len -= 480; + } while (pcm_len>0); analysis->analysis_offset = analysis_frame_size; analysis->analysis_offset -= frame_size; } + analysis_info->valid = 0; tonality_get_info(analysis, analysis_info, frame_size); } - -#endif /* DISABLE_FLOAT_API */ diff --git a/thirdparty/opus/analysis.h b/thirdparty/opus/analysis.h index 0b66555f21..9eae56a525 100644 --- a/thirdparty/opus/analysis.h +++ b/thirdparty/opus/analysis.h @@ -30,24 +30,16 @@ #include "celt.h" #include "opus_private.h" -#include "mlp.h" #define NB_FRAMES 8 #define NB_TBANDS 18 -#define ANALYSIS_BUF_SIZE 720 /* 30 ms at 24 kHz */ +#define NB_TOT_BANDS 21 +#define ANALYSIS_BUF_SIZE 720 /* 15 ms at 48 kHz */ -/* At that point we can stop counting frames because it no longer matters. */ -#define ANALYSIS_COUNT_MAX 10000 - -#define DETECT_SIZE 100 - -/* Uncomment this to print the MLP features on stdout. */ -/*#define MLP_TRAINING*/ +#define DETECT_SIZE 200 typedef struct { int arch; - int application; - opus_int32 Fs; #define TONALITY_ANALYSIS_RESET_START angle float angle[240]; float d_angle[240]; @@ -56,27 +48,35 @@ typedef struct { int mem_fill; /* number of usable samples in the buffer */ float prev_band_tonality[NB_TBANDS]; float prev_tonality; - int prev_bandwidth; float E[NB_FRAMES][NB_TBANDS]; - float logE[NB_FRAMES][NB_TBANDS]; float lowE[NB_TBANDS]; float highE[NB_TBANDS]; - float meanE[NB_TBANDS+1]; + float meanE[NB_TOT_BANDS]; float mem[32]; float cmean[8]; float std[9]; + float music_prob; float Etracker; float lowECount; int E_count; + int last_music; + int last_transition; int count; + float subframe_mem[3]; int analysis_offset; + /** Probability of having speech for time i to DETECT_SIZE-1 (and music before). + pspeech[0] is the probability that all frames in the window are speech. */ + float pspeech[DETECT_SIZE]; + /** Probability of having music for time i to DETECT_SIZE-1 (and speech before). + pmusic[0] is the probability that all frames in the window are music. */ + float pmusic[DETECT_SIZE]; + float speech_confidence; + float music_confidence; + int speech_confidence_count; + int music_confidence_count; int write_pos; int read_pos; int read_subframe; - float hp_ener_accum; - int initialized; - float rnn_state[MAX_NEURONS]; - opus_val32 downmix_state[3]; AnalysisInfo info[DETECT_SIZE]; } TonalityAnalysisState; @@ -86,7 +86,7 @@ typedef struct { * not be repeated every analysis step. No allocated memory is retained * by the state struct, so no cleanup call is required. */ -void tonality_analysis_init(TonalityAnalysisState *analysis, opus_int32 Fs); +void tonality_analysis_init(TonalityAnalysisState *analysis); /** Reset a TonalityAnalysisState stuct. * diff --git a/thirdparty/opus/celt/_kiss_fft_guts.h b/thirdparty/opus/celt/_kiss_fft_guts.h index 17392b3e90..5e3d58fd66 100644 --- a/thirdparty/opus/celt/_kiss_fft_guts.h +++ b/thirdparty/opus/celt/_kiss_fft_guts.h @@ -58,12 +58,12 @@ # define S_MUL(a,b) MULT16_32_Q15(b, a) # define C_MUL(m,a,b) \ - do{ (m).r = SUB32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ - (m).i = ADD32_ovflw(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0) + do{ (m).r = SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ + (m).i = ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0) # define C_MULC(m,a,b) \ - do{ (m).r = ADD32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ - (m).i = SUB32_ovflw(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0) + do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ + (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0) # define C_MULBYSCALAR( c, s ) \ do{ (c).r = S_MUL( (c).r , s ) ;\ @@ -77,17 +77,17 @@ DIVSCALAR( (c).i , div); }while (0) #define C_ADD( res, a,b)\ - do {(res).r=ADD32_ovflw((a).r,(b).r); (res).i=ADD32_ovflw((a).i,(b).i); \ + do {(res).r=ADD32((a).r,(b).r); (res).i=ADD32((a).i,(b).i); \ }while(0) #define C_SUB( res, a,b)\ - do {(res).r=SUB32_ovflw((a).r,(b).r); (res).i=SUB32_ovflw((a).i,(b).i); \ + do {(res).r=SUB32((a).r,(b).r); (res).i=SUB32((a).i,(b).i); \ }while(0) #define C_ADDTO( res , a)\ - do {(res).r = ADD32_ovflw((res).r, (a).r); (res).i = ADD32_ovflw((res).i,(a).i);\ + do {(res).r = ADD32((res).r, (a).r); (res).i = ADD32((res).i,(a).i);\ }while(0) #define C_SUBFROM( res , a)\ - do {(res).r = ADD32_ovflw((res).r,(a).r); (res).i = SUB32_ovflw((res).i,(a).i); \ + do {(res).r = ADD32((res).r,(a).r); (res).i = SUB32((res).i,(a).i); \ }while(0) #if defined(OPUS_ARM_INLINE_ASM) diff --git a/thirdparty/opus/celt/arch.h b/thirdparty/opus/celt/arch.h index 08b07db598..8ceab5fe10 100644 --- a/thirdparty/opus/celt/arch.h +++ b/thirdparty/opus/celt/arch.h @@ -46,50 +46,25 @@ # endif # endif -#if OPUS_GNUC_PREREQ(3, 0) -#define opus_likely(x) (__builtin_expect(!!(x), 1)) -#define opus_unlikely(x) (__builtin_expect(!!(x), 0)) -#else -#define opus_likely(x) (!!(x)) -#define opus_unlikely(x) (!!(x)) -#endif - #define CELT_SIG_SCALE 32768.f -#define CELT_FATAL(str) celt_fatal(str, __FILE__, __LINE__); - -#if defined(ENABLE_ASSERTIONS) || defined(ENABLE_HARDENING) -#ifdef __GNUC__ -__attribute__((noreturn)) -#endif -void celt_fatal(const char *str, const char *file, int line); - -#if defined(CELT_C) && !defined(OVERRIDE_celt_fatal) +#define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__); +#ifdef ENABLE_ASSERTIONS #include <stdio.h> #include <stdlib.h> #ifdef __GNUC__ __attribute__((noreturn)) #endif -void celt_fatal(const char *str, const char *file, int line) +static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line) { fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str); abort(); } -#endif - -#define celt_assert(cond) {if (!(cond)) {CELT_FATAL("assertion failed: " #cond);}} -#define celt_assert2(cond, message) {if (!(cond)) {CELT_FATAL("assertion failed: " #cond "\n" message);}} -#define MUST_SUCCEED(call) celt_assert((call) == OPUS_OK) +#define celt_assert(cond) {if (!(cond)) {celt_fatal("assertion failed: " #cond);}} +#define celt_assert2(cond, message) {if (!(cond)) {celt_fatal("assertion failed: " #cond "\n" message);}} #else #define celt_assert(cond) #define celt_assert2(cond, message) -#define MUST_SUCCEED(call) do {if((call) != OPUS_OK) {RESTORE_STACK; return OPUS_INTERNAL_ERROR;} } while (0) -#endif - -#if defined(ENABLE_ASSERTIONS) -#define celt_sig_assert(cond) {if (!(cond)) {CELT_FATAL("signal assertion failed: " #cond);}} -#else -#define celt_sig_assert(cond) #endif #define IMUL32(a,b) ((a)*(b)) @@ -118,20 +93,14 @@ void celt_fatal(const char *str, const char *file, int line) typedef opus_int16 opus_val16; typedef opus_int32 opus_val32; -typedef opus_int64 opus_val64; typedef opus_val32 celt_sig; typedef opus_val16 celt_norm; typedef opus_val32 celt_ener; -#define celt_isnan(x) 0 - #define Q15ONE 32767 #define SIG_SHIFT 12 -/* Safe saturation value for 32-bit signals. Should be less than - 2^31*(1-0.85) to avoid blowing up on DC at deemphasis.*/ -#define SIG_SAT (300000000) #define NORM_SCALING 16384 @@ -160,7 +129,7 @@ static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { #ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR #include "arm/fixed_arm64.h" -#elif defined (OPUS_ARM_INLINE_EDSP) +#elif OPUS_ARM_INLINE_EDSP #include "arm/fixed_armv5e.h" #elif defined (OPUS_ARM_INLINE_ASM) #include "arm/fixed_armv4.h" @@ -178,7 +147,6 @@ static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { typedef float opus_val16; typedef float opus_val32; -typedef float opus_val64; typedef float celt_sig; typedef float celt_norm; @@ -218,7 +186,6 @@ static OPUS_INLINE int celt_isnan(float x) #define NEG16(x) (-(x)) #define NEG32(x) (-(x)) -#define NEG32_ovflw(x) (-(x)) #define EXTRACT16(x) (x) #define EXTEND32(x) (x) #define SHR16(a,shift) (a) @@ -235,7 +202,6 @@ static OPUS_INLINE int celt_isnan(float x) #define SATURATE16(x) (x) #define ROUND16(a,shift) (a) -#define SROUND16(a,shift) (a) #define HALF16(x) (.5f*(x)) #define HALF32(x) (.5f*(x)) @@ -243,8 +209,6 @@ static OPUS_INLINE int celt_isnan(float x) #define SUB16(a,b) ((a)-(b)) #define ADD32(a,b) ((a)+(b)) #define SUB32(a,b) ((a)-(b)) -#define ADD32_ovflw(a,b) ((a)+(b)) -#define SUB32_ovflw(a,b) ((a)-(b)) #define MULT16_16_16(a,b) ((a)*(b)) #define MULT16_16(a,b) ((opus_val32)(a)*(opus_val32)(b)) #define MAC16_16(c,a,b) ((c)+(opus_val32)(a)*(opus_val32)(b)) @@ -279,9 +243,9 @@ static OPUS_INLINE int celt_isnan(float x) #ifndef GLOBAL_STACK_SIZE #ifdef FIXED_POINT -#define GLOBAL_STACK_SIZE 120000 +#define GLOBAL_STACK_SIZE 100000 #else -#define GLOBAL_STACK_SIZE 120000 +#define GLOBAL_STACK_SIZE 100000 #endif #endif diff --git a/thirdparty/opus/celt/arm/arm2gnu.pl b/thirdparty/opus/celt/arm/arm2gnu.pl index a2895f7445..6c922ac819 100755 --- a/thirdparty/opus/celt/arm/arm2gnu.pl +++ b/thirdparty/opus/celt/arm/arm2gnu.pl @@ -164,11 +164,11 @@ while (<>) { $prefix = ""; if ($proc) { - $prefix = $prefix.sprintf("\t.type\t%s, %%function", $proc) unless ($apple); + $prefix = $prefix.sprintf("\t.type\t%s, %%function; ",$proc) unless ($apple); # Make sure we $prefix isn't empty here (for the $apple case). # We handle mangling the label here, make sure it doesn't match # the label handling below (if $prefix would be empty). - $prefix = $prefix."; "; + $prefix = "; "; push(@proc_stack, $proc); s/^[A-Za-z_\.]\w+/$symprefix$&:/; } diff --git a/thirdparty/opus/celt/arm/arm_celt_map.c b/thirdparty/opus/celt/arm/arm_celt_map.c index ca988b66f5..4d4d069a86 100644 --- a/thirdparty/opus/celt/arm/arm_celt_map.c +++ b/thirdparty/opus/celt/arm/arm_celt_map.c @@ -35,29 +35,12 @@ #if defined(OPUS_HAVE_RTCD) -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR) -opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y, int N) = { - celt_inner_prod_c, /* ARMv4 */ - celt_inner_prod_c, /* EDSP */ - celt_inner_prod_c, /* Media */ - celt_inner_prod_neon /* NEON */ -}; - -void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, - int N, opus_val32 *xy1, opus_val32 *xy2) = { - dual_inner_prod_c, /* ARMv4 */ - dual_inner_prod_c, /* EDSP */ - dual_inner_prod_c, /* Media */ - dual_inner_prod_neon /* NEON */ -}; -# endif - # if defined(FIXED_POINT) # if ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \ (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \ (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP))) opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, - const opus_val16 *, opus_val32 *, int, int, int) = { + const opus_val16 *, opus_val32 *, int , int) = { celt_pitch_xcorr_c, /* ARMv4 */ MAY_HAVE_EDSP(celt_pitch_xcorr), /* EDSP */ MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */ @@ -68,7 +51,7 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, # else /* !FIXED_POINT */ # if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR) void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, - const opus_val16 *, opus_val32 *, int, int, int) = { + const opus_val16 *, opus_val32 *, int, int) = { celt_pitch_xcorr_c, /* ARMv4 */ celt_pitch_xcorr_c, /* EDSP */ celt_pitch_xcorr_c, /* Media */ diff --git a/thirdparty/opus/celt/arm/celt_fft_ne10.c b/thirdparty/opus/celt/arm/celt_ne10_fft.c index ea5fd7808b..42d96a7117 100644 --- a/thirdparty/opus/celt/arm/celt_fft_ne10.c +++ b/thirdparty/opus/celt/arm/celt_ne10_fft.c @@ -1,7 +1,7 @@ /* Copyright (c) 2015 Xiph.Org Foundation Written by Viswanath Puttagunta */ /** - @file celt_fft_ne10.c + @file celt_ne10_fft.c @brief ARM Neon optimizations for fft using NE10 library */ @@ -36,6 +36,7 @@ #endif #endif +#include <NE10_init.h> #include <NE10_dsp.h> #include "os_support.h" #include "kiss_fft.h" diff --git a/thirdparty/opus/celt/arm/celt_mdct_ne10.c b/thirdparty/opus/celt/arm/celt_ne10_mdct.c index 3531d02d10..293c3efd7a 100644 --- a/thirdparty/opus/celt/arm/celt_mdct_ne10.c +++ b/thirdparty/opus/celt/arm/celt_ne10_mdct.c @@ -1,7 +1,7 @@ /* Copyright (c) 2015 Xiph.Org Foundation Written by Viswanath Puttagunta */ /** - @file celt_mdct_ne10.c + @file celt_ne10_mdct.c @brief ARM Neon optimizations for mdct using NE10 library */ diff --git a/thirdparty/opus/celt/arm/celt_neon_intr.c b/thirdparty/opus/celt/arm/celt_neon_intr.c index effda769d0..47bbe3dc22 100644 --- a/thirdparty/opus/celt/arm/celt_neon_intr.c +++ b/thirdparty/opus/celt/arm/celt_neon_intr.c @@ -191,21 +191,121 @@ static void xcorr_kernel_neon_float(const float32_t *x, const float32_t *y, vst1q_f32(sum, SUMM); } +/* + * Function: xcorr_kernel_neon_float_process1 + * --------------------------------- + * Computes single correlation values and stores in *sum + */ +static void xcorr_kernel_neon_float_process1(const float32_t *x, + const float32_t *y, float32_t *sum, int len) { + float32x4_t XX[4]; + float32x4_t YY[4]; + float32x2_t XX_2; + float32x2_t YY_2; + float32x4_t SUMM; + float32x2_t SUMM_2[2]; + const float32_t *xi = x; + const float32_t *yi = y; + + SUMM = vdupq_n_f32(0); + + /* Work on 16 values per iteration */ + while (len >= 16) { + XX[0] = vld1q_f32(xi); + xi += 4; + XX[1] = vld1q_f32(xi); + xi += 4; + XX[2] = vld1q_f32(xi); + xi += 4; + XX[3] = vld1q_f32(xi); + xi += 4; + + YY[0] = vld1q_f32(yi); + yi += 4; + YY[1] = vld1q_f32(yi); + yi += 4; + YY[2] = vld1q_f32(yi); + yi += 4; + YY[3] = vld1q_f32(yi); + yi += 4; + + SUMM = vmlaq_f32(SUMM, YY[0], XX[0]); + SUMM = vmlaq_f32(SUMM, YY[1], XX[1]); + SUMM = vmlaq_f32(SUMM, YY[2], XX[2]); + SUMM = vmlaq_f32(SUMM, YY[3], XX[3]); + len -= 16; + } + + /* Work on 8 values */ + if (len >= 8) { + XX[0] = vld1q_f32(xi); + xi += 4; + XX[1] = vld1q_f32(xi); + xi += 4; + + YY[0] = vld1q_f32(yi); + yi += 4; + YY[1] = vld1q_f32(yi); + yi += 4; + + SUMM = vmlaq_f32(SUMM, YY[0], XX[0]); + SUMM = vmlaq_f32(SUMM, YY[1], XX[1]); + len -= 8; + } + + /* Work on 4 values */ + if (len >= 4) { + XX[0] = vld1q_f32(xi); + xi += 4; + YY[0] = vld1q_f32(yi); + yi += 4; + SUMM = vmlaq_f32(SUMM, YY[0], XX[0]); + len -= 4; + } + + /* Start accumulating results */ + SUMM_2[0] = vget_low_f32(SUMM); + if (len >= 2) { + /* While at it, consume 2 more values if available */ + XX_2 = vld1_f32(xi); + xi += 2; + YY_2 = vld1_f32(yi); + yi += 2; + SUMM_2[0] = vmla_f32(SUMM_2[0], YY_2, XX_2); + len -= 2; + } + SUMM_2[1] = vget_high_f32(SUMM); + SUMM_2[0] = vadd_f32(SUMM_2[0], SUMM_2[1]); + SUMM_2[0] = vpadd_f32(SUMM_2[0], SUMM_2[0]); + /* Ok, now we have result accumulated in SUMM_2[0].0 */ + + if (len > 0) { + /* Case when you have one value left */ + XX_2 = vld1_dup_f32(xi); + YY_2 = vld1_dup_f32(yi); + SUMM_2[0] = vmla_f32(SUMM_2[0], XX_2, YY_2); + } + + vst1_lane_f32(sum, SUMM_2[0], 0); +} + void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y, - opus_val32 *xcorr, int len, int max_pitch, int arch) { + opus_val32 *xcorr, int len, int max_pitch) { int i; - (void)arch; celt_assert(max_pitch > 0); - celt_sig_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); + celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); for (i = 0; i < (max_pitch-3); i += 4) { xcorr_kernel_neon_float((const float32_t *)_x, (const float32_t *)_y+i, (float32_t *)xcorr+i, len); } - /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */ + /* In case max_pitch isn't multiple of 4 + * compute single correlation value per iteration + */ for (; i < max_pitch; i++) { - xcorr[i] = celt_inner_prod_neon(_x, _y+i, len); + xcorr_kernel_neon_float_process1((const float32_t *)_x, + (const float32_t *)_y+i, (float32_t *)xcorr+i, len); } } #endif diff --git a/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm-gnu.S b/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm-gnu.S new file mode 100644 index 0000000000..5b2ee55a10 --- /dev/null +++ b/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm-gnu.S @@ -0,0 +1,551 @@ + .syntax unified +@ Copyright (c) 2007-2008 CSIRO +@ Copyright (c) 2007-2009 Xiph.Org Foundation +@ Copyright (c) 2013 Parrot +@ Written by Aurélien Zanelli +@ +@ Redistribution and use in source and binary forms, with or without +@ modification, are permitted provided that the following conditions +@ are met: +@ +@ - Redistributions of source code must retain the above copyright +@ notice, this list of conditions and the following disclaimer. +@ +@ - Redistributions in binary form must reproduce the above copyright +@ notice, this list of conditions and the following disclaimer in the +@ documentation and/or other materials provided with the distribution. +@ +@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +@ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +@ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + .text; .p2align 2; .arch armv7-a + .fpu neon + .object_arch armv4t + + .include "celt/arm/armopts-gnu.S" + + .if OPUS_ARM_MAY_HAVE_EDSP + .global celt_pitch_xcorr_edsp + .endif + + .if OPUS_ARM_MAY_HAVE_NEON + .global celt_pitch_xcorr_neon + .endif + + .if OPUS_ARM_MAY_HAVE_NEON + +@ Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3 +; xcorr_kernel_neon: @ PROC +xcorr_kernel_neon_start: + @ input: + @ r3 = int len + @ r4 = opus_val16 *x + @ r5 = opus_val16 *y + @ q0 = opus_val32 sum[4] + @ output: + @ q0 = opus_val32 sum[4] + @ preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15 + @ internal usage: + @ r12 = int j + @ d3 = y_3|y_2|y_1|y_0 + @ q2 = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4 + @ q3 = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0 + @ q8 = scratch + @ + @ Load y[0...3] + @ This requires len>0 to always be valid (which we assert in the C code). + VLD1.16 {d5}, [r5]! + SUBS r12, r3, #8 + BLE xcorr_kernel_neon_process4 +@ Process 8 samples at a time. +@ This loop loads one y value more than we actually need. Therefore we have to +@ stop as soon as there are 8 or fewer samples left (instead of 7), to avoid +@ reading past the end of the array. +xcorr_kernel_neon_process8: + @ This loop has 19 total instructions (10 cycles to issue, minimum), with + @ - 2 cycles of ARM insrtuctions, + @ - 10 cycles of load/store/byte permute instructions, and + @ - 9 cycles of data processing instructions. + @ On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the + @ latter two categories, meaning the whole loop should run in 10 cycles per + @ iteration, barring cache misses. + @ + @ Load x[0...7] + VLD1.16 {d6, d7}, [r4]! + @ Unlike VMOV, VAND is a data processsing instruction (and doesn't get + @ assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1. + VAND d3, d5, d5 + SUBS r12, r12, #8 + @ Load y[4...11] + VLD1.16 {d4, d5}, [r5]! + VMLAL.S16 q0, d3, d6[0] + VEXT.16 d16, d3, d4, #1 + VMLAL.S16 q0, d4, d7[0] + VEXT.16 d17, d4, d5, #1 + VMLAL.S16 q0, d16, d6[1] + VEXT.16 d16, d3, d4, #2 + VMLAL.S16 q0, d17, d7[1] + VEXT.16 d17, d4, d5, #2 + VMLAL.S16 q0, d16, d6[2] + VEXT.16 d16, d3, d4, #3 + VMLAL.S16 q0, d17, d7[2] + VEXT.16 d17, d4, d5, #3 + VMLAL.S16 q0, d16, d6[3] + VMLAL.S16 q0, d17, d7[3] + BGT xcorr_kernel_neon_process8 +@ Process 4 samples here if we have > 4 left (still reading one extra y value). +xcorr_kernel_neon_process4: + ADDS r12, r12, #4 + BLE xcorr_kernel_neon_process2 + @ Load x[0...3] + VLD1.16 d6, [r4]! + @ Use VAND since it's a data processing instruction again. + VAND d4, d5, d5 + SUB r12, r12, #4 + @ Load y[4...7] + VLD1.16 d5, [r5]! + VMLAL.S16 q0, d4, d6[0] + VEXT.16 d16, d4, d5, #1 + VMLAL.S16 q0, d16, d6[1] + VEXT.16 d16, d4, d5, #2 + VMLAL.S16 q0, d16, d6[2] + VEXT.16 d16, d4, d5, #3 + VMLAL.S16 q0, d16, d6[3] +@ Process 2 samples here if we have > 2 left (still reading one extra y value). +xcorr_kernel_neon_process2: + ADDS r12, r12, #2 + BLE xcorr_kernel_neon_process1 + @ Load x[0...1] + VLD2.16 {d6[],d7[]}, [r4]! + @ Use VAND since it's a data processing instruction again. + VAND d4, d5, d5 + SUB r12, r12, #2 + @ Load y[4...5] + VLD1.32 {d5[]}, [r5]! + VMLAL.S16 q0, d4, d6 + VEXT.16 d16, d4, d5, #1 + @ Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI + @ instead of VEXT, since it's a data-processing instruction. + VSRI.64 d5, d4, #32 + VMLAL.S16 q0, d16, d7 +@ Process 1 sample using the extra y value we loaded above. +xcorr_kernel_neon_process1: + @ Load next *x + VLD1.16 {d6[]}, [r4]! + ADDS r12, r12, #1 + @ y[0...3] are left in d5 from prior iteration(s) (if any) + VMLAL.S16 q0, d5, d6 + MOVLE pc, lr +@ Now process 1 last sample, not reading ahead. + @ Load last *y + VLD1.16 {d4[]}, [r5]! + VSRI.64 d4, d5, #16 + @ Load last *x + VLD1.16 {d6[]}, [r4]! + VMLAL.S16 q0, d4, d6 + MOV pc, lr + .size xcorr_kernel_neon, .-xcorr_kernel_neon @ ENDP + +@ opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y, +@ opus_val32 *xcorr, int len, int max_pitch) +; celt_pitch_xcorr_neon: @ PROC + @ input: + @ r0 = opus_val16 *_x + @ r1 = opus_val16 *_y + @ r2 = opus_val32 *xcorr + @ r3 = int len + @ output: + @ r0 = int maxcorr + @ internal usage: + @ r4 = opus_val16 *x (for xcorr_kernel_neon()) + @ r5 = opus_val16 *y (for xcorr_kernel_neon()) + @ r6 = int max_pitch + @ r12 = int j + @ q15 = int maxcorr[4] (q15 is not used by xcorr_kernel_neon()) + STMFD sp!, {r4-r6, lr} + LDR r6, [sp, #16] + VMOV.S32 q15, #1 + @ if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done + SUBS r6, r6, #4 + BLT celt_pitch_xcorr_neon_process4_done +celt_pitch_xcorr_neon_process4: + @ xcorr_kernel_neon parameters: + @ r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0} + MOV r4, r0 + MOV r5, r1 + VEOR q0, q0, q0 + @ xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3. + @ So we don't save/restore any other registers. + BL xcorr_kernel_neon_start + SUBS r6, r6, #4 + VST1.32 {q0}, [r2]! + @ _y += 4 + ADD r1, r1, #8 + VMAX.S32 q15, q15, q0 + @ if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done + BGE celt_pitch_xcorr_neon_process4 +@ We have less than 4 sums left to compute. +celt_pitch_xcorr_neon_process4_done: + ADDS r6, r6, #4 + @ Reduce maxcorr to a single value + VMAX.S32 d30, d30, d31 + VPMAX.S32 d30, d30, d30 + @ if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done + BLE celt_pitch_xcorr_neon_done +@ Now compute each remaining sum one at a time. +celt_pitch_xcorr_neon_process_remaining: + MOV r4, r0 + MOV r5, r1 + VMOV.I32 q0, #0 + SUBS r12, r3, #8 + BLT celt_pitch_xcorr_neon_process_remaining4 +@ Sum terms 8 at a time. +celt_pitch_xcorr_neon_process_remaining_loop8: + @ Load x[0...7] + VLD1.16 {q1}, [r4]! + @ Load y[0...7] + VLD1.16 {q2}, [r5]! + SUBS r12, r12, #8 + VMLAL.S16 q0, d4, d2 + VMLAL.S16 q0, d5, d3 + BGE celt_pitch_xcorr_neon_process_remaining_loop8 +@ Sum terms 4 at a time. +celt_pitch_xcorr_neon_process_remaining4: + ADDS r12, r12, #4 + BLT celt_pitch_xcorr_neon_process_remaining4_done + @ Load x[0...3] + VLD1.16 {d2}, [r4]! + @ Load y[0...3] + VLD1.16 {d3}, [r5]! + SUB r12, r12, #4 + VMLAL.S16 q0, d3, d2 +celt_pitch_xcorr_neon_process_remaining4_done: + @ Reduce the sum to a single value. + VADD.S32 d0, d0, d1 + VPADDL.S32 d0, d0 + ADDS r12, r12, #4 + BLE celt_pitch_xcorr_neon_process_remaining_loop_done +@ Sum terms 1 at a time. +celt_pitch_xcorr_neon_process_remaining_loop1: + VLD1.16 {d2[]}, [r4]! + VLD1.16 {d3[]}, [r5]! + SUBS r12, r12, #1 + VMLAL.S16 q0, d2, d3 + BGT celt_pitch_xcorr_neon_process_remaining_loop1 +celt_pitch_xcorr_neon_process_remaining_loop_done: + VST1.32 {d0[0]}, [r2]! + VMAX.S32 d30, d30, d0 + SUBS r6, r6, #1 + @ _y++ + ADD r1, r1, #2 + @ if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining + BGT celt_pitch_xcorr_neon_process_remaining +celt_pitch_xcorr_neon_done: + VMOV.32 r0, d30[0] + LDMFD sp!, {r4-r6, pc} + .size celt_pitch_xcorr_neon, .-celt_pitch_xcorr_neon @ ENDP + + .endif + + .if OPUS_ARM_MAY_HAVE_EDSP + +@ This will get used on ARMv7 devices without NEON, so it has been optimized +@ to take advantage of dual-issuing where possible. +; xcorr_kernel_edsp: @ PROC +xcorr_kernel_edsp_start: + @ input: + @ r3 = int len + @ r4 = opus_val16 *_x (must be 32-bit aligned) + @ r5 = opus_val16 *_y (must be 32-bit aligned) + @ r6...r9 = opus_val32 sum[4] + @ output: + @ r6...r9 = opus_val32 sum[4] + @ preserved: r0-r5 + @ internal usage + @ r2 = int j + @ r12,r14 = opus_val16 x[4] + @ r10,r11 = opus_val16 y[4] + STMFD sp!, {r2,r4,r5,lr} + LDR r10, [r5], #4 @ Load y[0...1] + SUBS r2, r3, #4 @ j = len-4 + LDR r11, [r5], #4 @ Load y[2...3] + BLE xcorr_kernel_edsp_process4_done + LDR r12, [r4], #4 @ Load x[0...1] + @ Stall +xcorr_kernel_edsp_process4: + @ The multiplies must issue from pipeline 0, and can't dual-issue with each + @ other. Every other instruction here dual-issues with a multiply, and is + @ thus "free". There should be no stalls in the body of the loop. + SMLABB r6, r12, r10, r6 @ sum[0] = MAC16_16(sum[0],x_0,y_0) + LDR r14, [r4], #4 @ Load x[2...3] + SMLABT r7, r12, r10, r7 @ sum[1] = MAC16_16(sum[1],x_0,y_1) + SUBS r2, r2, #4 @ j-=4 + SMLABB r8, r12, r11, r8 @ sum[2] = MAC16_16(sum[2],x_0,y_2) + SMLABT r9, r12, r11, r9 @ sum[3] = MAC16_16(sum[3],x_0,y_3) + SMLATT r6, r12, r10, r6 @ sum[0] = MAC16_16(sum[0],x_1,y_1) + LDR r10, [r5], #4 @ Load y[4...5] + SMLATB r7, r12, r11, r7 @ sum[1] = MAC16_16(sum[1],x_1,y_2) + SMLATT r8, r12, r11, r8 @ sum[2] = MAC16_16(sum[2],x_1,y_3) + SMLATB r9, r12, r10, r9 @ sum[3] = MAC16_16(sum[3],x_1,y_4) + LDRGT r12, [r4], #4 @ Load x[0...1] + SMLABB r6, r14, r11, r6 @ sum[0] = MAC16_16(sum[0],x_2,y_2) + SMLABT r7, r14, r11, r7 @ sum[1] = MAC16_16(sum[1],x_2,y_3) + SMLABB r8, r14, r10, r8 @ sum[2] = MAC16_16(sum[2],x_2,y_4) + SMLABT r9, r14, r10, r9 @ sum[3] = MAC16_16(sum[3],x_2,y_5) + SMLATT r6, r14, r11, r6 @ sum[0] = MAC16_16(sum[0],x_3,y_3) + LDR r11, [r5], #4 @ Load y[6...7] + SMLATB r7, r14, r10, r7 @ sum[1] = MAC16_16(sum[1],x_3,y_4) + SMLATT r8, r14, r10, r8 @ sum[2] = MAC16_16(sum[2],x_3,y_5) + SMLATB r9, r14, r11, r9 @ sum[3] = MAC16_16(sum[3],x_3,y_6) + BGT xcorr_kernel_edsp_process4 +xcorr_kernel_edsp_process4_done: + ADDS r2, r2, #4 + BLE xcorr_kernel_edsp_done + LDRH r12, [r4], #2 @ r12 = *x++ + SUBS r2, r2, #1 @ j-- + @ Stall + SMLABB r6, r12, r10, r6 @ sum[0] = MAC16_16(sum[0],x,y_0) + LDRHGT r14, [r4], #2 @ r14 = *x++ + SMLABT r7, r12, r10, r7 @ sum[1] = MAC16_16(sum[1],x,y_1) + SMLABB r8, r12, r11, r8 @ sum[2] = MAC16_16(sum[2],x,y_2) + SMLABT r9, r12, r11, r9 @ sum[3] = MAC16_16(sum[3],x,y_3) + BLE xcorr_kernel_edsp_done + SMLABT r6, r14, r10, r6 @ sum[0] = MAC16_16(sum[0],x,y_1) + SUBS r2, r2, #1 @ j-- + SMLABB r7, r14, r11, r7 @ sum[1] = MAC16_16(sum[1],x,y_2) + LDRH r10, [r5], #2 @ r10 = y_4 = *y++ + SMLABT r8, r14, r11, r8 @ sum[2] = MAC16_16(sum[2],x,y_3) + LDRHGT r12, [r4], #2 @ r12 = *x++ + SMLABB r9, r14, r10, r9 @ sum[3] = MAC16_16(sum[3],x,y_4) + BLE xcorr_kernel_edsp_done + SMLABB r6, r12, r11, r6 @ sum[0] = MAC16_16(sum[0],tmp,y_2) + CMP r2, #1 @ j-- + SMLABT r7, r12, r11, r7 @ sum[1] = MAC16_16(sum[1],tmp,y_3) + LDRH r2, [r5], #2 @ r2 = y_5 = *y++ + SMLABB r8, r12, r10, r8 @ sum[2] = MAC16_16(sum[2],tmp,y_4) + LDRHGT r14, [r4] @ r14 = *x + SMLABB r9, r12, r2, r9 @ sum[3] = MAC16_16(sum[3],tmp,y_5) + BLE xcorr_kernel_edsp_done + SMLABT r6, r14, r11, r6 @ sum[0] = MAC16_16(sum[0],tmp,y_3) + LDRH r11, [r5] @ r11 = y_6 = *y + SMLABB r7, r14, r10, r7 @ sum[1] = MAC16_16(sum[1],tmp,y_4) + SMLABB r8, r14, r2, r8 @ sum[2] = MAC16_16(sum[2],tmp,y_5) + SMLABB r9, r14, r11, r9 @ sum[3] = MAC16_16(sum[3],tmp,y_6) +xcorr_kernel_edsp_done: + LDMFD sp!, {r2,r4,r5,pc} + .size xcorr_kernel_edsp, .-xcorr_kernel_edsp @ ENDP + +; celt_pitch_xcorr_edsp: @ PROC + @ input: + @ r0 = opus_val16 *_x (must be 32-bit aligned) + @ r1 = opus_val16 *_y (only needs to be 16-bit aligned) + @ r2 = opus_val32 *xcorr + @ r3 = int len + @ output: + @ r0 = maxcorr + @ internal usage + @ r4 = opus_val16 *x + @ r5 = opus_val16 *y + @ r6 = opus_val32 sum0 + @ r7 = opus_val32 sum1 + @ r8 = opus_val32 sum2 + @ r9 = opus_val32 sum3 + @ r1 = int max_pitch + @ r12 = int j + STMFD sp!, {r4-r11, lr} + MOV r5, r1 + LDR r1, [sp, #36] + MOV r4, r0 + TST r5, #3 + @ maxcorr = 1 + MOV r0, #1 + BEQ celt_pitch_xcorr_edsp_process1u_done +@ Compute one sum at the start to make y 32-bit aligned. + SUBS r12, r3, #4 + @ r14 = sum = 0 + MOV r14, #0 + LDRH r8, [r5], #2 + BLE celt_pitch_xcorr_edsp_process1u_loop4_done + LDR r6, [r4], #4 + MOV r8, r8, LSL #16 +celt_pitch_xcorr_edsp_process1u_loop4: + LDR r9, [r5], #4 + SMLABT r14, r6, r8, r14 @ sum = MAC16_16(sum, x_0, y_0) + LDR r7, [r4], #4 + SMLATB r14, r6, r9, r14 @ sum = MAC16_16(sum, x_1, y_1) + LDR r8, [r5], #4 + SMLABT r14, r7, r9, r14 @ sum = MAC16_16(sum, x_2, y_2) + SUBS r12, r12, #4 @ j-=4 + SMLATB r14, r7, r8, r14 @ sum = MAC16_16(sum, x_3, y_3) + LDRGT r6, [r4], #4 + BGT celt_pitch_xcorr_edsp_process1u_loop4 + MOV r8, r8, LSR #16 +celt_pitch_xcorr_edsp_process1u_loop4_done: + ADDS r12, r12, #4 +celt_pitch_xcorr_edsp_process1u_loop1: + LDRHGE r6, [r4], #2 + @ Stall + SMLABBGE r14, r6, r8, r14 @ sum = MAC16_16(sum, *x, *y) + SUBSGE r12, r12, #1 + LDRHGT r8, [r5], #2 + BGT celt_pitch_xcorr_edsp_process1u_loop1 + @ Restore _x + SUB r4, r4, r3, LSL #1 + @ Restore and advance _y + SUB r5, r5, r3, LSL #1 + @ maxcorr = max(maxcorr, sum) + CMP r0, r14 + ADD r5, r5, #2 + MOVLT r0, r14 + SUBS r1, r1, #1 + @ xcorr[i] = sum + STR r14, [r2], #4 + BLE celt_pitch_xcorr_edsp_done +celt_pitch_xcorr_edsp_process1u_done: + @ if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2 + SUBS r1, r1, #4 + BLT celt_pitch_xcorr_edsp_process2 +celt_pitch_xcorr_edsp_process4: + @ xcorr_kernel_edsp parameters: + @ r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0} + MOV r6, #0 + MOV r7, #0 + MOV r8, #0 + MOV r9, #0 + BL xcorr_kernel_edsp_start @ xcorr_kernel_edsp(_x, _y+i, xcorr+i, len) + @ maxcorr = max(maxcorr, sum0, sum1, sum2, sum3) + CMP r0, r6 + @ _y+=4 + ADD r5, r5, #8 + MOVLT r0, r6 + CMP r0, r7 + MOVLT r0, r7 + CMP r0, r8 + MOVLT r0, r8 + CMP r0, r9 + MOVLT r0, r9 + STMIA r2!, {r6-r9} + SUBS r1, r1, #4 + BGE celt_pitch_xcorr_edsp_process4 +celt_pitch_xcorr_edsp_process2: + ADDS r1, r1, #2 + BLT celt_pitch_xcorr_edsp_process1a + SUBS r12, r3, #4 + @ {r10, r11} = {sum0, sum1} = {0, 0} + MOV r10, #0 + MOV r11, #0 + LDR r8, [r5], #4 + BLE celt_pitch_xcorr_edsp_process2_loop_done + LDR r6, [r4], #4 + LDR r9, [r5], #4 +celt_pitch_xcorr_edsp_process2_loop4: + SMLABB r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_0) + LDR r7, [r4], #4 + SMLABT r11, r6, r8, r11 @ sum1 = MAC16_16(sum1, x_0, y_1) + SUBS r12, r12, #4 @ j-=4 + SMLATT r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_1, y_1) + LDR r8, [r5], #4 + SMLATB r11, r6, r9, r11 @ sum1 = MAC16_16(sum1, x_1, y_2) + LDRGT r6, [r4], #4 + SMLABB r10, r7, r9, r10 @ sum0 = MAC16_16(sum0, x_2, y_2) + SMLABT r11, r7, r9, r11 @ sum1 = MAC16_16(sum1, x_2, y_3) + SMLATT r10, r7, r9, r10 @ sum0 = MAC16_16(sum0, x_3, y_3) + LDRGT r9, [r5], #4 + SMLATB r11, r7, r8, r11 @ sum1 = MAC16_16(sum1, x_3, y_4) + BGT celt_pitch_xcorr_edsp_process2_loop4 +celt_pitch_xcorr_edsp_process2_loop_done: + ADDS r12, r12, #2 + BLE celt_pitch_xcorr_edsp_process2_1 + LDR r6, [r4], #4 + @ Stall + SMLABB r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_0) + LDR r9, [r5], #4 + SMLABT r11, r6, r8, r11 @ sum1 = MAC16_16(sum1, x_0, y_1) + SUB r12, r12, #2 + SMLATT r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_1, y_1) + MOV r8, r9 + SMLATB r11, r6, r9, r11 @ sum1 = MAC16_16(sum1, x_1, y_2) +celt_pitch_xcorr_edsp_process2_1: + LDRH r6, [r4], #2 + ADDS r12, r12, #1 + @ Stall + SMLABB r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_0) + LDRHGT r7, [r4], #2 + SMLABT r11, r6, r8, r11 @ sum1 = MAC16_16(sum1, x_0, y_1) + BLE celt_pitch_xcorr_edsp_process2_done + LDRH r9, [r5], #2 + SMLABT r10, r7, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_1) + SMLABB r11, r7, r9, r11 @ sum1 = MAC16_16(sum1, x_0, y_2) +celt_pitch_xcorr_edsp_process2_done: + @ Restore _x + SUB r4, r4, r3, LSL #1 + @ Restore and advance _y + SUB r5, r5, r3, LSL #1 + @ maxcorr = max(maxcorr, sum0) + CMP r0, r10 + ADD r5, r5, #2 + MOVLT r0, r10 + SUB r1, r1, #2 + @ maxcorr = max(maxcorr, sum1) + CMP r0, r11 + @ xcorr[i] = sum + STR r10, [r2], #4 + MOVLT r0, r11 + STR r11, [r2], #4 +celt_pitch_xcorr_edsp_process1a: + ADDS r1, r1, #1 + BLT celt_pitch_xcorr_edsp_done + SUBS r12, r3, #4 + @ r14 = sum = 0 + MOV r14, #0 + BLT celt_pitch_xcorr_edsp_process1a_loop_done + LDR r6, [r4], #4 + LDR r8, [r5], #4 + LDR r7, [r4], #4 + LDR r9, [r5], #4 +celt_pitch_xcorr_edsp_process1a_loop4: + SMLABB r14, r6, r8, r14 @ sum = MAC16_16(sum, x_0, y_0) + SUBS r12, r12, #4 @ j-=4 + SMLATT r14, r6, r8, r14 @ sum = MAC16_16(sum, x_1, y_1) + LDRGE r6, [r4], #4 + SMLABB r14, r7, r9, r14 @ sum = MAC16_16(sum, x_2, y_2) + LDRGE r8, [r5], #4 + SMLATT r14, r7, r9, r14 @ sum = MAC16_16(sum, x_3, y_3) + LDRGE r7, [r4], #4 + LDRGE r9, [r5], #4 + BGE celt_pitch_xcorr_edsp_process1a_loop4 +celt_pitch_xcorr_edsp_process1a_loop_done: + ADDS r12, r12, #2 + LDRGE r6, [r4], #4 + LDRGE r8, [r5], #4 + @ Stall + SMLABBGE r14, r6, r8, r14 @ sum = MAC16_16(sum, x_0, y_0) + SUBGE r12, r12, #2 + SMLATTGE r14, r6, r8, r14 @ sum = MAC16_16(sum, x_1, y_1) + ADDS r12, r12, #1 + LDRHGE r6, [r4], #2 + LDRHGE r8, [r5], #2 + @ Stall + SMLABBGE r14, r6, r8, r14 @ sum = MAC16_16(sum, *x, *y) + @ maxcorr = max(maxcorr, sum) + CMP r0, r14 + @ xcorr[i] = sum + STR r14, [r2], #4 + MOVLT r0, r14 +celt_pitch_xcorr_edsp_done: + LDMFD sp!, {r4-r11, pc} + .size celt_pitch_xcorr_edsp, .-celt_pitch_xcorr_edsp @ ENDP + + .endif + +@ END: + .section .note.GNU-stack,"",%progbits diff --git a/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm.s b/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm.s index 6e873afc37..f96e0a88bb 100644 --- a/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm.s +++ b/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm.s @@ -153,7 +153,7 @@ xcorr_kernel_neon_process1 ENDP ; opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y, -; opus_val32 *xcorr, int len, int max_pitch, int arch) +; opus_val32 *xcorr, int len, int max_pitch) celt_pitch_xcorr_neon PROC ; input: ; r0 = opus_val16 *_x @@ -168,8 +168,6 @@ celt_pitch_xcorr_neon PROC ; r6 = int max_pitch ; r12 = int j ; q15 = int maxcorr[4] (q15 is not used by xcorr_kernel_neon()) - ; ignored: - ; int arch STMFD sp!, {r4-r6, lr} LDR r6, [sp, #16] VMOV.S32 q15, #1 @@ -360,8 +358,6 @@ celt_pitch_xcorr_edsp PROC ; r9 = opus_val32 sum3 ; r1 = int max_pitch ; r12 = int j - ; ignored: - ; int arch STMFD sp!, {r4-r11, lr} MOV r5, r1 LDR r1, [sp, #36] diff --git a/thirdparty/opus/celt/arm/fft_arm.h b/thirdparty/opus/celt/arm/fft_arm.h index 0b78175f3a..0cb55d8e22 100644 --- a/thirdparty/opus/celt/arm/fft_arm.h +++ b/thirdparty/opus/celt/arm/fft_arm.h @@ -34,6 +34,7 @@ #if !defined(FFT_ARM_H) #define FFT_ARM_H +#include "config.h" #include "kiss_fft.h" #if defined(HAVE_ARM_NE10) diff --git a/thirdparty/opus/celt/arm/fixed_armv4.h b/thirdparty/opus/celt/arm/fixed_armv4.h index d84888a772..efb3b1896a 100644 --- a/thirdparty/opus/celt/arm/fixed_armv4.h +++ b/thirdparty/opus/celt/arm/fixed_armv4.h @@ -37,7 +37,7 @@ static OPUS_INLINE opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b) "#MULT16_32_Q16\n\t" "smull %0, %1, %2, %3\n\t" : "=&r"(rd_lo), "=&r"(rd_hi) - : "%r"(b),"r"(SHL32(a,16)) + : "%r"(b),"r"(a<<16) ); return rd_hi; } @@ -54,10 +54,10 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b) "#MULT16_32_Q15\n\t" "smull %0, %1, %2, %3\n\t" : "=&r"(rd_lo), "=&r"(rd_hi) - : "%r"(b), "r"(SHL32(a,16)) + : "%r"(b), "r"(a<<16) ); /*We intentionally don't OR in the high bit of rd_lo for speed.*/ - return SHL32(rd_hi,1); + return rd_hi<<1; } #define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv4(a, b)) diff --git a/thirdparty/opus/celt/arm/fixed_armv5e.h b/thirdparty/opus/celt/arm/fixed_armv5e.h index 6bf73cbace..36a6321101 100644 --- a/thirdparty/opus/celt/arm/fixed_armv5e.h +++ b/thirdparty/opus/celt/arm/fixed_armv5e.h @@ -59,7 +59,7 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b) : "=r"(res) : "r"(b), "r"(a) ); - return SHL32(res,1); + return res<<1; } #define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b)) @@ -76,7 +76,7 @@ static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a, "#MAC16_32_Q15\n\t" "smlawb %0, %1, %2, %3;\n" : "=r"(res) - : "r"(SHL32(b,1)), "r"(a), "r"(c) + : "r"(b<<1), "r"(a), "r"(c) ); return res; } diff --git a/thirdparty/opus/celt/arm/mdct_arm.h b/thirdparty/opus/celt/arm/mdct_arm.h index 14200bac4b..49cbb44576 100644 --- a/thirdparty/opus/celt/arm/mdct_arm.h +++ b/thirdparty/opus/celt/arm/mdct_arm.h @@ -33,6 +33,7 @@ #if !defined(MDCT_ARM_H) #define MDCT_ARM_H +#include "config.h" #include "mdct.h" #if defined(HAVE_ARM_NE10) diff --git a/thirdparty/opus/celt/arm/pitch_arm.h b/thirdparty/opus/celt/arm/pitch_arm.h index bed8b04eac..14331169ee 100644 --- a/thirdparty/opus/celt/arm/pitch_arm.h +++ b/thirdparty/opus/celt/arm/pitch_arm.h @@ -30,47 +30,11 @@ # include "armcpu.h" -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -opus_val32 celt_inner_prod_neon(const opus_val16 *x, const opus_val16 *y, int N); -void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, - const opus_val16 *y02, int N, opus_val32 *xy1, opus_val32 *xy2); - -# if !defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_PRESUME_NEON) -# define OVERRIDE_CELT_INNER_PROD (1) -# define OVERRIDE_DUAL_INNER_PROD (1) -# define celt_inner_prod(x, y, N, arch) ((void)(arch), PRESUME_NEON(celt_inner_prod)(x, y, N)) -# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) ((void)(arch), PRESUME_NEON(dual_inner_prod)(x, y01, y02, N, xy1, xy2)) -# endif -# endif - -# if !defined(OVERRIDE_CELT_INNER_PROD) -# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)) -extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y, int N); -# define OVERRIDE_CELT_INNER_PROD (1) -# define celt_inner_prod(x, y, N, arch) ((*CELT_INNER_PROD_IMPL[(arch)&OPUS_ARCHMASK])(x, y, N)) -# elif defined(OPUS_ARM_PRESUME_NEON_INTR) -# define OVERRIDE_CELT_INNER_PROD (1) -# define celt_inner_prod(x, y, N, arch) ((void)(arch), celt_inner_prod_neon(x, y, N)) -# endif -# endif - -# if !defined(OVERRIDE_DUAL_INNER_PROD) -# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)) -extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, - const opus_val16 *y01, const opus_val16 *y02, int N, opus_val32 *xy1, opus_val32 *xy2); -# define OVERRIDE_DUAL_INNER_PROD (1) -# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) ((*DUAL_INNER_PROD_IMPL[(arch)&OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2)) -# elif defined(OPUS_ARM_PRESUME_NEON_INTR) -# define OVERRIDE_DUAL_INNER_PROD (1) -# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) ((void)(arch), dual_inner_prod_neon(x, y01, y02, N, xy1, xy2)) -# endif -# endif - # if defined(FIXED_POINT) # if defined(OPUS_ARM_MAY_HAVE_NEON) opus_val32 celt_pitch_xcorr_neon(const opus_val16 *_x, const opus_val16 *_y, - opus_val32 *xcorr, int len, int max_pitch, int arch); + opus_val32 *xcorr, int len, int max_pitch); # endif # if defined(OPUS_ARM_MAY_HAVE_MEDIA) @@ -79,7 +43,7 @@ opus_val32 celt_pitch_xcorr_neon(const opus_val16 *_x, const opus_val16 *_y, # if defined(OPUS_ARM_MAY_HAVE_EDSP) opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y, - opus_val32 *xcorr, int len, int max_pitch, int arch); + opus_val32 *xcorr, int len, int max_pitch); # endif # if defined(OPUS_HAVE_RTCD) && \ @@ -88,17 +52,18 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y, (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP))) extern opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, - const opus_val16 *, opus_val32 *, int, int, int); + const opus_val16 *, opus_val32 *, int, int); # define OVERRIDE_PITCH_XCORR (1) # define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \ - xcorr, len, max_pitch, arch)) + xcorr, len, max_pitch)) # elif defined(OPUS_ARM_PRESUME_EDSP) || \ defined(OPUS_ARM_PRESUME_MEDIA) || \ defined(OPUS_ARM_PRESUME_NEON) # define OVERRIDE_PITCH_XCORR (1) -# define celt_pitch_xcorr (PRESUME_NEON(celt_pitch_xcorr)) +# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ + ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch)) # endif @@ -134,24 +99,25 @@ extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( /* Float case */ #if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y, - opus_val32 *xcorr, int len, int max_pitch, int arch); + opus_val32 *xcorr, int len, int max_pitch); #endif # if defined(OPUS_HAVE_RTCD) && \ (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)) extern void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, - const opus_val16 *, opus_val32 *, int, int, int); + const opus_val16 *, opus_val32 *, int, int); # define OVERRIDE_PITCH_XCORR (1) # define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \ - xcorr, len, max_pitch, arch)) + xcorr, len, max_pitch)) # elif defined(OPUS_ARM_PRESUME_NEON_INTR) # define OVERRIDE_PITCH_XCORR (1) -# define celt_pitch_xcorr celt_pitch_xcorr_float_neon +# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ + ((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch)) # endif diff --git a/thirdparty/opus/celt/arm/pitch_neon_intr.c b/thirdparty/opus/celt/arm/pitch_neon_intr.c deleted file mode 100644 index 1ac38c433a..0000000000 --- a/thirdparty/opus/celt/arm/pitch_neon_intr.c +++ /dev/null @@ -1,290 +0,0 @@ -/*********************************************************************** -Copyright (c) 2017 Google Inc. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <arm_neon.h> -#include "pitch.h" - -#ifdef FIXED_POINT - -opus_val32 celt_inner_prod_neon(const opus_val16 *x, const opus_val16 *y, int N) -{ - int i; - opus_val32 xy; - int16x8_t x_s16x8, y_s16x8; - int32x4_t xy_s32x4 = vdupq_n_s32(0); - int64x2_t xy_s64x2; - int64x1_t xy_s64x1; - - for (i = 0; i < N - 7; i += 8) { - x_s16x8 = vld1q_s16(&x[i]); - y_s16x8 = vld1q_s16(&y[i]); - xy_s32x4 = vmlal_s16(xy_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y_s16x8)); - xy_s32x4 = vmlal_s16(xy_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y_s16x8)); - } - - if (N - i >= 4) { - const int16x4_t x_s16x4 = vld1_s16(&x[i]); - const int16x4_t y_s16x4 = vld1_s16(&y[i]); - xy_s32x4 = vmlal_s16(xy_s32x4, x_s16x4, y_s16x4); - i += 4; - } - - xy_s64x2 = vpaddlq_s32(xy_s32x4); - xy_s64x1 = vadd_s64(vget_low_s64(xy_s64x2), vget_high_s64(xy_s64x2)); - xy = vget_lane_s32(vreinterpret_s32_s64(xy_s64x1), 0); - - for (; i < N; i++) { - xy = MAC16_16(xy, x[i], y[i]); - } - -#ifdef OPUS_CHECK_ASM - celt_assert(celt_inner_prod_c(x, y, N) == xy); -#endif - - return xy; -} - -void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, - int N, opus_val32 *xy1, opus_val32 *xy2) -{ - int i; - opus_val32 xy01, xy02; - int16x8_t x_s16x8, y01_s16x8, y02_s16x8; - int32x4_t xy01_s32x4 = vdupq_n_s32(0); - int32x4_t xy02_s32x4 = vdupq_n_s32(0); - int64x2_t xy01_s64x2, xy02_s64x2; - int64x1_t xy01_s64x1, xy02_s64x1; - - for (i = 0; i < N - 7; i += 8) { - x_s16x8 = vld1q_s16(&x[i]); - y01_s16x8 = vld1q_s16(&y01[i]); - y02_s16x8 = vld1q_s16(&y02[i]); - xy01_s32x4 = vmlal_s16(xy01_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y01_s16x8)); - xy02_s32x4 = vmlal_s16(xy02_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y02_s16x8)); - xy01_s32x4 = vmlal_s16(xy01_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y01_s16x8)); - xy02_s32x4 = vmlal_s16(xy02_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y02_s16x8)); - } - - if (N - i >= 4) { - const int16x4_t x_s16x4 = vld1_s16(&x[i]); - const int16x4_t y01_s16x4 = vld1_s16(&y01[i]); - const int16x4_t y02_s16x4 = vld1_s16(&y02[i]); - xy01_s32x4 = vmlal_s16(xy01_s32x4, x_s16x4, y01_s16x4); - xy02_s32x4 = vmlal_s16(xy02_s32x4, x_s16x4, y02_s16x4); - i += 4; - } - - xy01_s64x2 = vpaddlq_s32(xy01_s32x4); - xy02_s64x2 = vpaddlq_s32(xy02_s32x4); - xy01_s64x1 = vadd_s64(vget_low_s64(xy01_s64x2), vget_high_s64(xy01_s64x2)); - xy02_s64x1 = vadd_s64(vget_low_s64(xy02_s64x2), vget_high_s64(xy02_s64x2)); - xy01 = vget_lane_s32(vreinterpret_s32_s64(xy01_s64x1), 0); - xy02 = vget_lane_s32(vreinterpret_s32_s64(xy02_s64x1), 0); - - for (; i < N; i++) { - xy01 = MAC16_16(xy01, x[i], y01[i]); - xy02 = MAC16_16(xy02, x[i], y02[i]); - } - *xy1 = xy01; - *xy2 = xy02; - -#ifdef OPUS_CHECK_ASM - { - opus_val32 xy1_c, xy2_c; - dual_inner_prod_c(x, y01, y02, N, &xy1_c, &xy2_c); - celt_assert(xy1_c == *xy1); - celt_assert(xy2_c == *xy2); - } -#endif -} - -#else /* !FIXED_POINT */ - -/* ========================================================================== */ - -#ifdef OPUS_CHECK_ASM - -/* This part of code simulates floating-point NEON operations. */ - -/* celt_inner_prod_neon_float_c_simulation() simulates the floating-point */ -/* operations of celt_inner_prod_neon(), and both functions should have bit */ -/* exact output. */ -static opus_val32 celt_inner_prod_neon_float_c_simulation(const opus_val16 *x, const opus_val16 *y, int N) -{ - int i; - opus_val32 xy, xy0 = 0, xy1 = 0, xy2 = 0, xy3 = 0; - for (i = 0; i < N - 3; i += 4) { - xy0 = MAC16_16(xy0, x[i + 0], y[i + 0]); - xy1 = MAC16_16(xy1, x[i + 1], y[i + 1]); - xy2 = MAC16_16(xy2, x[i + 2], y[i + 2]); - xy3 = MAC16_16(xy3, x[i + 3], y[i + 3]); - } - xy0 += xy2; - xy1 += xy3; - xy = xy0 + xy1; - for (; i < N; i++) { - xy = MAC16_16(xy, x[i], y[i]); - } - return xy; -} - -/* dual_inner_prod_neon_float_c_simulation() simulates the floating-point */ -/* operations of dual_inner_prod_neon(), and both functions should have bit */ -/* exact output. */ -static void dual_inner_prod_neon_float_c_simulation(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, - int N, opus_val32 *xy1, opus_val32 *xy2) -{ - int i; - opus_val32 xy01, xy02, xy01_0 = 0, xy01_1 = 0, xy01_2 = 0, xy01_3 = 0, xy02_0 = 0, xy02_1 = 0, xy02_2 = 0, xy02_3 = 0; - for (i = 0; i < N - 3; i += 4) { - xy01_0 = MAC16_16(xy01_0, x[i + 0], y01[i + 0]); - xy01_1 = MAC16_16(xy01_1, x[i + 1], y01[i + 1]); - xy01_2 = MAC16_16(xy01_2, x[i + 2], y01[i + 2]); - xy01_3 = MAC16_16(xy01_3, x[i + 3], y01[i + 3]); - xy02_0 = MAC16_16(xy02_0, x[i + 0], y02[i + 0]); - xy02_1 = MAC16_16(xy02_1, x[i + 1], y02[i + 1]); - xy02_2 = MAC16_16(xy02_2, x[i + 2], y02[i + 2]); - xy02_3 = MAC16_16(xy02_3, x[i + 3], y02[i + 3]); - } - xy01_0 += xy01_2; - xy02_0 += xy02_2; - xy01_1 += xy01_3; - xy02_1 += xy02_3; - xy01 = xy01_0 + xy01_1; - xy02 = xy02_0 + xy02_1; - for (; i < N; i++) { - xy01 = MAC16_16(xy01, x[i], y01[i]); - xy02 = MAC16_16(xy02, x[i], y02[i]); - } - *xy1 = xy01; - *xy2 = xy02; -} - -#endif /* OPUS_CHECK_ASM */ - -/* ========================================================================== */ - -opus_val32 celt_inner_prod_neon(const opus_val16 *x, const opus_val16 *y, int N) -{ - int i; - opus_val32 xy; - float32x4_t xy_f32x4 = vdupq_n_f32(0); - float32x2_t xy_f32x2; - - for (i = 0; i < N - 7; i += 8) { - float32x4_t x_f32x4, y_f32x4; - x_f32x4 = vld1q_f32(&x[i]); - y_f32x4 = vld1q_f32(&y[i]); - xy_f32x4 = vmlaq_f32(xy_f32x4, x_f32x4, y_f32x4); - x_f32x4 = vld1q_f32(&x[i + 4]); - y_f32x4 = vld1q_f32(&y[i + 4]); - xy_f32x4 = vmlaq_f32(xy_f32x4, x_f32x4, y_f32x4); - } - - if (N - i >= 4) { - const float32x4_t x_f32x4 = vld1q_f32(&x[i]); - const float32x4_t y_f32x4 = vld1q_f32(&y[i]); - xy_f32x4 = vmlaq_f32(xy_f32x4, x_f32x4, y_f32x4); - i += 4; - } - - xy_f32x2 = vadd_f32(vget_low_f32(xy_f32x4), vget_high_f32(xy_f32x4)); - xy_f32x2 = vpadd_f32(xy_f32x2, xy_f32x2); - xy = vget_lane_f32(xy_f32x2, 0); - - for (; i < N; i++) { - xy = MAC16_16(xy, x[i], y[i]); - } - -#ifdef OPUS_CHECK_ASM - celt_assert(ABS32(celt_inner_prod_neon_float_c_simulation(x, y, N) - xy) <= VERY_SMALL); -#endif - - return xy; -} - -void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, - int N, opus_val32 *xy1, opus_val32 *xy2) -{ - int i; - opus_val32 xy01, xy02; - float32x4_t xy01_f32x4 = vdupq_n_f32(0); - float32x4_t xy02_f32x4 = vdupq_n_f32(0); - float32x2_t xy01_f32x2, xy02_f32x2; - - for (i = 0; i < N - 7; i += 8) { - float32x4_t x_f32x4, y01_f32x4, y02_f32x4; - x_f32x4 = vld1q_f32(&x[i]); - y01_f32x4 = vld1q_f32(&y01[i]); - y02_f32x4 = vld1q_f32(&y02[i]); - xy01_f32x4 = vmlaq_f32(xy01_f32x4, x_f32x4, y01_f32x4); - xy02_f32x4 = vmlaq_f32(xy02_f32x4, x_f32x4, y02_f32x4); - x_f32x4 = vld1q_f32(&x[i + 4]); - y01_f32x4 = vld1q_f32(&y01[i + 4]); - y02_f32x4 = vld1q_f32(&y02[i + 4]); - xy01_f32x4 = vmlaq_f32(xy01_f32x4, x_f32x4, y01_f32x4); - xy02_f32x4 = vmlaq_f32(xy02_f32x4, x_f32x4, y02_f32x4); - } - - if (N - i >= 4) { - const float32x4_t x_f32x4 = vld1q_f32(&x[i]); - const float32x4_t y01_f32x4 = vld1q_f32(&y01[i]); - const float32x4_t y02_f32x4 = vld1q_f32(&y02[i]); - xy01_f32x4 = vmlaq_f32(xy01_f32x4, x_f32x4, y01_f32x4); - xy02_f32x4 = vmlaq_f32(xy02_f32x4, x_f32x4, y02_f32x4); - i += 4; - } - - xy01_f32x2 = vadd_f32(vget_low_f32(xy01_f32x4), vget_high_f32(xy01_f32x4)); - xy02_f32x2 = vadd_f32(vget_low_f32(xy02_f32x4), vget_high_f32(xy02_f32x4)); - xy01_f32x2 = vpadd_f32(xy01_f32x2, xy01_f32x2); - xy02_f32x2 = vpadd_f32(xy02_f32x2, xy02_f32x2); - xy01 = vget_lane_f32(xy01_f32x2, 0); - xy02 = vget_lane_f32(xy02_f32x2, 0); - - for (; i < N; i++) { - xy01 = MAC16_16(xy01, x[i], y01[i]); - xy02 = MAC16_16(xy02, x[i], y02[i]); - } - *xy1 = xy01; - *xy2 = xy02; - -#ifdef OPUS_CHECK_ASM - { - opus_val32 xy1_c, xy2_c; - dual_inner_prod_neon_float_c_simulation(x, y01, y02, N, &xy1_c, &xy2_c); - celt_assert(ABS32(xy1_c - *xy1) <= VERY_SMALL); - celt_assert(ABS32(xy2_c - *xy2) <= VERY_SMALL); - } -#endif -} - -#endif /* FIXED_POINT */ diff --git a/thirdparty/opus/celt/bands.c b/thirdparty/opus/celt/bands.c index 2702963c37..87eaa6c031 100644 --- a/thirdparty/opus/celt/bands.c +++ b/thirdparty/opus/celt/bands.c @@ -65,19 +65,19 @@ opus_uint32 celt_lcg_rand(opus_uint32 seed) /* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness with this approximation is important because it has an impact on the bit allocation */ -opus_int16 bitexact_cos(opus_int16 x) +static opus_int16 bitexact_cos(opus_int16 x) { opus_int32 tmp; opus_int16 x2; tmp = (4096+((opus_int32)(x)*(x)))>>13; - celt_sig_assert(tmp<=32767); + celt_assert(tmp<=32767); x2 = tmp; x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2))))); - celt_sig_assert(x2<=32766); + celt_assert(x2<=32766); return 1+x2; } -int bitexact_log2tan(int isin,int icos) +static int bitexact_log2tan(int isin,int icos) { int lc; int ls; @@ -92,11 +92,10 @@ int bitexact_log2tan(int isin,int icos) #ifdef FIXED_POINT /* Compute the amplitude (sqrt energy) in each of the bands */ -void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch) +void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) { int i, c, N; const opus_int16 *eBands = m->eBands; - (void)arch; N = m->shortMdctSize<<LM; c=0; do { for (i=0;i<end;i++) @@ -156,7 +155,7 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel #else /* FIXED_POINT */ /* Compute the amplitude (sqrt energy) in each of the bands */ -void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch) +void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) { int i, c, N; const opus_int16 *eBands = m->eBands; @@ -165,7 +164,7 @@ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *band for (i=0;i<end;i++) { opus_val32 sum; - sum = 1e-27f + celt_inner_prod(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM, arch); + sum = 1e-27f + celt_inner_prod_c(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM); bandE[i+c*m->nbEBands] = celt_sqrt(sum); /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ } @@ -225,9 +224,9 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, #endif j=M*eBands[i]; band_end = M*eBands[i+1]; - lg = SATURATE16(ADD32(bandLogE[i], SHL32((opus_val32)eMeans[i],6))); + lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6)); #ifndef FIXED_POINT - g = celt_exp2(MIN32(32.f, lg)); + g = celt_exp2(lg); #else /* Handle the integer part of the log energy */ shift = 16-(lg>>DB_SHIFT); @@ -242,12 +241,12 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, /* Handle extreme gains with negative shift. */ if (shift<0) { - /* For shift <= -2 and g > 16384 we'd be likely to overflow, so we're - capping the gain here, which is equivalent to a cap of 18 on lg. - This shouldn't trigger unless the bitstream is already corrupted. */ - if (shift <= -2) + /* For shift < -2 we'd be likely to overflow, so we're capping + the gain here. This shouldn't happen unless the bitstream is + already corrupted. */ + if (shift < -2) { - g = 16384; + g = 32767; shift = -2; } do { @@ -282,7 +281,7 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas N0 = m->eBands[i+1]-m->eBands[i]; /* depth in 1/8 bits */ - celt_sig_assert(pulses[i]>=0); + celt_assert(pulses[i]>=0); depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM; #ifdef FIXED_POINT @@ -361,30 +360,6 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas } } -/* Compute the weights to use for optimizing normalized distortion across - channels. We use the amplitude to weight square distortion, which means - that we use the square root of the value we would have been using if we - wanted to minimize the MSE in the non-normalized domain. This roughly - corresponds to some quick-and-dirty perceptual experiments I ran to - measure inter-aural masking (there doesn't seem to be any published data - on the topic). */ -static void compute_channel_weights(celt_ener Ex, celt_ener Ey, opus_val16 w[2]) -{ - celt_ener minE; -#ifdef FIXED_POINT - int shift; -#endif - minE = MIN32(Ex, Ey); - /* Adjustment to make the weights a bit more conservative. */ - Ex = ADD32(Ex, minE/3); - Ey = ADD32(Ey, minE/3); -#ifdef FIXED_POINT - shift = celt_ilog2(EPSILON+MAX32(Ex, Ey))-14; -#endif - w[0] = VSHR32(Ex, shift); - w[1] = VSHR32(Ey, shift); -} - static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N) { int i = bandID; @@ -478,7 +453,7 @@ static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT /* Decide whether we should spread the pulses in the current frame */ int spreading_decision(const CELTMode *m, const celt_norm *X, int *average, int last_decision, int *hf_average, int *tapset_decision, int update_hf, - int end, int C, int M, const int *spread_weight) + int end, int C, int M) { int i, c, N0; int sum = 0, nbBands=0; @@ -519,8 +494,8 @@ int spreading_decision(const CELTMode *m, const celt_norm *X, int *average, if (i>m->nbEBands-4) hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N); tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N); - sum += tmp*spread_weight[i]; - nbBands+=spread_weight[i]; + sum += tmp*256; + nbBands++; } } while (++c<C); @@ -544,7 +519,7 @@ int spreading_decision(const CELTMode *m, const celt_norm *X, int *average, /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/ celt_assert(nbBands>0); /* end has to be non-zero */ celt_assert(sum>=0); - sum = celt_udiv((opus_int32)sum<<8, nbBands); + sum = celt_udiv(sum, nbBands); /* Recursive averaging */ sum = (sum+*average)>>1; *average = sum; @@ -672,7 +647,6 @@ static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo) struct band_ctx { int encode; - int resynth; const CELTMode *m; int i; int intensity; @@ -683,9 +657,6 @@ struct band_ctx { const celt_ener *bandE; opus_uint32 seed; int arch; - int theta_round; - int disable_inv; - int avoid_split_noise; }; struct split_ctx { @@ -743,35 +714,8 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx, if (qn!=1) { if (encode) - { - if (!stereo || ctx->theta_round == 0) - { - itheta = (itheta*(opus_int32)qn+8192)>>14; - if (!stereo && ctx->avoid_split_noise && itheta > 0 && itheta < qn) - { - /* Check if the selected value of theta will cause the bit allocation - to inject noise on one side. If so, make sure the energy of that side - is zero. */ - int unquantized = celt_udiv((opus_int32)itheta*16384, qn); - imid = bitexact_cos((opus_int16)unquantized); - iside = bitexact_cos((opus_int16)(16384-unquantized)); - delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid)); - if (delta > *b) - itheta = qn; - else if (delta < -*b) - itheta = 0; - } - } else { - int down; - /* Bias quantization towards itheta=0 and itheta=16384. */ - int bias = itheta > 8192 ? 32767/qn : -32767/qn; - down = IMIN(qn-1, IMAX(0, (itheta*(opus_int32)qn + bias)>>14)); - if (ctx->theta_round < 0) - itheta = down; - else - itheta = down+1; - } - } + itheta = (itheta*(opus_int32)qn+8192)>>14; + /* Entropy coding of the angle. We use a uniform pdf for the time split, a step for stereo, and a triangular one for the rest. */ if (stereo && N>2) @@ -849,7 +793,7 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx, } else if (stereo) { if (encode) { - inv = itheta > 8192 && !ctx->disable_inv; + inv = itheta > 8192; if (inv) { int j; @@ -866,9 +810,6 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx, inv = ec_dec_bit_logp(ec, 2); } else inv = 0; - /* inv flag override to avoid problems with downmixing. */ - if (ctx->disable_inv) - inv = 0; itheta = 0; } qalloc = ec_tell_frac(ec) - tell; @@ -904,6 +845,11 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx, static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b, celt_norm *lowband_out) { +#ifdef RESYNTH + int resynth = 1; +#else + int resynth = !ctx->encode; +#endif int c; int stereo; celt_norm *x = X; @@ -928,7 +874,7 @@ static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, ctx->remaining_bits -= 1<<BITRES; b-=1<<BITRES; } - if (ctx->resynth) + if (resynth) x[0] = sign ? -NORM_SCALING : NORM_SCALING; x = Y; } while (++c<1+stereo); @@ -953,6 +899,11 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X, int B0=B; opus_val16 mid=0, side=0; unsigned cm=0; +#ifdef RESYNTH + int resynth = 1; +#else + int resynth = !ctx->encode; +#endif celt_norm *Y=NULL; int encode; const CELTMode *m; @@ -984,7 +935,8 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X, fill = (fill&1)|(fill<<1); B = (B+1)>>1; - compute_theta(ctx, &sctx, X, Y, N, &b, B, B0, LM, 0, &fill); + compute_theta(ctx, &sctx, X, Y, N, &b, B, B0, + LM, 0, &fill); imid = sctx.imid; iside = sctx.iside; delta = sctx.delta; @@ -1018,20 +970,24 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X, rebalance = ctx->remaining_bits; if (mbits >= sbits) { - cm = quant_partition(ctx, X, N, mbits, B, lowband, LM, + cm = quant_partition(ctx, X, N, mbits, B, + lowband, LM, MULT16_16_P15(gain,mid), fill); rebalance = mbits - (rebalance-ctx->remaining_bits); if (rebalance > 3<<BITRES && itheta!=0) sbits += rebalance - (3<<BITRES); - cm |= quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM, + cm |= quant_partition(ctx, Y, N, sbits, B, + next_lowband2, LM, MULT16_16_P15(gain,side), fill>>B)<<(B0>>1); } else { - cm = quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM, + cm = quant_partition(ctx, Y, N, sbits, B, + next_lowband2, LM, MULT16_16_P15(gain,side), fill>>B)<<(B0>>1); rebalance = sbits - (rebalance-ctx->remaining_bits); if (rebalance > 3<<BITRES && itheta!=16384) mbits += rebalance - (3<<BITRES); - cm |= quant_partition(ctx, X, N, mbits, B, lowband, LM, + cm |= quant_partition(ctx, X, N, mbits, B, + lowband, LM, MULT16_16_P15(gain,mid), fill); } } else { @@ -1056,14 +1012,18 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X, /* Finally do the actual quantization */ if (encode) { - cm = alg_quant(X, N, K, spread, B, ec, gain, ctx->resynth, ctx->arch); + cm = alg_quant(X, N, K, spread, B, ec +#ifdef RESYNTH + , gain +#endif + ); } else { cm = alg_unquant(X, N, K, spread, B, ec, gain); } } else { /* If there's no pulse, fill the band anyway */ int j; - if (ctx->resynth) + if (resynth) { unsigned cm_mask; /* B can be as large as 16, so this shift might overflow an int on a @@ -1120,6 +1080,11 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X, int recombine=0; int longBlocks; unsigned cm=0; +#ifdef RESYNTH + int resynth = 1; +#else + int resynth = !ctx->encode; +#endif int k; int encode; int tf_change; @@ -1186,10 +1151,11 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X, deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks); } - cm = quant_partition(ctx, X, N, b, B, lowband, LM, gain, fill); + cm = quant_partition(ctx, X, N, b, B, lowband, + LM, gain, fill); /* This code is used by the decoder and by the resynthesis-enabled encoder */ - if (ctx->resynth) + if (resynth) { /* Undo the sample reorganization going from time order to frequency order */ if (B0>1) @@ -1242,6 +1208,11 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm int inv = 0; opus_val16 mid=0, side=0; unsigned cm=0; +#ifdef RESYNTH + int resynth = 1; +#else + int resynth = !ctx->encode; +#endif int mbits, sbits, delta; int itheta; int qalloc; @@ -1261,7 +1232,8 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm orig_fill = fill; - compute_theta(ctx, &sctx, X, Y, N, &b, B, B, LM, 1, &fill); + compute_theta(ctx, &sctx, X, Y, N, &b, B, B, + LM, 1, &fill); inv = sctx.inv; imid = sctx.imid; iside = sctx.iside; @@ -1309,13 +1281,13 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm sign = 1-2*sign; /* We use orig_fill here because we want to fold the side, but if itheta==16384, we'll have cleared the low bits of fill. */ - cm = quant_band(ctx, x2, N, mbits, B, lowband, LM, lowband_out, Q15ONE, - lowband_scratch, orig_fill); + cm = quant_band(ctx, x2, N, mbits, B, lowband, + LM, lowband_out, Q15ONE, lowband_scratch, orig_fill); /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse), and there's no need to worry about mixing with the other channel. */ y2[0] = -sign*x2[1]; y2[1] = sign*x2[0]; - if (ctx->resynth) + if (resynth) { celt_norm tmp; X[0] = MULT16_16_Q15(mid, X[0]); @@ -1342,32 +1314,38 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm { /* In stereo mode, we do not apply a scaling to the mid because we need the normalized mid for folding later. */ - cm = quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q15ONE, - lowband_scratch, fill); + cm = quant_band(ctx, X, N, mbits, B, + lowband, LM, lowband_out, + Q15ONE, lowband_scratch, fill); rebalance = mbits - (rebalance-ctx->remaining_bits); if (rebalance > 3<<BITRES && itheta!=0) sbits += rebalance - (3<<BITRES); /* For a stereo split, the high bits of fill are always zero, so no folding will be done to the side. */ - cm |= quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B); + cm |= quant_band(ctx, Y, N, sbits, B, + NULL, LM, NULL, + side, NULL, fill>>B); } else { /* For a stereo split, the high bits of fill are always zero, so no folding will be done to the side. */ - cm = quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B); + cm = quant_band(ctx, Y, N, sbits, B, + NULL, LM, NULL, + side, NULL, fill>>B); rebalance = sbits - (rebalance-ctx->remaining_bits); if (rebalance > 3<<BITRES && itheta!=16384) mbits += rebalance - (3<<BITRES); /* In stereo mode, we do not apply a scaling to the mid because we need the normalized mid for folding later. */ - cm |= quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q15ONE, - lowband_scratch, fill); + cm |= quant_band(ctx, X, N, mbits, B, + lowband, LM, lowband_out, + Q15ONE, lowband_scratch, fill); } } /* This code is used by the decoder and by the resynthesis-enabled encoder */ - if (ctx->resynth) + if (resynth) { if (N!=2) stereo_merge(X, Y, mid, N, ctx->arch); @@ -1381,38 +1359,19 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm return cm; } -static void special_hybrid_folding(const CELTMode *m, celt_norm *norm, celt_norm *norm2, int start, int M, int dual_stereo) -{ - int n1, n2; - const opus_int16 * OPUS_RESTRICT eBands = m->eBands; - n1 = M*(eBands[start+1]-eBands[start]); - n2 = M*(eBands[start+2]-eBands[start+1]); - /* Duplicate enough of the first band folding data to be able to fold the second band. - Copies no data for CELT-only mode. */ - OPUS_COPY(&norm[n1], &norm[2*n1 - n2], n2-n1); - if (dual_stereo) - OPUS_COPY(&norm2[n1], &norm2[2*n1 - n2], n2-n1); -} void quant_all_bands(int encode, const CELTMode *m, int start, int end, celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses, int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int LM, int codedBands, - opus_uint32 *seed, int complexity, int arch, int disable_inv) + opus_uint32 *seed, int arch) { int i; opus_int32 remaining_bits; const opus_int16 * OPUS_RESTRICT eBands = m->eBands; celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2; VARDECL(celt_norm, _norm); - VARDECL(celt_norm, _lowband_scratch); - VARDECL(celt_norm, X_save); - VARDECL(celt_norm, Y_save); - VARDECL(celt_norm, X_save2); - VARDECL(celt_norm, Y_save2); - VARDECL(celt_norm, norm_save2); - int resynth_alloc; celt_norm *lowband_scratch; int B; int M; @@ -1420,11 +1379,10 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end, int update_lowband = 1; int C = Y_ != NULL ? 2 : 1; int norm_offset; - int theta_rdo = encode && Y_!=NULL && !dual_stereo && complexity>=8; #ifdef RESYNTH int resynth = 1; #else - int resynth = !encode || theta_rdo; + int resynth = !encode; #endif struct band_ctx ctx; SAVE_STACK; @@ -1437,24 +1395,9 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end, ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm); norm = _norm; norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset; - - /* For decoding, we can use the last band as scratch space because we don't need that - scratch space for the last band and we don't care about the data there until we're - decoding the last band. */ - if (encode && resynth) - resynth_alloc = M*(eBands[m->nbEBands]-eBands[m->nbEBands-1]); - else - resynth_alloc = ALLOC_NONE; - ALLOC(_lowband_scratch, resynth_alloc, celt_norm); - if (encode && resynth) - lowband_scratch = _lowband_scratch; - else - lowband_scratch = X_+M*eBands[m->nbEBands-1]; - ALLOC(X_save, resynth_alloc, celt_norm); - ALLOC(Y_save, resynth_alloc, celt_norm); - ALLOC(X_save2, resynth_alloc, celt_norm); - ALLOC(Y_save2, resynth_alloc, celt_norm); - ALLOC(norm_save2, resynth_alloc, celt_norm); + /* We can use the last band as scratch space because we don't need that + scratch space for the last band. */ + lowband_scratch = X_+M*eBands[m->nbEBands-1]; lowband_offset = 0; ctx.bandE = bandE; @@ -1465,11 +1408,6 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end, ctx.seed = *seed; ctx.spread = spread; ctx.arch = arch; - ctx.disable_inv = disable_inv; - ctx.resynth = resynth; - ctx.theta_round = 0; - /* Avoid injecting noise in the first band on transients. */ - ctx.avoid_split_noise = B > 1; for (i=start;i<end;i++) { opus_int32 tell; @@ -1492,7 +1430,6 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end, else Y = NULL; N = M*eBands[i+1]-M*eBands[i]; - celt_assert(N > 0); tell = ec_tell_frac(ec); /* Compute how many bits we want to allocate to this band */ @@ -1508,15 +1445,8 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end, b = 0; } -#ifndef DISABLE_UPDATE_DRAFT - if (resynth && (M*eBands[i]-N >= M*eBands[start] || i==start+1) && (update_lowband || lowband_offset==0)) - lowband_offset = i; - if (i == start+1) - special_hybrid_folding(m, norm, norm2, start, M, dual_stereo); -#else if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0)) lowband_offset = i; -#endif tf_change = tf_res[i]; ctx.tf_change = tf_change; @@ -1527,7 +1457,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end, Y = norm; lowband_scratch = NULL; } - if (last && !theta_rdo) + if (i==end-1) lowband_scratch = NULL; /* Get a conservative estimate of the collapse_mask's for the bands we're @@ -1542,11 +1472,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end, fold_start = lowband_offset; while(M*eBands[--fold_start] > effective_lowband+norm_offset); fold_end = lowband_offset-1; -#ifndef DISABLE_UPDATE_DRAFT - while(++fold_end < i && M*eBands[fold_end] < effective_lowband+norm_offset+N); -#else while(M*eBands[++fold_end] < effective_lowband+norm_offset+N); -#endif x_cm = y_cm = 0; fold_i = fold_start; do { x_cm |= collapse_masks[fold_i*C+0]; @@ -1579,79 +1505,13 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end, } else { if (Y!=NULL) { - if (theta_rdo && i < intensity) - { - ec_ctx ec_save, ec_save2; - struct band_ctx ctx_save, ctx_save2; - opus_val32 dist0, dist1; - unsigned cm, cm2; - int nstart_bytes, nend_bytes, save_bytes; - unsigned char *bytes_buf; - unsigned char bytes_save[1275]; - opus_val16 w[2]; - compute_channel_weights(bandE[i], bandE[i+m->nbEBands], w); - /* Make a copy. */ - cm = x_cm|y_cm; - ec_save = *ec; - ctx_save = ctx; - OPUS_COPY(X_save, X, N); - OPUS_COPY(Y_save, Y, N); - /* Encode and round down. */ - ctx.theta_round = -1; - x_cm = quant_band_stereo(&ctx, X, Y, N, b, B, - effective_lowband != -1 ? norm+effective_lowband : NULL, LM, - last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm); - dist0 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch)); - - /* Save first result. */ - cm2 = x_cm; - ec_save2 = *ec; - ctx_save2 = ctx; - OPUS_COPY(X_save2, X, N); - OPUS_COPY(Y_save2, Y, N); - if (!last) - OPUS_COPY(norm_save2, norm+M*eBands[i]-norm_offset, N); - nstart_bytes = ec_save.offs; - nend_bytes = ec_save.storage; - bytes_buf = ec_save.buf+nstart_bytes; - save_bytes = nend_bytes-nstart_bytes; - OPUS_COPY(bytes_save, bytes_buf, save_bytes); - - /* Restore */ - *ec = ec_save; - ctx = ctx_save; - OPUS_COPY(X, X_save, N); - OPUS_COPY(Y, Y_save, N); -#ifndef DISABLE_UPDATE_DRAFT - if (i == start+1) - special_hybrid_folding(m, norm, norm2, start, M, dual_stereo); -#endif - /* Encode and round up. */ - ctx.theta_round = 1; - x_cm = quant_band_stereo(&ctx, X, Y, N, b, B, - effective_lowband != -1 ? norm+effective_lowband : NULL, LM, - last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm); - dist1 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch)); - if (dist0 >= dist1) { - x_cm = cm2; - *ec = ec_save2; - ctx = ctx_save2; - OPUS_COPY(X, X_save2, N); - OPUS_COPY(Y, Y_save2, N); - if (!last) - OPUS_COPY(norm+M*eBands[i]-norm_offset, norm_save2, N); - OPUS_COPY(bytes_buf, bytes_save, save_bytes); - } - } else { - ctx.theta_round = 0; - x_cm = quant_band_stereo(&ctx, X, Y, N, b, B, - effective_lowband != -1 ? norm+effective_lowband : NULL, LM, - last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm); - } + x_cm = quant_band_stereo(&ctx, X, Y, N, b, B, + effective_lowband != -1 ? norm+effective_lowband : NULL, LM, + last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm); } else { x_cm = quant_band(&ctx, X, N, b, B, effective_lowband != -1 ? norm+effective_lowband : NULL, LM, - last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm); + last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm); } y_cm = x_cm; } @@ -1661,9 +1521,6 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end, /* Update the folding position only as long as we have 1 bit/sample depth. */ update_lowband = b>(N<<BITRES); - /* We only need to avoid noise on a split for the first band. After that, we - have folding. */ - ctx.avoid_split_noise = 0; } *seed = ctx.seed; diff --git a/thirdparty/opus/celt/bands.h b/thirdparty/opus/celt/bands.h index 422b32cf75..e8bef4bad0 100644 --- a/thirdparty/opus/celt/bands.h +++ b/thirdparty/opus/celt/bands.h @@ -36,15 +36,12 @@ #include "entdec.h" #include "rate.h" -opus_int16 bitexact_cos(opus_int16 x); -int bitexact_log2tan(int isin,int icos); - /** Compute the amplitude (sqrt energy) in each of the bands * @param m Mode data * @param X Spectrum * @param bandE Square root of the energy for each band (returned) */ -void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch); +void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM); /*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/ @@ -72,7 +69,7 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, int spreading_decision(const CELTMode *m, const celt_norm *X, int *average, int last_decision, int *hf_average, int *tapset_decision, int update_hf, - int end, int C, int M, const int *spread_weight); + int end, int C, int M); #ifdef MEASURE_NORM_MSE void measure_norm_mse(const CELTMode *m, float *X, float *X0, float *bandE, float *bandE0, int M, int N, int C); @@ -108,7 +105,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end, const celt_ener *bandE, int *pulses, int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed, - int complexity, int arch, int disable_inv); + int arch); void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size, int start, diff --git a/thirdparty/opus/celt/celt.c b/thirdparty/opus/celt/celt.c index 9ce234695c..b121c51a1f 100644 --- a/thirdparty/opus/celt/celt.c +++ b/thirdparty/opus/celt/celt.c @@ -111,31 +111,26 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N, t = MAC16_32_Q16(x[i], g10, x2); t = MAC16_32_Q16(t, g11, ADD32(x1,x3)); t = MAC16_32_Q16(t, g12, ADD32(x0,x4)); - t = SATURATE(t, SIG_SAT); y[i] = t; x4=SHL32(x[i-T+3],1); t = MAC16_32_Q16(x[i+1], g10, x1); t = MAC16_32_Q16(t, g11, ADD32(x0,x2)); t = MAC16_32_Q16(t, g12, ADD32(x4,x3)); - t = SATURATE(t, SIG_SAT); y[i+1] = t; x3=SHL32(x[i-T+4],1); t = MAC16_32_Q16(x[i+2], g10, x0); t = MAC16_32_Q16(t, g11, ADD32(x4,x1)); t = MAC16_32_Q16(t, g12, ADD32(x3,x2)); - t = SATURATE(t, SIG_SAT); y[i+2] = t; x2=SHL32(x[i-T+5],1); t = MAC16_32_Q16(x[i+3], g10, x4); t = MAC16_32_Q16(t, g11, ADD32(x3,x0)); t = MAC16_32_Q16(t, g12, ADD32(x2,x1)); - t = SATURATE(t, SIG_SAT); y[i+3] = t; x1=SHL32(x[i-T+6],1); t = MAC16_32_Q16(x[i+4], g10, x3); t = MAC16_32_Q16(t, g11, ADD32(x2,x4)); t = MAC16_32_Q16(t, g12, ADD32(x1,x0)); - t = SATURATE(t, SIG_SAT); y[i+4] = t; } #ifdef CUSTOM_MODES @@ -146,7 +141,6 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N, t = MAC16_32_Q16(x[i], g10, x2); t = MAC16_32_Q16(t, g11, ADD32(x1,x3)); t = MAC16_32_Q16(t, g12, ADD32(x0,x4)); - t = SATURATE(t, SIG_SAT); y[i] = t; x4=x3; x3=x2; @@ -175,7 +169,6 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N, + MULT16_32_Q15(g10,x2) + MULT16_32_Q15(g11,ADD32(x1,x3)) + MULT16_32_Q15(g12,ADD32(x0,x4)); - y[i] = SATURATE(y[i], SIG_SAT); x4=x3; x3=x2; x2=x1; @@ -207,10 +200,6 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, OPUS_MOVE(y, x, N); return; } - /* When the gain is zero, T0 and/or T1 is set to zero. We need - to have then be at least 2 to avoid processing garbage data. */ - T0 = IMAX(T0, COMBFILTER_MINPERIOD); - T1 = IMAX(T1, COMBFILTER_MINPERIOD); g00 = MULT16_16_P15(g0, gains[tapset0][0]); g01 = MULT16_16_P15(g0, gains[tapset0][1]); g02 = MULT16_16_P15(g0, gains[tapset0][2]); @@ -236,7 +225,6 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, + MULT16_32_Q15(MULT16_16_Q15(f,g10),x2) + MULT16_32_Q15(MULT16_16_Q15(f,g11),ADD32(x1,x3)) + MULT16_32_Q15(MULT16_16_Q15(f,g12),ADD32(x0,x4)); - y[i] = SATURATE(y[i], SIG_SAT); x4=x3; x3=x2; x2=x1; @@ -256,16 +244,11 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, } #endif /* OVERRIDE_comb_filter */ -/* TF change table. Positive values mean better frequency resolution (longer - effective window), whereas negative values mean better time resolution - (shorter effective window). The second index is computed as: - 4*isTransient + 2*tf_select + per_band_flag */ const signed char tf_select_table[4][8] = { - /*isTransient=0 isTransient=1 */ - {0, -1, 0, -1, 0,-1, 0,-1}, /* 2.5 ms */ - {0, -1, 0, -2, 1, 0, 1,-1}, /* 5 ms */ - {0, -2, 0, -3, 2, 0, 1,-1}, /* 10 ms */ - {0, -2, 0, -3, 3, 0, 1,-1}, /* 20 ms */ + {0, -1, 0, -1, 0,-1, 0,-1}, + {0, -1, 0, -2, 1, 0, 1,-1}, + {0, -2, 0, -3, 2, 0, 1,-1}, + {0, -2, 0, -3, 3, 0, 1,-1}, }; diff --git a/thirdparty/opus/celt/celt.h b/thirdparty/opus/celt/celt.h index 24b6b2b520..d1f7eb690d 100644 --- a/thirdparty/opus/celt/celt.h +++ b/thirdparty/opus/celt/celt.h @@ -50,8 +50,6 @@ extern "C" { #define CELTDecoder OpusCustomDecoder #define CELTMode OpusCustomMode -#define LEAK_BANDS 19 - typedef struct { int valid; float tonality; @@ -59,28 +57,18 @@ typedef struct { float noisiness; float activity; float music_prob; - float music_prob_min; - float music_prob_max; - int bandwidth; - float activity_probability; - float max_pitch_ratio; - /* Store as Q6 char to save space. */ - unsigned char leak_boost[LEAK_BANDS]; -} AnalysisInfo; - -typedef struct { - int signalType; - int offset; -} SILKInfo; + int bandwidth; +}AnalysisInfo; #define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr))) #define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr))) -#define __celt_check_silkinfo_ptr(ptr) ((ptr) + ((ptr) - (const SILKInfo*)(ptr))) - /* Encoder/decoder Requests */ +/* Expose this option again when variable framesize actually works */ +#define OPUS_FRAMESIZE_VARIABLE 5010 /**< Optimize the frame size dynamically */ + #define CELT_SET_PREDICTION_REQUEST 10002 /** Controls the use of interframe prediction. @@ -128,9 +116,6 @@ typedef struct { #define OPUS_SET_ENERGY_MASK_REQUEST 10026 #define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x) -#define CELT_SET_SILK_INFO_REQUEST 10028 -#define CELT_SET_SILK_INFO(x) CELT_SET_SILK_INFO_REQUEST, __celt_check_silkinfo_ptr(x) - /* Encoder stuff */ int celt_encoder_get_size(int channels); @@ -209,13 +194,6 @@ static OPUS_INLINE int fromOpus(unsigned char c) extern const signed char tf_select_table[4][8]; -#if defined(ENABLE_HARDENING) || defined(ENABLE_ASSERTIONS) -void validate_celt_decoder(CELTDecoder *st); -#define VALIDATE_CELT_DECODER(st) validate_celt_decoder(st) -#else -#define VALIDATE_CELT_DECODER(st) -#endif - int resampling_factor(opus_int32 rate); void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp, diff --git a/thirdparty/opus/celt/celt_decoder.c b/thirdparty/opus/celt/celt_decoder.c index e6efce9358..b978bb34d1 100644 --- a/thirdparty/opus/celt/celt_decoder.c +++ b/thirdparty/opus/celt/celt_decoder.c @@ -51,14 +51,6 @@ #include "celt_lpc.h" #include "vq.h" -/* The maximum pitch lag to allow in the pitch-based PLC. It's possible to save - CPU time in the PLC pitch search by making this smaller than MAX_PERIOD. The - current value corresponds to a pitch of 66.67 Hz. */ -#define PLC_PITCH_LAG_MAX (720) -/* The minimum pitch lag to allow in the pitch-based PLC. This corresponds to a - pitch of 480 Hz. */ -#define PLC_PITCH_LAG_MIN (100) - #if defined(SMALL_FOOTPRINT) && defined(FIXED_POINT) #define NORM_ALIASING_HACK #endif @@ -81,7 +73,6 @@ struct OpusCustomDecoder { int downsample; int start, end; int signalling; - int disable_inv; int arch; /* Everything beyond this point gets cleared on a reset */ @@ -109,38 +100,6 @@ struct OpusCustomDecoder { /* opus_val16 backgroundLogE[], Size = 2*mode->nbEBands */ }; -#if defined(ENABLE_HARDENING) || defined(ENABLE_ASSERTIONS) -/* Make basic checks on the CELT state to ensure we don't end - up writing all over memory. */ -void validate_celt_decoder(CELTDecoder *st) -{ -#ifndef CUSTOM_MODES - celt_assert(st->mode == opus_custom_mode_create(48000, 960, NULL)); - celt_assert(st->overlap == 120); -#endif - celt_assert(st->channels == 1 || st->channels == 2); - celt_assert(st->stream_channels == 1 || st->stream_channels == 2); - celt_assert(st->downsample > 0); - celt_assert(st->start == 0 || st->start == 17); - celt_assert(st->start < st->end); - celt_assert(st->end <= 21); -#ifdef OPUS_ARCHMASK - celt_assert(st->arch >= 0); - celt_assert(st->arch <= OPUS_ARCHMASK); -#endif - celt_assert(st->last_pitch_index <= PLC_PITCH_LAG_MAX); - celt_assert(st->last_pitch_index >= PLC_PITCH_LAG_MIN || st->last_pitch_index == 0); - celt_assert(st->postfilter_period < MAX_PERIOD); - celt_assert(st->postfilter_period >= COMBFILTER_MINPERIOD || st->postfilter_period == 0); - celt_assert(st->postfilter_period_old < MAX_PERIOD); - celt_assert(st->postfilter_period_old >= COMBFILTER_MINPERIOD || st->postfilter_period_old == 0); - celt_assert(st->postfilter_tapset <= 2); - celt_assert(st->postfilter_tapset >= 0); - celt_assert(st->postfilter_tapset_old <= 2); - celt_assert(st->postfilter_tapset_old >= 0); -} -#endif - int celt_decoder_get_size(int channels) { const CELTMode *mode = opus_custom_mode_create(48000, 960, NULL); @@ -204,11 +163,6 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMod st->start = 0; st->end = st->mode->effEBands; st->signalling = 1; -#ifndef DISABLE_UPDATE_DRAFT - st->disable_inv = channels == 1; -#else - st->disable_inv = 0; -#endif st->arch = opus_select_arch(); opus_custom_decoder_ctl(st, OPUS_RESET_STATE); @@ -223,36 +177,6 @@ void opus_custom_decoder_destroy(CELTDecoder *st) } #endif /* CUSTOM_MODES */ -#ifndef CUSTOM_MODES -/* Special case for stereo with no downsampling and no accumulation. This is - quite common and we can make it faster by processing both channels in the - same loop, reducing overhead due to the dependency loop in the IIR filter. */ -static void deemphasis_stereo_simple(celt_sig *in[], opus_val16 *pcm, int N, const opus_val16 coef0, - celt_sig *mem) -{ - celt_sig * OPUS_RESTRICT x0; - celt_sig * OPUS_RESTRICT x1; - celt_sig m0, m1; - int j; - x0=in[0]; - x1=in[1]; - m0 = mem[0]; - m1 = mem[1]; - for (j=0;j<N;j++) - { - celt_sig tmp0, tmp1; - /* Add VERY_SMALL to x[] first to reduce dependency chain. */ - tmp0 = x0[j] + VERY_SMALL + m0; - tmp1 = x1[j] + VERY_SMALL + m1; - m0 = MULT16_32_Q15(coef0, tmp0); - m1 = MULT16_32_Q15(coef0, tmp1); - pcm[2*j ] = SCALEOUT(SIG2WORD16(tmp0)); - pcm[2*j+1] = SCALEOUT(SIG2WORD16(tmp1)); - } - mem[0] = m0; - mem[1] = m1; -} -#endif #ifndef RESYNTH static @@ -266,14 +190,6 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c opus_val16 coef0; VARDECL(celt_sig, scratch); SAVE_STACK; -#ifndef CUSTOM_MODES - /* Short version for common case. */ - if (downsample == 1 && C == 2 && !accum) - { - deemphasis_stereo_simple(in, pcm, N, coef[0], mem); - return; - } -#endif #ifndef FIXED_POINT (void)accum; celt_assert(accum==0); @@ -309,7 +225,7 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c /* Shortcut for the standard (non-custom modes) case */ for (j=0;j<N;j++) { - celt_sig tmp = x[j] + VERY_SMALL + m; + celt_sig tmp = x[j] + m + VERY_SMALL; m = MULT16_32_Q15(coef0, tmp); scratch[j] = tmp; } @@ -330,7 +246,7 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c { for (j=0;j<N;j++) { - celt_sig tmp = x[j] + VERY_SMALL + m; + celt_sig tmp = x[j] + m + VERY_SMALL; m = MULT16_32_Q15(coef0, tmp); y[j*C] = SCALEOUT(SIG2WORD16(tmp)); } @@ -417,7 +333,7 @@ void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[], denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M, downsample, silence); for (i=0;i<N;i++) - freq[i] = ADD32(HALF32(freq[i]), HALF32(freq2[i])); + freq[i] = HALF32(ADD32(freq[i],freq2[i])); for (b=0;b<B;b++) clt_mdct_backward(&mode->mdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch); } else { @@ -429,12 +345,6 @@ void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[], clt_mdct_backward(&mode->mdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B, arch); } while (++c<CC); } - /* Saturate IMDCT output so that we can't overflow in the pitch postfilter - or in the */ - c=0; do { - for (i=0;i<N;i++) - out_syn[c][i] = SATURATE(out_syn[c][i], SIG_SAT); - } while (++c<CC); RESTORE_STACK; } @@ -477,6 +387,14 @@ static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, } } +/* The maximum pitch lag to allow in the pitch-based PLC. It's possible to save + CPU time in the PLC pitch search by making this smaller than MAX_PERIOD. The + current value corresponds to a pitch of 66.67 Hz. */ +#define PLC_PITCH_LAG_MAX (720) +/* The minimum pitch lag to allow in the pitch-based PLC. This corresponds to a + pitch of 480 Hz. */ +#define PLC_PITCH_LAG_MIN (100) + static int celt_plc_pitch_search(celt_sig *decode_mem[2], int C, int arch) { int pitch_index; @@ -586,15 +504,12 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, C, 0, LM, st->downsample, 0, st->arch); } else { - int exc_length; /* Pitch-based PLC */ const opus_val16 *window; - opus_val16 *exc; opus_val16 fade = Q15ONE; int pitch_index; VARDECL(opus_val32, etmp); - VARDECL(opus_val16, _exc); - VARDECL(opus_val16, fir_tmp); + VARDECL(opus_val16, exc); if (loss_count == 0) { @@ -604,14 +519,8 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) fade = QCONST16(.8f,15); } - /* We want the excitation for 2 pitch periods in order to look for a - decaying signal, but we can't get more than MAX_PERIOD. */ - exc_length = IMIN(2*pitch_index, MAX_PERIOD); - ALLOC(etmp, overlap, opus_val32); - ALLOC(_exc, MAX_PERIOD+LPC_ORDER, opus_val16); - ALLOC(fir_tmp, exc_length, opus_val16); - exc = _exc+LPC_ORDER; + ALLOC(exc, MAX_PERIOD, opus_val16); window = mode->window; c=0; do { opus_val16 decay; @@ -620,11 +529,13 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) celt_sig *buf; int extrapolation_offset; int extrapolation_len; + int exc_length; int j; buf = decode_mem[c]; - for (i=0;i<MAX_PERIOD+LPC_ORDER;i++) - exc[i-LPC_ORDER] = ROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD-LPC_ORDER+i], SIG_SHIFT); + for (i=0;i<MAX_PERIOD;i++) { + exc[i] = ROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD+i], SIG_SHIFT); + } if (loss_count == 0) { @@ -650,32 +561,22 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) #endif } _celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER); -#ifdef FIXED_POINT - /* For fixed-point, apply bandwidth expansion until we can guarantee that - no overflow can happen in the IIR filter. This means: - 32768*sum(abs(filter)) < 2^31 */ - while (1) { - opus_val16 tmp=Q15ONE; - opus_val32 sum=QCONST16(1., SIG_SHIFT); - for (i=0;i<LPC_ORDER;i++) - sum += ABS16(lpc[c*LPC_ORDER+i]); - if (sum < 65535) break; - for (i=0;i<LPC_ORDER;i++) - { - tmp = MULT16_16_Q15(QCONST16(.99f,15), tmp); - lpc[c*LPC_ORDER+i] = MULT16_16_Q15(lpc[c*LPC_ORDER+i], tmp); - } - } -#endif } + /* We want the excitation for 2 pitch periods in order to look for a + decaying signal, but we can't get more than MAX_PERIOD. */ + exc_length = IMIN(2*pitch_index, MAX_PERIOD); /* Initialize the LPC history with the samples just before the start of the region for which we're computing the excitation. */ { - /* Compute the excitation for exc_length samples before the loss. We need the copy - because celt_fir() cannot filter in-place. */ + opus_val16 lpc_mem[LPC_ORDER]; + for (i=0;i<LPC_ORDER;i++) + { + lpc_mem[i] = + ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-1-i], SIG_SHIFT); + } + /* Compute the excitation for exc_length samples before the loss. */ celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER, - fir_tmp, exc_length, LPC_ORDER, st->arch); - OPUS_COPY(exc+MAX_PERIOD-exc_length, fir_tmp, exc_length); + exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem, st->arch); } /* Check if the waveform is decaying, and if so how fast. @@ -729,8 +630,9 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) tmp = ROUND16( buf[DECODE_BUFFER_SIZE-MAX_PERIOD-N+extrapolation_offset+j], SIG_SHIFT); - S1 += SHR32(MULT16_16(tmp, tmp), 10); + S1 += SHR32(MULT16_16(tmp, tmp), 8); } + { opus_val16 lpc_mem[LPC_ORDER]; /* Copy the last decoded samples (prior to the overlap region) to @@ -742,10 +644,6 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER, buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER, lpc_mem, st->arch); -#ifdef FIXED_POINT - for (i=0; i < extrapolation_len; i++) - buf[DECODE_BUFFER_SIZE-N+i] = SATURATE(buf[DECODE_BUFFER_SIZE-N+i], SIG_SAT); -#endif } /* Check if the synthesis energy is higher than expected, which can @@ -756,7 +654,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) for (i=0;i<extrapolation_len;i++) { opus_val16 tmp = ROUND16(buf[DECODE_BUFFER_SIZE-N+i], SIG_SHIFT); - S2 += SHR32(MULT16_16(tmp, tmp), 10); + S2 += SHR32(MULT16_16(tmp, tmp), 8); } /* This checks for an "explosion" in the synthesis. */ #ifdef FIXED_POINT @@ -864,7 +762,6 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat const opus_int16 *eBands; ALLOC_STACK; - VALIDATE_CELT_DECODER(st); mode = st->mode; nbEBands = mode->nbEBands; overlap = mode->overlap; @@ -1059,7 +956,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat ALLOC(pulses, nbEBands, int); ALLOC(fine_priority, nbEBands, int); - codedBands = clt_compute_allocation(mode, start, end, offsets, cap, + codedBands = compute_allocation(mode, start, end, offsets, cap, alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses, fine_quant, fine_priority, C, LM, dec, 0, 0, 0); @@ -1082,8 +979,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks, NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res, - len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng, 0, - st->arch, st->disable_inv); + len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng, st->arch); if (anti_collapse_rsv > 0) { @@ -1338,26 +1234,6 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...) *value=st->rng; } break; - case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if(value<0 || value>1) - { - goto bad_arg; - } - st->disable_inv = value; - } - break; - case OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->disable_inv; - } - break; default: goto bad_request; } diff --git a/thirdparty/opus/celt/celt_encoder.c b/thirdparty/opus/celt/celt_encoder.c index 44cb0850ab..3ee7a4d3f7 100644 --- a/thirdparty/opus/celt/celt_encoder.c +++ b/thirdparty/opus/celt/celt_encoder.c @@ -73,8 +73,8 @@ struct OpusCustomEncoder { int constrained_vbr; /* If zero, VBR can do whatever it likes with the rate */ int loss_rate; int lsb_depth; + int variable_duration; int lfe; - int disable_inv; int arch; /* Everything beyond this point gets cleared on a reset */ @@ -98,7 +98,6 @@ struct OpusCustomEncoder { #endif int consec_transient; AnalysisInfo analysis; - SILKInfo silk_info; opus_val32 preemph_memE[2]; opus_val32 preemph_memD[2]; @@ -124,7 +123,6 @@ struct OpusCustomEncoder { /* opus_val16 oldBandE[], Size = channels*mode->nbEBands */ /* opus_val16 oldLogE[], Size = channels*mode->nbEBands */ /* opus_val16 oldLogE2[], Size = channels*mode->nbEBands */ - /* opus_val16 energyError[], Size = channels*mode->nbEBands */ }; int celt_encoder_get_size(int channels) @@ -138,10 +136,9 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_get_size(const CELTMode *mode, int int size = sizeof(struct CELTEncoder) + (channels*mode->overlap-1)*sizeof(celt_sig) /* celt_sig in_mem[channels*mode->overlap]; */ + channels*COMBFILTER_MAXPERIOD*sizeof(celt_sig) /* celt_sig prefilter_mem[channels*COMBFILTER_MAXPERIOD]; */ - + 4*channels*mode->nbEBands*sizeof(opus_val16); /* opus_val16 oldBandE[channels*mode->nbEBands]; */ + + 3*channels*mode->nbEBands*sizeof(opus_val16); /* opus_val16 oldBandE[channels*mode->nbEBands]; */ /* opus_val16 oldLogE[channels*mode->nbEBands]; */ /* opus_val16 oldLogE2[channels*mode->nbEBands]; */ - /* opus_val16 energyError[channels*mode->nbEBands]; */ return size; } @@ -181,6 +178,7 @@ static int opus_custom_encoder_init_arch(CELTEncoder *st, const CELTMode *mode, st->start = 0; st->end = st->mode->effEBands; st->signalling = 1; + st->arch = arch; st->constrained_vbr = 1; @@ -225,8 +223,7 @@ void opus_custom_encoder_destroy(CELTEncoder *st) static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C, - opus_val16 *tf_estimate, int *tf_chan, int allow_weak_transients, - int *weak_transient) + opus_val16 *tf_estimate, int *tf_chan) { int i; VARDECL(opus_val16, tmp); @@ -236,12 +233,6 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int int c; opus_val16 tf_max; int len2; - /* Forward masking: 6.7 dB/ms. */ -#ifdef FIXED_POINT - int forward_shift = 4; -#else - opus_val16 forward_decay = QCONST16(.0625f,15); -#endif /* Table of 6*64/x, trained on real data to minimize the average error */ static const unsigned char inv_table[128] = { 255,255,156,110, 86, 70, 59, 51, 45, 40, 37, 33, 31, 28, 26, 25, @@ -256,19 +247,6 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int SAVE_STACK; ALLOC(tmp, len, opus_val16); - *weak_transient = 0; - /* For lower bitrates, let's be more conservative and have a forward masking - decay of 3.3 dB/ms. This avoids having to code transients at very low - bitrate (mostly for hybrid), which can result in unstable energy and/or - partial collapse. */ - if (allow_weak_transients) - { -#ifdef FIXED_POINT - forward_shift = 5; -#else - forward_decay = QCONST16(.03125f,15); -#endif - } len2=len/2; for (c=0;c<C;c++) { @@ -291,7 +269,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int mem0 = mem1 + y - 2*x; mem1 = x - .5f*y; #endif - tmp[i] = SROUND16(y, 2); + tmp[i] = EXTRACT16(SHR32(y,2)); /*printf("%f ", tmp[i]);*/ } /*printf("\n");*/ @@ -302,7 +280,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int /* Normalize tmp to max range */ { int shift=0; - shift = 14-celt_ilog2(MAX16(1, celt_maxabs16(tmp, len))); + shift = 14-celt_ilog2(1+celt_maxabs16(tmp, len)); if (shift!=0) { for (i=0;i<len;i++) @@ -321,9 +299,9 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int mean += x2; #ifdef FIXED_POINT /* FIXME: Use PSHR16() instead */ - tmp[i] = mem0 + PSHR32(x2-mem0,forward_shift); + tmp[i] = mem0 + PSHR32(x2-mem0,4); #else - tmp[i] = mem0 + MULT16_16_P15(forward_decay,x2-mem0); + tmp[i] = mem0 + MULT16_16_P15(QCONST16(.0625f,15),x2-mem0); #endif mem0 = tmp[i]; } @@ -333,7 +311,6 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int /* Backward pass to compute the pre-echo threshold */ for (i=len2-1;i>=0;i--) { - /* Backward masking: 13.9 dB/ms. */ #ifdef FIXED_POINT /* FIXME: Use PSHR16() instead */ tmp[i] = mem0 + PSHR32(tmp[i]-mem0,3); @@ -362,12 +339,6 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int /* Compute harmonic mean discarding the unreliable boundaries The data is smooth, so we only take 1/4th of the samples */ unmask=0; - /* We should never see NaNs here. If we find any, then something really bad happened and we better abort - before it does any damage later on. If these asserts are disabled (no hardening), then the table - lookup a few lines below (id = ...) is likely to crash dur to an out-of-bounds read. DO NOT FIX - that crash on NaN since it could result in a worse issue later on. */ - celt_assert(!celt_isnan(tmp[0])); - celt_assert(!celt_isnan(norm)); for (i=12;i<len2-5;i+=4) { int id; @@ -388,12 +359,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int } } is_transient = mask_metric>200; - /* For low bitrates, define "weak transients" that need to be - handled differently to avoid partial collapse. */ - if (allow_weak_transients && is_transient && mask_metric<600) { - is_transient = 0; - *weak_transient = 1; - } + /* Arbitrary metric for VBR boost */ tf_max = MAX16(0,celt_sqrt(27*mask_metric)-42); /* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */ @@ -583,7 +549,7 @@ static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, opus_val16 bias static int tf_analysis(const CELTMode *m, int len, int isTransient, int *tf_res, int lambda, celt_norm *X, int N0, int LM, - opus_val16 tf_estimate, int tf_chan, int *importance) + int *tf_sum, opus_val16 tf_estimate, int tf_chan) { int i; VARDECL(int, metric); @@ -608,6 +574,7 @@ static int tf_analysis(const CELTMode *m, int len, int isTransient, ALLOC(path0, len, int); ALLOC(path1, len, int); + *tf_sum = 0; for (i=0;i<len;i++) { int k, N; @@ -662,26 +629,27 @@ static int tf_analysis(const CELTMode *m, int len, int isTransient, metric[i] = 2*best_level; else metric[i] = -2*best_level; + *tf_sum += (isTransient ? LM : 0) - metric[i]/2; /* For bands that can't be split to -1, set the metric to the half-way point to avoid biasing the decision */ if (narrow && (metric[i]==0 || metric[i]==-2*LM)) metric[i]-=1; - /*printf("%d ", metric[i]/2 + (!isTransient)*LM);*/ + /*printf("%d ", metric[i]);*/ } /*printf("\n");*/ /* Search for the optimal tf resolution, including tf_select */ tf_select = 0; for (sel=0;sel<2;sel++) { - cost0 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*sel+0]); - cost1 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*sel+1]) + (isTransient ? 0 : lambda); + cost0 = 0; + cost1 = isTransient ? 0 : lambda; for (i=1;i<len;i++) { int curr0, curr1; curr0 = IMIN(cost0, cost1 + lambda); curr1 = IMIN(cost0 + lambda, cost1); - cost0 = curr0 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+0]); - cost1 = curr1 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+1]); + cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+0]); + cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+1]); } cost0 = IMIN(cost0, cost1); selcost[sel]=cost0; @@ -690,8 +658,8 @@ static int tf_analysis(const CELTMode *m, int len, int isTransient, * If tests confirm it's useful for non-transients, we could allow it. */ if (selcost[1]<selcost[0] && isTransient) tf_select=1; - cost0 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]); - cost1 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]) + (isTransient ? 0 : lambda); + cost0 = 0; + cost1 = isTransient ? 0 : lambda; /* Viterbi forward pass */ for (i=1;i<len;i++) { @@ -719,8 +687,8 @@ static int tf_analysis(const CELTMode *m, int len, int isTransient, curr1 = from1; path1[i]= 1; } - cost0 = curr0 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]); - cost1 = curr1 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]); + cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]); + cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]); } tf_res[len-1] = cost0 < cost1 ? 0 : 1; /* Viterbi backward pass to check the decisions */ @@ -786,7 +754,7 @@ static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, const opus_val16 *bandLogE, int end, int LM, int C, int N0, AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate, - int intensity, opus_val16 surround_trim, opus_int32 equiv_rate, int arch) + int intensity, opus_val16 surround_trim, int arch) { int i; opus_val32 diff=0; @@ -794,14 +762,6 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, int trim_index; opus_val16 trim = QCONST16(5.f, 8); opus_val16 logXC, logXC2; - /* At low bitrate, reducing the trim seems to help. At higher bitrates, it's less - clear what's best, so we're keeping it as it was before, at least for now. */ - if (equiv_rate < 64000) { - trim = QCONST16(4.f, 8); - } else if (equiv_rate < 80000) { - opus_int32 frac = (equiv_rate-64000) >> 10; - trim = QCONST16(4.f, 8) + QCONST16(1.f/16.f, 8)*frac; - } if (C==2) { opus_val16 sum = 0; /* Q10 */ @@ -849,7 +809,7 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, } while (++c<C); diff /= C*(end-1); /*printf("%f\n", diff);*/ - trim -= MAX32(-QCONST16(2.f, 8), MIN32(QCONST16(2.f, 8), SHR32(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 )); + trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 )); trim -= SHR16(surround_trim, DB_SHIFT-8); trim -= 2*SHR16(tf_estimate, 14-8); #ifndef DISABLE_FLOAT_API @@ -970,8 +930,7 @@ static opus_val16 median_of_3(const opus_val16 *x) static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2, int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN, int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM, - int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc, - AnalysisInfo *analysis, int *importance, int *spread_weight) + int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc) { int i, c; opus_int32 tot_boost=0; @@ -997,42 +956,6 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 for (i=0;i<end;i++) maxDepth = MAX16(maxDepth, bandLogE[c*nbEBands+i]-noise_floor[i]); } while (++c<C); - { - /* Compute a really simple masking model to avoid taking into account completely masked - bands when computing the spreading decision. */ - VARDECL(opus_val16, mask); - VARDECL(opus_val16, sig); - ALLOC(mask, nbEBands, opus_val16); - ALLOC(sig, nbEBands, opus_val16); - for (i=0;i<end;i++) - mask[i] = bandLogE[i]-noise_floor[i]; - if (C==2) - { - for (i=0;i<end;i++) - mask[i] = MAX16(mask[i], bandLogE[nbEBands+i]-noise_floor[i]); - } - OPUS_COPY(sig, mask, end); - for (i=1;i<end;i++) - mask[i] = MAX16(mask[i], mask[i-1] - QCONST16(2.f, DB_SHIFT)); - for (i=end-2;i>=0;i--) - mask[i] = MAX16(mask[i], mask[i+1] - QCONST16(3.f, DB_SHIFT)); - for (i=0;i<end;i++) - { - /* Compute SMR: Mask is never more than 72 dB below the peak and never below the noise floor.*/ - opus_val16 smr = sig[i]-MAX16(MAX16(0, maxDepth-QCONST16(12.f, DB_SHIFT)), mask[i]); - /* Clamp SMR to make sure we're not shifting by something negative or too large. */ -#ifdef FIXED_POINT - /* FIXME: Use PSHR16() instead */ - int shift = -PSHR32(MAX16(-QCONST16(5.f, DB_SHIFT), MIN16(0, smr)), DB_SHIFT); -#else - int shift = IMIN(5, IMAX(0, -(int)floor(.5f + smr))); -#endif - spread_weight[i] = 32 >> shift; - } - /*for (i=0;i<end;i++) - printf("%d ", spread_weight[i]); - printf("\n");*/ - } /* Make sure that dynamic allocation can't make us bust the budget */ if (effectiveBytes > 50 && LM>=1 && !lfe) { @@ -1089,14 +1012,6 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 } for (i=start;i<end;i++) follower[i] = MAX16(follower[i], surround_dynalloc[i]); - for (i=start;i<end;i++) - { -#ifdef FIXED_POINT - importance[i] = PSHR32(13*celt_exp2(MIN16(follower[i], QCONST16(4.f, DB_SHIFT))), 16); -#else - importance[i] = (int)floor(.5f+13*celt_exp2(MIN16(follower[i], QCONST16(4.f, DB_SHIFT)))); -#endif - } /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */ if ((!vbr || constrained_vbr)&&!isTransient) { @@ -1105,26 +1020,14 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 } for (i=start;i<end;i++) { - if (i<8) - follower[i] *= 2; - if (i>=12) - follower[i] = HALF16(follower[i]); - } -#ifdef DISABLE_FLOAT_API - (void)analysis; -#else - if (analysis->valid) - { - for (i=start;i<IMIN(LEAK_BANDS, end);i++) - follower[i] = follower[i] + QCONST16(1.f/64.f, DB_SHIFT)*analysis->leak_boost[i]; - } -#endif - for (i=start;i<end;i++) - { int width; int boost; int boost_bits; + if (i<8) + follower[i] *= 2; + if (i>=12) + follower[i] = HALF16(follower[i]); follower[i] = MIN16(follower[i], QCONST16(4, DB_SHIFT)); width = C*(eBands[i+1]-eBands[i])<<LM; @@ -1139,11 +1042,11 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 boost = (int)SHR32(EXTEND32(follower[i])*width/6,DB_SHIFT); boost_bits = boost*6<<BITRES; } - /* For CBR and non-transient CVBR frames, limit dynalloc to 2/3 of the bits */ + /* For CBR and non-transient CVBR frames, limit dynalloc to 1/4 of the bits */ if ((!vbr || (constrained_vbr&&!isTransient)) - && (tot_boost+boost_bits)>>BITRES>>3 > 2*effectiveBytes/3) + && (tot_boost+boost_bits)>>BITRES>>3 > effectiveBytes/4) { - opus_int32 cap = ((2*effectiveBytes/3)<<BITRES<<3); + opus_int32 cap = ((effectiveBytes/4)<<BITRES<<3); offsets[i] = cap-tot_boost; tot_boost = cap; break; @@ -1152,9 +1055,6 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 tot_boost += boost_bits; } } - } else { - for (i=start;i<end;i++) - importance[i] = 13; } *tot_boost_ = tot_boost; RESTORE_STACK; @@ -1163,7 +1063,7 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, int CC, int N, - int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int nbAvailableBytes, AnalysisInfo *analysis) + int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int nbAvailableBytes) { int c; VARDECL(celt_sig, _pre); @@ -1219,12 +1119,7 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, gain1 = 0; pitch_index = COMBFILTER_MINPERIOD; } -#ifndef DISABLE_FLOAT_API - if (analysis->valid) - gain1 = (opus_val16)(gain1 * analysis->max_pitch_ratio); -#else - (void)analysis; -#endif + /* Gain threshold for enabling the prefilter/postfilter */ pf_threshold = QCONST16(.2f,15); @@ -1298,7 +1193,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 int LM, opus_int32 bitrate, int lastCodedBands, int C, int intensity, int constrained_vbr, opus_val16 stereo_saving, int tot_boost, opus_val16 tf_estimate, int pitch_change, opus_val16 maxDepth, - int lfe, int has_surround_mask, opus_val16 surround_masking, + int variable_duration, int lfe, int has_surround_mask, opus_val16 surround_masking, opus_val16 temporal_vbr) { /* The target rate in 8th bits per frame */ @@ -1340,9 +1235,10 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 SHR32(MULT16_16(stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<<BITRES)),8)); } /* Boost the rate according to dynalloc (minus the dynalloc average for calibration). */ - target += tot_boost-(19<<LM); + target += tot_boost-(16<<LM); /* Apply transient boost, compensating for average boost. */ - tf_calibration = QCONST16(0.044f,14); + tf_calibration = variable_duration==OPUS_FRAMESIZE_VARIABLE ? + QCONST16(0.02f,14) : QCONST16(0.04f,14); target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1); #ifndef DISABLE_FLOAT_API @@ -1353,7 +1249,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 float tonal; /* Tonality boost (compensating for the average). */ - tonal = MAX16(0.f,analysis->tonality-.15f)-0.12f; + tonal = MAX16(0.f,analysis->tonality-.15f)-0.09f; tonal_target = target + (opus_int32)((coded_bins<<BITRES)*1.2f*tonal); if (pitch_change) tonal_target += (opus_int32)((coded_bins<<BITRES)*.8f); @@ -1383,11 +1279,21 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 /*printf("%f %d\n", maxDepth, floor_depth);*/ } - /* Make VBR less aggressive for constrained VBR because we can't keep a higher bitrate - for long. Needs tuning. */ - if ((!has_surround_mask||lfe) && constrained_vbr) + if ((!has_surround_mask||lfe) && (constrained_vbr || bitrate<64000)) { - target = base_target + (opus_int32)MULT16_32_Q15(QCONST16(0.67f, 15), target-base_target); + opus_val16 rate_factor = Q15ONE; + if (bitrate < 64000) + { +#ifdef FIXED_POINT + rate_factor = MAX16(0,(bitrate-32000)); +#else + rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000)); +#endif + } + if (constrained_vbr) + rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15)); + target = base_target + (opus_int32)MULT16_32_Q15(rate_factor, target-base_target); + } if (!has_surround_mask && tf_estimate < QCONST16(.2f, 14)) @@ -1421,13 +1327,11 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, VARDECL(int, pulses); VARDECL(int, cap); VARDECL(int, offsets); - VARDECL(int, importance); - VARDECL(int, spread_weight); VARDECL(int, fine_priority); VARDECL(int, tf_res); VARDECL(unsigned char, collapse_masks); celt_sig *prefilter_mem; - opus_val16 *oldBandE, *oldLogE, *oldLogE2, *energyError; + opus_val16 *oldBandE, *oldLogE, *oldLogE2; int shortBlocks=0; int isTransient=0; const int CC = st->channels; @@ -1439,6 +1343,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int end; int effEnd; int codedBands; + int tf_sum; int alloc_trim; int pitch_index=COMBFILTER_MINPERIOD; opus_val16 gain1 = 0; @@ -1450,7 +1355,6 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, opus_int32 total_boost; opus_int32 balance; opus_int32 tell; - opus_int32 tell0_frac; int prefilter_tapset=0; int pf_on; int anti_collapse_rsv; @@ -1472,10 +1376,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, opus_val16 surround_masking=0; opus_val16 temporal_vbr=0; opus_val16 surround_trim = 0; - opus_int32 equiv_rate; - int hybrid; - int weak_transient = 0; - int enable_tf_analysis; + opus_int32 equiv_rate = 510000; VARDECL(opus_val16, surround_dynalloc); ALLOC_STACK; @@ -1485,7 +1386,6 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, eBands = mode->eBands; start = st->start; end = st->end; - hybrid = start != 0; tf_estimate = 0; if (nbCompressedBytes<2 || pcm==NULL) { @@ -1509,14 +1409,12 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, oldBandE = (opus_val16*)(st->in_mem+CC*(overlap+COMBFILTER_MAXPERIOD)); oldLogE = oldBandE + CC*nbEBands; oldLogE2 = oldLogE + CC*nbEBands; - energyError = oldLogE2 + CC*nbEBands; if (enc==NULL) { - tell0_frac=tell=1; + tell=1; nbFilledBytes=0; } else { - tell0_frac=ec_tell_frac(enc); tell=ec_tell(enc); nbFilledBytes=(tell+4)>>3; } @@ -1569,11 +1467,10 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, if (st->bitrate!=OPUS_BITRATE_MAX) nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes, (tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling)); - effectiveBytes = nbCompressedBytes - nbFilledBytes; + effectiveBytes = nbCompressedBytes; } - equiv_rate = ((opus_int32)nbCompressedBytes*8*50 >> (3-LM)) - (40*C+20)*((400>>LM) - 50); if (st->bitrate != OPUS_BITRATE_MAX) - equiv_rate = IMIN(equiv_rate, st->bitrate - (40*C+20)*((400>>LM) - 50)); + equiv_rate = st->bitrate - (40*C+20)*((400>>LM) - 50); if (enc==NULL) { @@ -1661,17 +1558,17 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, { int enabled; int qg; - enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && !hybrid && !silence && !st->disable_pf - && st->complexity >= 5; + enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && start==0 && !silence && !st->disable_pf + && st->complexity >= 5 && !(st->consec_transient && LM!=3 && st->variable_duration==OPUS_FRAMESIZE_VARIABLE); prefilter_tapset = st->tapset_decision; - pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes, &st->analysis); + pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes); if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3) && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period)) pitch_change = 1; if (pf_on==0) { - if(!hybrid && tell+16<=total_bits) + if(start==0 && tell+16<=total_bits) ec_enc_bit_logp(enc, 0, 1); } else { /*This block is not gated by a total bits check only because @@ -1692,12 +1589,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, shortBlocks = 0; if (st->complexity >= 1 && !st->lfe) { - /* Reduces the likelihood of energy instability on fricatives at low bitrate - in hybrid mode. It seems like we still want to have real transients on vowels - though (small SILK quantization offset value). */ - int allow_weak_transients = hybrid && effectiveBytes<15 && st->silk_info.signalType != 2; isTransient = transient_analysis(in, N+overlap, CC, - &tf_estimate, &tf_chan, allow_weak_transients, &weak_transient); + &tf_estimate, &tf_chan); } if (LM>0 && ec_tell(enc)+3<=total_bits) { @@ -1717,19 +1610,16 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, if (secondMdct) { compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch); - compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch); + compute_band_energies(mode, freq, bandE, effEnd, C, LM); amp2Log2(mode, effEnd, end, bandE, bandLogE2, C); for (i=0;i<C*nbEBands;i++) bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT)); } compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch); - /* This should catch any NaN in the CELT input. Since we're not supposed to see any (they're filtered - at the Opus layer), just abort. */ - celt_assert(!celt_isnan(freq[0]) && (C==1 || !celt_isnan(freq[N]))); if (CC==2&&C==1) tf_chan = 0; - compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch); + compute_band_energies(mode, freq, bandE, effEnd, C, LM); if (st->lfe) { @@ -1744,7 +1634,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, ALLOC(surround_dynalloc, C*nbEBands, opus_val16); OPUS_CLEAR(surround_dynalloc, end); /* This computes how much masking takes place between surround channels */ - if (!hybrid&&st->energy_mask&&!st->lfe) + if (start==0&&st->energy_mask&&!st->lfe) { int mask_end; int midband; @@ -1846,14 +1736,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, /* Last chance to catch any transient we might have missed in the time-domain analysis */ - if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe && !hybrid) + if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe) { if (patch_transient_decision(bandLogE, oldBandE, nbEBands, start, end, C)) { isTransient = 1; shortBlocks = M; compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch); - compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch); + compute_band_energies(mode, freq, bandE, effEnd, C, LM); amp2Log2(mode, effEnd, end, bandE, bandLogE, C); /* Compensate for the scaling of short vs long mdcts */ for (i=0;i<C*nbEBands;i++) @@ -1870,59 +1760,31 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, /* Band normalisation */ normalise_bands(mode, freq, X, bandE, effEnd, C, M); - enable_tf_analysis = effectiveBytes>=15*C && !hybrid && st->complexity>=2 && !st->lfe; - - ALLOC(offsets, nbEBands, int); - ALLOC(importance, nbEBands, int); - ALLOC(spread_weight, nbEBands, int); - - maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, start, end, C, offsets, - st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr, - eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc, &st->analysis, importance, spread_weight); - ALLOC(tf_res, nbEBands, int); /* Disable variable tf resolution for hybrid and at very low bitrate */ - if (enable_tf_analysis) + if (effectiveBytes>=15*C && start==0 && st->complexity>=2 && !st->lfe) { int lambda; - lambda = IMAX(80, 20480/effectiveBytes + 2); - tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, tf_estimate, tf_chan, importance); + if (effectiveBytes<40) + lambda = 12; + else if (effectiveBytes<60) + lambda = 6; + else if (effectiveBytes<100) + lambda = 4; + else + lambda = 3; + lambda*=2; + tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, &tf_sum, tf_estimate, tf_chan); for (i=effEnd;i<end;i++) tf_res[i] = tf_res[effEnd-1]; - } else if (hybrid && weak_transient) - { - /* For weak transients, we rely on the fact that improving time resolution using - TF on a long window is imperfect and will not result in an energy collapse at - low bitrate. */ - for (i=0;i<end;i++) - tf_res[i] = 1; - tf_select=0; - } else if (hybrid && effectiveBytes<15 && st->silk_info.signalType != 2) - { - /* For low bitrate hybrid, we force temporal resolution to 5 ms rather than 2.5 ms. */ - for (i=0;i<end;i++) - tf_res[i] = 0; - tf_select=isTransient; } else { + tf_sum = 0; for (i=0;i<end;i++) tf_res[i] = isTransient; tf_select=0; } ALLOC(error, C*nbEBands, opus_val16); - c=0; - do { - for (i=start;i<end;i++) - { - /* When the energy is stable, slightly bias energy quantization towards - the previous error to make the gain more stable (a constant offset is - better than fluctuations). */ - if (ABS32(SUB32(bandLogE[i+c*nbEBands], oldBandE[i+c*nbEBands])) < QCONST16(2.f, DB_SHIFT)) - { - bandLogE[i+c*nbEBands] -= MULT16_16_Q15(energyError[i+c*nbEBands], QCONST16(0.25f, 15)); - } - } - } while (++c < C); quant_coarse_energy(mode, start, end, effEnd, bandLogE, oldBandE, total_bits, error, enc, C, LM, nbAvailableBytes, st->force_intra, @@ -1936,15 +1798,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, { st->tapset_decision = 0; st->spread_decision = SPREAD_NORMAL; - } else if (hybrid) - { - if (st->complexity == 0) - st->spread_decision = SPREAD_NONE; - else if (isTransient) - st->spread_decision = SPREAD_NORMAL; - else - st->spread_decision = SPREAD_AGGRESSIVE; - } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C) + } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || start != 0) { if (st->complexity == 0) st->spread_decision = SPREAD_NONE; @@ -1968,7 +1822,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, { st->spread_decision = spreading_decision(mode, X, &st->tonal_average, st->spread_decision, &st->hf_average, - &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M, spread_weight); + &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M); } /*printf("%d %d\n", st->tapset_decision, st->spread_decision);*/ /*printf("%f %d %f %d\n\n", st->analysis.tonality, st->spread_decision, st->analysis.tonality_slope, st->tapset_decision);*/ @@ -1976,6 +1830,11 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5); } + ALLOC(offsets, nbEBands, int); + + maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, start, end, C, offsets, + st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr, + eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc); /* For LFE, everything interesting is in the first band */ if (st->lfe) offsets[0] = IMIN(8, effectiveBytes/3); @@ -2037,15 +1896,12 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, alloc_trim = 5; if (tell+(6<<BITRES) <= total_bits - total_boost) { - if (start > 0 || st->lfe) - { - st->stereo_saving = 0; + if (st->lfe) alloc_trim = 5; - } else { + else alloc_trim = alloc_trim_analysis(mode, X, bandLogE, end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, - st->intensity, surround_trim, equiv_rate, st->arch); - } + st->intensity, surround_trim, st->arch); ec_enc_icdf(enc, alloc_trim, trim_icdf, 7); tell = ec_tell_frac(enc); } @@ -2063,36 +1919,17 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, /* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms. The CELT allocator will just not be able to use more than that anyway. */ nbCompressedBytes = IMIN(nbCompressedBytes,1275>>(3-LM)); - if (!hybrid) - { - base_target = vbr_rate - ((40*C+20)<<BITRES); - } else { - base_target = IMAX(0, vbr_rate - ((9*C+4)<<BITRES)); - } + base_target = vbr_rate - ((40*C+20)<<BITRES); if (st->constrained_vbr) base_target += (st->vbr_offset>>lm_diff); - if (!hybrid) - { - target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate, + target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate, st->lastCodedBands, C, st->intensity, st->constrained_vbr, st->stereo_saving, tot_boost, tf_estimate, pitch_change, maxDepth, - st->lfe, st->energy_mask!=NULL, surround_masking, + st->variable_duration, st->lfe, st->energy_mask!=NULL, surround_masking, temporal_vbr); - } else { - target = base_target; - /* Tonal frames (offset<100) need more bits than noisy (offset>100) ones. */ - if (st->silk_info.offset < 100) target += 12 << BITRES >> (3-LM); - if (st->silk_info.offset > 100) target -= 18 << BITRES >> (3-LM); - /* Boosting bitrate on transients and vowels with significant temporal - spikes. */ - target += (opus_int32)MULT16_16_Q14(tf_estimate-QCONST16(.25f,14), (50<<BITRES)); - /* If we have a strong transient, let's make sure it has enough bits to code - the first two bands, so that it can use folding rather than noise. */ - if (tf_estimate > QCONST16(.7f,14)) - target = IMAX(target, 50<<BITRES); - } + /* The current offset is removed from the target and the space used so far is added*/ target=target+tell; @@ -2100,16 +1937,11 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, result in the encoder running out of bits. The margin of 2 bytes ensures that none of the bust-prevention logic in the decoder will have triggered so far. */ - min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2; - /* Take into account the 37 bits we need to have left in the packet to - signal a redundant frame in hybrid mode. Creating a shorter packet would - create an entropy coder desync. */ - if (hybrid) - min_allowed = IMAX(min_allowed, (tell0_frac+(37<<BITRES)+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)); + min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2 - nbFilledBytes; nbAvailableBytes = (target+(1<<(BITRES+2)))>>(BITRES+3); nbAvailableBytes = IMAX(min_allowed,nbAvailableBytes); - nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes); + nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes) - nbFilledBytes; /* By how much did we "miss" the target on that frame */ delta = target - vbr_rate; @@ -2156,7 +1988,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, st->vbr_reservoir = 0; /*printf ("+%d\n", adjust);*/ } - nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes); + nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes); /*printf("%d\n", nbCompressedBytes*50*8);*/ /* This moves the raw bits to take into account the new compressed size */ ec_enc_shrink(enc, nbCompressedBytes); @@ -2191,7 +2023,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, #endif if (st->lfe) signalBandwidth = 1; - codedBands = clt_compute_allocation(mode, start, end, offsets, cap, + codedBands = compute_allocation(mode, start, end, offsets, cap, alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses, fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth); if (st->lastCodedBands) @@ -2206,7 +2038,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, quant_all_bands(1, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks, bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, st->intensity, tf_res, nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, - balance, enc, LM, codedBands, &st->rng, st->complexity, st->arch, st->disable_inv); + balance, enc, LM, codedBands, &st->rng, st->arch); if (anti_collapse_rsv > 0) { @@ -2217,14 +2049,6 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, ec_enc_bits(enc, anti_collapse_on, 1); } quant_energy_finalise(mode, start, end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C); - OPUS_CLEAR(energyError, nbEBands*CC); - c=0; - do { - for (i=start;i<end;i++) - { - energyError[i+c*nbEBands] = MAX16(-QCONST16(0.5f, 15), MIN16(QCONST16(0.5f, 15), error[i+c*nbEBands])); - } - } while (++c < C); if (silence) { @@ -2497,24 +2321,10 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...) *value=st->lsb_depth; } break; - case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST: + case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST: { opus_int32 value = va_arg(ap, opus_int32); - if(value<0 || value>1) - { - goto bad_arg; - } - st->disable_inv = value; - } - break; - case OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->disable_inv; + st->variable_duration = value; } break; case OPUS_RESET_STATE: @@ -2558,13 +2368,6 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...) OPUS_COPY(&st->analysis, info, 1); } break; - case CELT_SET_SILK_INFO_REQUEST: - { - SILKInfo *info = va_arg(ap, SILKInfo *); - if (info) - OPUS_COPY(&st->silk_info, info, 1); - } - break; case CELT_GET_MODE_REQUEST: { const CELTMode ** value = va_arg(ap, const CELTMode**); diff --git a/thirdparty/opus/celt/celt_lpc.c b/thirdparty/opus/celt/celt_lpc.c index 8ecb693ee9..b410a21c5f 100644 --- a/thirdparty/opus/celt/celt_lpc.c +++ b/thirdparty/opus/celt/celt_lpc.c @@ -89,40 +89,58 @@ int p void celt_fir_c( - const opus_val16 *x, + const opus_val16 *_x, const opus_val16 *num, - opus_val16 *y, + opus_val16 *_y, int N, int ord, + opus_val16 *mem, int arch) { int i,j; VARDECL(opus_val16, rnum); + VARDECL(opus_val16, x); SAVE_STACK; - celt_assert(x != y); + ALLOC(rnum, ord, opus_val16); + ALLOC(x, N+ord, opus_val16); for(i=0;i<ord;i++) rnum[i] = num[ord-i-1]; + for(i=0;i<ord;i++) + x[i] = mem[ord-i-1]; + for (i=0;i<N;i++) + x[i+ord]=_x[i]; + for(i=0;i<ord;i++) + mem[i] = _x[N-i-1]; +#ifdef SMALL_FOOTPRINT + (void)arch; + for (i=0;i<N;i++) + { + opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT); + for (j=0;j<ord;j++) + { + sum = MAC16_16(sum,rnum[j],x[i+j]); + } + _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT)); + } +#else for (i=0;i<N-3;i+=4) { - opus_val32 sum[4]; - sum[0] = SHL32(EXTEND32(x[i ]), SIG_SHIFT); - sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT); - sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT); - sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT); - xcorr_kernel(rnum, x+i-ord, sum, ord, arch); - y[i ] = ROUND16(sum[0], SIG_SHIFT); - y[i+1] = ROUND16(sum[1], SIG_SHIFT); - y[i+2] = ROUND16(sum[2], SIG_SHIFT); - y[i+3] = ROUND16(sum[3], SIG_SHIFT); + opus_val32 sum[4]={0,0,0,0}; + xcorr_kernel(rnum, x+i, sum, ord, arch); + _y[i ] = SATURATE16(ADD32(EXTEND32(_x[i ]), PSHR32(sum[0], SIG_SHIFT))); + _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT))); + _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT))); + _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT))); } for (;i<N;i++) { - opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT); + opus_val32 sum = 0; for (j=0;j<ord;j++) - sum = MAC16_16(sum,rnum[j],x[i+j-ord]); - y[i] = ROUND16(sum, SIG_SHIFT); + sum = MAC16_16(sum,rnum[j],x[i+j]); + _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT))); } +#endif RESTORE_STACK; } @@ -148,7 +166,7 @@ void celt_iir(const opus_val32 *_x, { mem[j]=mem[j-1]; } - mem[0] = SROUND16(sum, SIG_SHIFT); + mem[0] = ROUND16(sum,SIG_SHIFT); _y[i] = sum; } #else @@ -177,20 +195,20 @@ void celt_iir(const opus_val32 *_x, xcorr_kernel(rden, y+i, sum, ord, arch); /* Patch up the result to compensate for the fact that this is an IIR */ - y[i+ord ] = -SROUND16(sum[0],SIG_SHIFT); + y[i+ord ] = -ROUND16(sum[0],SIG_SHIFT); _y[i ] = sum[0]; sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]); - y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT); + y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT); _y[i+1] = sum[1]; sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]); sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]); - y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT); + y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT); _y[i+2] = sum[2]; sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]); sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]); sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]); - y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT); + y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT); _y[i+3] = sum[3]; } for (;i<N;i++) @@ -198,7 +216,7 @@ void celt_iir(const opus_val32 *_x, opus_val32 sum = _x[i]; for (j=0;j<ord;j++) sum -= MULT16_16(rden[j],y[i+j]); - y[i+ord] = SROUND16(sum,SIG_SHIFT); + y[i+ord] = ROUND16(sum,SIG_SHIFT); _y[i] = sum; } for(i=0;i<ord;i++) diff --git a/thirdparty/opus/celt/celt_lpc.h b/thirdparty/opus/celt/celt_lpc.h index a4c5fd6ea5..323459eb1a 100644 --- a/thirdparty/opus/celt/celt_lpc.h +++ b/thirdparty/opus/celt/celt_lpc.h @@ -45,11 +45,12 @@ void celt_fir_c( opus_val16 *y, int N, int ord, + opus_val16 *mem, int arch); #if !defined(OVERRIDE_CELT_FIR) -#define celt_fir(x, num, y, N, ord, arch) \ - (celt_fir_c(x, num, y, N, ord, arch)) +#define celt_fir(x, num, y, N, ord, mem, arch) \ + (celt_fir_c(x, num, y, N, ord, mem, arch)) #endif void celt_iir(const opus_val32 *x, diff --git a/thirdparty/opus/celt/cwrs.c b/thirdparty/opus/celt/cwrs.c index a552e4f0fb..9722f0ac86 100644 --- a/thirdparty/opus/celt/cwrs.c +++ b/thirdparty/opus/celt/cwrs.c @@ -482,7 +482,7 @@ static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ k0=_k; q=row[_n]; if(q>_i){ - celt_sig_assert(p>q); + celt_assert(p>q); _k=_n; do p=CELT_PVQ_U_ROW[--_k][_n]; while(p>_i); diff --git a/thirdparty/opus/celt/entcode.h b/thirdparty/opus/celt/entcode.h index 3763e3f284..13d6c84ef0 100644 --- a/thirdparty/opus/celt/entcode.h +++ b/thirdparty/opus/celt/entcode.h @@ -122,7 +122,7 @@ opus_uint32 ec_tell_frac(ec_ctx *_this); /* Tested exhaustively for all n and for 1<=d<=256 */ static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) { - celt_sig_assert(d>0); + celt_assert(d>0); #ifdef USE_SMALL_DIV_TABLE if (d>256) return n/d; @@ -138,7 +138,7 @@ static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) { } static OPUS_INLINE opus_int32 celt_sudiv(opus_int32 n, opus_int32 d) { - celt_sig_assert(d>0); + celt_assert(d>0); #ifdef USE_SMALL_DIV_TABLE if (n<0) return -(opus_int32)celt_udiv(-n, d); diff --git a/thirdparty/opus/celt/entdec.h b/thirdparty/opus/celt/entdec.h index 025fc1870d..d8ab318730 100644 --- a/thirdparty/opus/celt/entdec.h +++ b/thirdparty/opus/celt/entdec.h @@ -85,7 +85,7 @@ int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb); The bits must have been encoded with ec_enc_uint(). No call to ec_dec_update() is necessary after this call. _ft: The number of integers that can be decoded (one more than the max). - This must be at least 2, and no more than 2**32-1. + This must be at least one, and no more than 2**32-1. Return: The decoded bits.*/ opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft); diff --git a/thirdparty/opus/celt/entenc.h b/thirdparty/opus/celt/entenc.h index f502eaf662..796bc4d572 100644 --- a/thirdparty/opus/celt/entenc.h +++ b/thirdparty/opus/celt/entenc.h @@ -67,7 +67,7 @@ void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb); /*Encodes a raw unsigned integer in the stream. _fl: The integer to encode. _ft: The number of integers that can be encoded (one more than the max). - This must be at least 2, and no more than 2**32-1.*/ + This must be at least one, and no more than 2**32-1.*/ void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft); /*Encodes a sequence of raw bits in the stream. diff --git a/thirdparty/opus/celt/fixed_c5x.h b/thirdparty/opus/celt/fixed_c5x.h deleted file mode 100644 index ea95a998c3..0000000000 --- a/thirdparty/opus/celt/fixed_c5x.h +++ /dev/null @@ -1,79 +0,0 @@ -/* Copyright (C) 2003 Jean-Marc Valin */ -/** - @file fixed_c5x.h - @brief Fixed-point operations for the TI C5x DSP family -*/ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef FIXED_C5X_H -#define FIXED_C5X_H - -#include "dsplib.h" - -#undef IMUL32 -static OPUS_INLINE long IMUL32(long i, long j) -{ - long ac0, ac1; - ac0 = _lmpy(i>>16,j); - ac1 = ac0 + _lmpy(i,j>>16); - return _lmpyu(i,j) + (ac1<<16); -} - -#undef MAX16 -#define MAX16(a,b) _max(a,b) - -#undef MIN16 -#define MIN16(a,b) _min(a,b) - -#undef MAX32 -#define MAX32(a,b) _lmax(a,b) - -#undef MIN32 -#define MIN32(a,b) _lmin(a,b) - -#undef VSHR32 -#define VSHR32(a, shift) _lshl(a,-(shift)) - -#undef MULT16_16_Q15 -#define MULT16_16_Q15(a,b) (_smpy(a,b)) - -#undef MULT16_16SU -#define MULT16_16SU(a,b) _lmpysu(a,b) - -#undef MULT_16_16 -#define MULT_16_16(a,b) _lmpy(a,b) - -/* FIXME: This is technically incorrect and is bound to cause problems. Is there any cleaner solution? */ -#undef MULT16_32_Q15 -#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),(b)),15)) - -#define celt_ilog2(x) (30 - _lnorm(x)) -#define OVERRIDE_CELT_ILOG2 - -#define celt_maxabs16(x, len) MAX32(EXTEND32(maxval((DATA *)x, len)),-EXTEND32(minval((DATA *)x, len))) -#define OVERRIDE_CELT_MAXABS16 - -#endif /* FIXED_C5X_H */ diff --git a/thirdparty/opus/celt/fixed_c6x.h b/thirdparty/opus/celt/fixed_c6x.h deleted file mode 100644 index bb6ad92780..0000000000 --- a/thirdparty/opus/celt/fixed_c6x.h +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright (C) 2008 CSIRO */ -/** - @file fixed_c6x.h - @brief Fixed-point operations for the TI C6x DSP family -*/ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef FIXED_C6X_H -#define FIXED_C6X_H - -#undef MULT16_16SU -#define MULT16_16SU(a,b) _mpysu(a,b) - -#undef MULT_16_16 -#define MULT_16_16(a,b) _mpy(a,b) - -#define celt_ilog2(x) (30 - _norm(x)) -#define OVERRIDE_CELT_ILOG2 - -#undef MULT16_32_Q15 -#define MULT16_32_Q15(a,b) (_mpylill(a, b) >> 15) - -#if 0 -#include "dsplib.h" - -#undef MAX16 -#define MAX16(a,b) _max(a,b) - -#undef MIN16 -#define MIN16(a,b) _min(a,b) - -#undef MAX32 -#define MAX32(a,b) _lmax(a,b) - -#undef MIN32 -#define MIN32(a,b) _lmin(a,b) - -#undef VSHR32 -#define VSHR32(a, shift) _lshl(a,-(shift)) - -#undef MULT16_16_Q15 -#define MULT16_16_Q15(a,b) (_smpy(a,b)) - -#define celt_maxabs16(x, len) MAX32(EXTEND32(maxval((DATA *)x, len)),-EXTEND32(minval((DATA *)x, len))) -#define OVERRIDE_CELT_MAXABS16 - -#endif /* FIXED_C6X_H */ diff --git a/thirdparty/opus/celt/fixed_debug.h b/thirdparty/opus/celt/fixed_debug.h index f435295234..d28227f5dc 100644 --- a/thirdparty/opus/celt/fixed_debug.h +++ b/thirdparty/opus/celt/fixed_debug.h @@ -59,14 +59,6 @@ extern opus_int64 celt_mips; #define SHR(a,b) SHR32(a,b) #define PSHR(a,b) PSHR32(a,b) -/** Add two 32-bit values, ignore any overflows */ -#define ADD32_ovflw(a,b) (celt_mips+=2,(opus_val32)((opus_uint32)(a)+(opus_uint32)(b))) -/** Subtract two 32-bit values, ignore any overflows */ -#define SUB32_ovflw(a,b) (celt_mips+=2,(opus_val32)((opus_uint32)(a)-(opus_uint32)(b))) -/* Avoid MSVC warning C4146: unary minus operator applied to unsigned type */ -/** Negate 32-bit value, ignore any overflows */ -#define NEG32_ovflw(a) (celt_mips+=2,(opus_val32)(0-(opus_uint32)(a))) - static OPUS_INLINE short NEG16(int x) { int res; @@ -235,11 +227,12 @@ static OPUS_INLINE int SHL32_(opus_int64 a, int shift, char *file, int line) #define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift))) #define ROUND16(x,a) (celt_mips--,EXTRACT16(PSHR32((x),(a)))) -#define SROUND16(x,a) (celt_mips--,EXTRACT16(SATURATE(PSHR32(x,a), 32767))); - #define HALF16(x) (SHR16(x,1)) #define HALF32(x) (SHR32(x,1)) +//#define SHR(a,shift) ((a) >> (shift)) +//#define SHL(a,shift) ((a) << (shift)) + #define ADD16(a, b) ADD16_(a, b, __FILE__, __LINE__) static OPUS_INLINE short ADD16_(int a, int b, char *file, int line) { diff --git a/thirdparty/opus/celt/fixed_generic.h b/thirdparty/opus/celt/fixed_generic.h index 5f4abda76e..1cfd6d6989 100644 --- a/thirdparty/opus/celt/fixed_generic.h +++ b/thirdparty/opus/celt/fixed_generic.h @@ -104,9 +104,6 @@ /** Shift by a and round-to-neareast 32-bit value. Result is a 16-bit value */ #define ROUND16(x,a) (EXTRACT16(PSHR32((x),(a)))) -/** Shift by a and round-to-neareast 32-bit value. Result is a saturated 16-bit value */ -#define SROUND16(x,a) EXTRACT16(SATURATE(PSHR32(x,a), 32767)); - /** Divide by two */ #define HALF16(x) (SHR16(x,1)) #define HALF32(x) (SHR32(x,1)) @@ -120,14 +117,6 @@ /** Subtract two 32-bit values */ #define SUB32(a,b) ((opus_val32)(a)-(opus_val32)(b)) -/** Add two 32-bit values, ignore any overflows */ -#define ADD32_ovflw(a,b) ((opus_val32)((opus_uint32)(a)+(opus_uint32)(b))) -/** Subtract two 32-bit values, ignore any overflows */ -#define SUB32_ovflw(a,b) ((opus_val32)((opus_uint32)(a)-(opus_uint32)(b))) -/* Avoid MSVC warning C4146: unary minus operator applied to unsigned type */ -/** Negate 32-bit value, ignore any overflows */ -#define NEG32_ovflw(a) ((opus_val32)(0-(opus_uint32)(a))) - /** 16x16 multiplication where the result fits in 16 bits */ #define MULT16_16_16(a,b) ((((opus_val16)(a))*((opus_val16)(b)))) diff --git a/thirdparty/opus/celt/float_cast.h b/thirdparty/opus/celt/float_cast.h index 889dae965f..ed5a39b543 100644 --- a/thirdparty/opus/celt/float_cast.h +++ b/thirdparty/opus/celt/float_cast.h @@ -61,13 +61,7 @@ ** the config.h file. */ -/* With GCC, when SSE is available, the fastest conversion is cvtss2si. */ -#if defined(__GNUC__) && defined(__SSE__) - -#include <xmmintrin.h> -static OPUS_INLINE opus_int32 float2int(float x) {return _mm_cvt_ss2si(_mm_set_ss(x));} - -#elif defined(HAVE_LRINTF) +#if (HAVE_LRINTF) /* These defines enable functionality introduced with the 1999 ISO C ** standard. They must be defined before the inclusion of math.h to @@ -96,10 +90,10 @@ static OPUS_INLINE opus_int32 float2int(float x) {return _mm_cvt_ss2si(_mm_set_s #include <math.h> #define float2int(x) lrint(x) -#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)) +#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_X64) #include <xmmintrin.h> - static __inline long int float2int(float value) + __inline long int float2int(float value) { return _mm_cvtss_si32(_mm_load_ss(&value)); } @@ -110,7 +104,7 @@ static OPUS_INLINE opus_int32 float2int(float x) {return _mm_cvt_ss2si(_mm_set_s ** Therefore implement OPUS_INLINE versions of these functions here. */ - static __inline long int + __inline long int float2int (float flt) { int intgr; diff --git a/thirdparty/opus/celt/kiss_fft.c b/thirdparty/opus/celt/kiss_fft.c index 83775165d8..1f8fd05321 100644 --- a/thirdparty/opus/celt/kiss_fft.c +++ b/thirdparty/opus/celt/kiss_fft.c @@ -82,8 +82,8 @@ static void kf_bfly2( C_SUB( Fout2[0] , Fout[0] , t ); C_ADDTO( Fout[0] , t ); - t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw); - t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw); + t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw); + t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw); C_SUB( Fout2[1] , Fout[1] , t ); C_ADDTO( Fout[1] , t ); @@ -92,8 +92,8 @@ static void kf_bfly2( C_SUB( Fout2[2] , Fout[2] , t ); C_ADDTO( Fout[2] , t ); - t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw); - t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw); + t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw); + t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw); C_SUB( Fout2[3] , Fout[3] , t ); C_ADDTO( Fout[3] , t ); Fout += 8; @@ -126,10 +126,10 @@ static void kf_bfly4( C_ADDTO( *Fout , scratch1 ); C_SUB( scratch1 , Fout[1] , Fout[3] ); - Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i); - Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r); - Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i); - Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r); + Fout[1].r = scratch0.r + scratch1.i; + Fout[1].i = scratch0.i - scratch1.r; + Fout[3].r = scratch0.r - scratch1.i; + Fout[3].i = scratch0.i + scratch1.r; Fout+=4; } } else { @@ -160,10 +160,10 @@ static void kf_bfly4( tw3 += fstride*3; C_ADDTO( *Fout , scratch[3] ); - Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i); - Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r); - Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i); - Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r); + Fout[m].r = scratch[5].r + scratch[4].i; + Fout[m].i = scratch[5].i - scratch[4].r; + Fout[m3].r = scratch[5].r - scratch[4].i; + Fout[m3].i = scratch[5].i + scratch[4].r; ++Fout; } } @@ -212,18 +212,18 @@ static void kf_bfly3( tw1 += fstride; tw2 += fstride*2; - Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r)); - Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i)); + Fout[m].r = Fout->r - HALF_OF(scratch[3].r); + Fout[m].i = Fout->i - HALF_OF(scratch[3].i); C_MULBYSCALAR( scratch[0] , epi3.i ); C_ADDTO(*Fout,scratch[3]); - Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i); - Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r); + Fout[m2].r = Fout[m].r + scratch[0].i; + Fout[m2].i = Fout[m].i - scratch[0].r; - Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i); - Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r); + Fout[m].r -= scratch[0].i; + Fout[m].i += scratch[0].r; ++Fout; } while(--k); @@ -282,22 +282,22 @@ static void kf_bfly5( C_ADD( scratch[8],scratch[2],scratch[3]); C_SUB( scratch[9],scratch[2],scratch[3]); - Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r)); - Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i)); + Fout0->r += scratch[7].r + scratch[8].r; + Fout0->i += scratch[7].i + scratch[8].i; - scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r))); - scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r))); + scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); + scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); - scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i)); - scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i))); + scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); + scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); C_SUB(*Fout1,scratch[5],scratch[6]); C_ADD(*Fout4,scratch[5],scratch[6]); - scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r))); - scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r))); - scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i)); - scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i)); + scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); + scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); + scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); + scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); C_ADD(*Fout2,scratch[11],scratch[12]); C_SUB(*Fout3,scratch[11],scratch[12]); diff --git a/thirdparty/opus/celt/mathops.c b/thirdparty/opus/celt/mathops.c index 6ee9b9e101..21a01f52e4 100644 --- a/thirdparty/opus/celt/mathops.c +++ b/thirdparty/opus/celt/mathops.c @@ -38,8 +38,7 @@ #include "mathops.h" /*Compute floor(sqrt(_val)) with exact arithmetic. - _val must be greater than 0. - This has been tested on all possible 32-bit inputs greater than 0.*/ + This has been tested on all possible 32-bit inputs.*/ unsigned isqrt32(opus_uint32 _val){ unsigned b; unsigned g; @@ -183,7 +182,7 @@ opus_val32 celt_rcp(opus_val32 x) int i; opus_val16 n; opus_val16 r; - celt_sig_assert(x>0); + celt_assert2(x>0, "celt_rcp() only defined for positive values"); i = celt_ilog2(x); /* n is Q15 with range [0,1). */ n = VSHR32(x,i-15)-32768; diff --git a/thirdparty/opus/celt/mathops.h b/thirdparty/opus/celt/mathops.h index 5e86ff0dd2..a0525a9610 100644 --- a/thirdparty/opus/celt/mathops.h +++ b/thirdparty/opus/celt/mathops.h @@ -38,44 +38,11 @@ #include "entcode.h" #include "os_support.h" -#define PI 3.141592653f - /* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */ #define FRAC_MUL16(a,b) ((16384+((opus_int32)(opus_int16)(a)*(opus_int16)(b)))>>15) unsigned isqrt32(opus_uint32 _val); -/* CELT doesn't need it for fixed-point, by analysis.c does. */ -#if !defined(FIXED_POINT) || defined(ANALYSIS_C) -#define cA 0.43157974f -#define cB 0.67848403f -#define cC 0.08595542f -#define cE ((float)PI/2) -static OPUS_INLINE float fast_atan2f(float y, float x) { - float x2, y2; - x2 = x*x; - y2 = y*y; - /* For very small values, we don't care about the answer, so - we can just return 0. */ - if (x2 + y2 < 1e-18f) - { - return 0; - } - if(x2<y2){ - float den = (y2 + cB*x2) * (y2 + cC*x2); - return -x*y*(y2 + cA*x2) / den + (y<0 ? -cE : cE); - }else{ - float den = (x2 + cB*y2) * (x2 + cC*y2); - return x*y*(x2 + cA*y2) / den + (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE); - } -} -#undef cA -#undef cB -#undef cC -#undef cE -#endif - - #ifndef OVERRIDE_CELT_MAXABS16 static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len) { @@ -113,6 +80,7 @@ static OPUS_INLINE opus_val32 celt_maxabs32(const opus_val32 *x, int len) #ifndef FIXED_POINT +#define PI 3.141592653f #define celt_sqrt(x) ((float)sqrt(x)) #define celt_rsqrt(x) (1.f/celt_sqrt(x)) #define celt_rsqrt_norm(x) (celt_rsqrt(x)) @@ -179,7 +147,7 @@ static OPUS_INLINE float celt_exp2(float x) /** Integer log in base2. Undefined for zero and negative numbers */ static OPUS_INLINE opus_int16 celt_ilog2(opus_int32 x) { - celt_sig_assert(x>0); + celt_assert2(x>0, "celt_ilog2() only defined for strictly positive numbers"); return EC_ILOG(x)-1; } #endif diff --git a/thirdparty/opus/celt/mdct.c b/thirdparty/opus/celt/mdct.c index 5c6dab5b75..5315ad11a3 100644 --- a/thirdparty/opus/celt/mdct.c +++ b/thirdparty/opus/celt/mdct.c @@ -270,8 +270,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca int rev; kiss_fft_scalar yr, yi; rev = *bitrev++; - yr = ADD32_ovflw(S_MUL(*xp2, t[i]), S_MUL(*xp1, t[N4+i])); - yi = SUB32_ovflw(S_MUL(*xp1, t[i]), S_MUL(*xp2, t[N4+i])); + yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]); + yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]); /* We swap real and imag because we use an FFT instead of an IFFT. */ yp[2*rev+1] = yr; yp[2*rev] = yi; @@ -301,8 +301,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca t0 = t[i]; t1 = t[N4+i]; /* We'd scale up by 2 here, but instead it's done when mixing the windows */ - yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)); - yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)); + yr = S_MUL(re,t0) + S_MUL(im,t1); + yi = S_MUL(re,t1) - S_MUL(im,t0); /* We swap real and imag because we're using an FFT instead of an IFFT. */ re = yp1[1]; im = yp1[0]; @@ -312,8 +312,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca t0 = t[(N4-i-1)]; t1 = t[(N2-i-1)]; /* We'd scale up by 2 here, but instead it's done when mixing the windows */ - yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)); - yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)); + yr = S_MUL(re,t0) + S_MUL(im,t1); + yi = S_MUL(re,t1) - S_MUL(im,t0); yp1[0] = yr; yp0[1] = yi; yp0 += 2; @@ -333,8 +333,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca kiss_fft_scalar x1, x2; x1 = *xp1; x2 = *yp1; - *yp1++ = SUB32_ovflw(MULT16_32_Q15(*wp2, x2), MULT16_32_Q15(*wp1, x1)); - *xp1-- = ADD32_ovflw(MULT16_32_Q15(*wp1, x2), MULT16_32_Q15(*wp2, x1)); + *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); + *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); wp1++; wp2--; } diff --git a/thirdparty/opus/celt/mips/celt_mipsr1.h b/thirdparty/opus/celt/mips/celt_mipsr1.h index c332fe0471..e85661a661 100644 --- a/thirdparty/opus/celt/mips/celt_mipsr1.h +++ b/thirdparty/opus/celt/mips/celt_mipsr1.h @@ -53,7 +53,6 @@ #include "celt_lpc.h" #include "vq.h" -#define OVERRIDE_COMB_FILTER_CONST #define OVERRIDE_comb_filter void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, diff --git a/thirdparty/opus/celt/mips/vq_mipsr1.h b/thirdparty/opus/celt/mips/vq_mipsr1.h index f26a33e755..54cef86133 100644 --- a/thirdparty/opus/celt/mips/vq_mipsr1.h +++ b/thirdparty/opus/celt/mips/vq_mipsr1.h @@ -36,6 +36,11 @@ #include "mathops.h" #include "arch.h" +static unsigned extract_collapse_mask(int *iy, int N, int B); +static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X, int N, opus_val32 Ryy, opus_val16 gain); +static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread); +static void renormalise_vector_mips(celt_norm *X, int N, opus_val16 gain, int arch); + #define OVERRIDE_vq_exp_rotation1 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) { @@ -64,7 +69,11 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_ } #define OVERRIDE_renormalise_vector -void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch) + +#define renormalise_vector(X, N, gain, arch) \ + (renormalise_vector_mips(X, N, gain, arch)) + +void renormalise_vector_mips(celt_norm *X, int N, opus_val16 gain, int arch) { int i; #ifdef FIXED_POINT diff --git a/thirdparty/opus/celt/modes.c b/thirdparty/opus/celt/modes.c index 390c5e8aeb..911686e905 100644 --- a/thirdparty/opus/celt/modes.c +++ b/thirdparty/opus/celt/modes.c @@ -427,7 +427,7 @@ void opus_custom_mode_destroy(CELTMode *mode) } #endif /* CUSTOM_MODES_ONLY */ opus_free((opus_int16*)mode->eBands); - opus_free((unsigned char*)mode->allocVectors); + opus_free((opus_int16*)mode->allocVectors); opus_free((opus_val16*)mode->window); opus_free((opus_int16*)mode->logN); diff --git a/thirdparty/opus/celt/pitch.c b/thirdparty/opus/celt/pitch.c index 872582a48a..bf46e7d562 100644 --- a/thirdparty/opus/celt/pitch.c +++ b/thirdparty/opus/celt/pitch.c @@ -102,9 +102,11 @@ static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len, } } -static void celt_fir5(opus_val16 *x, +static void celt_fir5(const opus_val16 *x, const opus_val16 *num, - int N) + opus_val16 *y, + int N, + opus_val16 *mem) { int i; opus_val16 num0, num1, num2, num3, num4; @@ -114,11 +116,11 @@ static void celt_fir5(opus_val16 *x, num2=num[2]; num3=num[3]; num4=num[4]; - mem0=0; - mem1=0; - mem2=0; - mem3=0; - mem4=0; + mem0=mem[0]; + mem1=mem[1]; + mem2=mem[2]; + mem3=mem[3]; + mem4=mem[4]; for (i=0;i<N;i++) { opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT); @@ -132,8 +134,13 @@ static void celt_fir5(opus_val16 *x, mem2 = mem1; mem1 = mem0; mem0 = x[i]; - x[i] = ROUND16(sum, SIG_SHIFT); + y[i] = ROUND16(sum, SIG_SHIFT); } + mem[0]=mem0; + mem[1]=mem1; + mem[2]=mem2; + mem[3]=mem3; + mem[4]=mem4; } @@ -143,7 +150,7 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x int i; opus_val32 ac[5]; opus_val16 tmp=Q15ONE; - opus_val16 lpc[4]; + opus_val16 lpc[4], mem[5]={0,0,0,0,0}; opus_val16 lpc2[5]; opus_val16 c1 = QCONST16(.8f,15); #ifdef FIXED_POINT @@ -204,7 +211,7 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]); lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]); lpc2[4] = MULT16_16_Q15(c1,lpc[3]); - celt_fir5(x_lp, lpc2, len>>1); + celt_fir5(x_lp, lpc2, x_lp, len>>1, mem); } /* Pure C implementation. */ @@ -213,8 +220,13 @@ opus_val32 #else void #endif +#if defined(OVERRIDE_PITCH_XCORR) celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, + opus_val32 *xcorr, int len, int max_pitch) +#else +celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch, int arch) +#endif { #if 0 /* This is a simple version of the pitch correlation that should work @@ -249,11 +261,15 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 maxcorr=1; #endif celt_assert(max_pitch>0); - celt_sig_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); + celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); for (i=0;i<max_pitch-3;i+=4) { opus_val32 sum[4]={0,0,0,0}; +#if defined(OVERRIDE_PITCH_XCORR) + xcorr_kernel_c(_x, _y+i, sum, len); +#else xcorr_kernel(_x, _y+i, sum, len, arch); +#endif xcorr[i]=sum[0]; xcorr[i+1]=sum[1]; xcorr[i+2]=sum[2]; @@ -269,7 +285,11 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, for (;i<max_pitch;i++) { opus_val32 sum; +#if defined(OVERRIDE_PITCH_XCORR) + sum = celt_inner_prod_c(_x, _y+i, len); +#else sum = celt_inner_prod(_x, _y+i, len, arch); +#endif xcorr[i] = sum; #ifdef FIXED_POINT maxcorr = MAX32(maxcorr, sum); @@ -358,7 +378,7 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR for (j=0;j<len>>1;j++) sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift); #else - sum = celt_inner_prod(x_lp, y+i, len>>1, arch); + sum = celt_inner_prod_c(x_lp, y+i, len>>1); #endif xcorr[i] = MAX32(-1, sum); #ifdef FIXED_POINT @@ -404,7 +424,7 @@ static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy sx = celt_ilog2(xx)-14; sy = celt_ilog2(yy)-14; shift = sx + sy; - x2y2 = SHR32(MULT16_16(VSHR32(xx, sx), VSHR32(yy, sy)), 14); + x2y2 = MULT16_16_Q14(VSHR32(xx, sx), VSHR32(yy, sy)); if (shift & 1) { if (x2y2 < 32768) { diff --git a/thirdparty/opus/celt/pitch.h b/thirdparty/opus/celt/pitch.h index e425f56aea..d3503532a0 100644 --- a/thirdparty/opus/celt/pitch.h +++ b/thirdparty/opus/celt/pitch.h @@ -46,7 +46,8 @@ #include "mips/pitch_mipsr1.h" #endif -#if (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)) +#if ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \ + || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)) # include "arm/pitch_arm.h" #endif @@ -183,10 +184,17 @@ opus_val32 void #endif celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, + opus_val32 *xcorr, int len, int max_pitch); + +#if !defined(OVERRIDE_PITCH_XCORR) +#ifdef FIXED_POINT +opus_val32 +#else +void +#endif +celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch, int arch); -#ifndef OVERRIDE_PITCH_XCORR -# define celt_pitch_xcorr celt_pitch_xcorr_c #endif #endif diff --git a/thirdparty/opus/celt/quant_bands.c b/thirdparty/opus/celt/quant_bands.c index 39a221eda5..95076e0af2 100644 --- a/thirdparty/opus/celt/quant_bands.c +++ b/thirdparty/opus/celt/quant_bands.c @@ -418,7 +418,6 @@ void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *ol offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384); #endif oldEBands[i+c*m->nbEBands] += offset; - error[i+c*m->nbEBands] -= offset; bits_left--; } while (++c < C); } @@ -457,7 +456,7 @@ void unquant_coarse_energy(const CELTMode *m, int start, int end, opus_val16 *ol /* It would be better to express this invariant as a test on C at function entry, but that isn't enough to make the static analyzer happy. */ - celt_sig_assert(c<2); + celt_assert(c<2); tell = ec_tell(dec); if(budget-tell>=15) { @@ -548,15 +547,9 @@ void amp2Log2(const CELTMode *m, int effEnd, int end, c=0; do { for (i=0;i<effEnd;i++) - { bandLogE[i+c*m->nbEBands] = - celt_log2(bandE[i+c*m->nbEBands]) + celt_log2(SHL32(bandE[i+c*m->nbEBands],2)) - SHL16((opus_val16)eMeans[i],6); -#ifdef FIXED_POINT - /* Compensate for bandE[] being Q12 but celt_log2() taking a Q14 input. */ - bandLogE[i+c*m->nbEBands] += QCONST16(2.f, DB_SHIFT); -#endif - } for (i=effEnd;i<end;i++) bandLogE[c*m->nbEBands+i] = -QCONST16(14.f,DB_SHIFT); } while (++c < C); diff --git a/thirdparty/opus/celt/rate.c b/thirdparty/opus/celt/rate.c index 465e1ba26c..7dfa5be8a6 100644 --- a/thirdparty/opus/celt/rate.c +++ b/thirdparty/opus/celt/rate.c @@ -348,17 +348,12 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, /*This if() block is the only part of the allocation function that is not a mandatory part of the bitstream: any bands we choose to skip here must be explicitly signaled.*/ - int depth_threshold; - /*We choose a threshold with some hysteresis to keep bands from - fluctuating in and out, but we try not to fold below a certain point. */ - if (codedBands > 17) - depth_threshold = j<prev ? 7 : 9; - else - depth_threshold = 0; + /*Choose a threshold with some hysteresis to keep bands from + fluctuating in and out.*/ #ifdef FUZZING if ((rand()&0x1) == 0) #else - if (codedBands<=start+2 || (band_bits > (depth_threshold*band_width<<LM<<BITRES)>>4 && j<=signalBandwidth)) + if (codedBands<=start+2 || (band_bits > ((j<prev?7:9)*band_width<<LM<<BITRES)>>4 && j<=signalBandwidth)) #endif { ec_enc_bit_logp(ec, 1, 1); @@ -529,7 +524,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, return codedBands; } -int clt_compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo, +int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo, opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth) { int lo, hi, len, j; diff --git a/thirdparty/opus/celt/rate.h b/thirdparty/opus/celt/rate.h index fad5e412da..515f7687ce 100644 --- a/thirdparty/opus/celt/rate.h +++ b/thirdparty/opus/celt/rate.h @@ -95,7 +95,7 @@ static OPUS_INLINE int pulses2bits(const CELTMode *m, int band, int LM, int puls @param pulses Number of pulses per band (returned) @return Total number of bits allocated */ -int clt_compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo, +int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stero, opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth); #endif diff --git a/thirdparty/opus/celt/static_modes_fixed_arm_ne10.h b/thirdparty/opus/celt/static_modes_fixed_arm_ne10.h index 7623092192..b8ef0cee98 100644 --- a/thirdparty/opus/celt/static_modes_fixed_arm_ne10.h +++ b/thirdparty/opus/celt/static_modes_fixed_arm_ne10.h @@ -1,7 +1,7 @@ /* The contents of this file was automatically generated by * dump_mode_arm_ne10.c with arguments: 48000 960 * It contains static definitions for some pre-defined modes. */ -#include <NE10_types.h> +#include <NE10_init.h> #ifndef NE10_FFT_PARAMS48000_960 #define NE10_FFT_PARAMS48000_960 diff --git a/thirdparty/opus/celt/static_modes_float_arm_ne10.h b/thirdparty/opus/celt/static_modes_float_arm_ne10.h index 66e1abb101..934a82a420 100644 --- a/thirdparty/opus/celt/static_modes_float_arm_ne10.h +++ b/thirdparty/opus/celt/static_modes_float_arm_ne10.h @@ -1,7 +1,7 @@ /* The contents of this file was automatically generated by * dump_mode_arm_ne10.c with arguments: 48000 960 * It contains static definitions for some pre-defined modes. */ -#include <NE10_types.h> +#include <NE10_init.h> #ifndef NE10_FFT_PARAMS48000_960 #define NE10_FFT_PARAMS48000_960 diff --git a/thirdparty/opus/celt/tests/test_unit_cwrs32.c b/thirdparty/opus/celt/tests/test_unit_cwrs32.c new file mode 100644 index 0000000000..36dd8af5f5 --- /dev/null +++ b/thirdparty/opus/celt/tests/test_unit_cwrs32.c @@ -0,0 +1,161 @@ +/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation, + Gregory Maxwell + Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <string.h> + +#ifndef CUSTOM_MODES +#define CUSTOM_MODES +#else +#define TEST_CUSTOM_MODES +#endif + +#define CELT_C +#include "stack_alloc.h" +#include "entenc.c" +#include "entdec.c" +#include "entcode.c" +#include "cwrs.c" +#include "mathops.c" +#include "rate.h" + +#define NMAX (240) +#define KMAX (128) + +#ifdef TEST_CUSTOM_MODES + +#define NDIMS (44) +static const int pn[NDIMS]={ + 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 18, 20, 22, + 24, 26, 28, 30, 32, 36, 40, 44, 48, + 52, 56, 60, 64, 72, 80, 88, 96, 104, + 112, 120, 128, 144, 160, 176, 192, 208 +}; +static const int pkmax[NDIMS]={ + 128, 128, 128, 128, 88, 52, 36, 26, 22, + 18, 16, 15, 13, 12, 12, 11, 10, 9, + 9, 8, 8, 7, 7, 7, 7, 6, 6, + 6, 6, 6, 5, 5, 5, 5, 5, 5, + 4, 4, 4, 4, 4, 4, 4, 4 +}; + +#else /* TEST_CUSTOM_MODES */ + +#define NDIMS (22) +static const int pn[NDIMS]={ + 2, 3, 4, 6, 8, 9, 11, 12, 16, + 18, 22, 24, 32, 36, 44, 48, 64, 72, + 88, 96, 144, 176 +}; +static const int pkmax[NDIMS]={ + 128, 128, 128, 88, 36, 26, 18, 16, 12, + 11, 9, 9, 7, 7, 6, 6, 5, 5, + 5, 5, 4, 4 +}; + +#endif + +int main(void){ + int t; + int n; + ALLOC_STACK; + for(t=0;t<NDIMS;t++){ + int pseudo; + n=pn[t]; + for(pseudo=1;pseudo<41;pseudo++) + { + int k; +#if defined(SMALL_FOOTPRINT) + opus_uint32 uu[KMAX+2U]; +#endif + opus_uint32 inc; + opus_uint32 nc; + opus_uint32 i; + k=get_pulses(pseudo); + if (k>pkmax[t])break; + printf("Testing CWRS with N=%i, K=%i...\n",n,k); +#if defined(SMALL_FOOTPRINT) + nc=ncwrs_urow(n,k,uu); +#else + nc=CELT_PVQ_V(n,k); +#endif + inc=nc/20000; + if(inc<1)inc=1; + for(i=0;i<nc;i+=inc){ +#if defined(SMALL_FOOTPRINT) + opus_uint32 u[KMAX+2U]; +#endif + int y[NMAX]; + int sy; + opus_uint32 v; + opus_uint32 ii; + int j; +#if defined(SMALL_FOOTPRINT) + memcpy(u,uu,(k+2U)*sizeof(*u)); + cwrsi(n,k,i,y,u); +#else + cwrsi(n,k,i,y); +#endif + sy=0; + for(j=0;j<n;j++)sy+=abs(y[j]); + if(sy!=k){ + fprintf(stderr,"N=%d Pulse count mismatch in cwrsi (%d!=%d).\n", + n,sy,k); + return 99; + } + /*printf("%6u of %u:",i,nc); + for(j=0;j<n;j++)printf(" %+3i",y[j]); + printf(" ->");*/ +#if defined(SMALL_FOOTPRINT) + ii=icwrs(n,k,&v,y,u); +#else + ii=icwrs(n,y); + v=CELT_PVQ_V(n,k); +#endif + if(ii!=i){ + fprintf(stderr,"Combination-index mismatch (%lu!=%lu).\n", + (long)ii,(long)i); + return 1; + } + if(v!=nc){ + fprintf(stderr,"Combination count mismatch (%lu!=%lu).\n", + (long)v,(long)nc); + return 2; + } + /*printf(" %6u\n",i);*/ + } + /*printf("\n");*/ + } + } + return 0; +} diff --git a/thirdparty/opus/celt/tests/test_unit_dft.c b/thirdparty/opus/celt/tests/test_unit_dft.c new file mode 100644 index 0000000000..6166eb0e4f --- /dev/null +++ b/thirdparty/opus/celt/tests/test_unit_dft.c @@ -0,0 +1,189 @@ +/* Copyright (c) 2008 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#define SKIP_CONFIG_H + +#ifndef CUSTOM_MODES +#define CUSTOM_MODES +#endif + +#include <stdio.h> + +#define CELT_C +#define TEST_UNIT_DFT_C +#include "stack_alloc.h" +#include "kiss_fft.h" +#include "kiss_fft.c" +#include "mathops.c" +#include "entcode.c" + +#if defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) +# include "x86/x86cpu.c" +#elif defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +# include "arm/armcpu.c" +# include "celt_lpc.c" +# include "pitch.c" +# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +# include "arm/celt_neon_intr.c" +# if defined(HAVE_ARM_NE10) +# include "mdct.c" +# include "arm/celt_ne10_fft.c" +# include "arm/celt_ne10_mdct.c" +# endif +# endif +# include "arm/arm_celt_map.c" +#endif + +#ifndef M_PI +#define M_PI 3.141592653 +#endif + +int ret = 0; + +void check(kiss_fft_cpx * in,kiss_fft_cpx * out,int nfft,int isinverse) +{ + int bin,k; + double errpow=0,sigpow=0, snr; + + for (bin=0;bin<nfft;++bin) { + double ansr = 0; + double ansi = 0; + double difr; + double difi; + + for (k=0;k<nfft;++k) { + double phase = -2*M_PI*bin*k/nfft; + double re = cos(phase); + double im = sin(phase); + if (isinverse) + im = -im; + + if (!isinverse) + { + re /= nfft; + im /= nfft; + } + + ansr += in[k].r * re - in[k].i * im; + ansi += in[k].r * im + in[k].i * re; + } + /*printf ("%d %d ", (int)ansr, (int)ansi);*/ + difr = ansr - out[bin].r; + difi = ansi - out[bin].i; + errpow += difr*difr + difi*difi; + sigpow += ansr*ansr+ansi*ansi; + } + snr = 10*log10(sigpow/errpow); + printf("nfft=%d inverse=%d,snr = %f\n",nfft,isinverse,snr ); + if (snr<60) { + printf( "** poor snr: %f ** \n", snr); + ret = 1; + } +} + +void test1d(int nfft,int isinverse,int arch) +{ + size_t buflen = sizeof(kiss_fft_cpx)*nfft; + + kiss_fft_cpx * in = (kiss_fft_cpx*)malloc(buflen); + kiss_fft_cpx * out= (kiss_fft_cpx*)malloc(buflen); + kiss_fft_state *cfg = opus_fft_alloc(nfft,0,0,arch); + int k; + + for (k=0;k<nfft;++k) { + in[k].r = (rand() % 32767) - 16384; + in[k].i = (rand() % 32767) - 16384; + } + + for (k=0;k<nfft;++k) { + in[k].r *= 32768; + in[k].i *= 32768; + } + + if (isinverse) + { + for (k=0;k<nfft;++k) { + in[k].r /= nfft; + in[k].i /= nfft; + } + } + + /*for (k=0;k<nfft;++k) printf("%d %d ", in[k].r, in[k].i);printf("\n");*/ + + if (isinverse) + opus_ifft(cfg,in,out, arch); + else + opus_fft(cfg,in,out, arch); + + /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/ + + check(in,out,nfft,isinverse); + + free(in); + free(out); + opus_fft_free(cfg, arch); +} + +int main(int argc,char ** argv) +{ + ALLOC_STACK; + int arch = opus_select_arch(); + + if (argc>1) { + int k; + for (k=1;k<argc;++k) { + test1d(atoi(argv[k]),0,arch); + test1d(atoi(argv[k]),1,arch); + } + }else{ + test1d(32,0,arch); + test1d(32,1,arch); + test1d(128,0,arch); + test1d(128,1,arch); + test1d(256,0,arch); + test1d(256,1,arch); +#ifndef RADIX_TWO_ONLY + test1d(36,0,arch); + test1d(36,1,arch); + test1d(50,0,arch); + test1d(50,1,arch); + test1d(60,0,arch); + test1d(60,1,arch); + test1d(120,0,arch); + test1d(120,1,arch); + test1d(240,0,arch); + test1d(240,1,arch); + test1d(480,0,arch); + test1d(480,1,arch); +#endif + } + return ret; +} diff --git a/thirdparty/opus/celt/tests/test_unit_entropy.c b/thirdparty/opus/celt/tests/test_unit_entropy.c new file mode 100644 index 0000000000..ff9265864c --- /dev/null +++ b/thirdparty/opus/celt/tests/test_unit_entropy.c @@ -0,0 +1,382 @@ +/* Copyright (c) 2007-2011 Xiph.Org Foundation, Mozilla Corporation, + Gregory Maxwell + Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <time.h> +#include "entcode.h" +#include "entenc.h" +#include "entdec.h" +#include <string.h> + +#include "entenc.c" +#include "entdec.c" +#include "entcode.c" + +#ifndef M_LOG2E +# define M_LOG2E 1.4426950408889634074 +#endif +#define DATA_SIZE 10000000 +#define DATA_SIZE2 10000 + +int main(int _argc,char **_argv){ + ec_enc enc; + ec_dec dec; + long nbits; + long nbits2; + double entropy; + int ft; + int ftb; + int sz; + int i; + int ret; + unsigned int sym; + unsigned int seed; + unsigned char *ptr; + const char *env_seed; + ret=0; + entropy=0; + if (_argc > 2) { + fprintf(stderr, "Usage: %s [<seed>]\n", _argv[0]); + return 1; + } + env_seed = getenv("SEED"); + if (_argc > 1) + seed = atoi(_argv[1]); + else if (env_seed) + seed = atoi(env_seed); + else + seed = time(NULL); + /*Testing encoding of raw bit values.*/ + ptr = (unsigned char *)malloc(DATA_SIZE); + ec_enc_init(&enc,ptr, DATA_SIZE); + for(ft=2;ft<1024;ft++){ + for(i=0;i<ft;i++){ + entropy+=log(ft)*M_LOG2E; + ec_enc_uint(&enc,i,ft); + } + } + /*Testing encoding of raw bit values.*/ + for(ftb=1;ftb<16;ftb++){ + for(i=0;i<(1<<ftb);i++){ + entropy+=ftb; + nbits=ec_tell(&enc); + ec_enc_bits(&enc,i,ftb); + nbits2=ec_tell(&enc); + if(nbits2-nbits!=ftb){ + fprintf(stderr,"Used %li bits to encode %i bits directly.\n", + nbits2-nbits,ftb); + ret=-1; + } + } + } + nbits=ec_tell_frac(&enc); + ec_enc_done(&enc); + fprintf(stderr, + "Encoded %0.2lf bits of entropy to %0.2lf bits (%0.3lf%% wasted).\n", + entropy,ldexp(nbits,-3),100*(nbits-ldexp(entropy,3))/nbits); + fprintf(stderr,"Packed to %li bytes.\n",(long)ec_range_bytes(&enc)); + ec_dec_init(&dec,ptr,DATA_SIZE); + for(ft=2;ft<1024;ft++){ + for(i=0;i<ft;i++){ + sym=ec_dec_uint(&dec,ft); + if(sym!=(unsigned)i){ + fprintf(stderr,"Decoded %i instead of %i with ft of %i.\n",sym,i,ft); + ret=-1; + } + } + } + for(ftb=1;ftb<16;ftb++){ + for(i=0;i<(1<<ftb);i++){ + sym=ec_dec_bits(&dec,ftb); + if(sym!=(unsigned)i){ + fprintf(stderr,"Decoded %i instead of %i with ftb of %i.\n",sym,i,ftb); + ret=-1; + } + } + } + nbits2=ec_tell_frac(&dec); + if(nbits!=nbits2){ + fprintf(stderr, + "Reported number of bits used was %0.2lf, should be %0.2lf.\n", + ldexp(nbits2,-3),ldexp(nbits,-3)); + ret=-1; + } + /*Testing an encoder bust prefers range coder data over raw bits. + This isn't a general guarantee, will only work for data that is buffered in + the encoder state and not yet stored in the user buffer, and should never + get used in practice. + It's mostly here for code coverage completeness.*/ + /*Start with a 16-bit buffer.*/ + ec_enc_init(&enc,ptr,2); + /*Write 7 raw bits.*/ + ec_enc_bits(&enc,0x55,7); + /*Write 12.3 bits of range coder data.*/ + ec_enc_uint(&enc,1,2); + ec_enc_uint(&enc,1,3); + ec_enc_uint(&enc,1,4); + ec_enc_uint(&enc,1,5); + ec_enc_uint(&enc,2,6); + ec_enc_uint(&enc,6,7); + ec_enc_done(&enc); + ec_dec_init(&dec,ptr,2); + if(!enc.error + /*The raw bits should have been overwritten by the range coder data.*/ + ||ec_dec_bits(&dec,7)!=0x05 + /*And all the range coder data should have been encoded correctly.*/ + ||ec_dec_uint(&dec,2)!=1 + ||ec_dec_uint(&dec,3)!=1 + ||ec_dec_uint(&dec,4)!=1 + ||ec_dec_uint(&dec,5)!=1 + ||ec_dec_uint(&dec,6)!=2 + ||ec_dec_uint(&dec,7)!=6){ + fprintf(stderr,"Encoder bust overwrote range coder data with raw bits.\n"); + ret=-1; + } + srand(seed); + fprintf(stderr,"Testing random streams... Random seed: %u (%.4X)\n", seed, rand() % 65536); + for(i=0;i<409600;i++){ + unsigned *data; + unsigned *tell; + unsigned tell_bits; + int j; + int zeros; + ft=rand()/((RAND_MAX>>(rand()%11U))+1U)+10; + sz=rand()/((RAND_MAX>>(rand()%9U))+1U); + data=(unsigned *)malloc(sz*sizeof(*data)); + tell=(unsigned *)malloc((sz+1)*sizeof(*tell)); + ec_enc_init(&enc,ptr,DATA_SIZE2); + zeros = rand()%13==0; + tell[0]=ec_tell_frac(&enc); + for(j=0;j<sz;j++){ + if (zeros) + data[j]=0; + else + data[j]=rand()%ft; + ec_enc_uint(&enc,data[j],ft); + tell[j+1]=ec_tell_frac(&enc); + } + if (rand()%2==0) + while(ec_tell(&enc)%8 != 0) + ec_enc_uint(&enc, rand()%2, 2); + tell_bits = ec_tell(&enc); + ec_enc_done(&enc); + if(tell_bits!=(unsigned)ec_tell(&enc)){ + fprintf(stderr,"ec_tell() changed after ec_enc_done(): %i instead of %i (Random seed: %u)\n", + ec_tell(&enc),tell_bits,seed); + ret=-1; + } + if ((tell_bits+7)/8 < ec_range_bytes(&enc)) + { + fprintf (stderr, "ec_tell() lied, there's %i bytes instead of %d (Random seed: %u)\n", + ec_range_bytes(&enc), (tell_bits+7)/8,seed); + ret=-1; + } + ec_dec_init(&dec,ptr,DATA_SIZE2); + if(ec_tell_frac(&dec)!=tell[0]){ + fprintf(stderr, + "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n", + 0,ec_tell_frac(&dec),tell[0],seed); + } + for(j=0;j<sz;j++){ + sym=ec_dec_uint(&dec,ft); + if(sym!=data[j]){ + fprintf(stderr, + "Decoded %i instead of %i with ft of %i at position %i of %i (Random seed: %u).\n", + sym,data[j],ft,j,sz,seed); + ret=-1; + } + if(ec_tell_frac(&dec)!=tell[j+1]){ + fprintf(stderr, + "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n", + j+1,ec_tell_frac(&dec),tell[j+1],seed); + } + } + free(tell); + free(data); + } + /*Test compatibility between multiple different encode/decode routines.*/ + for(i=0;i<409600;i++){ + unsigned *logp1; + unsigned *data; + unsigned *tell; + unsigned *enc_method; + int j; + sz=rand()/((RAND_MAX>>(rand()%9U))+1U); + logp1=(unsigned *)malloc(sz*sizeof(*logp1)); + data=(unsigned *)malloc(sz*sizeof(*data)); + tell=(unsigned *)malloc((sz+1)*sizeof(*tell)); + enc_method=(unsigned *)malloc(sz*sizeof(*enc_method)); + ec_enc_init(&enc,ptr,DATA_SIZE2); + tell[0]=ec_tell_frac(&enc); + for(j=0;j<sz;j++){ + data[j]=rand()/((RAND_MAX>>1)+1); + logp1[j]=(rand()%15)+1; + enc_method[j]=rand()/((RAND_MAX>>2)+1); + switch(enc_method[j]){ + case 0:{ + ec_encode(&enc,data[j]?(1<<logp1[j])-1:0, + (1<<logp1[j])-(data[j]?0:1),1<<logp1[j]); + }break; + case 1:{ + ec_encode_bin(&enc,data[j]?(1<<logp1[j])-1:0, + (1<<logp1[j])-(data[j]?0:1),logp1[j]); + }break; + case 2:{ + ec_enc_bit_logp(&enc,data[j],logp1[j]); + }break; + case 3:{ + unsigned char icdf[2]; + icdf[0]=1; + icdf[1]=0; + ec_enc_icdf(&enc,data[j],icdf,logp1[j]); + }break; + } + tell[j+1]=ec_tell_frac(&enc); + } + ec_enc_done(&enc); + if((ec_tell(&enc)+7U)/8U<ec_range_bytes(&enc)){ + fprintf(stderr,"tell() lied, there's %i bytes instead of %d (Random seed: %u)\n", + ec_range_bytes(&enc),(ec_tell(&enc)+7)/8,seed); + ret=-1; + } + ec_dec_init(&dec,ptr,DATA_SIZE2); + if(ec_tell_frac(&dec)!=tell[0]){ + fprintf(stderr, + "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n", + 0,ec_tell_frac(&dec),tell[0],seed); + } + for(j=0;j<sz;j++){ + int fs; + int dec_method; + dec_method=rand()/((RAND_MAX>>2)+1); + switch(dec_method){ + case 0:{ + fs=ec_decode(&dec,1<<logp1[j]); + sym=fs>=(1<<logp1[j])-1; + ec_dec_update(&dec,sym?(1<<logp1[j])-1:0, + (1<<logp1[j])-(sym?0:1),1<<logp1[j]); + }break; + case 1:{ + fs=ec_decode_bin(&dec,logp1[j]); + sym=fs>=(1<<logp1[j])-1; + ec_dec_update(&dec,sym?(1<<logp1[j])-1:0, + (1<<logp1[j])-(sym?0:1),1<<logp1[j]); + }break; + case 2:{ + sym=ec_dec_bit_logp(&dec,logp1[j]); + }break; + case 3:{ + unsigned char icdf[2]; + icdf[0]=1; + icdf[1]=0; + sym=ec_dec_icdf(&dec,icdf,logp1[j]); + }break; + } + if(sym!=data[j]){ + fprintf(stderr, + "Decoded %i instead of %i with logp1 of %i at position %i of %i (Random seed: %u).\n", + sym,data[j],logp1[j],j,sz,seed); + fprintf(stderr,"Encoding method: %i, decoding method: %i\n", + enc_method[j],dec_method); + ret=-1; + } + if(ec_tell_frac(&dec)!=tell[j+1]){ + fprintf(stderr, + "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n", + j+1,ec_tell_frac(&dec),tell[j+1],seed); + } + } + free(enc_method); + free(tell); + free(data); + free(logp1); + } + ec_enc_init(&enc,ptr,DATA_SIZE2); + ec_enc_bit_logp(&enc,0,1); + ec_enc_bit_logp(&enc,0,1); + ec_enc_bit_logp(&enc,0,1); + ec_enc_bit_logp(&enc,0,1); + ec_enc_bit_logp(&enc,0,2); + ec_enc_patch_initial_bits(&enc,3,2); + if(enc.error){ + fprintf(stderr,"patch_initial_bits failed"); + ret=-1; + } + ec_enc_patch_initial_bits(&enc,0,5); + if(!enc.error){ + fprintf(stderr,"patch_initial_bits didn't fail when it should have"); + ret=-1; + } + ec_enc_done(&enc); + if(ec_range_bytes(&enc)!=1||ptr[0]!=192){ + fprintf(stderr,"Got %d when expecting 192 for patch_initial_bits",ptr[0]); + ret=-1; + } + ec_enc_init(&enc,ptr,DATA_SIZE2); + ec_enc_bit_logp(&enc,0,1); + ec_enc_bit_logp(&enc,0,1); + ec_enc_bit_logp(&enc,1,6); + ec_enc_bit_logp(&enc,0,2); + ec_enc_patch_initial_bits(&enc,0,2); + if(enc.error){ + fprintf(stderr,"patch_initial_bits failed"); + ret=-1; + } + ec_enc_done(&enc); + if(ec_range_bytes(&enc)!=2||ptr[0]!=63){ + fprintf(stderr,"Got %d when expecting 63 for patch_initial_bits",ptr[0]); + ret=-1; + } + ec_enc_init(&enc,ptr,2); + ec_enc_bit_logp(&enc,0,2); + for(i=0;i<48;i++){ + ec_enc_bits(&enc,0,1); + } + ec_enc_done(&enc); + if(!enc.error){ + fprintf(stderr,"Raw bits overfill didn't fail when it should have"); + ret=-1; + } + ec_enc_init(&enc,ptr,2); + for(i=0;i<17;i++){ + ec_enc_bits(&enc,0,1); + } + ec_enc_done(&enc); + if(!enc.error){ + fprintf(stderr,"17 raw bits encoded in two bytes"); + ret=-1; + } + free(ptr); + return ret; +} diff --git a/thirdparty/opus/celt/tests/test_unit_laplace.c b/thirdparty/opus/celt/tests/test_unit_laplace.c new file mode 100644 index 0000000000..22951e29ee --- /dev/null +++ b/thirdparty/opus/celt/tests/test_unit_laplace.c @@ -0,0 +1,93 @@ +/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation + Written by Jean-Marc Valin and Timothy B. Terriberry */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include "laplace.h" +#define CELT_C +#include "stack_alloc.h" + +#include "entenc.c" +#include "entdec.c" +#include "entcode.c" +#include "laplace.c" + +#define DATA_SIZE 40000 + +int ec_laplace_get_start_freq(int decay) +{ + opus_uint32 ft = 32768 - LAPLACE_MINP*(2*LAPLACE_NMIN+1); + int fs = (ft*(16384-decay))/(16384+decay); + return fs+LAPLACE_MINP; +} + +int main(void) +{ + int i; + int ret = 0; + ec_enc enc; + ec_dec dec; + unsigned char *ptr; + int val[10000], decay[10000]; + ALLOC_STACK; + ptr = (unsigned char *)malloc(DATA_SIZE); + ec_enc_init(&enc,ptr,DATA_SIZE); + + val[0] = 3; decay[0] = 6000; + val[1] = 0; decay[1] = 5800; + val[2] = -1; decay[2] = 5600; + for (i=3;i<10000;i++) + { + val[i] = rand()%15-7; + decay[i] = rand()%11000+5000; + } + for (i=0;i<10000;i++) + ec_laplace_encode(&enc, &val[i], + ec_laplace_get_start_freq(decay[i]), decay[i]); + + ec_enc_done(&enc); + + ec_dec_init(&dec,ec_get_buffer(&enc),ec_range_bytes(&enc)); + + for (i=0;i<10000;i++) + { + int d = ec_laplace_decode(&dec, + ec_laplace_get_start_freq(decay[i]), decay[i]); + if (d != val[i]) + { + fprintf (stderr, "Got %d instead of %d\n", d, val[i]); + ret = 1; + } + } + + free(ptr); + return ret; +} diff --git a/thirdparty/opus/celt/tests/test_unit_mathops.c b/thirdparty/opus/celt/tests/test_unit_mathops.c new file mode 100644 index 0000000000..fd3319da91 --- /dev/null +++ b/thirdparty/opus/celt/tests/test_unit_mathops.c @@ -0,0 +1,304 @@ +/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation, + Gregory Maxwell + Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifndef CUSTOM_MODES +#define CUSTOM_MODES +#endif + +#define CELT_C + +#include <stdio.h> +#include <math.h> +#include "mathops.c" +#include "entenc.c" +#include "entdec.c" +#include "entcode.c" +#include "bands.c" +#include "quant_bands.c" +#include "laplace.c" +#include "vq.c" +#include "cwrs.c" +#include "pitch.c" +#include "celt_lpc.c" +#include "celt.c" + +#if defined(OPUS_X86_MAY_HAVE_SSE) || defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) +# if defined(OPUS_X86_MAY_HAVE_SSE) +# include "x86/pitch_sse.c" +# endif +# if defined(OPUS_X86_MAY_HAVE_SSE2) +# include "x86/pitch_sse2.c" +# endif +# if defined(OPUS_X86_MAY_HAVE_SSE4_1) +# include "x86/pitch_sse4_1.c" +# include "x86/celt_lpc_sse.c" +# endif +# include "x86/x86_celt_map.c" +#elif defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +# include "arm/armcpu.c" +# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +# include "arm/celt_neon_intr.c" +# if defined(HAVE_ARM_NE10) +# include "kiss_fft.c" +# include "mdct.c" +# include "arm/celt_ne10_fft.c" +# include "arm/celt_ne10_mdct.c" +# endif +# endif +# include "arm/arm_celt_map.c" +#endif + +#ifdef FIXED_POINT +#define WORD "%d" +#else +#define WORD "%f" +#endif + +int ret = 0; + +void testdiv(void) +{ + opus_int32 i; + for (i=1;i<=327670;i++) + { + double prod; + opus_val32 val; + val = celt_rcp(i); +#ifdef FIXED_POINT + prod = (1./32768./65526.)*val*i; +#else + prod = val*i; +#endif + if (fabs(prod-1) > .00025) + { + fprintf (stderr, "div failed: 1/%d="WORD" (product = %f)\n", i, val, prod); + ret = 1; + } + } +} + +void testsqrt(void) +{ + opus_int32 i; + for (i=1;i<=1000000000;i++) + { + double ratio; + opus_val16 val; + val = celt_sqrt(i); + ratio = val/sqrt(i); + if (fabs(ratio - 1) > .0005 && fabs(val-sqrt(i)) > 2) + { + fprintf (stderr, "sqrt failed: sqrt(%d)="WORD" (ratio = %f)\n", i, val, ratio); + ret = 1; + } + i+= i>>10; + } +} + +void testbitexactcos(void) +{ + int i; + opus_int32 min_d,max_d,last,chk; + chk=max_d=0; + last=min_d=32767; + for(i=64;i<=16320;i++) + { + opus_int32 d; + opus_int32 q=bitexact_cos(i); + chk ^= q*i; + d = last - q; + if (d>max_d)max_d=d; + if (d<min_d)min_d=d; + last = q; + } + if ((chk!=89408644)||(max_d!=5)||(min_d!=0)||(bitexact_cos(64)!=32767)|| + (bitexact_cos(16320)!=200)||(bitexact_cos(8192)!=23171)) + { + fprintf (stderr, "bitexact_cos failed\n"); + ret = 1; + } +} + +void testbitexactlog2tan(void) +{ + int i,fail; + opus_int32 min_d,max_d,last,chk; + fail=chk=max_d=0; + last=min_d=15059; + for(i=64;i<8193;i++) + { + opus_int32 d; + opus_int32 mid=bitexact_cos(i); + opus_int32 side=bitexact_cos(16384-i); + opus_int32 q=bitexact_log2tan(mid,side); + chk ^= q*i; + d = last - q; + if (q!=-1*bitexact_log2tan(side,mid)) + fail = 1; + if (d>max_d)max_d=d; + if (d<min_d)min_d=d; + last = q; + } + if ((chk!=15821257)||(max_d!=61)||(min_d!=-2)||fail|| + (bitexact_log2tan(32767,200)!=15059)||(bitexact_log2tan(30274,12540)!=2611)|| + (bitexact_log2tan(23171,23171)!=0)) + { + fprintf (stderr, "bitexact_log2tan failed\n"); + ret = 1; + } +} + +#ifndef FIXED_POINT +void testlog2(void) +{ + float x; + for (x=0.001;x<1677700.0;x+=(x/8.0)) + { + float error = fabs((1.442695040888963387*log(x))-celt_log2(x)); + if (error>0.0009) + { + fprintf (stderr, "celt_log2 failed: fabs((1.442695040888963387*log(x))-celt_log2(x))>0.001 (x = %f, error = %f)\n", x,error); + ret = 1; + } + } +} + +void testexp2(void) +{ + float x; + for (x=-11.0;x<24.0;x+=0.0007) + { + float error = fabs(x-(1.442695040888963387*log(celt_exp2(x)))); + if (error>0.0002) + { + fprintf (stderr, "celt_exp2 failed: fabs(x-(1.442695040888963387*log(celt_exp2(x))))>0.0005 (x = %f, error = %f)\n", x,error); + ret = 1; + } + } +} + +void testexp2log2(void) +{ + float x; + for (x=-11.0;x<24.0;x+=0.0007) + { + float error = fabs(x-(celt_log2(celt_exp2(x)))); + if (error>0.001) + { + fprintf (stderr, "celt_log2/celt_exp2 failed: fabs(x-(celt_log2(celt_exp2(x))))>0.001 (x = %f, error = %f)\n", x,error); + ret = 1; + } + } +} +#else +void testlog2(void) +{ + opus_val32 x; + for (x=8;x<1073741824;x+=(x>>3)) + { + float error = fabs((1.442695040888963387*log(x/16384.0))-celt_log2(x)/1024.0); + if (error>0.003) + { + fprintf (stderr, "celt_log2 failed: x = %ld, error = %f\n", (long)x,error); + ret = 1; + } + } +} + +void testexp2(void) +{ + opus_val16 x; + for (x=-32768;x<15360;x++) + { + float error1 = fabs(x/1024.0-(1.442695040888963387*log(celt_exp2(x)/65536.0))); + float error2 = fabs(exp(0.6931471805599453094*x/1024.0)-celt_exp2(x)/65536.0); + if (error1>0.0002&&error2>0.00004) + { + fprintf (stderr, "celt_exp2 failed: x = "WORD", error1 = %f, error2 = %f\n", x,error1,error2); + ret = 1; + } + } +} + +void testexp2log2(void) +{ + opus_val32 x; + for (x=8;x<65536;x+=(x>>3)) + { + float error = fabs(x-0.25*celt_exp2(celt_log2(x)))/16384; + if (error>0.004) + { + fprintf (stderr, "celt_log2/celt_exp2 failed: fabs(x-(celt_exp2(celt_log2(x))))>0.001 (x = %ld, error = %f)\n", (long)x,error); + ret = 1; + } + } +} + +void testilog2(void) +{ + opus_val32 x; + for (x=1;x<=268435455;x+=127) + { + opus_val32 lg; + opus_val32 y; + + lg = celt_ilog2(x); + if (lg<0 || lg>=31) + { + printf("celt_ilog2 failed: 0<=celt_ilog2(x)<31 (x = %d, celt_ilog2(x) = %d)\n",x,lg); + ret = 1; + } + y = 1<<lg; + + if (x<y || (x>>1)>=y) + { + printf("celt_ilog2 failed: 2**celt_ilog2(x)<=x<2**(celt_ilog2(x)+1) (x = %d, 2**celt_ilog2(x) = %d)\n",x,y); + ret = 1; + } + } +} +#endif + +int main(void) +{ + testbitexactcos(); + testbitexactlog2tan(); + testdiv(); + testsqrt(); + testlog2(); + testexp2(); + testexp2log2(); +#ifdef FIXED_POINT + testilog2(); +#endif + return ret; +} diff --git a/thirdparty/opus/celt/tests/test_unit_mdct.c b/thirdparty/opus/celt/tests/test_unit_mdct.c new file mode 100644 index 0000000000..8dbb9caa2e --- /dev/null +++ b/thirdparty/opus/celt/tests/test_unit_mdct.c @@ -0,0 +1,230 @@ +/* Copyright (c) 2008-2011 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#define SKIP_CONFIG_H + +#ifndef CUSTOM_MODES +#define CUSTOM_MODES +#endif + +#include <stdio.h> + +#define CELT_C +#include "mdct.h" +#include "stack_alloc.h" + +#include "kiss_fft.c" +#include "mdct.c" +#include "mathops.c" +#include "entcode.c" + +#if defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) +# include "x86/x86cpu.c" +#elif defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +# include "arm/armcpu.c" +# include "pitch.c" +# include "celt_lpc.c" +# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +# include "arm/celt_neon_intr.c" +# if defined(HAVE_ARM_NE10) +# include "arm/celt_ne10_fft.c" +# include "arm/celt_ne10_mdct.c" +# endif +# endif +# include "arm/arm_celt_map.c" +#endif + +#ifndef M_PI +#define M_PI 3.141592653 +#endif + +int ret = 0; +void check(kiss_fft_scalar * in,kiss_fft_scalar * out,int nfft,int isinverse) +{ + int bin,k; + double errpow=0,sigpow=0; + double snr; + for (bin=0;bin<nfft/2;++bin) { + double ansr = 0; + double difr; + + for (k=0;k<nfft;++k) { + double phase = 2*M_PI*(k+.5+.25*nfft)*(bin+.5)/nfft; + double re = cos(phase); + + re /= nfft/4; + + ansr += in[k] * re; + } + /*printf ("%f %f\n", ansr, out[bin]);*/ + difr = ansr - out[bin]; + errpow += difr*difr; + sigpow += ansr*ansr; + } + snr = 10*log10(sigpow/errpow); + printf("nfft=%d inverse=%d,snr = %f\n",nfft,isinverse,snr ); + if (snr<60) { + printf( "** poor snr: %f **\n", snr); + ret = 1; + } +} + +void check_inv(kiss_fft_scalar * in,kiss_fft_scalar * out,int nfft,int isinverse) +{ + int bin,k; + double errpow=0,sigpow=0; + double snr; + for (bin=0;bin<nfft;++bin) { + double ansr = 0; + double difr; + + for (k=0;k<nfft/2;++k) { + double phase = 2*M_PI*(bin+.5+.25*nfft)*(k+.5)/nfft; + double re = cos(phase); + + /*re *= 2;*/ + + ansr += in[k] * re; + } + /*printf ("%f %f\n", ansr, out[bin]);*/ + difr = ansr - out[bin]; + errpow += difr*difr; + sigpow += ansr*ansr; + } + snr = 10*log10(sigpow/errpow); + printf("nfft=%d inverse=%d,snr = %f\n",nfft,isinverse,snr ); + if (snr<60) { + printf( "** poor snr: %f **\n", snr); + ret = 1; + } +} + + +void test1d(int nfft,int isinverse,int arch) +{ + mdct_lookup cfg; + size_t buflen = sizeof(kiss_fft_scalar)*nfft; + + kiss_fft_scalar * in = (kiss_fft_scalar*)malloc(buflen); + kiss_fft_scalar * in_copy = (kiss_fft_scalar*)malloc(buflen); + kiss_fft_scalar * out= (kiss_fft_scalar*)malloc(buflen); + opus_val16 * window= (opus_val16*)malloc(sizeof(opus_val16)*nfft/2); + int k; + + clt_mdct_init(&cfg, nfft, 0, arch); + for (k=0;k<nfft;++k) { + in[k] = (rand() % 32768) - 16384; + } + + for (k=0;k<nfft/2;++k) { + window[k] = Q15ONE; + } + for (k=0;k<nfft;++k) { + in[k] *= 32768; + } + + if (isinverse) + { + for (k=0;k<nfft;++k) { + in[k] /= nfft; + } + } + + for (k=0;k<nfft;++k) + in_copy[k] = in[k]; + /*for (k=0;k<nfft;++k) printf("%d %d ", in[k].r, in[k].i);printf("\n");*/ + + if (isinverse) + { + for (k=0;k<nfft;++k) + out[k] = 0; + clt_mdct_backward(&cfg,in,out, window, nfft/2, 0, 1, arch); + /* apply TDAC because clt_mdct_backward() no longer does that */ + for (k=0;k<nfft/4;++k) + out[nfft-k-1] = out[nfft/2+k]; + check_inv(in,out,nfft,isinverse); + } else { + clt_mdct_forward(&cfg,in,out,window, nfft/2, 0, 1, arch); + check(in_copy,out,nfft,isinverse); + } + /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/ + + + free(in); + free(in_copy); + free(out); + free(window); + clt_mdct_clear(&cfg, arch); +} + +int main(int argc,char ** argv) +{ + ALLOC_STACK; + int arch = opus_select_arch(); + + if (argc>1) { + int k; + for (k=1;k<argc;++k) { + test1d(atoi(argv[k]),0,arch); + test1d(atoi(argv[k]),1,arch); + } + }else{ + test1d(32,0,arch); + test1d(32,1,arch); + test1d(256,0,arch); + test1d(256,1,arch); + test1d(512,0,arch); + test1d(512,1,arch); + test1d(1024,0,arch); + test1d(1024,1,arch); + test1d(2048,0,arch); + test1d(2048,1,arch); +#ifndef RADIX_TWO_ONLY + test1d(36,0,arch); + test1d(36,1,arch); + test1d(40,0,arch); + test1d(40,1,arch); + test1d(60,0,arch); + test1d(60,1,arch); + test1d(120,0,arch); + test1d(120,1,arch); + test1d(240,0,arch); + test1d(240,1,arch); + test1d(480,0,arch); + test1d(480,1,arch); + test1d(960,0,arch); + test1d(960,1,arch); + test1d(1920,0,arch); + test1d(1920,1,arch); +#endif + } + return ret; +} diff --git a/thirdparty/opus/celt/tests/test_unit_rotation.c b/thirdparty/opus/celt/tests/test_unit_rotation.c new file mode 100644 index 0000000000..1080c2085d --- /dev/null +++ b/thirdparty/opus/celt/tests/test_unit_rotation.c @@ -0,0 +1,120 @@ +/* Copyright (c) 2008-2011 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifndef CUSTOM_MODES +#define CUSTOM_MODES +#endif + +#define CELT_C + +#include <stdio.h> +#include <stdlib.h> +#include "vq.c" +#include "cwrs.c" +#include "entcode.c" +#include "entenc.c" +#include "entdec.c" +#include "mathops.c" +#include "bands.h" +#include "pitch.c" +#include "celt_lpc.c" +#include "celt.c" +#include <math.h> + +#if defined(OPUS_X86_MAY_HAVE_SSE) || defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) +# if defined(OPUS_X86_MAY_HAVE_SSE) +# include "x86/pitch_sse.c" +# endif +# if defined(OPUS_X86_MAY_HAVE_SSE2) +# include "x86/pitch_sse2.c" +# endif +# if defined(OPUS_X86_MAY_HAVE_SSE4_1) +# include "x86/pitch_sse4_1.c" +# include "x86/celt_lpc_sse.c" +# endif +# include "x86/x86_celt_map.c" +#elif defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +# include "arm/armcpu.c" +# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +# include "arm/celt_neon_intr.c" +# if defined(HAVE_ARM_NE10) +# include "kiss_fft.c" +# include "mdct.c" +# include "arm/celt_ne10_fft.c" +# include "arm/celt_ne10_mdct.c" +# endif +# endif +# include "arm/arm_celt_map.c" +#endif + +#define MAX_SIZE 100 + +int ret=0; +void test_rotation(int N, int K) +{ + int i; + double err = 0, ener = 0, snr, snr0; + opus_val16 x0[MAX_SIZE]; + opus_val16 x1[MAX_SIZE]; + for (i=0;i<N;i++) + x1[i] = x0[i] = rand()%32767-16384; + exp_rotation(x1, N, 1, 1, K, SPREAD_NORMAL); + for (i=0;i<N;i++) + { + err += (x0[i]-(double)x1[i])*(x0[i]-(double)x1[i]); + ener += x0[i]*(double)x0[i]; + } + snr0 = 20*log10(ener/err); + err = ener = 0; + exp_rotation(x1, N, -1, 1, K, SPREAD_NORMAL); + for (i=0;i<N;i++) + { + err += (x0[i]-(double)x1[i])*(x0[i]-(double)x1[i]); + ener += x0[i]*(double)x0[i]; + } + snr = 20*log10(ener/err); + printf ("SNR for size %d (%d pulses) is %f (was %f without inverse)\n", N, K, snr, snr0); + if (snr < 60 || snr0 > 20) + { + fprintf(stderr, "FAIL!\n"); + ret = 1; + } +} + +int main(void) +{ + ALLOC_STACK; + test_rotation(15, 3); + test_rotation(23, 5); + test_rotation(50, 3); + test_rotation(80, 1); + return ret; +} diff --git a/thirdparty/opus/celt/x86/vq_sse.h b/thirdparty/opus/celt/tests/test_unit_types.c index b4efe8f249..67a0fb8ed3 100644 --- a/thirdparty/opus/celt/x86/vq_sse.h +++ b/thirdparty/opus/celt/tests/test_unit_types.c @@ -1,4 +1,5 @@ -/* Copyright (c) 2016 Jean-Marc Valin */ +/* Copyright (c) 2008-2011 Xiph.Org Foundation + Written by Jean-Marc Valin */ /* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -24,27 +25,26 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef VQ_SSE_H -#define VQ_SSE_H - -#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT) -#define OVERRIDE_OP_PVQ_SEARCH - -opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch); - -#if defined(OPUS_X86_PRESUME_SSE2) -#define op_pvq_search(x, iy, K, N, arch) \ - (op_pvq_search_sse2(x, iy, K, N, arch)) - -#else - -extern opus_val16 (*const OP_PVQ_SEARCH_IMPL[OPUS_ARCHMASK + 1])( - celt_norm *_X, int *iy, int K, int N, int arch); - -# define op_pvq_search(X, iy, K, N, arch) \ - ((*OP_PVQ_SEARCH_IMPL[(arch) & OPUS_ARCHMASK])(X, iy, K, N, arch)) - -#endif +#ifdef HAVE_CONFIG_H +#include "config.h" #endif -#endif +#include "opus_types.h" +#include <stdio.h> + +int main(void) +{ + opus_int16 i = 1; + i <<= 14; + if (i>>14 != 1) + { + fprintf(stderr, "opus_int16 isn't 16 bits\n"); + return 1; + } + if (sizeof(opus_int16)*2 != sizeof(opus_int32)) + { + fprintf(stderr, "16*2 != 32\n"); + return 1; + } + return 0; +} diff --git a/thirdparty/opus/celt/vq.c b/thirdparty/opus/celt/vq.c index 8011e22548..d29f38fd8e 100644 --- a/thirdparty/opus/celt/vq.c +++ b/thirdparty/opus/celt/vq.c @@ -39,10 +39,6 @@ #include "rate.h" #include "pitch.h" -#if defined(MIPSr1_ASM) -#include "mips/vq_mipsr1.h" -#endif - #ifndef OVERRIDE_vq_exp_rotation1 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) { @@ -71,7 +67,7 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_ } #endif /* OVERRIDE_vq_exp_rotation1 */ -void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) +static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) { static const int SPREAD_FACTOR[3]={15,10,5}; int i; @@ -162,27 +158,42 @@ static unsigned extract_collapse_mask(int *iy, int N, int B) return collapse_mask; } -opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch) +unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc +#ifdef RESYNTH + , opus_val16 gain +#endif + ) { VARDECL(celt_norm, y); - VARDECL(int, signx); + VARDECL(int, iy); + VARDECL(opus_val16, signx); int i, j; + opus_val16 s; int pulsesLeft; opus_val32 sum; opus_val32 xy; opus_val16 yy; + unsigned collapse_mask; SAVE_STACK; - (void)arch; + celt_assert2(K>0, "alg_quant() needs at least one pulse"); + celt_assert2(N>1, "alg_quant() needs at least two dimensions"); + ALLOC(y, N, celt_norm); - ALLOC(signx, N, int); + ALLOC(iy, N, int); + ALLOC(signx, N, opus_val16); + + exp_rotation(X, N, 1, B, K, spread); /* Get rid of the sign */ sum = 0; j=0; do { - signx[j] = X[j]<0; - /* OPT: Make sure the compiler doesn't use a branch on ABS16(). */ - X[j] = ABS16(X[j]); + if (X[j]>0) + signx[j]=1; + else { + signx[j]=-1; + X[j]=-X[j]; + } iy[j] = 0; y[j] = 0; } while (++j<N); @@ -214,12 +225,7 @@ opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch) while (++j<N); sum = QCONST16(1.f,14); } -#ifdef FIXED_POINT - rcp = EXTRACT16(MULT16_32_Q16(K, celt_rcp(sum))); -#else - /* Using K+e with e < 1 guarantees we cannot get more than K pulses. */ - rcp = EXTRACT16(MULT16_32_Q16(K+0.8f, celt_rcp(sum))); -#endif + rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum))); j=0; do { #ifdef FIXED_POINT /* It's really important to round *towards zero* here */ @@ -234,12 +240,12 @@ opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch) pulsesLeft -= iy[j]; } while (++j<N); } - celt_sig_assert(pulsesLeft>=0); + celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass"); /* This should never happen, but just in case it does (e.g. on silence) we fill the first bin with pulses. */ #ifdef FIXED_POINT_DEBUG - celt_sig_assert(pulsesLeft<=N+3); + celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass"); #endif if (pulsesLeft > N+3) { @@ -250,12 +256,12 @@ opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch) pulsesLeft=0; } + s = 1; for (i=0;i<pulsesLeft;i++) { - opus_val16 Rxy, Ryy; int best_id; - opus_val32 best_num; - opus_val16 best_den; + opus_val32 best_num = -VERY_LARGE16; + opus_val16 best_den = 0; #ifdef FIXED_POINT int rshift; #endif @@ -266,22 +272,9 @@ opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch) /* The squared magnitude term gets added anyway, so we might as well add it outside the loop */ yy = ADD16(yy, 1); - - /* Calculations for position 0 are out of the loop, in part to reduce - mispredicted branches (since the if condition is usually false) - in the loop. */ - /* Temporary sums of the new pulse(s) */ - Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[0])),rshift)); - /* We're multiplying y[j] by two so we don't have to do it here */ - Ryy = ADD16(yy, y[0]); - - /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that - Rxy is positive because the sign is pre-computed) */ - Rxy = MULT16_16_Q15(Rxy,Rxy); - best_den = Ryy; - best_num = Rxy; - j=1; + j=0; do { + opus_val16 Rxy, Ryy; /* Temporary sums of the new pulse(s) */ Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift)); /* We're multiplying y[j] by two so we don't have to do it here */ @@ -292,11 +285,8 @@ opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch) Rxy = MULT16_16_Q15(Rxy,Rxy); /* The idea is to check for num/den >= best_num/best_den, but that way we can do it without any division */ - /* OPT: It's not clear whether a cmov is faster than a branch here - since the condition is more often false than true and using - a cmov introduces data dependencies across iterations. The optimal - choice may be architecture-dependent. */ - if (opus_unlikely(MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))) + /* OPT: Make sure to use conditional moves here */ + if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)) { best_den = Ryy; best_num = Rxy; @@ -311,47 +301,23 @@ opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch) /* Only now that we've made the final choice, update y/iy */ /* Multiplying y[j] by 2 so we don't have to do it everywhere else */ - y[best_id] += 2; + y[best_id] += 2*s; iy[best_id]++; } /* Put the original sign back */ j=0; do { - /*iy[j] = signx[j] ? -iy[j] : iy[j];*/ - /* OPT: The is more likely to be compiled without a branch than the code above - but has the same performance otherwise. */ - iy[j] = (iy[j]^-signx[j]) + signx[j]; + X[j] = MULT16_16(signx[j],X[j]); + if (signx[j] < 0) + iy[j] = -iy[j]; } while (++j<N); - RESTORE_STACK; - return yy; -} - -unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc, - opus_val16 gain, int resynth, int arch) -{ - VARDECL(int, iy); - opus_val16 yy; - unsigned collapse_mask; - SAVE_STACK; - - celt_assert2(K>0, "alg_quant() needs at least one pulse"); - celt_assert2(N>1, "alg_quant() needs at least two dimensions"); - - /* Covers vectorization by up to 4. */ - ALLOC(iy, N+3, int); - - exp_rotation(X, N, 1, B, K, spread); - - yy = op_pvq_search(X, iy, K, N, arch); - encode_pulses(iy, N, K, enc); - if (resynth) - { - normalise_residual(iy, X, N, yy, gain); - exp_rotation(X, N, -1, B, K, spread); - } +#ifdef RESYNTH + normalise_residual(iy, X, N, yy, gain); + exp_rotation(X, N, -1, B, K, spread); +#endif collapse_mask = extract_collapse_mask(iy, N, B); RESTORE_STACK; @@ -435,7 +401,7 @@ int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int /* 0.63662 = 2/pi */ itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid)); #else - itheta = (int)floor(.5f+16384*0.63662f*fast_atan2f(side,mid)); + itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid)); #endif return itheta; diff --git a/thirdparty/opus/celt/vq.h b/thirdparty/opus/celt/vq.h index 45ec55918e..5cfcbe50ea 100644 --- a/thirdparty/opus/celt/vq.h +++ b/thirdparty/opus/celt/vq.h @@ -37,18 +37,10 @@ #include "entdec.h" #include "modes.h" -#if (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT)) -#include "x86/vq_sse.h" +#if defined(MIPSr1_ASM) +#include "mips/vq_mipsr1.h" #endif -void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread); - -opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch); - -#if !defined(OVERRIDE_OP_PVQ_SEARCH) -#define op_pvq_search(x, iy, K, N, arch) \ - (op_pvq_search_c(x, iy, K, N, arch)) -#endif /** Algebraic pulse-vector quantiser. The signal x is replaced by the sum of * the pitch and a combination of pulses such that its norm is still equal @@ -59,8 +51,12 @@ opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch); * @param enc Entropy encoder state * @ret A mask indicating which blocks in the band received pulses */ -unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc, - opus_val16 gain, int resynth, int arch); +unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, + ec_enc *enc +#ifdef RESYNTH + , opus_val16 gain +#endif + ); /** Algebraic pulse decoder * @param X Decoded normalised spectrum (returned) diff --git a/thirdparty/opus/celt/x86/celt_lpc_sse4_1.c b/thirdparty/opus/celt/x86/celt_lpc_sse.c index 5478568849..67e5592acf 100644 --- a/thirdparty/opus/celt/x86/celt_lpc_sse4_1.c +++ b/thirdparty/opus/celt/x86/celt_lpc_sse.c @@ -40,23 +40,65 @@ #if defined(FIXED_POINT) -void celt_fir_sse4_1(const opus_val16 *x, +void celt_fir_sse4_1(const opus_val16 *_x, const opus_val16 *num, - opus_val16 *y, + opus_val16 *_y, int N, int ord, + opus_val16 *mem, int arch) { int i,j; VARDECL(opus_val16, rnum); + VARDECL(opus_val16, x); __m128i vecNoA; opus_int32 noA ; SAVE_STACK; ALLOC(rnum, ord, opus_val16); + ALLOC(x, N+ord, opus_val16); for(i=0;i<ord;i++) rnum[i] = num[ord-i-1]; + for(i=0;i<ord;i++) + x[i] = mem[ord-i-1]; + + for (i=0;i<N-7;i+=8) + { + x[i+ord ]=_x[i ]; + x[i+ord+1]=_x[i+1]; + x[i+ord+2]=_x[i+2]; + x[i+ord+3]=_x[i+3]; + x[i+ord+4]=_x[i+4]; + x[i+ord+5]=_x[i+5]; + x[i+ord+6]=_x[i+6]; + x[i+ord+7]=_x[i+7]; + } + + for (;i<N-3;i+=4) + { + x[i+ord ]=_x[i ]; + x[i+ord+1]=_x[i+1]; + x[i+ord+2]=_x[i+2]; + x[i+ord+3]=_x[i+3]; + } + + for (;i<N;i++) + x[i+ord]=_x[i]; + + for(i=0;i<ord;i++) + mem[i] = _x[N-i-1]; +#ifdef SMALL_FOOTPRINT + for (i=0;i<N;i++) + { + opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT); + for (j=0;j<ord;j++) + { + sum = MAC16_16(sum,rnum[j],x[i+j]); + } + _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT)); + } +#else noA = EXTEND32(1) << SIG_SHIFT >> 1; vecNoA = _mm_set_epi32(noA, noA, noA, noA); @@ -65,24 +107,25 @@ void celt_fir_sse4_1(const opus_val16 *x, opus_val32 sums[4] = {0}; __m128i vecSum, vecX; - xcorr_kernel(rnum, x+i-ord, sums, ord, arch); + xcorr_kernel(rnum, x+i, sums, ord, arch); vecSum = _mm_loadu_si128((__m128i *)sums); vecSum = _mm_add_epi32(vecSum, vecNoA); vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT); - vecX = OP_CVTEPI16_EPI32_M64(x + i); + vecX = OP_CVTEPI16_EPI32_M64(_x + i); vecSum = _mm_add_epi32(vecSum, vecX); vecSum = _mm_packs_epi32(vecSum, vecSum); - _mm_storel_epi64((__m128i *)(y + i), vecSum); + _mm_storel_epi64((__m128i *)(_y + i), vecSum); } for (;i<N;i++) { opus_val32 sum = 0; for (j=0;j<ord;j++) - sum = MAC16_16(sum, rnum[j], x[i+j-ord]); - y[i] = SATURATE16(ADD32(EXTEND32(x[i]), PSHR32(sum, SIG_SHIFT))); + sum = MAC16_16(sum, rnum[j], x[i + j]); + _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT))); } +#endif RESTORE_STACK; } diff --git a/thirdparty/opus/celt/x86/celt_lpc_sse.h b/thirdparty/opus/celt/x86/celt_lpc_sse.h index 7d1ecf7533..c5ec796ed5 100644 --- a/thirdparty/opus/celt/x86/celt_lpc_sse.h +++ b/thirdparty/opus/celt/x86/celt_lpc_sse.h @@ -41,11 +41,12 @@ void celt_fir_sse4_1( opus_val16 *y, int N, int ord, + opus_val16 *mem, int arch); #if defined(OPUS_X86_PRESUME_SSE4_1) -#define celt_fir(x, num, y, N, ord, arch) \ - ((void)arch, celt_fir_sse4_1(x, num, y, N, ord, arch)) +#define celt_fir(x, num, y, N, ord, mem, arch) \ + ((void)arch, celt_fir_sse4_1(x, num, y, N, ord, mem, arch)) #else @@ -55,10 +56,11 @@ extern void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])( opus_val16 *y, int N, int ord, + opus_val16 *mem, int arch); -# define celt_fir(x, num, y, N, ord, arch) \ - ((*CELT_FIR_IMPL[(arch) & OPUS_ARCHMASK])(x, num, y, N, ord, arch)) +# define celt_fir(x, num, y, N, ord, mem, arch) \ + ((*CELT_FIR_IMPL[(arch) & OPUS_ARCHMASK])(x, num, y, N, ord, mem, arch)) #endif #endif diff --git a/thirdparty/opus/celt/x86/vq_sse2.c b/thirdparty/opus/celt/x86/vq_sse2.c deleted file mode 100644 index 775042860d..0000000000 --- a/thirdparty/opus/celt/x86/vq_sse2.c +++ /dev/null @@ -1,217 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Copyright (c) 2007-2016 Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <xmmintrin.h> -#include <emmintrin.h> -#include "celt_lpc.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "vq.h" -#include "x86cpu.h" - - -#ifndef FIXED_POINT - -opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch) -{ - int i, j; - int pulsesLeft; - float xy, yy; - VARDECL(celt_norm, y); - VARDECL(celt_norm, X); - VARDECL(float, signy); - __m128 signmask; - __m128 sums; - __m128i fours; - SAVE_STACK; - - (void)arch; - /* All bits set to zero, except for the sign bit. */ - signmask = _mm_set_ps1(-0.f); - fours = _mm_set_epi32(4, 4, 4, 4); - ALLOC(y, N+3, celt_norm); - ALLOC(X, N+3, celt_norm); - ALLOC(signy, N+3, float); - - OPUS_COPY(X, _X, N); - X[N] = X[N+1] = X[N+2] = 0; - sums = _mm_setzero_ps(); - for (j=0;j<N;j+=4) - { - __m128 x4, s4; - x4 = _mm_loadu_ps(&X[j]); - s4 = _mm_cmplt_ps(x4, _mm_setzero_ps()); - /* Get rid of the sign */ - x4 = _mm_andnot_ps(signmask, x4); - sums = _mm_add_ps(sums, x4); - /* Clear y and iy in case we don't do the projection. */ - _mm_storeu_ps(&y[j], _mm_setzero_ps()); - _mm_storeu_si128((__m128i*)&iy[j], _mm_setzero_si128()); - _mm_storeu_ps(&X[j], x4); - _mm_storeu_ps(&signy[j], s4); - } - sums = _mm_add_ps(sums, _mm_shuffle_ps(sums, sums, _MM_SHUFFLE(1, 0, 3, 2))); - sums = _mm_add_ps(sums, _mm_shuffle_ps(sums, sums, _MM_SHUFFLE(2, 3, 0, 1))); - - xy = yy = 0; - - pulsesLeft = K; - - /* Do a pre-search by projecting on the pyramid */ - if (K > (N>>1)) - { - __m128i pulses_sum; - __m128 yy4, xy4; - __m128 rcp4; - opus_val32 sum = _mm_cvtss_f32(sums); - /* If X is too small, just replace it with a pulse at 0 */ - /* Prevents infinities and NaNs from causing too many pulses - to be allocated. 64 is an approximation of infinity here. */ - if (!(sum > EPSILON && sum < 64)) - { - X[0] = QCONST16(1.f,14); - j=1; do - X[j]=0; - while (++j<N); - sums = _mm_set_ps1(1.f); - } - /* Using K+e with e < 1 guarantees we cannot get more than K pulses. */ - rcp4 = _mm_mul_ps(_mm_set_ps1((float)(K+.8)), _mm_rcp_ps(sums)); - xy4 = yy4 = _mm_setzero_ps(); - pulses_sum = _mm_setzero_si128(); - for (j=0;j<N;j+=4) - { - __m128 rx4, x4, y4; - __m128i iy4; - x4 = _mm_loadu_ps(&X[j]); - rx4 = _mm_mul_ps(x4, rcp4); - iy4 = _mm_cvttps_epi32(rx4); - pulses_sum = _mm_add_epi32(pulses_sum, iy4); - _mm_storeu_si128((__m128i*)&iy[j], iy4); - y4 = _mm_cvtepi32_ps(iy4); - xy4 = _mm_add_ps(xy4, _mm_mul_ps(x4, y4)); - yy4 = _mm_add_ps(yy4, _mm_mul_ps(y4, y4)); - /* double the y[] vector so we don't have to do it in the search loop. */ - _mm_storeu_ps(&y[j], _mm_add_ps(y4, y4)); - } - pulses_sum = _mm_add_epi32(pulses_sum, _mm_shuffle_epi32(pulses_sum, _MM_SHUFFLE(1, 0, 3, 2))); - pulses_sum = _mm_add_epi32(pulses_sum, _mm_shuffle_epi32(pulses_sum, _MM_SHUFFLE(2, 3, 0, 1))); - pulsesLeft -= _mm_cvtsi128_si32(pulses_sum); - xy4 = _mm_add_ps(xy4, _mm_shuffle_ps(xy4, xy4, _MM_SHUFFLE(1, 0, 3, 2))); - xy4 = _mm_add_ps(xy4, _mm_shuffle_ps(xy4, xy4, _MM_SHUFFLE(2, 3, 0, 1))); - xy = _mm_cvtss_f32(xy4); - yy4 = _mm_add_ps(yy4, _mm_shuffle_ps(yy4, yy4, _MM_SHUFFLE(1, 0, 3, 2))); - yy4 = _mm_add_ps(yy4, _mm_shuffle_ps(yy4, yy4, _MM_SHUFFLE(2, 3, 0, 1))); - yy = _mm_cvtss_f32(yy4); - } - X[N] = X[N+1] = X[N+2] = -100; - y[N] = y[N+1] = y[N+2] = 100; - celt_sig_assert(pulsesLeft>=0); - - /* This should never happen, but just in case it does (e.g. on silence) - we fill the first bin with pulses. */ - if (pulsesLeft > N+3) - { - opus_val16 tmp = (opus_val16)pulsesLeft; - yy = MAC16_16(yy, tmp, tmp); - yy = MAC16_16(yy, tmp, y[0]); - iy[0] += pulsesLeft; - pulsesLeft=0; - } - - for (i=0;i<pulsesLeft;i++) - { - int best_id; - __m128 xy4, yy4; - __m128 max, max2; - __m128i count; - __m128i pos; - /* The squared magnitude term gets added anyway, so we might as well - add it outside the loop */ - yy = ADD16(yy, 1); - xy4 = _mm_load1_ps(&xy); - yy4 = _mm_load1_ps(&yy); - max = _mm_setzero_ps(); - pos = _mm_setzero_si128(); - count = _mm_set_epi32(3, 2, 1, 0); - for (j=0;j<N;j+=4) - { - __m128 x4, y4, r4; - x4 = _mm_loadu_ps(&X[j]); - y4 = _mm_loadu_ps(&y[j]); - x4 = _mm_add_ps(x4, xy4); - y4 = _mm_add_ps(y4, yy4); - y4 = _mm_rsqrt_ps(y4); - r4 = _mm_mul_ps(x4, y4); - /* Update the index of the max. */ - pos = _mm_max_epi16(pos, _mm_and_si128(count, _mm_castps_si128(_mm_cmpgt_ps(r4, max)))); - /* Update the max. */ - max = _mm_max_ps(max, r4); - /* Update the indices (+4) */ - count = _mm_add_epi32(count, fours); - } - /* Horizontal max */ - max2 = _mm_max_ps(max, _mm_shuffle_ps(max, max, _MM_SHUFFLE(1, 0, 3, 2))); - max2 = _mm_max_ps(max2, _mm_shuffle_ps(max2, max2, _MM_SHUFFLE(2, 3, 0, 1))); - /* Now that max2 contains the max at all positions, look at which value(s) of the - partial max is equal to the global max. */ - pos = _mm_and_si128(pos, _mm_castps_si128(_mm_cmpeq_ps(max, max2))); - pos = _mm_max_epi16(pos, _mm_unpackhi_epi64(pos, pos)); - pos = _mm_max_epi16(pos, _mm_shufflelo_epi16(pos, _MM_SHUFFLE(1, 0, 3, 2))); - best_id = _mm_cvtsi128_si32(pos); - - /* Updating the sums of the new pulse(s) */ - xy = ADD32(xy, EXTEND32(X[best_id])); - /* We're multiplying y[j] by two so we don't have to do it here */ - yy = ADD16(yy, y[best_id]); - - /* Only now that we've made the final choice, update y/iy */ - /* Multiplying y[j] by 2 so we don't have to do it everywhere else */ - y[best_id] += 2; - iy[best_id]++; - } - - /* Put the original sign back */ - for (j=0;j<N;j+=4) - { - __m128i y4; - __m128i s4; - y4 = _mm_loadu_si128((__m128i*)&iy[j]); - s4 = _mm_castps_si128(_mm_loadu_ps(&signy[j])); - y4 = _mm_xor_si128(_mm_add_epi32(y4, s4), s4); - _mm_storeu_si128((__m128i*)&iy[j], y4); - } - RESTORE_STACK; - return yy; -} - -#endif diff --git a/thirdparty/opus/celt/x86/x86_celt_map.c b/thirdparty/opus/celt/x86/x86_celt_map.c index d39d88edec..47ba41b9ee 100644 --- a/thirdparty/opus/celt/x86/x86_celt_map.c +++ b/thirdparty/opus/celt/x86/x86_celt_map.c @@ -33,7 +33,6 @@ #include "celt_lpc.h" #include "pitch.h" #include "pitch_sse.h" -#include "vq.h" #if defined(OPUS_HAVE_RTCD) @@ -47,6 +46,7 @@ void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])( opus_val16 *y, int N, int ord, + opus_val16 *mem, int arch ) = { celt_fir_c, /* non-sse */ @@ -151,17 +151,5 @@ void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])( #endif -#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2) -opus_val16 (*const OP_PVQ_SEARCH_IMPL[OPUS_ARCHMASK + 1])( - celt_norm *_X, int *iy, int K, int N, int arch -) = { - op_pvq_search_c, /* non-sse */ - op_pvq_search_c, - MAY_HAVE_SSE2(op_pvq_search), - MAY_HAVE_SSE2(op_pvq_search), - MAY_HAVE_SSE2(op_pvq_search) -}; -#endif - #endif #endif diff --git a/thirdparty/opus/celt/x86/x86cpu.h b/thirdparty/opus/celt/x86/x86cpu.h index 1e2bf17b9b..04fd48aac4 100644 --- a/thirdparty/opus/celt/x86/x86cpu.h +++ b/thirdparty/opus/celt/x86/x86cpu.h @@ -82,9 +82,7 @@ int opus_select_arch(void); (_mm_cvtepi8_epi32(*(__m128i *)(x))) #endif -/* similar reasoning about the instruction sequence as in the 32-bit macro above, - */ -# if defined(__clang__) || !defined(__OPTIMIZE__) +# if !defined(__OPTIMIZE__) # define OP_CVTEPI16_EPI32_M64(x) \ (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x)))) # else diff --git a/thirdparty/opus/config.h b/thirdparty/opus/config.h index bb935619eb..7b9c92c6a8 100644 --- a/thirdparty/opus/config.h +++ b/thirdparty/opus/config.h @@ -1,44 +1,5 @@ -/* config.h. Generated from config.h.in by configure. */ -/* config.h.in. Generated from configure.ac by autoheader. */ - -/* Get CPU Info by asm method */ -#define CPU_INFO_BY_ASM 1 - -/* Get CPU Info by c method */ -/* #undef CPU_INFO_BY_C */ - -/* Custom modes */ -/* #undef CUSTOM_MODES */ - -/* Do not build the float API */ -/* #undef DISABLE_FLOAT_API */ - -/* Disable bitstream fixes from RFC 8251 */ -/* #undef DISABLE_UPDATE_DRAFT */ - -/* Assertions */ -/* #undef ENABLE_ASSERTIONS */ - -/* Hardening */ -#define ENABLE_HARDENING 1 - -/* Debug fixed-point implementation */ -/* #undef FIXED_DEBUG */ - -/* Compile as fixed-point (for machines without a fast enough FPU) */ -/* #undef FIXED_POINT */ - -/* Float approximations */ -/* #undef FLOAT_APPROX */ - -/* Fuzzing */ -/* #undef FUZZING */ - -/* Define to 1 if you have the <alloca.h> header file. */ -/* #undef HAVE_ALLOCA_H */ - -/* NE10 library is installed on host. Make sure it is on target! */ -/* #undef HAVE_ARM_NE10 */ +/* Opus configuration header */ +/* Based on the output of libopus configure script */ /* Define to 1 if you have the <dlfcn.h> header file. */ #define HAVE_DLFCN_H 1 @@ -80,9 +41,6 @@ /* Define to 1 if you have the <unistd.h> header file. */ #define HAVE_UNISTD_H 1 -/* Define to 1 if you have the `__malloc_hook' function. */ -#define HAVE___MALLOC_HOOK 1 - /* Define to the sub-directory in which libtool stores uninstalled libraries. */ #define LT_OBJDIR ".libs/" @@ -134,80 +92,9 @@ #endif // OPUS_ARM64_OPT -/* Define if binary requires Aarch64 Neon Intrinsics */ -/* #undef OPUS_ARM_PRESUME_AARCH64_NEON_INTR */ - -/* Define if binary requires EDSP instruction support */ -/* #undef OPUS_ARM_PRESUME_EDSP */ - -/* Define if binary requires ARMv6 media instruction support */ -/* #undef OPUS_ARM_PRESUME_MEDIA */ - -/* Define if binary requires NEON instruction support */ -/* #undef OPUS_ARM_PRESUME_NEON */ - -/* Define if binary requires NEON intrinsics support */ -/* #undef OPUS_ARM_PRESUME_NEON_INTR */ - /* This is a build of OPUS */ #define OPUS_BUILD /**/ -/* Run bit-exactness checks between optimized and c implementations */ -/* #undef OPUS_CHECK_ASM */ - -#ifndef OPUS_ARM_OPT -/* Use run-time CPU capabilities detection */ -#define OPUS_HAVE_RTCD 1 -#endif - -/* Compiler supports X86 AVX Intrinsics */ -/* #define OPUS_X86_MAY_HAVE_AVX */ - -/* Compiler supports X86 SSE Intrinsics */ -/* #define OPUS_X86_MAY_HAVE_SSE */ - -/* Compiler supports X86 SSE2 Intrinsics */ -/* #define OPUS_X86_MAY_HAVE_SSE2 */ - -/* Compiler supports X86 SSE4.1 Intrinsics */ -/* #define OPUS_X86_MAY_HAVE_SSE4_1 */ - -/* Define if binary requires AVX intrinsics support */ -/* #undef OPUS_X86_PRESUME_AVX */ - -/* Define if binary requires SSE intrinsics support */ -#define OPUS_X86_PRESUME_SSE 1 - -/* Define if binary requires SSE2 intrinsics support */ -#define OPUS_X86_PRESUME_SSE2 1 - -/* Define if binary requires SSE4.1 intrinsics support */ -#define OPUS_X86_PRESUME_SSE4_1 1 - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "opus@xiph.org" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "opus" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "opus unknown" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "opus" - -/* Define to the home page for this package. */ -#define PACKAGE_URL "" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "unknown" - -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - -/* Make use of alloca */ -/* #undef USE_ALLOCA */ - #ifndef WIN32 /* Use C99 variable-size arrays */ #define VAR_ARRAYS 1 @@ -216,13 +103,11 @@ #define USE_ALLOCA 1 #endif -/* Define to empty if `const' does not conform to ANSI C. */ -/* #undef const */ - #ifndef OPUS_FIXED_POINT #define FLOAT_APPROX 1 #endif + /* Define to `__inline__' or `__inline' if that's what the C compiler calls it, or to nothing if 'inline' is not supported under any name. */ #ifndef __cplusplus diff --git a/thirdparty/opus/info.c b/thirdparty/opus/info.c index 3a1a5bf75b..c36f9a9ee1 100644 --- a/thirdparty/opus/info.c +++ b/thirdparty/opus/info.c @@ -107,32 +107,26 @@ static int op_tags_ensure_capacity(OpusTags *_tags,size_t _ncomments){ char **user_comments; int *comment_lengths; int cur_ncomments; + char *binary_suffix_data; + int binary_suffix_len; size_t size; if(OP_UNLIKELY(_ncomments>=(size_t)INT_MAX))return OP_EFAULT; size=sizeof(*_tags->comment_lengths)*(_ncomments+1); if(size/sizeof(*_tags->comment_lengths)!=_ncomments+1)return OP_EFAULT; cur_ncomments=_tags->comments; - /*We only support growing. - Trimming requires cleaning up the allocated strings in the old space, and - is best handled separately if it's ever needed.*/ - OP_ASSERT(_ncomments>=(size_t)cur_ncomments); + comment_lengths=_tags->comment_lengths; + binary_suffix_len=comment_lengths==NULL?0:comment_lengths[cur_ncomments]; comment_lengths=(int *)_ogg_realloc(_tags->comment_lengths,size); if(OP_UNLIKELY(comment_lengths==NULL))return OP_EFAULT; - if(_tags->comment_lengths==NULL){ - OP_ASSERT(cur_ncomments==0); - comment_lengths[cur_ncomments]=0; - } - comment_lengths[_ncomments]=comment_lengths[cur_ncomments]; + comment_lengths[_ncomments]=binary_suffix_len; _tags->comment_lengths=comment_lengths; size=sizeof(*_tags->user_comments)*(_ncomments+1); if(size/sizeof(*_tags->user_comments)!=_ncomments+1)return OP_EFAULT; + user_comments=_tags->user_comments; + binary_suffix_data=user_comments==NULL?NULL:user_comments[cur_ncomments]; user_comments=(char **)_ogg_realloc(_tags->user_comments,size); if(OP_UNLIKELY(user_comments==NULL))return OP_EFAULT; - if(_tags->user_comments==NULL){ - OP_ASSERT(cur_ncomments==0); - user_comments[cur_ncomments]=NULL; - } - user_comments[_ncomments]=user_comments[cur_ncomments]; + user_comments[_ncomments]=binary_suffix_data; _tags->user_comments=user_comments; return 0; } @@ -281,30 +275,28 @@ int opus_tags_copy(OpusTags *_dst,const OpusTags *_src){ ret=opus_tags_copy_impl(&dst,_src); if(OP_UNLIKELY(ret<0))opus_tags_clear(&dst); else *_dst=*&dst; - return ret; + return 0; } int opus_tags_add(OpusTags *_tags,const char *_tag,const char *_value){ - char *comment; - size_t tag_len; - size_t value_len; - int ncomments; - int ret; + char *comment; + int tag_len; + int value_len; + int ncomments; + int ret; ncomments=_tags->comments; ret=op_tags_ensure_capacity(_tags,ncomments+1); if(OP_UNLIKELY(ret<0))return ret; tag_len=strlen(_tag); value_len=strlen(_value); /*+2 for '=' and '\0'.*/ - if(tag_len+value_len<tag_len)return OP_EFAULT; - if(tag_len+value_len>(size_t)INT_MAX-2)return OP_EFAULT; comment=(char *)_ogg_malloc(sizeof(*comment)*(tag_len+value_len+2)); if(OP_UNLIKELY(comment==NULL))return OP_EFAULT; memcpy(comment,_tag,sizeof(*comment)*tag_len); comment[tag_len]='='; memcpy(comment+tag_len+1,_value,sizeof(*comment)*(value_len+1)); _tags->user_comments[ncomments]=comment; - _tags->comment_lengths[ncomments]=(int)(tag_len+value_len+1); + _tags->comment_lengths[ncomments]=tag_len+value_len+1; _tags->comments=ncomments+1; return 0; } @@ -345,10 +337,7 @@ int opus_tags_set_binary_suffix(OpusTags *_tags, } int opus_tagcompare(const char *_tag_name,const char *_comment){ - size_t tag_len; - tag_len=strlen(_tag_name); - if(OP_UNLIKELY(tag_len>(size_t)INT_MAX))return -1; - return opus_tagncompare(_tag_name,(int)tag_len,_comment); + return opus_tagncompare(_tag_name,strlen(_tag_name),_comment); } int opus_tagncompare(const char *_tag_name,int _tag_len,const char *_comment){ @@ -359,18 +348,17 @@ int opus_tagncompare(const char *_tag_name,int _tag_len,const char *_comment){ } const char *opus_tags_query(const OpusTags *_tags,const char *_tag,int _count){ - char **user_comments; - size_t tag_len; - int found; - int ncomments; - int ci; + char **user_comments; + int tag_len; + int found; + int ncomments; + int ci; tag_len=strlen(_tag); - if(OP_UNLIKELY(tag_len>(size_t)INT_MAX))return NULL; ncomments=_tags->comments; user_comments=_tags->user_comments; found=0; for(ci=0;ci<ncomments;ci++){ - if(!opus_tagncompare(_tag,(int)tag_len,user_comments[ci])){ + if(!opus_tagncompare(_tag,tag_len,user_comments[ci])){ /*We return a pointer to the data, not a copy.*/ if(_count==found++)return user_comments[ci]+tag_len+1; } @@ -380,18 +368,17 @@ const char *opus_tags_query(const OpusTags *_tags,const char *_tag,int _count){ } int opus_tags_query_count(const OpusTags *_tags,const char *_tag){ - char **user_comments; - size_t tag_len; - int found; - int ncomments; - int ci; + char **user_comments; + int tag_len; + int found; + int ncomments; + int ci; tag_len=strlen(_tag); - if(OP_UNLIKELY(tag_len>(size_t)INT_MAX))return 0; ncomments=_tags->comments; user_comments=_tags->user_comments; found=0; for(ci=0;ci<ncomments;ci++){ - if(!opus_tagncompare(_tag,(int)tag_len,user_comments[ci]))found++; + if(!opus_tagncompare(_tag,tag_len,user_comments[ci]))found++; } return found; } @@ -416,8 +403,7 @@ static int opus_tags_get_gain(const OpusTags *_tags,int *_gain_q8, ncomments=_tags->comments; /*Look for the first valid tag with the name _tag_name and use that.*/ for(ci=0;ci<ncomments;ci++){ - OP_ASSERT(_tag_len<=(size_t)INT_MAX); - if(opus_tagncompare(_tag_name,(int)_tag_len,comments[ci])==0){ + if(opus_tagncompare(_tag_name,_tag_len,comments[ci])==0){ char *p; opus_int32 gain_q8; int negative; @@ -453,7 +439,8 @@ int opus_tags_get_track_gain(const OpusTags *_tags,int *_gain_q8){ } static int op_is_jpeg(const unsigned char *_buf,size_t _buf_sz){ - return _buf_sz>=3&&memcmp(_buf,"\xFF\xD8\xFF",3)==0; + return _buf_sz>=11&&memcmp(_buf,"\xFF\xD8\xFF\xE0",4)==0 + &&(_buf[4]<<8|_buf[5])>=16&&memcmp(_buf+6,"JFIF",5)==0; } /*Tries to extract the width, height, bits per pixel, and palette size of a diff --git a/thirdparty/opus/internal.h b/thirdparty/opus/internal.h index 9ac17e028f..ee48ea34c9 100644 --- a/thirdparty/opus/internal.h +++ b/thirdparty/opus/internal.h @@ -136,9 +136,6 @@ struct OggOpusLink{ that end-trimming calculations work properly. This is only valid for seekable sources.*/ opus_int64 end_offset; - /*The total duration of all prior links. - This is always zero for non-seekable sources.*/ - ogg_int64_t pcm_file_offset; /*The granule position of the last sample. This is only valid for seekable sources.*/ ogg_int64_t pcm_end; @@ -153,25 +150,23 @@ struct OggOpusLink{ }; struct OggOpusFile{ - /*The callbacks used to access the stream.*/ + /*The callbacks used to access the data source.*/ OpusFileCallbacks callbacks; - /*A FILE *, memory buffer, etc.*/ - void *stream; - /*Whether or not we can seek with this stream.*/ + /*A FILE *, memory bufer, etc.*/ + void *source; + /*Whether or not we can seek with this data source.*/ int seekable; /*The number of links in this chained Ogg Opus file.*/ int nlinks; /*The cached information from each link in a chained Ogg Opus file. - If stream isn't seekable (e.g., it's a pipe), only the current link + If source isn't seekable (e.g., it's a pipe), only the current link appears.*/ OggOpusLink *links; /*The number of serial numbers from a single link.*/ int nserialnos; /*The capacity of the list of serial numbers from a single link.*/ int cserialnos; - /*Storage for the list of serial numbers from a single link. - This is a scratch buffer used when scanning the BOS pages at the start of - each link.*/ + /*Storage for the list of serial numbers from a single link.*/ ogg_uint32_t *serialnos; /*This is the current offset of the data processed by the ogg_sync_state. After a seek, this should be set to the target offset so that we can track @@ -179,9 +174,9 @@ struct OggOpusFile{ After a call to op_get_next_page(), this will point to the first byte after that page.*/ opus_int64 offset; - /*The total size of this stream, or -1 if it's unseekable.*/ + /*The total size of this data source, or -1 if it's unseekable.*/ opus_int64 end; - /*Used to locate pages in the stream.*/ + /*Used to locate pages in the data source.*/ ogg_sync_state oy; /*One of OP_NOTOPEN, OP_PARTOPEN, OP_OPENED, OP_STREAMSET, OP_INITSET.*/ int ready_state; @@ -232,7 +227,7 @@ struct OggOpusFile{ /*The number of valid samples in the decoded buffer.*/ int od_buffer_size; /*The type of gain offset to apply. - One of OP_HEADER_GAIN, OP_ALBUM_GAIN, OP_TRACK_GAIN, or OP_ABSOLUTE_GAIN.*/ + One of OP_HEADER_GAIN, OP_TRACK_GAIN, or OP_ABSOLUTE_GAIN.*/ int gain_type; /*The offset to apply to the gain.*/ opus_int32 gain_offset_q8; diff --git a/thirdparty/opus/mapping_matrix.c b/thirdparty/opus/mapping_matrix.c deleted file mode 100644 index 31298af057..0000000000 --- a/thirdparty/opus/mapping_matrix.c +++ /dev/null @@ -1,378 +0,0 @@ -/* Copyright (c) 2017 Google Inc. - Written by Andrew Allen */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "arch.h" -#include "float_cast.h" -#include "opus_private.h" -#include "opus_defines.h" -#include "mapping_matrix.h" - -#define MATRIX_INDEX(nb_rows, row, col) (nb_rows * col + row) - -opus_int32 mapping_matrix_get_size(int rows, int cols) -{ - opus_int32 size; - - /* Mapping Matrix must only support up to 255 channels in or out. - * Additionally, the total cell count must be <= 65004 octets in order - * for the matrix to be stored in an OGG header. - */ - if (rows > 255 || cols > 255) - return 0; - size = rows * (opus_int32)cols * sizeof(opus_int16); - if (size > 65004) - return 0; - - return align(sizeof(MappingMatrix)) + align(size); -} - -opus_int16 *mapping_matrix_get_data(const MappingMatrix *matrix) -{ - /* void* cast avoids clang -Wcast-align warning */ - return (opus_int16*)(void*)((char*)matrix + align(sizeof(MappingMatrix))); -} - -void mapping_matrix_init(MappingMatrix * const matrix, - int rows, int cols, int gain, const opus_int16 *data, opus_int32 data_size) -{ - int i; - opus_int16 *ptr; - -#if !defined(ENABLE_ASSERTIONS) - (void)data_size; -#endif - celt_assert(align(data_size) == align(rows * cols * sizeof(opus_int16))); - - matrix->rows = rows; - matrix->cols = cols; - matrix->gain = gain; - ptr = mapping_matrix_get_data(matrix); - for (i = 0; i < rows * cols; i++) - { - ptr[i] = data[i]; - } -} - -#ifndef DISABLE_FLOAT_API -void mapping_matrix_multiply_channel_in_float( - const MappingMatrix *matrix, - const float *input, - int input_rows, - opus_val16 *output, - int output_row, - int output_rows, - int frame_size) -{ - /* Matrix data is ordered col-wise. */ - opus_int16* matrix_data; - int i, col; - - celt_assert(input_rows <= matrix->cols && output_rows <= matrix->rows); - - matrix_data = mapping_matrix_get_data(matrix); - - for (i = 0; i < frame_size; i++) - { - float tmp = 0; - for (col = 0; col < input_rows; col++) - { - tmp += - matrix_data[MATRIX_INDEX(matrix->rows, output_row, col)] * - input[MATRIX_INDEX(input_rows, col, i)]; - } -#if defined(FIXED_POINT) - output[output_rows * i] = FLOAT2INT16((1/32768.f)*tmp); -#else - output[output_rows * i] = (1/32768.f)*tmp; -#endif - } -} - -void mapping_matrix_multiply_channel_out_float( - const MappingMatrix *matrix, - const opus_val16 *input, - int input_row, - int input_rows, - float *output, - int output_rows, - int frame_size -) -{ - /* Matrix data is ordered col-wise. */ - opus_int16* matrix_data; - int i, row; - float input_sample; - - celt_assert(input_rows <= matrix->cols && output_rows <= matrix->rows); - - matrix_data = mapping_matrix_get_data(matrix); - - for (i = 0; i < frame_size; i++) - { -#if defined(FIXED_POINT) - input_sample = (1/32768.f)*input[input_rows * i]; -#else - input_sample = input[input_rows * i]; -#endif - for (row = 0; row < output_rows; row++) - { - float tmp = - (1/32768.f)*matrix_data[MATRIX_INDEX(matrix->rows, row, input_row)] * - input_sample; - output[MATRIX_INDEX(output_rows, row, i)] += tmp; - } - } -} -#endif /* DISABLE_FLOAT_API */ - -void mapping_matrix_multiply_channel_in_short( - const MappingMatrix *matrix, - const opus_int16 *input, - int input_rows, - opus_val16 *output, - int output_row, - int output_rows, - int frame_size) -{ - /* Matrix data is ordered col-wise. */ - opus_int16* matrix_data; - int i, col; - - celt_assert(input_rows <= matrix->cols && output_rows <= matrix->rows); - - matrix_data = mapping_matrix_get_data(matrix); - - for (i = 0; i < frame_size; i++) - { - opus_val32 tmp = 0; - for (col = 0; col < input_rows; col++) - { -#if defined(FIXED_POINT) - tmp += - ((opus_int32)matrix_data[MATRIX_INDEX(matrix->rows, output_row, col)] * - (opus_int32)input[MATRIX_INDEX(input_rows, col, i)]) >> 8; -#else - tmp += - matrix_data[MATRIX_INDEX(matrix->rows, output_row, col)] * - input[MATRIX_INDEX(input_rows, col, i)]; -#endif - } -#if defined(FIXED_POINT) - output[output_rows * i] = (opus_int16)((tmp + 64) >> 7); -#else - output[output_rows * i] = (1/(32768.f*32768.f))*tmp; -#endif - } -} - -void mapping_matrix_multiply_channel_out_short( - const MappingMatrix *matrix, - const opus_val16 *input, - int input_row, - int input_rows, - opus_int16 *output, - int output_rows, - int frame_size) -{ - /* Matrix data is ordered col-wise. */ - opus_int16* matrix_data; - int i, row; - opus_int32 input_sample; - - celt_assert(input_rows <= matrix->cols && output_rows <= matrix->rows); - - matrix_data = mapping_matrix_get_data(matrix); - - for (i = 0; i < frame_size; i++) - { -#if defined(FIXED_POINT) - input_sample = (opus_int32)input[input_rows * i]; -#else - input_sample = (opus_int32)FLOAT2INT16(input[input_rows * i]); -#endif - for (row = 0; row < output_rows; row++) - { - opus_int32 tmp = - (opus_int32)matrix_data[MATRIX_INDEX(matrix->rows, row, input_row)] * - input_sample; - output[MATRIX_INDEX(output_rows, row, i)] += (tmp + 16384) >> 15; - } - } -} - -const MappingMatrix mapping_matrix_foa_mixing = { 6, 6, 0 }; -const opus_int16 mapping_matrix_foa_mixing_data[36] = { - 16384, 0, -16384, 23170, 0, 0, 16384, 23170, - 16384, 0, 0, 0, 16384, 0, -16384, -23170, - 0, 0, 16384, -23170, 16384, 0, 0, 0, - 0, 0, 0, 0, 32767, 0, 0, 0, - 0, 0, 0, 32767 -}; - -const MappingMatrix mapping_matrix_soa_mixing = { 11, 11, 0 }; -const opus_int16 mapping_matrix_soa_mixing_data[121] = { - 10923, 7723, 13377, -13377, 11585, 9459, 7723, -16384, - -6689, 0, 0, 10923, 7723, 13377, 13377, -11585, - 9459, 7723, 16384, -6689, 0, 0, 10923, -15447, - 13377, 0, 0, -18919, 7723, 0, 13377, 0, - 0, 10923, 7723, -13377, -13377, 11585, -9459, 7723, - 16384, -6689, 0, 0, 10923, -7723, 0, 13377, - -16384, 0, -15447, 0, 9459, 0, 0, 10923, - -7723, 0, -13377, 16384, 0, -15447, 0, 9459, - 0, 0, 10923, 15447, 0, 0, 0, 0, - -15447, 0, -18919, 0, 0, 10923, 7723, -13377, - 13377, -11585, -9459, 7723, -16384, -6689, 0, 0, - 10923, -15447, -13377, 0, 0, 18919, 7723, 0, - 13377, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 32767, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 32767 -}; - -const MappingMatrix mapping_matrix_toa_mixing = { 18, 18, 0 }; -const opus_int16 mapping_matrix_toa_mixing_data[324] = { - 8208, 0, -881, 14369, 0, 0, -8192, -4163, - 13218, 0, 0, 0, 11095, -8836, -6218, 14833, - 0, 0, 8208, -10161, 881, 10161, -13218, -2944, - -8192, 2944, 0, -10488, -6218, 6248, -11095, -6248, - 0, -10488, 0, 0, 8208, 10161, 881, -10161, - -13218, 2944, -8192, -2944, 0, 10488, -6218, -6248, - -11095, 6248, 0, 10488, 0, 0, 8176, 5566, - -11552, 5566, 9681, -11205, 8192, -11205, 0, 4920, - -15158, 9756, -3334, 9756, 0, -4920, 0, 0, - 8176, 7871, 11552, 0, 0, 15846, 8192, 0, - -9681, -6958, 0, 13797, 3334, 0, -15158, 0, - 0, 0, 8176, 0, 11552, 7871, 0, 0, - 8192, 15846, 9681, 0, 0, 0, 3334, 13797, - 15158, 6958, 0, 0, 8176, 5566, -11552, -5566, - -9681, -11205, 8192, 11205, 0, 4920, 15158, 9756, - -3334, -9756, 0, 4920, 0, 0, 8208, 14369, - -881, 0, 0, -4163, -8192, 0, -13218, -14833, - 0, -8836, 11095, 0, 6218, 0, 0, 0, - 8208, 10161, 881, 10161, 13218, 2944, -8192, 2944, - 0, 10488, 6218, -6248, -11095, -6248, 0, -10488, - 0, 0, 8208, -14369, -881, 0, 0, 4163, - -8192, 0, -13218, 14833, 0, 8836, 11095, 0, - 6218, 0, 0, 0, 8208, 0, -881, -14369, - 0, 0, -8192, 4163, 13218, 0, 0, 0, - 11095, 8836, -6218, -14833, 0, 0, 8176, -5566, - -11552, 5566, -9681, 11205, 8192, -11205, 0, -4920, - 15158, -9756, -3334, 9756, 0, -4920, 0, 0, - 8176, 0, 11552, -7871, 0, 0, 8192, -15846, - 9681, 0, 0, 0, 3334, -13797, 15158, -6958, - 0, 0, 8176, -7871, 11552, 0, 0, -15846, - 8192, 0, -9681, 6958, 0, -13797, 3334, 0, - -15158, 0, 0, 0, 8176, -5566, -11552, -5566, - 9681, 11205, 8192, 11205, 0, -4920, -15158, -9756, - -3334, -9756, 0, 4920, 0, 0, 8208, -10161, - 881, -10161, 13218, -2944, -8192, -2944, 0, -10488, - 6218, 6248, -11095, 6248, 0, 10488, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 32767, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 32767 -}; - -const MappingMatrix mapping_matrix_foa_demixing = { 6, 6, 0 }; -const opus_int16 mapping_matrix_foa_demixing_data[36] = { - 16384, 16384, 16384, 16384, 0, 0, 0, 23170, - 0, -23170, 0, 0, -16384, 16384, -16384, 16384, - 0, 0, 23170, 0, -23170, 0, 0, 0, - 0, 0, 0, 0, 32767, 0, 0, 0, - 0, 0, 0, 32767 -}; - -const MappingMatrix mapping_matrix_soa_demixing = { 11, 11, 3050 }; -const opus_int16 mapping_matrix_soa_demixing_data[121] = { - 2771, 2771, 2771, 2771, 2771, 2771, 2771, 2771, - 2771, 0, 0, 10033, 10033, -20066, 10033, 14189, - 14189, -28378, 10033, -20066, 0, 0, 3393, 3393, - 3393, -3393, 0, 0, 0, -3393, -3393, 0, - 0, -17378, 17378, 0, -17378, -24576, 24576, 0, - 17378, 0, 0, 0, -14189, 14189, 0, -14189, - -28378, 28378, 0, 14189, 0, 0, 0, 2399, - 2399, -4799, -2399, 0, 0, 0, -2399, 4799, - 0, 0, 1959, 1959, 1959, 1959, -3918, -3918, - -3918, 1959, 1959, 0, 0, -4156, 4156, 0, - 4156, 0, 0, 0, -4156, 0, 0, 0, - 8192, 8192, -16384, 8192, 16384, 16384, -32768, 8192, - -16384, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 8312, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 8312 -}; - -const MappingMatrix mapping_matrix_toa_demixing = { 18, 18, 0 }; -const opus_int16 mapping_matrix_toa_demixing_data[324] = { - 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, - 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, - 0, 0, 0, -9779, 9779, 6263, 8857, 0, - 6263, 13829, 9779, -13829, 0, -6263, 0, -8857, - -6263, -9779, 0, 0, -3413, 3413, 3413, -11359, - 11359, 11359, -11359, -3413, 3413, -3413, -3413, -11359, - 11359, 11359, -11359, 3413, 0, 0, 13829, 9779, - -9779, 6263, 0, 8857, -6263, 0, 9779, 0, - -13829, 6263, -8857, 0, -6263, -9779, 0, 0, - 0, -15617, -15617, 6406, 0, 0, -6406, 0, - 15617, 0, 0, -6406, 0, 0, 6406, 15617, - 0, 0, 0, -5003, 5003, -10664, 15081, 0, - -10664, -7075, 5003, 7075, 0, 10664, 0, -15081, - 10664, -5003, 0, 0, -8176, -8176, -8176, 8208, - 8208, 8208, 8208, -8176, -8176, -8176, -8176, 8208, - 8208, 8208, 8208, -8176, 0, 0, -7075, 5003, - -5003, -10664, 0, 15081, 10664, 0, 5003, 0, - 7075, -10664, -15081, 0, 10664, -5003, 0, 0, - 15617, 0, 0, 0, -6406, 6406, 0, -15617, - 0, -15617, 15617, 0, 6406, -6406, 0, 0, - 0, 0, 0, -11393, 11393, 2993, -4233, 0, - 2993, -16112, 11393, 16112, 0, -2993, 0, 4233, - -2993, -11393, 0, 0, 0, -9974, -9974, -13617, - 0, 0, 13617, 0, 9974, 0, 0, 13617, - 0, 0, -13617, 9974, 0, 0, 0, 5579, - -5579, 10185, 14403, 0, 10185, -7890, -5579, 7890, - 0, -10185, 0, -14403, -10185, 5579, 0, 0, - 11826, -11826, -11826, -901, 901, 901, -901, 11826, - -11826, 11826, 11826, -901, 901, 901, -901, -11826, - 0, 0, -7890, -5579, 5579, 10185, 0, 14403, - -10185, 0, -5579, 0, 7890, 10185, -14403, 0, - -10185, 5579, 0, 0, -9974, 0, 0, 0, - -13617, 13617, 0, 9974, 0, 9974, -9974, 0, - 13617, -13617, 0, 0, 0, 0, 16112, -11393, - 11393, -2993, 0, 4233, 2993, 0, -11393, 0, - -16112, -2993, -4233, 0, 2993, 11393, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 32767, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 32767 -}; - diff --git a/thirdparty/opus/mapping_matrix.h b/thirdparty/opus/mapping_matrix.h deleted file mode 100644 index 98bc82df3e..0000000000 --- a/thirdparty/opus/mapping_matrix.h +++ /dev/null @@ -1,133 +0,0 @@ -/* Copyright (c) 2017 Google Inc. - Written by Andrew Allen */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/** - * @file mapping_matrix.h - * @brief Opus reference implementation mapping matrix API - */ - -#ifndef MAPPING_MATRIX_H -#define MAPPING_MATRIX_H - -#include "opus_types.h" -#include "opus_projection.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct MappingMatrix -{ - int rows; /* number of channels outputted from matrix. */ - int cols; /* number of channels inputted to matrix. */ - int gain; /* in dB. S7.8-format. */ - /* Matrix cell data goes here using col-wise ordering. */ -} MappingMatrix; - -opus_int32 mapping_matrix_get_size(int rows, int cols); - -opus_int16 *mapping_matrix_get_data(const MappingMatrix *matrix); - -void mapping_matrix_init( - MappingMatrix * const matrix, - int rows, - int cols, - int gain, - const opus_int16 *data, - opus_int32 data_size -); - -#ifndef DISABLE_FLOAT_API -void mapping_matrix_multiply_channel_in_float( - const MappingMatrix *matrix, - const float *input, - int input_rows, - opus_val16 *output, - int output_row, - int output_rows, - int frame_size -); - -void mapping_matrix_multiply_channel_out_float( - const MappingMatrix *matrix, - const opus_val16 *input, - int input_row, - int input_rows, - float *output, - int output_rows, - int frame_size -); -#endif /* DISABLE_FLOAT_API */ - -void mapping_matrix_multiply_channel_in_short( - const MappingMatrix *matrix, - const opus_int16 *input, - int input_rows, - opus_val16 *output, - int output_row, - int output_rows, - int frame_size -); - -void mapping_matrix_multiply_channel_out_short( - const MappingMatrix *matrix, - const opus_val16 *input, - int input_row, - int input_rows, - opus_int16 *output, - int output_rows, - int frame_size -); - -/* Pre-computed mixing and demixing matrices for 1st to 3rd-order ambisonics. - * foa: first-order ambisonics - * soa: second-order ambisonics - * toa: third-order ambisonics - */ -extern const MappingMatrix mapping_matrix_foa_mixing; -extern const opus_int16 mapping_matrix_foa_mixing_data[36]; - -extern const MappingMatrix mapping_matrix_soa_mixing; -extern const opus_int16 mapping_matrix_soa_mixing_data[121]; - -extern const MappingMatrix mapping_matrix_toa_mixing; -extern const opus_int16 mapping_matrix_toa_mixing_data[324]; - -extern const MappingMatrix mapping_matrix_foa_demixing; -extern const opus_int16 mapping_matrix_foa_demixing_data[36]; - -extern const MappingMatrix mapping_matrix_soa_demixing; -extern const opus_int16 mapping_matrix_soa_demixing_data[121]; - -extern const MappingMatrix mapping_matrix_toa_demixing; -extern const opus_int16 mapping_matrix_toa_demixing_data[324]; - -#ifdef __cplusplus -} -#endif - -#endif /* MAPPING_MATRIX_H */ diff --git a/thirdparty/opus/mlp.c b/thirdparty/opus/mlp.c index 964c6a98f6..ff9e50df47 100644 --- a/thirdparty/opus/mlp.c +++ b/thirdparty/opus/mlp.c @@ -1,5 +1,5 @@ /* Copyright (c) 2008-2011 Octasic Inc. - 2012-2017 Jean-Marc Valin */ + Written by Jean-Marc Valin */ /* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -29,13 +29,42 @@ #include "config.h" #endif -#include <math.h> #include "opus_types.h" #include "opus_defines.h" + +#include <math.h> +#include "mlp.h" #include "arch.h" #include "tansig_table.h" -#include "mlp.h" +#define MAX_NEURONS 100 +#if 0 +static OPUS_INLINE opus_val16 tansig_approx(opus_val32 _x) /* Q19 */ +{ + int i; + opus_val16 xx; /* Q11 */ + /*double x, y;*/ + opus_val16 dy, yy; /* Q14 */ + /*x = 1.9073e-06*_x;*/ + if (_x>=QCONST32(8,19)) + return QCONST32(1.,14); + if (_x<=-QCONST32(8,19)) + return -QCONST32(1.,14); + xx = EXTRACT16(SHR32(_x, 8)); + /*i = lrint(25*x);*/ + i = SHR32(ADD32(1024,MULT16_16(25, xx)),11); + /*x -= .04*i;*/ + xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8)); + /*x = xx*(1./2048);*/ + /*y = tansig_table[250+i];*/ + yy = tansig_table[250+i]; + /*y = yy*(1./16384);*/ + dy = 16384-MULT16_16_Q14(yy,yy); + yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx))); + return yy; +} +#else +/*extern const float tansig_table[501];*/ static OPUS_INLINE float tansig_approx(float x) { int i; @@ -63,82 +92,54 @@ static OPUS_INLINE float tansig_approx(float x) y = y + x*dy*(1 - y*x); return sign*y; } +#endif -static OPUS_INLINE float sigmoid_approx(float x) -{ - return .5f + .5f*tansig_approx(.5f*x); -} - -static void gemm_accum(float *out, const opus_int8 *weights, int rows, int cols, int col_stride, const float *x) -{ - int i, j; - for (i=0;i<rows;i++) - { - for (j=0;j<cols;j++) - out[i] += weights[j*col_stride + i]*x[j]; - } -} - -void compute_dense(const DenseLayer *layer, float *output, const float *input) +#if 0 +void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out) { - int i; - int N, M; - int stride; - M = layer->nb_inputs; - N = layer->nb_neurons; - stride = N; - for (i=0;i<N;i++) - output[i] = layer->bias[i]; - gemm_accum(output, layer->input_weights, N, M, stride, input); - for (i=0;i<N;i++) - output[i] *= WEIGHTS_SCALE; - if (layer->sigmoid) { - for (i=0;i<N;i++) - output[i] = sigmoid_approx(output[i]); - } else { - for (i=0;i<N;i++) - output[i] = tansig_approx(output[i]); - } + int j; + opus_val16 hidden[MAX_NEURONS]; + const opus_val16 *W = m->weights; + /* Copy to tmp_in */ + for (j=0;j<m->topo[1];j++) + { + int k; + opus_val32 sum = SHL32(EXTEND32(*W++),8); + for (k=0;k<m->topo[0];k++) + sum = MAC16_16(sum, in[k],*W++); + hidden[j] = tansig_approx(sum); + } + for (j=0;j<m->topo[2];j++) + { + int k; + opus_val32 sum = SHL32(EXTEND32(*W++),14); + for (k=0;k<m->topo[1];k++) + sum = MAC16_16(sum, hidden[k], *W++); + out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17))); + } } - -void compute_gru(const GRULayer *gru, float *state, const float *input) +#else +void mlp_process(const MLP *m, const float *in, float *out) { - int i; - int N, M; - int stride; - float tmp[MAX_NEURONS]; - float z[MAX_NEURONS]; - float r[MAX_NEURONS]; - float h[MAX_NEURONS]; - M = gru->nb_inputs; - N = gru->nb_neurons; - stride = 3*N; - /* Compute update gate. */ - for (i=0;i<N;i++) - z[i] = gru->bias[i]; - gemm_accum(z, gru->input_weights, N, M, stride, input); - gemm_accum(z, gru->recurrent_weights, N, N, stride, state); - for (i=0;i<N;i++) - z[i] = sigmoid_approx(WEIGHTS_SCALE*z[i]); - - /* Compute reset gate. */ - for (i=0;i<N;i++) - r[i] = gru->bias[N + i]; - gemm_accum(r, &gru->input_weights[N], N, M, stride, input); - gemm_accum(r, &gru->recurrent_weights[N], N, N, stride, state); - for (i=0;i<N;i++) - r[i] = sigmoid_approx(WEIGHTS_SCALE*r[i]); - - /* Compute output. */ - for (i=0;i<N;i++) - h[i] = gru->bias[2*N + i]; - for (i=0;i<N;i++) - tmp[i] = state[i] * r[i]; - gemm_accum(h, &gru->input_weights[2*N], N, M, stride, input); - gemm_accum(h, &gru->recurrent_weights[2*N], N, N, stride, tmp); - for (i=0;i<N;i++) - h[i] = z[i]*state[i] + (1-z[i])*tansig_approx(WEIGHTS_SCALE*h[i]); - for (i=0;i<N;i++) - state[i] = h[i]; + int j; + float hidden[MAX_NEURONS]; + const float *W = m->weights; + /* Copy to tmp_in */ + for (j=0;j<m->topo[1];j++) + { + int k; + float sum = *W++; + for (k=0;k<m->topo[0];k++) + sum = sum + in[k]**W++; + hidden[j] = tansig_approx(sum); + } + for (j=0;j<m->topo[2];j++) + { + int k; + float sum = *W++; + for (k=0;k<m->topo[1];k++) + sum = sum + hidden[k]**W++; + out[j] = tansig_approx(sum); + } } - +#endif diff --git a/thirdparty/opus/mlp.h b/thirdparty/opus/mlp.h index d7670550fd..618e246e2c 100644 --- a/thirdparty/opus/mlp.h +++ b/thirdparty/opus/mlp.h @@ -1,4 +1,5 @@ -/* Copyright (c) 2017 Jean-Marc Valin */ +/* Copyright (c) 2008-2011 Octasic Inc. + Written by Jean-Marc Valin */ /* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -27,34 +28,16 @@ #ifndef _MLP_H_ #define _MLP_H_ -#include "opus_types.h" - -#define WEIGHTS_SCALE (1.f/128) - -#define MAX_NEURONS 32 +#include "arch.h" typedef struct { - const opus_int8 *bias; - const opus_int8 *input_weights; - int nb_inputs; - int nb_neurons; - int sigmoid; -} DenseLayer; - -typedef struct { - const opus_int8 *bias; - const opus_int8 *input_weights; - const opus_int8 *recurrent_weights; - int nb_inputs; - int nb_neurons; -} GRULayer; - -extern const DenseLayer layer0; -extern const GRULayer layer1; -extern const DenseLayer layer2; + int layers; + const int *topo; + const float *weights; +} MLP; -void compute_dense(const DenseLayer *layer, float *output, const float *input); +extern const MLP net; -void compute_gru(const GRULayer *gru, float *state, const float *input); +void mlp_process(const MLP *m, const float *in, float *out); #endif /* _MLP_H_ */ diff --git a/thirdparty/opus/mlp_data.c b/thirdparty/opus/mlp_data.c index ae4178df76..c2fda4e2e5 100644 --- a/thirdparty/opus/mlp_data.c +++ b/thirdparty/opus/mlp_data.c @@ -1,4 +1,5 @@ -/*This file is automatically generated from a Keras model*/ +/* The contents of this file was automatically generated by mlp_train.c + It contains multi-layer perceptron (MLP) weights. */ #ifdef HAVE_CONFIG_H #include "config.h" @@ -6,667 +7,103 @@ #include "mlp.h" -static const opus_int8 layer0_weights[800] = { - -30, -9, 2, -12, 5, -1, 8, 9, - 9, 8, -13, 18, -17, -34, -5, 17, - -11, 0, -4, 10, 2, 10, 15, -8, - 2, -1, 0, 5, 13, -3, -16, 1, - -5, 3, 7, -28, -13, 6, 36, -3, - 19, -60, -17, -28, 7, -11, -30, -7, - 2, -42, -21, -3, 6, -22, 33, -9, - 7, -30, 21, -14, 24, -11, -20, -18, - -5, -12, 12, -49, -50, -49, 16, 9, - -37, -1, 9, 34, -13, -31, -31, 12, - 16, 44, -42, 2, -9, 8, -18, -6, - 9, 36, 19, 11, 13, 12, -21, 3, - -28, -12, 3, 33, 25, -14, 11, 1, - -94, -39, 18, -12, -11, -15, -7, 49, - 52, 10, -43, 9, 57, 8, 21, -6, - 14, -15, 44, -8, 7, -30, -13, -2, - -9, 25, -2, -127, 18, -11, -52, 26, - -27, 27, 10, -10, 7, 43, 6, -24, - 41, 10, -18, -27, 10, 17, 9, 10, - -17, -10, 20, -6, 22, 55, 35, -80, - 36, 25, -24, -36, 15, 9, -19, 88, - 19, 64, -51, -35, 17, 0, -7, 41, - -16, 27, 4, 15, -1, 18, -16, 47, - -39, -54, -8, 13, -25, -20, 102, -18, - -5, 44, 11, -28, 71, 2, -51, -5, - 5, 2, -83, -9, -29, 8, 21, -53, - 58, -37, -7, 13, 38, 9, 34, -1, - -41, 21, 4, -24, -36, -33, -21, 32, - 75, -2, 1, -68, -1, 47, -29, 32, - 20, 12, -65, -87, 5, 16, -12, 24, - 40, 15, 7, 19, -26, -17, 17, 6, - -2, -37, -30, -9, 32, -127, -39, 0, - -31, -27, 4, -22, 23, -6, -77, 35, - -61, 32, -37, -24, 13, -11, -1, -40, - -3, 17, -7, 13, 11, 59, -19, 10, - 6, -18, 0, 13, 3, -6, -23, 19, - 11, -17, 13, -1, -80, 40, -53, 69, - -29, -54, 0, -4, 33, -25, -2, 38, - 35, 36, -15, 46, 2, -13, -16, -8, - -8, 12, -24, -9, -55, -5, -9, 32, - 11, 7, 12, -18, -10, -86, -38, 54, - 37, -25, 18, -43, 7, -27, -27, -54, - 13, 9, 22, 70, 6, 35, -7, 23, - -15, -44, -6, 7, -66, -85, 32, 40, - -19, -9, -7, 12, -15, 7, 2, 6, - -35, 11, 28, 0, 26, 14, 1, 1, - 4, 12, 18, 35, 22, -18, -3, 14, - -1, 7, 14, -8, -14, -3, 4, -3, - -19, -7, -1, -25, -27, 25, -26, -2, - 33, -22, -27, -25, 4, -9, 7, 21, - 26, -30, 10, -9, -20, 11, 27, 10, - 5, -18, 14, -4, 2, -17, -5, -7, - -9, -13, 15, 29, 1, -10, -16, -10, - 35, 36, -7, -22, -44, 17, 30, 22, - 21, -1, 22, -11, 32, -8, -7, 5, - -10, 5, 30, -20, 29, -20, -34, 12, - -4, -6, 6, -13, 10, -5, -68, -1, - 24, 9, 19, -24, -64, 31, 19, 27, - -26, 75, -45, 41, 39, -42, 8, 6, - 23, -30, 16, -25, 30, 34, 8, -38, - -3, 18, 16, -31, 22, -4, -9, 1, - 20, 9, 38, -32, 0, -45, 0, -6, - -13, 11, -25, -32, -22, 31, -24, -11, - -11, -4, -4, 20, -34, 22, 20, 9, - -25, 27, -5, 28, -29, 29, 6, 21, - -6, -18, 54, 4, -46, 23, 21, -14, - -31, 36, -41, -24, 4, 22, 10, 11, - 7, 36, -32, -13, -52, -17, 24, 28, - -37, -36, -1, 24, 9, -38, 35, 48, - 18, 2, -1, 45, 10, 39, 24, -38, - 13, 8, -16, 8, 25, 11, 7, -29, - -11, 7, 20, -30, -38, -45, 14, -18, - -28, -9, 65, 61, 22, -53, -38, -16, - 36, 46, 20, -39, 32, -61, -6, -6, - -36, -33, -18, -28, 56, 101, 45, 11, - -28, -23, -29, -61, 20, -47, 2, 48, - 27, -17, 1, 40, 1, 3, -51, 15, - 35, 28, 22, 35, 53, -61, -29, 12, - -6, -21, 10, 3, -20, 2, -25, 1, - -6, 31, 11, -3, 1, -10, -52, 6, - 126, -105, 122, 127, -128, 127, 127, -128, - 127, 108, 12, 127, 48, -128, -36, -128, - 127, 127, -128, -128, 127, 89, -128, 127, - -128, -128, -128, 127, 127, -128, -128, -93, - -82, 20, 125, 65, -82, 127, 38, -74, - 81, 88, -88, 79, 51, -47, -111, -26, - 14, 83, -88, -112, 24, 35, -101, 98, - -99, -48, -45, 46, 83, -60, -79, 45, - -20, -41, 9, 4, 52, 54, 93, -10, - 4, 13, 3, 123, 6, 94, -111, -69, - -14, -31, 10, 12, 53, -79, -11, -21, - -2, -44, -72, 92, 65, -57, 56, -38, - 127, -56, -128, 127, 127, -128, 86, 117, - -75, -128, 127, -19, -99, -112, 127, -128, - 127, -48, 114, 118, -128, -128, 117, -17, - -6, 121, -128, 127, -128, 82, 54, -106, - 127, 127, -33, 100, -39, -23, 18, -78, - -34, -29, -1, -30, 127, -26, 127, -128, - 126, -128, 27, -23, -79, -120, -127, 127, - 72, 66, 29, 7, -66, -56, -117, -128 -}; - -static const opus_int8 layer0_bias[32] = { - 51, -16, 1, 13, -5, -6, -16, -7, - 11, -6, 106, 26, 28, -14, 21, -29, - 7, 18, -18, -17, 21, -17, -9, 20, - -25, -3, -34, 48, 11, -13, -31, -20 -}; - -static const opus_int8 layer1_weights[2304] = { - 22, -1, -7, 7, 29, -27, -31, -17, - -13, 33, 44, -8, 11, 33, 24, 78, - 15, 19, 30, -2, -24, 5, 49, 5, - 36, 29, -14, -11, -48, -33, 21, -42, - -38, -12, 55, -37, 54, -8, 1, 36, - 17, 0, 51, 31, 59, 7, -12, 53, - 4, 32, -14, 48, 5, -10, -16, -8, - 1, -16, -56, -24, -6, 18, -2, 23, - 6, 46, -6, -10, 20, 35, -44, -15, - -49, 36, 16, 5, -7, -79, -67, 12, - 70, -3, -79, -54, -85, -24, 47, -22, - 33, 21, 69, -1, 11, 22, 14, -16, - -16, -22, -28, -11, 11, -41, 31, -26, - -33, -19, -4, 27, 32, -50, 5, -10, - -38, -22, -8, 35, -31, 1, -41, -15, - -11, 44, 28, -17, -41, -23, 17, 2, - -23, -26, -13, -13, -17, 6, 14, -31, - -25, 9, -19, 39, -8, 4, 31, -1, - -45, -11, -28, -92, -46, -15, 21, 118, - -22, 45, -51, 11, -20, -20, -15, 13, - -21, -97, -29, -32, -23, -42, 94, 1, - 23, -8, 63, -3, -46, 19, -26, 32, - -40, -74, -26, 26, -4, -13, 30, -20, - -30, -25, -14, -31, -45, -43, 4, -60, - -48, -12, -34, 2, 2, 3, 13, 15, - 11, 16, 5, 46, -9, -55, -16, -57, - 29, 14, 38, -50, -2, -44, -11, -8, - 52, -27, -38, -7, 20, 47, 17, -59, - 0, 47, 46, -63, 35, -17, 19, 33, - 68, -19, 2, 15, -16, 28, -16, -103, - 26, -35, 47, -39, -60, 30, 31, -23, - -52, -13, 116, 47, -25, 30, 40, 30, - -22, 2, 12, -27, -18, 31, -10, 27, - -8, -66, 12, 14, 4, -26, -28, -13, - 3, 13, -26, -51, 37, 5, 2, -21, - 47, 3, 13, 25, -41, -27, -8, -4, - 5, -76, -33, 28, 10, 9, -46, -74, - 19, 28, 25, 31, 54, -55, 68, 38, - -24, -32, 2, 4, 68, 11, -1, 99, - 5, 16, -2, -74, 40, 26, -26, 33, - 31, -1, -68, 14, -6, 25, 9, 29, - 60, 61, 7, -7, 0, -24, 7, 77, - 4, -1, 16, -7, 13, -15, -19, 28, - -31, -24, -16, 37, 24, 13, 30, 10, - -30, 11, 11, -10, 22, 60, 28, 45, - -3, -40, -62, -5, -102, 9, -32, -27, - -54, 21, 15, -5, 37, -43, -11, 37, - -19, 47, -64, -128, -27, -114, 21, -66, - 59, 46, -3, -12, -87, -9, 4, 19, - -113, -36, 78, 57, -26, -38, -77, -10, - 6, 6, -75, 25, -97, -11, 33, -46, - 1, 13, -21, -33, -20, 16, -6, -3, - -11, -4, -27, 38, 8, -41, -2, -33, - 18, 19, -26, 1, -29, -22, -4, -14, - -55, -11, -80, -3, 11, 34, 90, 51, - 11, 17, 43, 36, 127, -32, 29, 103, - 9, 27, 13, 64, 56, 70, -14, 3, - -12, 10, 37, 3, 12, -22, -10, 46, - 28, 10, 20, 26, -24, 18, 9, 7, - 14, 34, -5, -7, 31, -14, -56, 11, - -18, -8, -17, -7, -10, -40, 10, -33, - -32, -43, 5, 9, 11, -4, 10, 50, - -12, -5, 46, 9, 7, 1, 11, 15, - 91, -17, 7, -50, 23, 6, -30, -99, - 0, -17, 14, 8, -10, -25, -30, -69, - -62, 31, 127, 114, -23, 101, -5, -54, - -6, -22, 7, -56, 39, 18, -29, 0, - 46, 8, -79, 4, -21, 18, -32, 62, - -12, -8, -12, -58, 31, -32, 17, 6, - -24, 25, 24, 9, -4, -19, 45, 6, - 17, -14, 5, -27, 16, -4, -41, 25, - -36, 5, 15, 12, 50, 27, 25, 23, - -44, -69, -9, -19, -48, -8, 4, 12, - -6, 13, -19, -30, -36, 26, 37, -1, - -3, -30, -42, -14, -10, -20, 26, -54, - -27, -44, 4, 73, -26, 90, 32, -69, - -29, -16, 3, 103, 15, -17, 37, 24, - -23, -31, 33, -37, -64, 25, 13, -81, - -28, -32, 27, 5, -35, -23, 15, -22, - 19, -7, 9, 30, 19, -23, 27, -13, - 43, 29, -29, -6, 9, -40, -33, -33, - -32, 9, 11, -48, -8, -23, -52, 46, - 17, -22, -42, 35, -15, -41, 16, 34, - 31, -42, -19, -11, 55, 7, -39, 89, - -11, -33, 20, -14, 22, 32, 3, -17, - -6, 14, 34, 1, 55, -21, -90, -8, - 18, 27, 13, -29, 21, 15, -33, -51, - -9, -11, 4, -16, -18, 23, -4, -4, - 48, 1, 7, 29, -14, -12, -16, 17, - 35, 8, 0, -7, -2, 9, 8, 17, - -6, 53, -32, -21, -50, 5, 99, -60, - -5, -53, 10, -31, 12, -5, 7, 80, - 36, 18, -31, 9, 98, 36, -63, -35, - 4, -13, -28, -24, 28, -13, 18, 16, - -1, -18, -34, 10, 20, 7, 4, 29, - 11, 25, -7, 36, 14, 45, 24, 1, - -16, 30, 6, 35, -6, -11, -24, 13, - -1, 27, 39, 20, 48, -11, -4, -13, - 28, 11, -31, -18, 31, -29, 22, -2, - -20, -16, 5, 30, -12, -28, -3, 93, - -16, 23, 18, -29, 6, -54, -37, 28, - -3, -3, -47, -3, -36, -55, -3, 41, - -10, 47, -2, 23, 42, -7, -71, -27, - 83, -64, 7, -24, 8, 26, -17, 15, - 12, 31, -30, -38, -13, -33, -56, 4, - -17, 20, 18, 1, -30, -5, -6, -31, - -14, -37, 0, 22, 10, -30, 37, -17, - 18, 6, 5, 23, -36, -32, 14, 18, - -13, -61, -52, -69, 44, -30, 16, 18, - -4, -25, 14, 81, 26, -8, -23, -59, - 52, -104, 17, 119, -32, 26, 17, 1, - 23, 45, 29, -64, -57, -14, 73, 21, - -13, -13, 9, -68, -7, -52, 3, 24, - -39, 44, -15, 27, 14, 19, -9, -28, - -11, 5, 3, -34, -2, 2, 22, -6, - -23, 4, 3, 13, -22, -13, -10, -18, - 29, 6, 44, -13, -24, -8, 2, 30, - 14, 43, 6, 17, -73, -6, -7, 20, - -80, -7, -7, -28, 15, -69, -38, -5, - -100, -35, 15, -79, 23, 29, -18, -27, - 21, -66, -37, 8, -22, -39, 48, 4, - -13, 1, -9, 11, -29, 22, 6, -49, - 32, -14, 47, -18, -4, 44, -52, -74, - 43, 30, 23, -14, 5, 0, -27, 4, - -7, 10, -4, 10, 1, -16, 11, -18, - -2, -5, 2, -11, 0, -20, -4, 38, - 74, 59, 39, 64, -10, 26, -3, -40, - -68, 3, -30, -51, 8, -19, -27, -46, - 51, 52, 54, 36, 90, 92, 14, 13, - -5, 0, 16, -62, 16, 11, -47, -37, - -6, -5, 21, 54, -57, 32, 42, -6, - 62, -9, 16, 21, 24, 9, -10, -4, - 33, 50, 13, -15, 1, -35, -48, 18, - -11, -17, -67, -13, 21, 38, -44, 36, - -16, 29, 17, 5, -10, 18, 17, -32, - 2, 8, 22, -56, -15, -32, 40, 43, - 19, 46, -7, -100, -96, 19, 53, 24, - 21, -26, -48, -101, -82, 61, 38, -85, - -28, -34, -1, 63, -5, -5, 39, 39, - -38, 32, -12, -28, 20, 40, -8, 2, - 31, 12, -35, -13, 20, -25, 30, 8, - 3, -13, -9, -20, 2, -13, 24, 37, - -10, 33, 6, 20, -16, -24, -6, -6, - -19, -5, 22, 21, 10, 11, -4, -39, - -1, 6, 49, 41, -15, -57, 21, -62, - 77, -69, -13, 0, -74, 1, -7, -38, - -8, 6, 63, 28, 4, 26, -52, 82, - 63, 13, 45, -33, 44, -52, -65, -21, - -46, -49, 64, -17, 32, 24, 68, -39, - -16, -5, -26, 28, 5, -61, -28, 2, - 24, 11, -12, -33, 9, -37, -3, -28, - 22, -37, -12, 19, 0, -18, -2, 14, - 1, 4, 8, -9, -2, 43, -17, -2, - -66, -31, 56, -40, -87, -36, -2, -4, - -42, -45, -1, 31, -43, -15, 27, 63, - -11, 32, -10, -33, 27, -19, 4, 15, - -26, -34, 29, -4, -39, -65, 14, -20, - -21, -17, -36, 13, 59, 47, -38, -33, - 13, -37, -8, -37, -7, -6, -76, -31, - -12, -46, 7, 24, -21, -30, -14, 9, - 15, -12, -13, 47, -27, -25, -1, -39, - 0, 20, -9, 6, 7, 4, 3, 7, - 39, 50, 22, -7, 14, -20, 1, 70, - -28, 29, -41, 10, -16, -5, -28, -2, - -37, 32, -18, 17, 62, -11, -20, -50, - 36, 21, -62, -12, -56, 52, 50, 17, - 3, 48, 44, -41, -25, 3, 16, -3, - 0, 33, -6, 15, 27, 34, -25, 22, - 9, 17, -11, 36, 16, -2, 12, 21, - -52, 45, -2, -10, 46, 21, -18, 67, - -28, -13, 30, 37, 42, 16, -9, 11, - 75, 7, -64, -40, -10, 29, 57, -23, - 5, 53, -77, 3, -17, -5, 47, -55, - -35, -36, -13, 52, -53, -71, 52, -111, - -23, -26, -28, 29, -43, 55, -19, 43, - -19, 54, -12, -33, -44, -39, -19, -10, - -31, -10, 21, 38, -57, -20, 2, -25, - 8, -6, 50, 12, 15, 25, -25, 15, - -30, -6, 9, 25, 37, 19, -4, 31, - -22, 2, 4, 2, 36, 7, 3, -34, - -80, 36, -10, -2, -5, 31, -36, 49, - -70, 20, -36, 21, 24, 25, -46, -51, - 36, -58, -48, -40, -10, 55, 71, 47, - 10, -1, 1, 2, -46, -68, 16, 13, - 0, -74, -29, 73, -52, -18, -11, 7, - -44, -82, -32, -70, -28, -1, -39, -68, - -6, -41, 12, -22, -16, 40, -11, -25, - 51, -9, 21, 4, 4, -34, 7, -78, - 16, 6, -38, -30, -2, -44, 32, 0, - 22, 64, 5, -72, -2, -14, -10, -16, - -8, -25, 12, 102, -58, 37, -10, -23, - 15, 49, 7, -7, 2, -20, -32, 45, - -6, 48, 28, 30, 33, -1, 22, -6, - 30, 65, -17, 29, 74, 37, -26, -10, - 15, -24, 19, -66, 22, -10, -31, -1, - -18, -9, 11, 37, -4, 45, 5, 41, - 17, 1, 1, 24, -58, 41, 5, -51, - 14, 8, 43, 16, -10, -1, 45, 32, - -64, 3, -33, -25, -3, -27, -68, 12, - 23, -11, -13, -37, -40, 4, -21, -12, - 32, -23, -19, 76, 41, -23, -24, -44, - -65, -1, -15, 1, 71, 63, 5, 20, - -3, 21, -23, 31, -32, 18, -2, 27, - 31, 46, -5, -39, -5, -35, 18, -18, - -40, -10, 3, 12, 2, -2, -22, 40, - 5, -6, 60, 36, 3, 29, -27, 10, - 25, -54, 5, 26, 39, 35, -24, -37, - 30, -91, 28, -4, -21, -27, -39, -6, - 5, 12, -128, 38, -16, 29, -95, -29, - 82, -2, 35, 2, 12, 8, -22, 10, - 80, -47, 2, -25, -73, -79, 16, -30, - -32, -66, 48, 21, -45, -11, -47, 14, - -27, -17, -7, 15, -44, -14, -44, -26, - -32, 26, -23, 17, -7, -28, 26, -6, - 28, 6, -26, 2, 13, -14, -23, -14, - 19, 46, 16, 2, -33, -21, 28, -17, - -42, 44, -37, 1, -39, 28, 84, -46, - 15, 10, 13, -44, 72, -26, 26, 32, - -28, -12, -83, 2, 10, -30, -44, -10, - -28, 53, 45, 65, 0, -25, 57, 36, - -33, 6, 29, 44, -53, 11, 19, -2, - -27, 35, 32, 49, 4, 23, 38, 36, - 24, 10, 51, -39, 4, -7, 26, 37, - -35, 11, -47, -18, 28, 16, -35, 42, - 17, -21, -41, 28, 14, -12, 11, -45, - 7, -43, -15, 18, -5, 38, -40, -50, - -30, -21, 9, -98, 13, 12, 23, 75, - -56, -7, -3, -4, -1, -34, 12, -49, - 11, 26, -18, -28, -17, 33, 13, -14, - 40, 24, -72, -37, 10, 17, -6, 22, - 16, 16, -6, -12, -30, -14, 10, 40, - -23, 12, 15, -3, -15, 13, -56, -4, - -30, 1, -3, -17, 27, 50, -5, 64, - -36, -19, 7, 29, 22, 25, 9, -16, - -58, -69, -40, -61, -71, -14, 42, 93, - 26, 11, -6, -58, -11, 70, -52, 19, - 9, -30, -33, 11, -37, -47, -21, -22, - -40, 10, 47, 4, -23, 17, 48, 41, - -48, 14, 10, 15, 34, -23, -2, -47, - 23, -32, -13, -10, -26, -26, -4, 16, - 38, -14, 0, -12, -7, -7, 20, 44, - -1, -32, -27, -16, 4, -6, -18, 14, - 5, 4, -29, 28, 7, -7, 15, -11, - -20, -45, -36, 16, 84, 34, -59, -30, - 22, 126, 8, 68, 79, -17, 21, -68, - 37, 5, 15, 63, 49, 127, -90, 85, - 43, 7, 16, 9, 6, -45, -57, -43, - 57, 11, -23, -11, -29, 60, -26, 0, - 7, 42, -24, 10, 23, -25, 8, -7, - -40, 19, -17, 35, 4, 27, -39, -91, - 27, -36, 34, 2, 16, -24, 25, 7, - -21, 5, 17, 10, -22, -30, 9, -17, - -61, -26, 33, 21, 58, -51, -14, 69, - -38, 20, 7, 80, -4, -65, -6, -27, - 53, -12, 47, -1, -15, 1, 60, 102, - -79, -4, 12, 9, 22, 37, -8, -4, - 37, 2, -3, -15, -16, -11, -5, 19, - -6, -43, 20, -25, -18, 10, -27, 0, - -28, -27, -11, 10, -18, -2, -4, -16, - 26, 14, -6, 7, -6, 1, 53, -2, - -29, 23, 9, -30, -6, -4, -6, 56, - 70, 0, -33, -20, -17, -9, -24, 46, - -5, -105, 47, -46, -51, 20, 20, -53, - -81, -1, -7, 75, -5, -21, -65, 12, - -52, 22, -50, -12, 49, 54, 76, -81, - 10, 45, -41, -59, 18, -19, 25, 14, - -31, -53, -5, 12, 31, 84, -23, 2, - 7, 2, 10, -32, 39, -2, -12, 1, - -9, 0, -10, -11, 9, 15, -8, -2, - 2, -1, 10, 14, -5, -40, 19, -7, - -7, 26, -4, 2, 1, -27, 35, 32, - 21, -31, 26, 43, -9, 4, -32, 40, - -62, -52, 36, 22, 38, 22, 36, -96, - 6, -10, -23, -49, 15, -33, -18, -3, - 0, 41, 21, -19, 21, 23, -39, -23, - -6, 6, 47, 56, 4, 74, 0, -98, - 29, -47, -14, -36, 21, -22, 22, 16, - 13, 12, 16, -5, 13, 17, -13, -15, - 1, -34, -26, 26, 12, 32, 27, 13, - -67, 27, 2, 8, 10, 18, 16, 20, - -17, -17, 57, -64, 5, 14, 19, 31, - -18, -44, -46, -16, 4, -25, 17, -126, - -24, 39, 4, 8, 55, -25, -34, 39, - -16, 3, 9, 71, 72, -31, -55, 6, - 10, -25, 32, -85, -21, 18, -8, 15, - 12, -27, -7, 1, -21, -2, -5, 48, - -16, 18, 1, -22, -26, 16, 14, -31, - 27, -6, -15, -21, 4, -14, 18, -36 -}; - -static const opus_int8 layer1_recur_weights[1728] = { - 20, 67, -99, 12, 41, -25, 49, -44, - 35, 81, 110, 47, 34, -66, -14, 14, - -60, 34, 29, -73, 10, 41, 35, 89, - 7, -35, 22, 7, 27, -20, -6, 56, - 26, 66, 6, 33, -55, 53, 1, -21, - 14, 17, 68, 55, 59, 0, 18, -9, - 5, -41, 6, -5, -114, -12, 29, 42, - -23, 10, 81, -27, 20, -53, -30, -62, - 40, 95, 25, -4, 3, 18, -8, -15, - -29, -82, 2, -57, -3, -61, -29, -29, - 49, 2, -55, 5, -69, -99, -49, -51, - 6, -25, 12, 89, 44, -33, 5, 41, - 1, 23, -37, -37, -28, -48, 3, 4, - -41, -30, -57, -35, -39, -1, -13, -56, - -5, 50, 49, 41, -4, -4, 33, -22, - -1, 33, 34, 18, 40, -42, 12, 1, - -6, -2, 18, 17, 39, 44, 11, 65, - -60, -45, 10, 91, 21, 9, -62, -11, - 8, 69, 37, 24, -30, 21, 26, -27, - 1, -28, 24, 66, -8, 6, -71, 34, - 24, 44, 58, -78, -19, 57, 17, -60, - 1, 12, -3, -1, -40, 22, 11, -5, - 25, 12, 1, 72, 79, 7, -50, 23, - 18, 13, 21, -11, -20, 5, 77, -94, - 24, 15, 57, -51, 3, 36, 53, -1, - 4, 14, 30, -31, 22, 40, 32, -11, - -34, -36, -59, 58, 25, 21, -54, -23, - 40, 46, 18, 0, 12, 54, -96, -99, - -59, 5, 119, -38, 50, 55, 12, -16, - 67, 0, 34, 35, 39, 35, -1, 69, - 24, 27, -30, -35, -4, -70, 2, -44, - -7, -6, 19, -9, 60, 44, -21, -10, - 37, 43, -16, -3, 30, -15, -65, 31, - -55, 18, -98, 76, 64, 25, 24, -18, - -7, -68, -10, 38, 27, -60, 36, 33, - 16, 30, 34, -39, -37, 31, 12, 53, - -54, 14, -26, -49, -128, -13, -5, -22, - -11, -85, 55, -8, -51, -11, -33, -10, - -31, -76, -41, 23, 44, -40, -54, -127, - -101, 19, -23, -15, 15, 27, 58, -60, - 8, 14, -33, 1, 48, -9, -11, -123, - 3, 53, 23, 4, -28, 22, 2, -29, - -67, 36, 12, 7, 55, -21, 88, 20, - -1, -21, -17, 3, 41, 32, -10, -14, - -5, -57, 67, 57, 21, 23, -2, -27, - -73, -24, 120, 21, 18, -35, 42, -7, - 3, -45, -25, 76, -34, 50, 11, -54, - -91, 3, -113, -20, -5, 47, 15, -47, - 17, 27, -3, -26, -7, 10, 7, 74, - -40, 64, -7, -5, -24, -49, -24, -3, - -10, 27, -17, -8, -3, 14, -27, 33, - 13, 39, 28, -7, -38, 29, 16, 44, - 19, 55, -3, 9, -13, -57, 43, 43, - 31, 0, -93, -17, 19, -56, 4, -12, - -25, 37, -85, -13, -118, 33, -17, 56, - 71, -80, -4, 6, -11, -18, 47, -52, - 25, 9, 48, -107, 1, 21, 20, -3, - 10, -16, -4, 24, 17, 31, -61, -18, - -50, 24, -10, 12, 71, 26, 11, -3, - 4, 1, 0, -7, -40, 18, 38, -34, - 38, 17, 8, -34, 2, 21, 123, -32, - -26, 43, 14, -34, -1, -9, 37, -16, - 6, -17, -62, 68, 22, 17, 11, -75, - 33, -80, 62, -9, -75, 76, 36, -41, - -8, -40, -11, -71, 40, -39, 62, -49, - -81, 16, -9, -52, 52, 61, 17, -103, - -27, -10, -8, -54, -57, 21, 23, -16, - -52, 36, 18, 10, -5, 8, 15, -29, - 5, -19, -37, 8, -53, 6, 19, -37, - 38, -17, 48, 10, 0, 81, 46, 70, - -29, 101, 11, 44, -44, -3, 24, 11, - 3, 14, -9, 11, 14, -45, 13, 46, - -3, -57, 68, 44, 63, 98, 25, -28, - -23, 15, 32, -10, 53, -6, -2, -9, - -6, 16, -107, -11, -11, -28, 59, 57, - -22, 38, 42, 83, 27, 5, 29, -30, - 12, -21, -13, 31, 38, -21, 58, -10, - -10, -15, -2, -5, 11, 12, -73, -28, - -38, 22, 2, -25, 73, -52, -12, -55, - 32, -63, 21, 51, 33, 52, -26, 55, - -26, -26, 57, -32, -4, -52, -61, 21, - -33, -91, -51, 69, -90, -53, -38, -44, - 12, -76, -20, 77, -45, -7, 86, 43, - -109, -33, -105, -40, -121, -10, 0, -72, - 45, -51, -75, -49, -38, -1, -62, 18, - -1, 30, -44, -14, -10, -67, 40, -10, - -34, 46, -64, -32, 29, -13, 33, 3, - -32, -5, 28, -27, -25, 93, 24, 68, - -40, 57, 23, -3, -21, -58, 17, -39, - -17, -22, -89, 11, 18, -46, 27, 24, - 46, 127, 61, 87, 31, 127, -36, 47, - -23, 47, 127, -24, 110, 122, 30, 100, - 0, 96, -12, 6, 50, 44, -13, 73, - 4, 55, -11, -15, 49, 42, -6, 20, - -35, 58, 18, 38, 42, 72, 19, -21, - 11, 9, -37, 7, 29, 31, 16, -17, - 13, -50, 19, 5, -23, 51, -16, -5, - 4, -24, 76, 10, -53, -28, -7, -65, - 74, 40, -16, -29, 32, -16, -49, -35, - -3, 59, -96, -50, -43, -43, -61, -15, - -8, -36, -34, -33, -14, 11, -3, -39, - 4, -114, -123, -11, -49, -21, 14, -56, - 1, 43, -63, 26, 40, 18, -10, -26, - -14, -15, -35, -35, -11, 32, -44, -67, - 2, 22, 7, 3, -9, -30, -51, -28, - 28, 6, -22, 16, 34, -25, -52, -54, - -8, -6, 5, 8, 20, -16, -17, -44, - 27, 3, 31, -5, -48, -1, -3, 116, - 11, 71, -31, -47, 109, 50, -22, -12, - -57, 32, 66, 8, -25, -93, -54, -10, - 19, -76, -34, 97, 48, -36, -18, -30, - -39, -26, -12, 28, 14, 12, -12, -31, - 38, 2, 10, 4, -40, 20, 16, -61, - 2, 64, 39, 5, 15, 33, 40, -61, - -49, 93, -10, 33, 28, -11, -27, -18, - 39, -62, -6, -6, 62, 11, -8, 38, - -67, 12, 27, 39, -27, 123, -18, -6, - -65, 83, -64, 20, 19, -11, 33, 24, - 17, 56, 78, 7, -15, 54, -101, -9, - 115, -96, 50, 51, 35, 34, 27, 37, - -40, -11, 8, -36, 42, -45, 2, -23, - 0, 67, -8, -9, -13, 50, -14, -27, - 4, 0, -8, -14, 30, -9, 29, 15, - 9, -38, 37, -8, 50, -46, 54, 41, - -11, -8, -11, -26, 39, 45, 14, -26, - -17, -27, 69, 38, 39, 98, 66, 0, - 42, 123, -101, -19, -83, 117, -32, 56, - 10, 12, -88, 79, -53, 56, 63, 95, - -62, 9, 36, -13, -79, -16, 37, -46, - 35, -34, 14, 17, -54, 5, 21, -7, - 7, 63, 56, 15, 27, -76, -25, 4, - -26, -63, 28, -67, -52, 43, -47, -70, - 40, -12, 40, -66, -37, 0, 35, 37, - -53, 4, -17, -51, 11, 21, 14, -34, - -4, 24, -42, 29, 22, 7, 28, 12, - 37, 39, -39, -19, 65, -60, -50, -2, - 1, 82, 39, 19, -23, -43, -22, -67, - -35, -34, 32, 102, 81, 127, 36, 67, - -45, 1, -67, -52, -4, 35, 20, 28, - 71, 86, -35, -9, -83, -34, 12, 9, - -23, 2, 14, 28, -23, 7, -25, 45, - 7, 17, -37, 0, -19, 31, 26, 40, - -27, -16, 17, 5, -21, 23, 24, 96, - -55, 52, -19, -14, -6, 1, 50, -34, - 86, -53, 38, 2, -52, -36, -13, 60, - -85, -120, 32, 7, -12, 22, 70, -7, - -94, 38, -76, -31, -20, 15, -28, 7, - 6, 40, 53, 88, 3, 38, 18, -8, - -22, -23, 51, 37, -9, 13, -32, 25, - -21, 27, 31, 20, 18, -9, -13, 1, - 21, -24, -13, 39, 15, -11, -29, -36, - 18, 15, 8, 27, 21, -94, -1, -22, - 49, 66, -1, 6, -3, -40, -18, 6, - 28, 12, 33, -59, 62, 60, -48, 90, - -1, 108, 9, 18, -2, 27, 77, -65, - 82, -48, -38, -19, -11, 127, 50, 66, - 18, -13, -22, 60, -38, 40, -14, -26, - -13, 38, 67, 57, 30, 33, 26, 36, - 38, -17, 27, -28, 20, 12, -64, 18, - 5, -33, -27, 13, -26, 32, 35, -5, - -48, -14, 92, 43, -47, -14, 40, 11, - 51, 66, 22, -63, -16, -61, 4, -28, - 27, 20, -33, -30, -21, -29, -53, 31, - -40, 24, 43, -4, -19, 21, 67, 20, - 100, -16, -93, 78, -6, -18, -52, -37, - -9, 66, -31, -8, 26, 18, 4, 24, - -22, 17, -2, -13, 27, 0, 8, -18, - -25, 5, -21, -24, -7, 18, -93, 21, - 7, 2, -75, 69, 50, -5, -15, -17, - 60, -42, 55, 1, -4, 3, 10, 46, - 16, -13, 45, -7, -10, -44, -108, 49, - 2, -15, -64, -12, -72, 32, -38, -45, - 10, -54, 13, -13, -27, -36, -64, 58, - -62, -101, 88, -86, -71, -39, -9, -128, - 32, 15, -4, 54, -16, -39, -26, -36, - 46, 48, -64, -10, 19, 30, -13, 34, - -8, 50, 60, -22, -6, -11, -30, 5, - 50, 32, 56, 0, 25, 6, 68, 11, - -29, 45, -9, -12, 4, 1, 18, -49, - 0, -38, -19, 90, 29, 35, 51, 8, - -48, 96, -1, -12, -9, -32, -63, -65, - -7, 38, 89, 28, -85, -28, -23, -25, - -128, 56, 79, -36, 99, -6, -37, 7, - -13, -69, -46, -29, 25, 64, -21, 17, - 1, 42, -66, 1, 80, 26, -32, 21, - 15, 15, 6, 6, -10, 15, 127, 5, - 38, 27, 87, -57, -25, 11, 72, -21, - -5, 11, -13, -66, 78, 36, -3, 41, - -21, 8, -33, 23, 73, 28, 57, -25, - -5, 4, -22, -47, 15, 4, -57, -72, - 33, 1, 18, 2, 53, -71, -99, -21, - -3, -111, 108, 71, -14, 82, 25, 61, - -48, 5, 9, -51, -20, -25, -3, 14, - -33, 14, -3, -34, 22, 12, -19, -38, - -16, 2, 21, 16, 26, -31, 75, 44, - -31, 16, 26, 66, 17, -9, -22, -22, - 22, -44, 22, 27, 2, 58, -14, 10, - -73, -42, 55, -25, -61, 72, -1, 30, - -58, -25, 63, 26, -48, -40, 26, -30, - 60, 8, -17, -1, -18, -20, 43, -20, - -4, -28, 127, -106, 29, 70, 64, -27, - 39, -33, -5, -88, -40, -52, 26, 44, - -17, 23, 2, -49, 22, -9, -8, 86, - 49, -43, -60, 1, 10, 45, 36, -53, - -4, 33, 38, 48, -72, 1, 19, 21, - -65, 4, -5, -62, 27, -25, 17, -6, - 6, -45, -39, -46, 4, 26, 127, -9, - 18, -33, -18, -3, 33, 2, -5, 15, - -26, -22, -117, -63, -17, -59, 61, -74, - 7, -47, -58, -128, -67, 15, -16, -128, - 12, 2, 20, 9, -48, -40, 43, 3, - -40, -16, -38, -6, -22, -28, -16, -59, - -22, 6, -5, 11, -12, -66, -40, 27, - -62, -44, -19, 38, -3, 39, -8, 40, - -24, 13, 21, 50, -60, -22, 53, -29, - -6, 1, 22, -59, 0, 17, -39, 115 -}; +/* RMS error was 0.138320, seed was 1361535663 */ -static const opus_int8 layer1_bias[72] = { - -42, 20, 16, 0, 105, 60, 1, -97, - 24, 60, 18, 13, 62, 25, 127, 34, - 79, 55, 118, 127, 95, 31, -4, 87, - 21, 12, 2, -14, 18, 23, 8, 17, - -1, -8, 5, 4, 24, 37, 21, 13, - 36, 13, 17, 18, 37, 30, 33, 1, - 8, -16, -11, -5, -31, -3, -5, 0, - 6, 3, 58, -7, -1, -16, 5, -13, - 16, 10, -2, -14, 11, -4, 3, -11 -}; +static const float weights[422] = { -static const opus_int8 layer2_weights[48] = { - -113, -88, 31, -128, -126, -61, 85, -35, - 118, -128, -61, 127, -128, -17, -128, 127, - 104, -9, -128, 33, 45, 127, 5, 83, - 84, -128, -85, -128, -45, 48, -53, -128, - 46, 127, -17, 125, 117, -41, -117, -91, - -127, -68, -1, -89, -80, 32, 106, 7 -}; +/* hidden layer */ +-0.0941125f, -0.302976f, -0.603555f, -0.19393f, -0.185983f, +-0.601617f, -0.0465317f, -0.114563f, -0.103599f, -0.618938f, +-0.317859f, -0.169949f, -0.0702885f, 0.148065f, 0.409524f, +0.548432f, 0.367649f, -0.494393f, 0.764306f, -1.83957f, +0.170849f, 12.786f, -1.08848f, -1.27284f, -16.2606f, +24.1773f, -5.57454f, -0.17276f, -0.163388f, -0.224421f, +-0.0948944f, -0.0728695f, -0.26557f, -0.100283f, -0.0515459f, +-0.146142f, -0.120674f, -0.180655f, 0.12857f, 0.442138f, +-0.493735f, 0.167767f, 0.206699f, -0.197567f, 0.417999f, +1.50364f, -0.773341f, -10.0401f, 0.401872f, 2.97966f, +15.2165f, -1.88905f, -1.19254f, 0.0285397f, -0.00405139f, +0.0707565f, 0.00825699f, -0.0927269f, -0.010393f, -0.00428882f, +-0.00489743f, -0.0709731f, -0.00255992f, 0.0395619f, 0.226424f, +0.0325231f, 0.162175f, -0.100118f, 0.485789f, 0.12697f, +0.285937f, 0.0155637f, 0.10546f, 3.05558f, 1.15059f, +-1.00904f, -1.83088f, 3.31766f, -3.42516f, -0.119135f, +-0.0405654f, 0.00690068f, 0.0179877f, -0.0382487f, 0.00597941f, +-0.0183611f, 0.00190395f, -0.144322f, -0.0435671f, 0.000990594f, +0.221087f, 0.142405f, 0.484066f, 0.404395f, 0.511955f, +-0.237255f, 0.241742f, 0.35045f, -0.699428f, 10.3993f, +2.6507f, -2.43459f, -4.18838f, 1.05928f, 1.71067f, +0.00667811f, -0.0721335f, -0.0397346f, 0.0362704f, -0.11496f, +-0.0235776f, 0.0082161f, -0.0141741f, -0.0329699f, -0.0354253f, +0.00277404f, -0.290654f, -1.14767f, -0.319157f, -0.686544f, +0.36897f, 0.478899f, 0.182579f, -0.411069f, 0.881104f, +-4.60683f, 1.4697f, 0.335845f, -1.81905f, -30.1699f, +5.55225f, 0.0019508f, -0.123576f, -0.0727332f, -0.0641597f, +-0.0534458f, -0.108166f, -0.0937368f, -0.0697883f, -0.0275475f, +-0.192309f, -0.110074f, 0.285375f, -0.405597f, 0.0926724f, +-0.287881f, -0.851193f, -0.099493f, -0.233764f, -1.2852f, +1.13611f, 3.12168f, -0.0699f, -1.86216f, 2.65292f, +-7.31036f, 2.44776f, -0.00111802f, -0.0632786f, -0.0376296f, +-0.149851f, 0.142963f, 0.184368f, 0.123433f, 0.0756158f, +0.117312f, 0.0933395f, 0.0692163f, 0.0842592f, 0.0704683f, +0.0589963f, 0.0942205f, -0.448862f, 0.0262677f, 0.270352f, +-0.262317f, 0.172586f, 2.00227f, -0.159216f, 0.038422f, +10.2073f, 4.15536f, -2.3407f, -0.0550265f, 0.00964792f, +-0.141336f, 0.0274501f, 0.0343921f, -0.0487428f, 0.0950172f, +-0.00775017f, -0.0372492f, -0.00548121f, -0.0663695f, 0.0960506f, +-0.200008f, -0.0412827f, 0.58728f, 0.0515787f, 0.337254f, +0.855024f, 0.668371f, -0.114904f, -3.62962f, -0.467477f, +-0.215472f, 2.61537f, 0.406117f, -1.36373f, 0.0425394f, +0.12208f, 0.0934502f, 0.123055f, 0.0340935f, -0.142466f, +0.035037f, -0.0490666f, 0.0733208f, 0.0576672f, 0.123984f, +-0.0517194f, -0.253018f, 0.590565f, 0.145849f, 0.315185f, +0.221534f, -0.149081f, 0.216161f, -0.349575f, 24.5664f, +-0.994196f, 0.614289f, -18.7905f, -2.83277f, -0.716801f, +-0.347201f, 0.479515f, -0.246027f, 0.0758683f, 0.137293f, +-0.17781f, 0.118751f, -0.00108329f, -0.237334f, 0.355732f, +-0.12991f, -0.0547627f, -0.318576f, -0.325524f, 0.180494f, +-0.0625604f, 0.141219f, 0.344064f, 0.37658f, -0.591772f, +5.8427f, -0.38075f, 0.221894f, -1.41934f, -1.87943e+06f, +1.34114f, 0.0283355f, -0.0447856f, -0.0211466f, -0.0256927f, +0.0139618f, 0.0207934f, -0.0107666f, 0.0110969f, 0.0586069f, +-0.0253545f, -0.0328433f, 0.11872f, -0.216943f, 0.145748f, +0.119808f, -0.0915211f, -0.120647f, -0.0787719f, -0.143644f, +-0.595116f, -1.152f, -1.25335f, -1.17092f, 4.34023f, +-975268.f, -1.37033f, -0.0401123f, 0.210602f, -0.136656f, +0.135962f, -0.0523293f, 0.0444604f, 0.0143928f, 0.00412666f, +-0.0193003f, 0.218452f, -0.110204f, -2.02563f, 0.918238f, +-2.45362f, 1.19542f, -0.061362f, -1.92243f, 0.308111f, +0.49764f, 0.912356f, 0.209272f, -2.34525f, 2.19326f, +-6.47121f, 1.69771f, -0.725123f, 0.0118929f, 0.0377944f, +0.0554003f, 0.0226452f, -0.0704421f, -0.0300309f, 0.0122978f, +-0.0041782f, -0.0686612f, 0.0313115f, 0.039111f, 0.364111f, +-0.0945548f, 0.0229876f, -0.17414f, 0.329795f, 0.114714f, +0.30022f, 0.106997f, 0.132355f, 5.79932f, 0.908058f, +-0.905324f, -3.3561f, 0.190647f, 0.184211f, -0.673648f, +0.231807f, -0.0586222f, 0.230752f, -0.438277f, 0.245857f, +-0.17215f, 0.0876383f, -0.720512f, 0.162515f, 0.0170571f, +0.101781f, 0.388477f, 1.32931f, 1.08548f, -0.936301f, +-2.36958f, -6.71988f, -3.44376f, 2.13818f, 14.2318f, +4.91459f, -3.09052f, -9.69191f, -0.768234f, 1.79604f, +0.0549653f, 0.163399f, 0.0797025f, 0.0343933f, -0.0555876f, +-0.00505673f, 0.0187258f, 0.0326628f, 0.0231486f, 0.15573f, +0.0476223f, -0.254824f, 1.60155f, -0.801221f, 2.55496f, +0.737629f, -1.36249f, -0.695463f, -2.44301f, -1.73188f, +3.95279f, 1.89068f, 0.486087f, -11.3343f, 3.9416e+06f, -static const opus_int8 layer2_bias[2] = { - 14, 117 -}; +/* output layer */ +-0.381439f, 0.12115f, -0.906927f, 2.93878f, 1.6388f, +0.882811f, 0.874344f, 1.21726f, -0.874545f, 0.321706f, +0.785055f, 0.946558f, -0.575066f, -3.46553f, 0.884905f, +0.0924047f, -9.90712f, 0.391338f, 0.160103f, -2.04954f, +4.1455f, 0.0684029f, -0.144761f, -0.285282f, 0.379244f, +-1.1584f, -0.0277241f, -9.85f, -4.82386f, 3.71333f, +3.87308f, 3.52558f}; -const DenseLayer layer0 = { - layer0_bias, - layer0_weights, - 25, 32, 0 -}; +static const int topo[3] = {25, 15, 2}; -const GRULayer layer1 = { - layer1_bias, - layer1_weights, - layer1_recur_weights, - 32, 24 +const MLP net = { + 3, + topo, + weights }; - -const DenseLayer layer2 = { - layer2_bias, - layer2_weights, - 24, 2, 1 -}; - diff --git a/thirdparty/opus/opus.c b/thirdparty/opus/opus.c index 538b5ea74e..f76f125cfa 100644 --- a/thirdparty/opus/opus.c +++ b/thirdparty/opus/opus.c @@ -107,7 +107,7 @@ OPUS_EXPORT void opus_pcm_soft_clip(float *_x, int N, int C, float *declip_mem) /* Slightly boost "a" by 2^-22. This is just enough to ensure -ffast-math does not cause output values larger than +/-1, but small enough not to matter even for 24-bit output. */ - a += a*2.4e-7f; + a += a*2.4e-7; if (x[i*C]>0) a = -a; /* Apply soft clipping */ @@ -252,7 +252,7 @@ int opus_packet_parse_impl(const unsigned char *data, opus_int32 len, /* Number of frames encoded in bits 0 to 5 */ ch = *data++; count = ch&0x3F; - if (count <= 0 || framesize*(opus_int32)count > 5760) + if (count <= 0 || framesize*count > 5760) return OPUS_INVALID_PACKET; len--; /* Padding flag is bit 6 */ diff --git a/thirdparty/opus/opus/opus.h b/thirdparty/opus/opus/opus.h index d282f21d25..5be73ddf4e 100644 --- a/thirdparty/opus/opus/opus.h +++ b/thirdparty/opus/opus/opus.h @@ -531,7 +531,7 @@ OPUS_EXPORT int opus_packet_parse( const unsigned char *frames[48], opus_int16 size[48], int *payload_offset -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(5); +) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); /** Gets the bandwidth of an Opus packet. * @param [in] data <tt>char*</tt>: Opus packet diff --git a/thirdparty/opus/opus/opus_defines.h b/thirdparty/opus/opus/opus_defines.h index d141418b21..315412dd1d 100644 --- a/thirdparty/opus/opus/opus_defines.h +++ b/thirdparty/opus/opus/opus_defines.h @@ -165,13 +165,8 @@ extern "C" { #define OPUS_GET_EXPERT_FRAME_DURATION_REQUEST 4041 #define OPUS_SET_PREDICTION_DISABLED_REQUEST 4042 #define OPUS_GET_PREDICTION_DISABLED_REQUEST 4043 -/* Don't use 4045, it's already taken by OPUS_GET_GAIN_REQUEST */ -#define OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST 4046 -#define OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST 4047 -#define OPUS_GET_IN_DTX_REQUEST 4049 -/** Defines for the presence of extended APIs. */ -#define OPUS_HAVE_OPUS_PROJECTION_H +/* Don't use 4045, it's already taken by OPUS_GET_GAIN_REQUEST */ /* Macros to trigger compilation errors when the wrong types are provided to a CTL */ #define __opus_check_int(x) (((void)((x) == (opus_int32)0)), (opus_int32)(x)) @@ -213,9 +208,6 @@ extern "C" { #define OPUS_FRAMESIZE_20_MS 5004 /**< Use 20 ms frames */ #define OPUS_FRAMESIZE_40_MS 5005 /**< Use 40 ms frames */ #define OPUS_FRAMESIZE_60_MS 5006 /**< Use 60 ms frames */ -#define OPUS_FRAMESIZE_80_MS 5007 /**< Use 80 ms frames */ -#define OPUS_FRAMESIZE_100_MS 5008 /**< Use 100 ms frames */ -#define OPUS_FRAMESIZE_120_MS 5009 /**< Use 120 ms frames */ /**@}*/ @@ -574,9 +566,7 @@ extern "C" { * <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd> * <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd> * <dt>OPUS_FRAMESIZE_60_MS</dt><dd>Use 60 ms frames.</dd> - * <dt>OPUS_FRAMESIZE_80_MS</dt><dd>Use 80 ms frames.</dd> - * <dt>OPUS_FRAMESIZE_100_MS</dt><dd>Use 100 ms frames.</dd> - * <dt>OPUS_FRAMESIZE_120_MS</dt><dd>Use 120 ms frames.</dd> + * <dt>OPUS_FRAMESIZE_VARIABLE</dt><dd>Optimize the frame size dynamically.</dd> * </dl> * @hideinitializer */ #define OPUS_SET_EXPERT_FRAME_DURATION(x) OPUS_SET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int(x) @@ -591,9 +581,7 @@ extern "C" { * <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd> * <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd> * <dt>OPUS_FRAMESIZE_60_MS</dt><dd>Use 60 ms frames.</dd> - * <dt>OPUS_FRAMESIZE_80_MS</dt><dd>Use 80 ms frames.</dd> - * <dt>OPUS_FRAMESIZE_100_MS</dt><dd>Use 100 ms frames.</dd> - * <dt>OPUS_FRAMESIZE_120_MS</dt><dd>Use 120 ms frames.</dd> + * <dt>OPUS_FRAMESIZE_VARIABLE</dt><dd>Optimize the frame size dynamically.</dd> * </dl> * @hideinitializer */ #define OPUS_GET_EXPERT_FRAME_DURATION(x) OPUS_GET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int_ptr(x) @@ -693,40 +681,6 @@ extern "C" { */ #define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x) -/** If set to 1, disables the use of phase inversion for intensity stereo, - * improving the quality of mono downmixes, but slightly reducing normal - * stereo quality. Disabling phase inversion in the decoder does not comply - * with RFC 6716, although it does not cause any interoperability issue and - * is expected to become part of the Opus standard once RFC 6716 is updated - * by draft-ietf-codec-opus-update. - * @see OPUS_GET_PHASE_INVERSION_DISABLED - * @param[in] x <tt>opus_int32</tt>: Allowed values: - * <dl> - * <dt>0</dt><dd>Enable phase inversion (default).</dd> - * <dt>1</dt><dd>Disable phase inversion.</dd> - * </dl> - * @hideinitializer */ -#define OPUS_SET_PHASE_INVERSION_DISABLED(x) OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST, __opus_check_int(x) -/** Gets the encoder's configured phase inversion status. - * @see OPUS_SET_PHASE_INVERSION_DISABLED - * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values: - * <dl> - * <dt>0</dt><dd>Stereo phase inversion enabled (default).</dd> - * <dt>1</dt><dd>Stereo phase inversion disabled.</dd> - * </dl> - * @hideinitializer */ -#define OPUS_GET_PHASE_INVERSION_DISABLED(x) OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST, __opus_check_int_ptr(x) -/** Gets the DTX state of the encoder. - * Returns whether the last encoded frame was either a comfort noise update - * during DTX or not encoded because of DTX. - * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values: - * <dl> - * <dt>0</dt><dd>The encoder is not in DTX.</dd> - * <dt>1</dt><dd>The encoder is in DTX.</dd> - * </dl> - * @hideinitializer */ -#define OPUS_GET_IN_DTX(x) OPUS_GET_IN_DTX_REQUEST, __opus_check_int_ptr(x) - /**@}*/ /** @defgroup opus_decoderctls Decoder related CTLs diff --git a/thirdparty/opus/opus/opus_multistream.h b/thirdparty/opus/opus/opus_multistream.h index babcee6905..3622e009fb 100644 --- a/thirdparty/opus/opus/opus_multistream.h +++ b/thirdparty/opus/opus/opus_multistream.h @@ -273,7 +273,7 @@ OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSEncoder *opus_multistream_surround_enc unsigned char *mapping, int application, int *error -) OPUS_ARG_NONNULL(4) OPUS_ARG_NONNULL(5) OPUS_ARG_NONNULL(6); +) OPUS_ARG_NONNULL(5); /** Initialize a previously allocated multistream encoder state. * The memory pointed to by \a st must be at least the size returned by @@ -342,7 +342,7 @@ OPUS_EXPORT int opus_multistream_surround_encoder_init( int *coupled_streams, unsigned char *mapping, int application -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(5) OPUS_ARG_NONNULL(6) OPUS_ARG_NONNULL(7); +) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6); /** Encodes a multistream Opus frame. * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state. diff --git a/thirdparty/opus/opus/opus_projection.h b/thirdparty/opus/opus/opus_projection.h deleted file mode 100644 index 9dabf4e85c..0000000000 --- a/thirdparty/opus/opus/opus_projection.h +++ /dev/null @@ -1,568 +0,0 @@ -/* Copyright (c) 2017 Google Inc. - Written by Andrew Allen */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/** - * @file opus_projection.h - * @brief Opus projection reference API - */ - -#ifndef OPUS_PROJECTION_H -#define OPUS_PROJECTION_H - -#include "opus_multistream.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/** @cond OPUS_INTERNAL_DOC */ - -/** These are the actual encoder and decoder CTL ID numbers. - * They should not be used directly by applications.c - * In general, SETs should be even and GETs should be odd.*/ -/**@{*/ -#define OPUS_PROJECTION_GET_DEMIXING_MATRIX_GAIN_REQUEST 6001 -#define OPUS_PROJECTION_GET_DEMIXING_MATRIX_SIZE_REQUEST 6003 -#define OPUS_PROJECTION_GET_DEMIXING_MATRIX_REQUEST 6005 -/**@}*/ - - -/** @endcond */ - -/** @defgroup opus_projection_ctls Projection specific encoder and decoder CTLs - * - * These are convenience macros that are specific to the - * opus_projection_encoder_ctl() and opus_projection_decoder_ctl() - * interface. - * The CTLs from @ref opus_genericctls, @ref opus_encoderctls, - * @ref opus_decoderctls, and @ref opus_multistream_ctls may be applied to a - * projection encoder or decoder as well. - */ -/**@{*/ - -/** Gets the gain (in dB. S7.8-format) of the demixing matrix from the encoder. - * @param[out] x <tt>opus_int32 *</tt>: Returns the gain (in dB. S7.8-format) - * of the demixing matrix. - * @hideinitializer - */ -#define OPUS_PROJECTION_GET_DEMIXING_MATRIX_GAIN(x) OPUS_PROJECTION_GET_DEMIXING_MATRIX_GAIN_REQUEST, __opus_check_int_ptr(x) - - -/** Gets the size in bytes of the demixing matrix from the encoder. - * @param[out] x <tt>opus_int32 *</tt>: Returns the size in bytes of the - * demixing matrix. - * @hideinitializer - */ -#define OPUS_PROJECTION_GET_DEMIXING_MATRIX_SIZE(x) OPUS_PROJECTION_GET_DEMIXING_MATRIX_SIZE_REQUEST, __opus_check_int_ptr(x) - - -/** Copies the demixing matrix to the supplied pointer location. - * @param[out] x <tt>unsigned char *</tt>: Returns the demixing matrix to the - * supplied pointer location. - * @param y <tt>opus_int32</tt>: The size in bytes of the reserved memory at the - * pointer location. - * @hideinitializer - */ -#define OPUS_PROJECTION_GET_DEMIXING_MATRIX(x,y) OPUS_PROJECTION_GET_DEMIXING_MATRIX_REQUEST, x, __opus_check_int(y) - - -/**@}*/ - -/** Opus projection encoder state. - * This contains the complete state of a projection Opus encoder. - * It is position independent and can be freely copied. - * @see opus_projection_ambisonics_encoder_create - */ -typedef struct OpusProjectionEncoder OpusProjectionEncoder; - - -/** Opus projection decoder state. - * This contains the complete state of a projection Opus decoder. - * It is position independent and can be freely copied. - * @see opus_projection_decoder_create - * @see opus_projection_decoder_init - */ -typedef struct OpusProjectionDecoder OpusProjectionDecoder; - - -/**\name Projection encoder functions */ -/**@{*/ - -/** Gets the size of an OpusProjectionEncoder structure. - * @param channels <tt>int</tt>: The total number of input channels to encode. - * This must be no more than 255. - * @param mapping_family <tt>int</tt>: The mapping family to use for selecting - * the appropriate projection. - * @returns The size in bytes on success, or a negative error code - * (see @ref opus_errorcodes) on error. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_projection_ambisonics_encoder_get_size( - int channels, - int mapping_family -); - - -/** Allocates and initializes a projection encoder state. - * Call opus_projection_encoder_destroy() to release - * this object when finished. - * @param Fs <tt>opus_int32</tt>: Sampling rate of the input signal (in Hz). - * This must be one of 8000, 12000, 16000, - * 24000, or 48000. - * @param channels <tt>int</tt>: Number of channels in the input signal. - * This must be at most 255. - * It may be greater than the number of - * coded channels (<code>streams + - * coupled_streams</code>). - * @param mapping_family <tt>int</tt>: The mapping family to use for selecting - * the appropriate projection. - * @param[out] streams <tt>int *</tt>: The total number of streams that will - * be encoded from the input. - * @param[out] coupled_streams <tt>int *</tt>: Number of coupled (2 channel) - * streams that will be encoded from the input. - * @param application <tt>int</tt>: The target encoder application. - * This must be one of the following: - * <dl> - * <dt>#OPUS_APPLICATION_VOIP</dt> - * <dd>Process signal for improved speech intelligibility.</dd> - * <dt>#OPUS_APPLICATION_AUDIO</dt> - * <dd>Favor faithfulness to the original input.</dd> - * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt> - * <dd>Configure the minimum possible coding delay by disabling certain modes - * of operation.</dd> - * </dl> - * @param[out] error <tt>int *</tt>: Returns #OPUS_OK on success, or an error - * code (see @ref opus_errorcodes) on - * failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusProjectionEncoder *opus_projection_ambisonics_encoder_create( - opus_int32 Fs, - int channels, - int mapping_family, - int *streams, - int *coupled_streams, - int application, - int *error -) OPUS_ARG_NONNULL(4) OPUS_ARG_NONNULL(5); - - -/** Initialize a previously allocated projection encoder state. - * The memory pointed to by \a st must be at least the size returned by - * opus_projection_ambisonics_encoder_get_size(). - * This is intended for applications which use their own allocator instead of - * malloc. - * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL. - * @see opus_projection_ambisonics_encoder_create - * @see opus_projection_ambisonics_encoder_get_size - * @param st <tt>OpusProjectionEncoder*</tt>: Projection encoder state to initialize. - * @param Fs <tt>opus_int32</tt>: Sampling rate of the input signal (in Hz). - * This must be one of 8000, 12000, 16000, - * 24000, or 48000. - * @param channels <tt>int</tt>: Number of channels in the input signal. - * This must be at most 255. - * It may be greater than the number of - * coded channels (<code>streams + - * coupled_streams</code>). - * @param streams <tt>int</tt>: The total number of streams to encode from the - * input. - * This must be no more than the number of channels. - * @param coupled_streams <tt>int</tt>: Number of coupled (2 channel) streams - * to encode. - * This must be no larger than the total - * number of streams. - * Additionally, The total number of - * encoded channels (<code>streams + - * coupled_streams</code>) must be no - * more than the number of input channels. - * @param application <tt>int</tt>: The target encoder application. - * This must be one of the following: - * <dl> - * <dt>#OPUS_APPLICATION_VOIP</dt> - * <dd>Process signal for improved speech intelligibility.</dd> - * <dt>#OPUS_APPLICATION_AUDIO</dt> - * <dd>Favor faithfulness to the original input.</dd> - * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt> - * <dd>Configure the minimum possible coding delay by disabling certain modes - * of operation.</dd> - * </dl> - * @returns #OPUS_OK on success, or an error code (see @ref opus_errorcodes) - * on failure. - */ -OPUS_EXPORT int opus_projection_ambisonics_encoder_init( - OpusProjectionEncoder *st, - opus_int32 Fs, - int channels, - int mapping_family, - int *streams, - int *coupled_streams, - int application -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(5) OPUS_ARG_NONNULL(6); - - -/** Encodes a projection Opus frame. - * @param st <tt>OpusProjectionEncoder*</tt>: Projection encoder state. - * @param[in] pcm <tt>const opus_int16*</tt>: The input signal as interleaved - * samples. - * This must contain - * <code>frame_size*channels</code> - * samples. - * @param frame_size <tt>int</tt>: Number of samples per channel in the input - * signal. - * This must be an Opus frame size for the - * encoder's sampling rate. - * For example, at 48 kHz the permitted values - * are 120, 240, 480, 960, 1920, and 2880. - * Passing in a duration of less than 10 ms - * (480 samples at 48 kHz) will prevent the - * encoder from using the LPC or hybrid modes. - * @param[out] data <tt>unsigned char*</tt>: Output payload. - * This must contain storage for at - * least \a max_data_bytes. - * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated - * memory for the output - * payload. This may be - * used to impose an upper limit on - * the instant bitrate, but should - * not be used as the only bitrate - * control. Use #OPUS_SET_BITRATE to - * control the bitrate. - * @returns The length of the encoded packet (in bytes) on success or a - * negative error code (see @ref opus_errorcodes) on failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_projection_encode( - OpusProjectionEncoder *st, - const opus_int16 *pcm, - int frame_size, - unsigned char *data, - opus_int32 max_data_bytes -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4); - - -/** Encodes a projection Opus frame from floating point input. - * @param st <tt>OpusProjectionEncoder*</tt>: Projection encoder state. - * @param[in] pcm <tt>const float*</tt>: The input signal as interleaved - * samples with a normal range of - * +/-1.0. - * Samples with a range beyond +/-1.0 - * are supported but will be clipped by - * decoders using the integer API and - * should only be used if it is known - * that the far end supports extended - * dynamic range. - * This must contain - * <code>frame_size*channels</code> - * samples. - * @param frame_size <tt>int</tt>: Number of samples per channel in the input - * signal. - * This must be an Opus frame size for the - * encoder's sampling rate. - * For example, at 48 kHz the permitted values - * are 120, 240, 480, 960, 1920, and 2880. - * Passing in a duration of less than 10 ms - * (480 samples at 48 kHz) will prevent the - * encoder from using the LPC or hybrid modes. - * @param[out] data <tt>unsigned char*</tt>: Output payload. - * This must contain storage for at - * least \a max_data_bytes. - * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated - * memory for the output - * payload. This may be - * used to impose an upper limit on - * the instant bitrate, but should - * not be used as the only bitrate - * control. Use #OPUS_SET_BITRATE to - * control the bitrate. - * @returns The length of the encoded packet (in bytes) on success or a - * negative error code (see @ref opus_errorcodes) on failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_projection_encode_float( - OpusProjectionEncoder *st, - const float *pcm, - int frame_size, - unsigned char *data, - opus_int32 max_data_bytes -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4); - - -/** Frees an <code>OpusProjectionEncoder</code> allocated by - * opus_projection_ambisonics_encoder_create(). - * @param st <tt>OpusProjectionEncoder*</tt>: Projection encoder state to be freed. - */ -OPUS_EXPORT void opus_projection_encoder_destroy(OpusProjectionEncoder *st); - - -/** Perform a CTL function on a projection Opus encoder. - * - * Generally the request and subsequent arguments are generated by a - * convenience macro. - * @param st <tt>OpusProjectionEncoder*</tt>: Projection encoder state. - * @param request This and all remaining parameters should be replaced by one - * of the convenience macros in @ref opus_genericctls, - * @ref opus_encoderctls, @ref opus_multistream_ctls, or - * @ref opus_projection_ctls - * @see opus_genericctls - * @see opus_encoderctls - * @see opus_multistream_ctls - * @see opus_projection_ctls - */ -OPUS_EXPORT int opus_projection_encoder_ctl(OpusProjectionEncoder *st, int request, ...) OPUS_ARG_NONNULL(1); - - -/**@}*/ - -/**\name Projection decoder functions */ -/**@{*/ - -/** Gets the size of an <code>OpusProjectionDecoder</code> structure. - * @param channels <tt>int</tt>: The total number of output channels. - * This must be no more than 255. - * @param streams <tt>int</tt>: The total number of streams coded in the - * input. - * This must be no more than 255. - * @param coupled_streams <tt>int</tt>: Number streams to decode as coupled - * (2 channel) streams. - * This must be no larger than the total - * number of streams. - * Additionally, The total number of - * coded channels (<code>streams + - * coupled_streams</code>) must be no - * more than 255. - * @returns The size in bytes on success, or a negative error code - * (see @ref opus_errorcodes) on error. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_projection_decoder_get_size( - int channels, - int streams, - int coupled_streams -); - - -/** Allocates and initializes a projection decoder state. - * Call opus_projection_decoder_destroy() to release - * this object when finished. - * @param Fs <tt>opus_int32</tt>: Sampling rate to decode at (in Hz). - * This must be one of 8000, 12000, 16000, - * 24000, or 48000. - * @param channels <tt>int</tt>: Number of channels to output. - * This must be at most 255. - * It may be different from the number of coded - * channels (<code>streams + - * coupled_streams</code>). - * @param streams <tt>int</tt>: The total number of streams coded in the - * input. - * This must be no more than 255. - * @param coupled_streams <tt>int</tt>: Number of streams to decode as coupled - * (2 channel) streams. - * This must be no larger than the total - * number of streams. - * Additionally, The total number of - * coded channels (<code>streams + - * coupled_streams</code>) must be no - * more than 255. - * @param[in] demixing_matrix <tt>const unsigned char[demixing_matrix_size]</tt>: Demixing matrix - * that mapping from coded channels to output channels, - * as described in @ref opus_projection and - * @ref opus_projection_ctls. - * @param demixing_matrix_size <tt>opus_int32</tt>: The size in bytes of the - * demixing matrix, as - * described in @ref - * opus_projection_ctls. - * @param[out] error <tt>int *</tt>: Returns #OPUS_OK on success, or an error - * code (see @ref opus_errorcodes) on - * failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusProjectionDecoder *opus_projection_decoder_create( - opus_int32 Fs, - int channels, - int streams, - int coupled_streams, - unsigned char *demixing_matrix, - opus_int32 demixing_matrix_size, - int *error -) OPUS_ARG_NONNULL(5); - - -/** Intialize a previously allocated projection decoder state object. - * The memory pointed to by \a st must be at least the size returned by - * opus_projection_decoder_get_size(). - * This is intended for applications which use their own allocator instead of - * malloc. - * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL. - * @see opus_projection_decoder_create - * @see opus_projection_deocder_get_size - * @param st <tt>OpusProjectionDecoder*</tt>: Projection encoder state to initialize. - * @param Fs <tt>opus_int32</tt>: Sampling rate to decode at (in Hz). - * This must be one of 8000, 12000, 16000, - * 24000, or 48000. - * @param channels <tt>int</tt>: Number of channels to output. - * This must be at most 255. - * It may be different from the number of coded - * channels (<code>streams + - * coupled_streams</code>). - * @param streams <tt>int</tt>: The total number of streams coded in the - * input. - * This must be no more than 255. - * @param coupled_streams <tt>int</tt>: Number of streams to decode as coupled - * (2 channel) streams. - * This must be no larger than the total - * number of streams. - * Additionally, The total number of - * coded channels (<code>streams + - * coupled_streams</code>) must be no - * more than 255. - * @param[in] demixing_matrix <tt>const unsigned char[demixing_matrix_size]</tt>: Demixing matrix - * that mapping from coded channels to output channels, - * as described in @ref opus_projection and - * @ref opus_projection_ctls. - * @param demixing_matrix_size <tt>opus_int32</tt>: The size in bytes of the - * demixing matrix, as - * described in @ref - * opus_projection_ctls. - * @returns #OPUS_OK on success, or an error code (see @ref opus_errorcodes) - * on failure. - */ -OPUS_EXPORT int opus_projection_decoder_init( - OpusProjectionDecoder *st, - opus_int32 Fs, - int channels, - int streams, - int coupled_streams, - unsigned char *demixing_matrix, - opus_int32 demixing_matrix_size -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6); - - -/** Decode a projection Opus packet. - * @param st <tt>OpusProjectionDecoder*</tt>: Projection decoder state. - * @param[in] data <tt>const unsigned char*</tt>: Input payload. - * Use a <code>NULL</code> - * pointer to indicate packet - * loss. - * @param len <tt>opus_int32</tt>: Number of bytes in payload. - * @param[out] pcm <tt>opus_int16*</tt>: Output signal, with interleaved - * samples. - * This must contain room for - * <code>frame_size*channels</code> - * samples. - * @param frame_size <tt>int</tt>: The number of samples per channel of - * available space in \a pcm. - * If this is less than the maximum packet duration - * (120 ms; 5760 for 48kHz), this function will not be capable - * of decoding some packets. In the case of PLC (data==NULL) - * or FEC (decode_fec=1), then frame_size needs to be exactly - * the duration of audio that is missing, otherwise the - * decoder will not be in the optimal state to decode the - * next incoming packet. For the PLC and FEC cases, frame_size - * <b>must</b> be a multiple of 2.5 ms. - * @param decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band - * forward error correction data be decoded. - * If no such data is available, the frame is - * decoded as if it were lost. - * @returns Number of samples decoded on success or a negative error code - * (see @ref opus_errorcodes) on failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_projection_decode( - OpusProjectionDecoder *st, - const unsigned char *data, - opus_int32 len, - opus_int16 *pcm, - int frame_size, - int decode_fec -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); - - -/** Decode a projection Opus packet with floating point output. - * @param st <tt>OpusProjectionDecoder*</tt>: Projection decoder state. - * @param[in] data <tt>const unsigned char*</tt>: Input payload. - * Use a <code>NULL</code> - * pointer to indicate packet - * loss. - * @param len <tt>opus_int32</tt>: Number of bytes in payload. - * @param[out] pcm <tt>opus_int16*</tt>: Output signal, with interleaved - * samples. - * This must contain room for - * <code>frame_size*channels</code> - * samples. - * @param frame_size <tt>int</tt>: The number of samples per channel of - * available space in \a pcm. - * If this is less than the maximum packet duration - * (120 ms; 5760 for 48kHz), this function will not be capable - * of decoding some packets. In the case of PLC (data==NULL) - * or FEC (decode_fec=1), then frame_size needs to be exactly - * the duration of audio that is missing, otherwise the - * decoder will not be in the optimal state to decode the - * next incoming packet. For the PLC and FEC cases, frame_size - * <b>must</b> be a multiple of 2.5 ms. - * @param decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band - * forward error correction data be decoded. - * If no such data is available, the frame is - * decoded as if it were lost. - * @returns Number of samples decoded on success or a negative error code - * (see @ref opus_errorcodes) on failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_projection_decode_float( - OpusProjectionDecoder *st, - const unsigned char *data, - opus_int32 len, - float *pcm, - int frame_size, - int decode_fec -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); - - -/** Perform a CTL function on a projection Opus decoder. - * - * Generally the request and subsequent arguments are generated by a - * convenience macro. - * @param st <tt>OpusProjectionDecoder*</tt>: Projection decoder state. - * @param request This and all remaining parameters should be replaced by one - * of the convenience macros in @ref opus_genericctls, - * @ref opus_decoderctls, @ref opus_multistream_ctls, or - * @ref opus_projection_ctls. - * @see opus_genericctls - * @see opus_decoderctls - * @see opus_multistream_ctls - * @see opus_projection_ctls - */ -OPUS_EXPORT int opus_projection_decoder_ctl(OpusProjectionDecoder *st, int request, ...) OPUS_ARG_NONNULL(1); - - -/** Frees an <code>OpusProjectionDecoder</code> allocated by - * opus_projection_decoder_create(). - * @param st <tt>OpusProjectionDecoder</tt>: Projection decoder state to be freed. - */ -OPUS_EXPORT void opus_projection_decoder_destroy(OpusProjectionDecoder *st); - - -/**@}*/ - -/**@}*/ - -#ifdef __cplusplus -} -#endif - -#endif /* OPUS_PROJECTION_H */ diff --git a/thirdparty/opus/opus/opus_types.h b/thirdparty/opus/opus/opus_types.h index 7cf675580f..b28e03aea2 100644 --- a/thirdparty/opus/opus/opus_types.h +++ b/thirdparty/opus/opus/opus_types.h @@ -33,29 +33,14 @@ #ifndef OPUS_TYPES_H #define OPUS_TYPES_H -#define opus_int int /* used for counters etc; at least 16 bits */ -#define opus_int64 long long -#define opus_int8 signed char - -#define opus_uint unsigned int /* used for counters etc; at least 16 bits */ -#define opus_uint64 unsigned long long -#define opus_uint8 unsigned char - /* Use the real stdint.h if it's there (taken from Paul Hsieh's pstdint.h) */ -#if (defined(__STDC__) && __STDC__ && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) || defined (HAVE_STDINT_H)) +#if (defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) || defined (HAVE_STDINT_H)) #include <stdint.h> -# undef opus_int64 -# undef opus_int8 -# undef opus_uint64 -# undef opus_uint8 - typedef int8_t opus_int8; - typedef uint8_t opus_uint8; + typedef int16_t opus_int16; typedef uint16_t opus_uint16; typedef int32_t opus_int32; typedef uint32_t opus_uint32; - typedef int64_t opus_int64; - typedef uint64_t opus_uint64; #elif defined(_WIN32) # if defined(__CYGWIN__) @@ -163,4 +148,12 @@ #endif +#define opus_int int /* used for counters etc; at least 16 bits */ +#define opus_int64 long long +#define opus_int8 signed char + +#define opus_uint unsigned int /* used for counters etc; at least 16 bits */ +#define opus_uint64 unsigned long long +#define opus_uint8 unsigned char + #endif /* OPUS_TYPES_H */ diff --git a/thirdparty/opus/opus/opusfile.h b/thirdparty/opus/opus/opusfile.h index e3a3dc8389..4bf2fba926 100644 --- a/thirdparty/opus/opus/opusfile.h +++ b/thirdparty/opus/opus/opusfile.h @@ -239,8 +239,7 @@ struct OpusHead{ -32768...32767. The <tt>libopusfile</tt> API will automatically apply this gain to the decoded output before returning it, scaling it by - <code>pow(10,output_gain/(20.0*256))</code>. - You can adjust this behavior with op_set_gain_offset().*/ + <code>pow(10,output_gain/(20.0*256))</code>.*/ int output_gain; /**The channel mapping family, in the range 0...255. Channel mapping family 0 covers mono or stereo in a single stream. @@ -1155,18 +1154,16 @@ OP_WARN_UNUSED_RESULT OggOpusFile *op_open_url(const char *_url, int *_error,...) OP_ARG_NONNULL(1); /**Open a stream using the given set of callbacks to access it. - \param _stream The stream to read from (e.g., a <code>FILE *</code>). - This value will be passed verbatim as the first - argument to all of the callbacks. + \param _source The stream to read from (e.g., a <code>FILE *</code>). \param _cb The callbacks with which to access the stream. <code><a href="#op_read_func">read()</a></code> must be implemented. <code><a href="#op_seek_func">seek()</a></code> and <code><a href="#op_tell_func">tell()</a></code> may be <code>NULL</code>, or may always return -1 to - indicate a stream is unseekable, but if + indicate a source is unseekable, but if <code><a href="#op_seek_func">seek()</a></code> is - implemented and succeeds on a particular stream, then + implemented and succeeds on a particular source, then <code><a href="#op_tell_func">tell()</a></code> must also. <code><a href="#op_close_func">close()</a></code> may @@ -1229,11 +1226,11 @@ OP_WARN_UNUSED_RESULT OggOpusFile *op_open_url(const char *_url, basic validity checks.</dd> </dl> \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error. - <tt>libopusfile</tt> does <em>not</em> take ownership of the stream + <tt>libopusfile</tt> does <em>not</em> take ownership of the source if the call fails. - The calling application is responsible for closing the stream if + The calling application is responsible for closing the source if this call returns an error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_open_callbacks(void *_stream, +OP_WARN_UNUSED_RESULT OggOpusFile *op_open_callbacks(void *_source, const OpusFileCallbacks *_cb,const unsigned char *_initial_data, size_t _initial_bytes,int *_error) OP_ARG_NONNULL(2); @@ -1335,20 +1332,18 @@ OP_WARN_UNUSED_RESULT OggOpusFile *op_test_url(const char *_url, For new code, you are likely better off using op_test() instead, which is less resource-intensive, requires less data to succeed, and imposes a hard limit on the amount of data it examines (important for unseekable - streams, where all such data must be buffered until you are sure of the + sources, where all such data must be buffered until you are sure of the stream type). - \param _stream The stream to read from (e.g., a <code>FILE *</code>). - This value will be passed verbatim as the first - argument to all of the callbacks. + \param _source The stream to read from (e.g., a <code>FILE *</code>). \param _cb The callbacks with which to access the stream. <code><a href="#op_read_func">read()</a></code> must be implemented. <code><a href="#op_seek_func">seek()</a></code> and <code><a href="#op_tell_func">tell()</a></code> may be <code>NULL</code>, or may always return -1 to - indicate a stream is unseekable, but if + indicate a source is unseekable, but if <code><a href="#op_seek_func">seek()</a></code> is - implemented and succeeds on a particular stream, then + implemented and succeeds on a particular source, then <code><a href="#op_tell_func">tell()</a></code> must also. <code><a href="#op_close_func">close()</a></code> may @@ -1378,11 +1373,11 @@ OP_WARN_UNUSED_RESULT OggOpusFile *op_test_url(const char *_url, See op_open_callbacks() for a full list of failure codes. \return A partially opened \c OggOpusFile, or <code>NULL</code> on error. - <tt>libopusfile</tt> does <em>not</em> take ownership of the stream + <tt>libopusfile</tt> does <em>not</em> take ownership of the source if the call fails. - The calling application is responsible for closing the stream if + The calling application is responsible for closing the source if this call returns an error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_test_callbacks(void *_stream, +OP_WARN_UNUSED_RESULT OggOpusFile *op_test_callbacks(void *_source, const OpusFileCallbacks *_cb,const unsigned char *_initial_data, size_t _initial_bytes,int *_error) OP_ARG_NONNULL(2); @@ -1439,7 +1434,7 @@ void op_free(OggOpusFile *_of); Their documention will indicate so explicitly.*/ /*@{*/ -/**Returns whether or not the stream being read is seekable. +/**Returns whether or not the data source being read is seekable. This is true if <ol> <li>The <code><a href="#op_seek_func">seek()</a></code> and @@ -1460,9 +1455,9 @@ int op_seekable(const OggOpusFile *_of) OP_ARG_NONNULL(1); return 1. The actual number of links is not known until the stream is fully opened. \param _of The \c OggOpusFile from which to retrieve the link count. - \return For fully-open seekable streams, this returns the total number of + \return For fully-open seekable sources, this returns the total number of links in the whole stream, which will be at least 1. - For partially-open or unseekable streams, this always returns 1.*/ + For partially-open or unseekable sources, this always returns 1.*/ int op_link_count(const OggOpusFile *_of) OP_ARG_NONNULL(1); /**Get the serial number of the given link in a (possibly-chained) Ogg Opus @@ -1476,7 +1471,7 @@ int op_link_count(const OggOpusFile *_of) OP_ARG_NONNULL(1); \return The serial number of the given link. If \a _li is greater than the total number of links, this returns the serial number of the last link. - If the stream is not seekable, this always returns the serial number + If the source is not seekable, this always returns the serial number of the current link.*/ opus_uint32 op_serialno(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); @@ -1493,7 +1488,7 @@ opus_uint32 op_serialno(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); \return The channel count of the given link. If \a _li is greater than the total number of links, this returns the channel count of the last link. - If the stream is not seekable, this always returns the channel count + If the source is not seekable, this always returns the channel count of the current link.*/ int op_channel_count(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); @@ -1512,9 +1507,9 @@ int op_channel_count(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); compressed size of link \a _li if it is non-negative, or a negative value on error. The compressed size of the entire stream may be smaller than that - of the underlying stream if trailing garbage was detected in the + of the underlying source if trailing garbage was detected in the file. - \retval #OP_EINVAL The stream is not seekable (so we can't know the length), + \retval #OP_EINVAL The source is not seekable (so we can't know the length), \a _li wasn't less than the total number of links in the stream, or the stream was only partially open.*/ opus_int64 op_raw_total(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); @@ -1532,7 +1527,7 @@ opus_int64 op_raw_total(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); \return The PCM length of the entire stream if \a _li is negative, the PCM length of link \a _li if it is non-negative, or a negative value on error. - \retval #OP_EINVAL The stream is not seekable (so we can't know the length), + \retval #OP_EINVAL The source is not seekable (so we can't know the length), \a _li wasn't less than the total number of links in the stream, or the stream was only partially open.*/ ogg_int64_t op_pcm_total(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); @@ -1580,8 +1575,8 @@ const OpusTags *op_tags(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); \param _of The \c OggOpusFile from which to retrieve the current link index. \return The index of the current link on success, or a negative value on failure. - For seekable streams, this is a number between 0 (inclusive) and the - value returned by op_link_count() (exclusive). + For seekable streams, this is a number between 0 and the value + returned by op_link_count(). For unseekable streams, this value starts at 0 and increments by one each time a new link is encountered (even though op_link_count() always returns 1). @@ -1645,10 +1640,10 @@ ogg_int64_t op_pcm_tell(const OggOpusFile *_of) OP_ARG_NONNULL(1); /*@{*/ /**\name Functions for seeking in Opus streams - These functions let you seek in Opus streams, if the underlying stream + These functions let you seek in Opus streams, if the underlying source support it. Seeking is implemented for all built-in stream I/O routines, though some - individual streams may not be seekable (pipes, live HTTP streams, or HTTP + individual sources may not be seekable (pipes, live HTTP streams, or HTTP streams from a server that does not support <code>Range</code> requests). op_raw_seek() is the fastest: it is guaranteed to perform at most one @@ -1675,8 +1670,6 @@ ogg_int64_t op_pcm_tell(const OggOpusFile *_of) OP_ARG_NONNULL(1); packets out of the tail of the link to which it seeks. \param _of The \c OggOpusFile in which to seek. \param _byte_offset The byte position to seek to. - This must be between 0 and #op_raw_total(\a _of,\c -1) - (inclusive). \return 0 on success, or a negative error code on failure. \retval #OP_EREAD The underlying seek operation failed. \retval #OP_EINVAL The stream was only partially open, or the target was diff --git a/thirdparty/opus/opus_compare.c b/thirdparty/opus/opus_compare.c index 1956e08fa5..06c67d752f 100644 --- a/thirdparty/opus/opus_compare.c +++ b/thirdparty/opus/opus_compare.c @@ -363,9 +363,6 @@ int main(int _argc,const char **_argv){ Ef*=Ef; err+=Ef*Ef; } - free(xb); - free(X); - free(Y); err=pow(err/nframes,1.0/16); Q=100*(1-0.5*log(1+err)/log(1.13)); if(Q<0){ diff --git a/thirdparty/opus/opus_decoder.c b/thirdparty/opus/opus_decoder.c index 9113638a00..080bec5072 100644 --- a/thirdparty/opus/opus_decoder.c +++ b/thirdparty/opus/opus_decoder.c @@ -78,26 +78,6 @@ struct OpusDecoder { opus_uint32 rangeFinal; }; -#if defined(ENABLE_HARDENING) || defined(ENABLE_ASSERTIONS) -static void validate_opus_decoder(OpusDecoder *st) -{ - celt_assert(st->channels == 1 || st->channels == 2); - celt_assert(st->Fs == 48000 || st->Fs == 24000 || st->Fs == 16000 || st->Fs == 12000 || st->Fs == 8000); - celt_assert(st->DecControl.API_sampleRate == st->Fs); - celt_assert(st->DecControl.internalSampleRate == 0 || st->DecControl.internalSampleRate == 16000 || st->DecControl.internalSampleRate == 12000 || st->DecControl.internalSampleRate == 8000); - celt_assert(st->DecControl.nChannelsAPI == st->channels); - celt_assert(st->DecControl.nChannelsInternal == 0 || st->DecControl.nChannelsInternal == 1 || st->DecControl.nChannelsInternal == 2); - celt_assert(st->DecControl.payloadSize_ms == 0 || st->DecControl.payloadSize_ms == 10 || st->DecControl.payloadSize_ms == 20 || st->DecControl.payloadSize_ms == 40 || st->DecControl.payloadSize_ms == 60); -#ifdef OPUS_ARCHMASK - celt_assert(st->arch >= 0); - celt_assert(st->arch <= OPUS_ARCHMASK); -#endif - celt_assert(st->stream_channels == 1 || st->stream_channels == 2); -} -#define VALIDATE_OPUS_DECODER(st) validate_opus_decoder(st) -#else -#define VALIDATE_OPUS_DECODER(st) -#endif int opus_decoder_get_size(int channels) { @@ -124,7 +104,7 @@ int opus_decoder_init(OpusDecoder *st, opus_int32 Fs, int channels) return OPUS_BAD_ARG; OPUS_CLEAR((char*)st, opus_decoder_get_size(channels)); - /* Initialize SILK decoder */ + /* Initialize SILK encoder */ ret = silk_Get_Decoder_Size(&silkDecSizeBytes); if (ret) return OPUS_INTERNAL_ERROR; @@ -237,7 +217,6 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, int audiosize; int mode; - int bandwidth; int transition=0; int start_band; int redundancy=0; @@ -274,12 +253,10 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, { audiosize = st->frame_size; mode = st->mode; - bandwidth = st->bandwidth; ec_dec_init(&dec,(unsigned char*)data,len); } else { audiosize = frame_size; mode = st->prev_mode; - bandwidth = 0; if (mode == 0) { @@ -378,15 +355,15 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, { st->DecControl.nChannelsInternal = st->stream_channels; if( mode == MODE_SILK_ONLY ) { - if( bandwidth == OPUS_BANDWIDTH_NARROWBAND ) { + if( st->bandwidth == OPUS_BANDWIDTH_NARROWBAND ) { st->DecControl.internalSampleRate = 8000; - } else if( bandwidth == OPUS_BANDWIDTH_MEDIUMBAND ) { + } else if( st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND ) { st->DecControl.internalSampleRate = 12000; - } else if( bandwidth == OPUS_BANDWIDTH_WIDEBAND ) { + } else if( st->bandwidth == OPUS_BANDWIDTH_WIDEBAND ) { st->DecControl.internalSampleRate = 16000; } else { st->DecControl.internalSampleRate = 16000; - celt_assert( 0 ); + silk_assert( 0 ); } } else { /* Hybrid mode */ @@ -450,26 +427,10 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, if (mode != MODE_CELT_ONLY) start_band = 17; - if (redundancy) - { - transition = 0; - pcm_transition_silk_size=ALLOC_NONE; - } - - ALLOC(pcm_transition_silk, pcm_transition_silk_size, opus_val16); - - if (transition && mode != MODE_CELT_ONLY) - { - pcm_transition = pcm_transition_silk; - opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0); - } - - - if (bandwidth) { int endband=21; - switch(bandwidth) + switch(st->bandwidth) { case OPUS_BANDWIDTH_NARROWBAND: endband = 13; @@ -484,13 +445,24 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, case OPUS_BANDWIDTH_FULLBAND: endband = 21; break; - default: - celt_assert(0); - break; } - MUST_SUCCEED(celt_decoder_ctl(celt_dec, CELT_SET_END_BAND(endband))); + celt_decoder_ctl(celt_dec, CELT_SET_END_BAND(endband)); + celt_decoder_ctl(celt_dec, CELT_SET_CHANNELS(st->stream_channels)); + } + + if (redundancy) + { + transition = 0; + pcm_transition_silk_size=ALLOC_NONE; + } + + ALLOC(pcm_transition_silk, pcm_transition_silk_size, opus_val16); + + if (transition && mode != MODE_CELT_ONLY) + { + pcm_transition = pcm_transition_silk; + opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0); } - MUST_SUCCEED(celt_decoder_ctl(celt_dec, CELT_SET_CHANNELS(st->stream_channels))); /* Only allocation memory for redundancy if/when needed */ redundant_audio_size = redundancy ? F5*st->channels : ALLOC_NONE; @@ -499,21 +471,21 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, /* 5 ms redundant frame for CELT->SILK*/ if (redundancy && celt_to_silk) { - MUST_SUCCEED(celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0))); + celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL, 0); - MUST_SUCCEED(celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng))); + celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); } /* MUST be after PLC */ - MUST_SUCCEED(celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(start_band))); + celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(start_band)); if (mode != MODE_SILK_ONLY) { int celt_frame_size = IMIN(F20, frame_size); /* Make sure to discard any previous CELT state */ if (mode != st->prev_mode && st->prev_mode > 0 && !st->prev_redundancy) - MUST_SUCCEED(celt_decoder_ctl(celt_dec, OPUS_RESET_STATE)); + celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); /* Decode CELT */ celt_ret = celt_decode_with_ec(celt_dec, decode_fec ? NULL : data, len, pcm, celt_frame_size, &dec, celt_accum); @@ -528,7 +500,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, do a fade-out by decoding a silence frame */ if (st->prev_mode == MODE_HYBRID && !(redundancy && celt_to_silk && st->prev_redundancy) ) { - MUST_SUCCEED(celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0))); + celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL, celt_accum); } } @@ -546,18 +518,18 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, { const CELTMode *celt_mode; - MUST_SUCCEED(celt_decoder_ctl(celt_dec, CELT_GET_MODE(&celt_mode))); + celt_decoder_ctl(celt_dec, CELT_GET_MODE(&celt_mode)); window = celt_mode->window; } /* 5 ms redundant frame for SILK->CELT */ if (redundancy && !celt_to_silk) { - MUST_SUCCEED(celt_decoder_ctl(celt_dec, OPUS_RESET_STATE)); - MUST_SUCCEED(celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0))); + celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); + celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL, 0); - MUST_SUCCEED(celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng))); + celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5, pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs); } @@ -633,7 +605,6 @@ int opus_decode_native(OpusDecoder *st, const unsigned char *data, int packet_frame_size, packet_bandwidth, packet_mode, packet_stream_channels; /* 48 x 2.5 ms = 120 ms */ opus_int16 size[48]; - VALIDATE_OPUS_DECODER(st); if (decode_fec<0 || decode_fec>1) return OPUS_BAD_ARG; /* For FEC/PLC, frame_size has to be to have a multiple of 2.5 ms */ @@ -769,7 +740,6 @@ int opus_decode_float(OpusDecoder *st, const unsigned char *data, else return OPUS_INVALID_PACKET; } - celt_assert(st->channels == 1 || st->channels == 2); ALLOC(out, frame_size*st->channels, opus_int16); ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0); @@ -807,7 +777,6 @@ int opus_decode(OpusDecoder *st, const unsigned char *data, else return OPUS_INVALID_PACKET; } - celt_assert(st->channels == 1 || st->channels == 2); ALLOC(out, frame_size*st->channels, float); ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 1); @@ -895,7 +864,7 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...) goto bad_arg; } if (st->prev_mode == MODE_CELT_ONLY) - ret = celt_decoder_ctl(celt_dec, OPUS_GET_PITCH(value)); + celt_decoder_ctl(celt_dec, OPUS_GET_PITCH(value)); else *value = st->DecControl.prevPitchLag; } @@ -922,7 +891,7 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...) break; case OPUS_GET_LAST_PACKET_DURATION_REQUEST: { - opus_int32 *value = va_arg(ap, opus_int32*); + opus_uint32 *value = va_arg(ap, opus_uint32*); if (!value) { goto bad_arg; @@ -930,26 +899,6 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...) *value = st->last_packet_duration; } break; - case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if(value<0 || value>1) - { - goto bad_arg; - } - ret = celt_decoder_ctl(celt_dec, OPUS_SET_PHASE_INVERSION_DISABLED(value)); - } - break; - case OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - ret = celt_decoder_ctl(celt_dec, OPUS_GET_PHASE_INVERSION_DISABLED(value)); - } - break; default: /*fprintf(stderr, "unknown opus_decoder_ctl() request: %d", request);*/ ret = OPUS_UNIMPLEMENTED; diff --git a/thirdparty/opus/opus_encoder.c b/thirdparty/opus/opus_encoder.c index e98ac5b8d0..9a516a884a 100644 --- a/thirdparty/opus/opus_encoder.c +++ b/thirdparty/opus/opus_encoder.c @@ -53,10 +53,6 @@ #define MAX_ENCODER_BUFFER 480 -#ifndef DISABLE_FLOAT_API -#define PSEUDO_SNR_THRESHOLD 316.23f /* 10^(25/10) */ -#endif - typedef struct { opus_val32 XX, XY, YY; opus_val16 smoothed_width; @@ -86,7 +82,6 @@ struct OpusEncoder { int encoder_buffer; int lfe; int arch; - int use_dtx; /* general DTX for both SILK and CELT */ #ifndef DISABLE_FLOAT_API TonalityAnalysisState analysis; #endif @@ -102,8 +97,6 @@ struct OpusEncoder { int prev_channels; int prev_framesize; int bandwidth; - /* Bandwidth determined automatically from the rate (before any other adjustment) */ - int auto_bandwidth; int silk_bw_switch; /* Sampling rate (at the API level) */ int first; @@ -112,10 +105,7 @@ struct OpusEncoder { opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2]; #ifndef DISABLE_FLOAT_API int detected_bandwidth; - int nb_no_activity_frames; - opus_val32 peak_signal_energy; #endif - int nonfinal_frame; /* current frame is not the final in a packet */ opus_uint32 rangeFinal; }; @@ -123,46 +113,38 @@ struct OpusEncoder { middle (memoriless) threshold. The second column is the hysteresis (difference with the middle) */ static const opus_int32 mono_voice_bandwidth_thresholds[8] = { - 9000, 700, /* NB<->MB */ - 9000, 700, /* MB<->WB */ - 13500, 1000, /* WB<->SWB */ - 14000, 2000, /* SWB<->FB */ + 11000, 1000, /* NB<->MB */ + 14000, 1000, /* MB<->WB */ + 17000, 1000, /* WB<->SWB */ + 21000, 2000, /* SWB<->FB */ }; static const opus_int32 mono_music_bandwidth_thresholds[8] = { - 9000, 700, /* NB<->MB */ - 9000, 700, /* MB<->WB */ - 11000, 1000, /* WB<->SWB */ - 12000, 2000, /* SWB<->FB */ + 12000, 1000, /* NB<->MB */ + 15000, 1000, /* MB<->WB */ + 18000, 2000, /* WB<->SWB */ + 22000, 2000, /* SWB<->FB */ }; static const opus_int32 stereo_voice_bandwidth_thresholds[8] = { - 9000, 700, /* NB<->MB */ - 9000, 700, /* MB<->WB */ - 13500, 1000, /* WB<->SWB */ - 14000, 2000, /* SWB<->FB */ + 11000, 1000, /* NB<->MB */ + 14000, 1000, /* MB<->WB */ + 21000, 2000, /* WB<->SWB */ + 28000, 2000, /* SWB<->FB */ }; static const opus_int32 stereo_music_bandwidth_thresholds[8] = { - 9000, 700, /* NB<->MB */ - 9000, 700, /* MB<->WB */ - 11000, 1000, /* WB<->SWB */ - 12000, 2000, /* SWB<->FB */ + 12000, 1000, /* NB<->MB */ + 18000, 2000, /* MB<->WB */ + 21000, 2000, /* WB<->SWB */ + 30000, 2000, /* SWB<->FB */ }; /* Threshold bit-rates for switching between mono and stereo */ -static const opus_int32 stereo_voice_threshold = 19000; -static const opus_int32 stereo_music_threshold = 17000; +static const opus_int32 stereo_voice_threshold = 30000; +static const opus_int32 stereo_music_threshold = 30000; /* Threshold bit-rate for switching between SILK/hybrid and CELT-only */ static const opus_int32 mode_thresholds[2][2] = { /* voice */ /* music */ - { 64000, 10000}, /* mono */ - { 44000, 10000}, /* stereo */ -}; - -static const opus_int32 fec_thresholds[] = { - 12000, 1000, /* NB */ - 14000, 1000, /* MB */ - 16000, 1000, /* WB */ - 20000, 1000, /* SWB */ - 22000, 1000, /* FB */ + { 64000, 16000}, /* mono */ + { 36000, 16000}, /* stereo */ }; int opus_encoder_get_size(int channels) @@ -263,8 +245,7 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat st->bandwidth = OPUS_BANDWIDTH_FULLBAND; #ifndef DISABLE_FLOAT_API - tonality_analysis_init(&st->analysis, st->Fs); - st->analysis.application = st->application; + tonality_analysis_init(&st->analysis); #endif return OPUS_OK; @@ -342,11 +323,10 @@ static void silk_biquad_float( } #endif -static void hp_cutoff(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs, int arch) +static void hp_cutoff(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs) { opus_int32 B_Q28[ 3 ], A_Q28[ 2 ]; opus_int32 Fc_Q19, r_Q28, r_Q22; - (void)arch; silk_assert( cutoff_Hz <= silk_int32_MAX / SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ) ); Fc_Q19 = silk_DIV32_16( silk_SMULBB( SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ), cutoff_Hz ), Fs/1000 ); @@ -366,10 +346,9 @@ static void hp_cutoff(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *ou A_Q28[ 1 ] = silk_SMULWW( r_Q22, r_Q22 ); #ifdef FIXED_POINT - if( channels == 1 ) { - silk_biquad_alt_stride1( in, B_Q28, A_Q28, hp_mem, out, len ); - } else { - silk_biquad_alt_stride2( in, B_Q28, A_Q28, hp_mem, out, len, arch ); + silk_biquad_alt( in, B_Q28, A_Q28, hp_mem, out, len, channels ); + if( channels == 2 ) { + silk_biquad_alt( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels ); } #else silk_biquad_float( in, B_Q28, A_Q28, hp_mem, out, len, channels ); @@ -385,17 +364,21 @@ static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *ou int c, i; int shift; - /* Approximates -round(log2(6.3*cutoff_Hz/Fs)) */ - shift=celt_ilog2(Fs/(cutoff_Hz*4)); + /* Approximates -round(log2(4.*cutoff_Hz/Fs)) */ + shift=celt_ilog2(Fs/(cutoff_Hz*3)); for (c=0;c<channels;c++) { for (i=0;i<len;i++) { - opus_val32 x, y; - x = SHL32(EXTEND32(in[channels*i+c]), 14); - y = x-hp_mem[2*c]; + opus_val32 x, tmp, y; + x = SHL32(EXTEND32(in[channels*i+c]), 15); + /* First stage */ + tmp = x-hp_mem[2*c]; hp_mem[2*c] = hp_mem[2*c] + PSHR32(x - hp_mem[2*c], shift); - out[channels*i+c] = EXTRACT16(SATURATE(PSHR32(y, 14), 32767)); + /* Second stage */ + y = tmp - hp_mem[2*c+1]; + hp_mem[2*c+1] = hp_mem[2*c+1] + PSHR32(tmp - hp_mem[2*c+1], shift); + out[channels*i+c] = EXTRACT16(SATURATE(PSHR32(y, 15), 32767)); } } } @@ -403,41 +386,24 @@ static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *ou #else static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs) { - int i; - float coef, coef2; - coef = 6.3f*cutoff_Hz/Fs; - coef2 = 1-coef; - if (channels==2) + int c, i; + float coef; + + coef = 4.0f*cutoff_Hz/Fs; + for (c=0;c<channels;c++) { - float m0, m2; - m0 = hp_mem[0]; - m2 = hp_mem[2]; for (i=0;i<len;i++) { - opus_val32 x0, x1, out0, out1; - x0 = in[2*i+0]; - x1 = in[2*i+1]; - out0 = x0-m0; - out1 = x1-m2; - m0 = coef*x0 + VERY_SMALL + coef2*m0; - m2 = coef*x1 + VERY_SMALL + coef2*m2; - out[2*i+0] = out0; - out[2*i+1] = out1; + opus_val32 x, tmp, y; + x = in[channels*i+c]; + /* First stage */ + tmp = x-hp_mem[2*c]; + hp_mem[2*c] = hp_mem[2*c] + coef*(x - hp_mem[2*c]) + VERY_SMALL; + /* Second stage */ + y = tmp - hp_mem[2*c+1]; + hp_mem[2*c+1] = hp_mem[2*c+1] + coef*(tmp - hp_mem[2*c+1]) + VERY_SMALL; + out[channels*i+c] = y; } - hp_mem[0] = m0; - hp_mem[2] = m2; - } else { - float m0; - m0 = hp_mem[0]; - for (i=0;i<len;i++) - { - opus_val32 x, y; - x = in[i]; - y = x-m0; - m0 = coef*x + VERY_SMALL + coef2*m0; - out[i] = y; - } - hp_mem[0] = m0; } } #endif @@ -555,57 +521,287 @@ static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int m } #ifndef DISABLE_FLOAT_API +/* Don't use more than 60 ms for the frame size analysis */ +#define MAX_DYNAMIC_FRAMESIZE 24 +/* Estimates how much the bitrate will be boosted based on the sub-frame energy */ +static float transient_boost(const float *E, const float *E_1, int LM, int maxM) +{ + int i; + int M; + float sumE=0, sumE_1=0; + float metric; + + M = IMIN(maxM, (1<<LM)+1); + for (i=0;i<M;i++) + { + sumE += E[i]; + sumE_1 += E_1[i]; + } + metric = sumE*sumE_1/(M*M); + /*if (LM==3) + printf("%f\n", metric);*/ + /*return metric>10 ? 1 : 0;*/ + /*return MAX16(0,1-exp(-.25*(metric-2.)));*/ + return MIN16(1,(float)sqrt(MAX16(0,.05f*(metric-2)))); +} + +/* Viterbi decoding trying to find the best frame size combination using look-ahead + + State numbering: + 0: unused + 1: 2.5 ms + 2: 5 ms (#1) + 3: 5 ms (#2) + 4: 10 ms (#1) + 5: 10 ms (#2) + 6: 10 ms (#3) + 7: 10 ms (#4) + 8: 20 ms (#1) + 9: 20 ms (#2) + 10: 20 ms (#3) + 11: 20 ms (#4) + 12: 20 ms (#5) + 13: 20 ms (#6) + 14: 20 ms (#7) + 15: 20 ms (#8) +*/ +static int transient_viterbi(const float *E, const float *E_1, int N, int frame_cost, int rate) +{ + int i; + float cost[MAX_DYNAMIC_FRAMESIZE][16]; + int states[MAX_DYNAMIC_FRAMESIZE][16]; + float best_cost; + int best_state; + float factor; + /* Take into account that we damp VBR in the 32 kb/s to 64 kb/s range. */ + if (rate<80) + factor=0; + else if (rate>160) + factor=1; + else + factor = (rate-80.f)/80.f; + /* Makes variable framesize less aggressive at lower bitrates, but I can't + find any valid theoretical justification for this (other than it seems + to help) */ + for (i=0;i<16;i++) + { + /* Impossible state */ + states[0][i] = -1; + cost[0][i] = 1e10; + } + for (i=0;i<4;i++) + { + cost[0][1<<i] = (frame_cost + rate*(1<<i))*(1+factor*transient_boost(E, E_1, i, N+1)); + states[0][1<<i] = i; + } + for (i=1;i<N;i++) + { + int j; + + /* Follow continuations */ + for (j=2;j<16;j++) + { + cost[i][j] = cost[i-1][j-1]; + states[i][j] = j-1; + } + + /* New frames */ + for(j=0;j<4;j++) + { + int k; + float min_cost; + float curr_cost; + states[i][1<<j] = 1; + min_cost = cost[i-1][1]; + for(k=1;k<4;k++) + { + float tmp = cost[i-1][(1<<(k+1))-1]; + if (tmp < min_cost) + { + states[i][1<<j] = (1<<(k+1))-1; + min_cost = tmp; + } + } + curr_cost = (frame_cost + rate*(1<<j))*(1+factor*transient_boost(E+i, E_1+i, j, N-i+1)); + cost[i][1<<j] = min_cost; + /* If part of the frame is outside the analysis window, only count part of the cost */ + if (N-i < (1<<j)) + cost[i][1<<j] += curr_cost*(float)(N-i)/(1<<j); + else + cost[i][1<<j] += curr_cost; + } + } + + best_state=1; + best_cost = cost[N-1][1]; + /* Find best end state (doesn't force a frame to end at N-1) */ + for (i=2;i<16;i++) + { + if (cost[N-1][i]<best_cost) + { + best_cost = cost[N-1][i]; + best_state = i; + } + } + + /* Follow transitions back */ + for (i=N-1;i>=0;i--) + { + /*printf("%d ", best_state);*/ + best_state = states[i][best_state]; + } + /*printf("%d\n", best_state);*/ + return best_state; +} + +static int optimize_framesize(const void *x, int len, int C, opus_int32 Fs, + int bitrate, opus_val16 tonality, float *mem, int buffering, + downmix_func downmix) +{ + int N; + int i; + float e[MAX_DYNAMIC_FRAMESIZE+4]; + float e_1[MAX_DYNAMIC_FRAMESIZE+3]; + opus_val32 memx; + int bestLM=0; + int subframe; + int pos; + int offset; + VARDECL(opus_val32, sub); + + subframe = Fs/400; + ALLOC(sub, subframe, opus_val32); + e[0]=mem[0]; + e_1[0]=1.f/(EPSILON+mem[0]); + if (buffering) + { + /* Consider the CELT delay when not in restricted-lowdelay */ + /* We assume the buffering is between 2.5 and 5 ms */ + offset = 2*subframe - buffering; + celt_assert(offset>=0 && offset <= subframe); + len -= offset; + e[1]=mem[1]; + e_1[1]=1.f/(EPSILON+mem[1]); + e[2]=mem[2]; + e_1[2]=1.f/(EPSILON+mem[2]); + pos = 3; + } else { + pos=1; + offset=0; + } + N=IMIN(len/subframe, MAX_DYNAMIC_FRAMESIZE); + /* Just silencing a warning, it's really initialized later */ + memx = 0; + for (i=0;i<N;i++) + { + float tmp; + opus_val32 tmpx; + int j; + tmp=EPSILON; + + downmix(x, sub, subframe, i*subframe+offset, 0, -2, C); + if (i==0) + memx = sub[0]; + for (j=0;j<subframe;j++) + { + tmpx = sub[j]; + tmp += (tmpx-memx)*(float)(tmpx-memx); + memx = tmpx; + } + e[i+pos] = tmp; + e_1[i+pos] = 1.f/tmp; + } + /* Hack to get 20 ms working with APPLICATION_AUDIO + The real problem is that the corresponding memory needs to use 1.5 ms + from this frame and 1 ms from the next frame */ + e[i+pos] = e[i+pos-1]; + if (buffering) + N=IMIN(MAX_DYNAMIC_FRAMESIZE, N+2); + bestLM = transient_viterbi(e, e_1, N, (int)((1.f+.5f*tonality)*(60*C+40)), bitrate/400); + mem[0] = e[1<<bestLM]; + if (buffering) + { + mem[1] = e[(1<<bestLM)+1]; + mem[2] = e[(1<<bestLM)+2]; + } + return bestLM; +} + +#endif + +#ifndef DISABLE_FLOAT_API #ifdef FIXED_POINT #define PCM2VAL(x) FLOAT2INT16(x) #else #define PCM2VAL(x) SCALEIN(x) #endif - -void downmix_float(const void *_x, opus_val32 *y, int subframe, int offset, int c1, int c2, int C) +void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C) { const float *x; + opus_val32 scale; int j; - x = (const float *)_x; for (j=0;j<subframe;j++) - y[j] = PCM2VAL(x[(j+offset)*C+c1]); + sub[j] = PCM2VAL(x[(j+offset)*C+c1]); if (c2>-1) { for (j=0;j<subframe;j++) - y[j] += PCM2VAL(x[(j+offset)*C+c2]); + sub[j] += PCM2VAL(x[(j+offset)*C+c2]); } else if (c2==-2) { int c; for (c=1;c<C;c++) { for (j=0;j<subframe;j++) - y[j] += PCM2VAL(x[(j+offset)*C+c]); + sub[j] += PCM2VAL(x[(j+offset)*C+c]); } } +#ifdef FIXED_POINT + scale = (1<<SIG_SHIFT); +#else + scale = 1.f; +#endif + if (C==-2) + scale /= C; + else + scale /= 2; + for (j=0;j<subframe;j++) + sub[j] *= scale; } #endif -void downmix_int(const void *_x, opus_val32 *y, int subframe, int offset, int c1, int c2, int C) +void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C) { const opus_int16 *x; + opus_val32 scale; int j; - x = (const opus_int16 *)_x; for (j=0;j<subframe;j++) - y[j] = x[(j+offset)*C+c1]; + sub[j] = x[(j+offset)*C+c1]; if (c2>-1) { for (j=0;j<subframe;j++) - y[j] += x[(j+offset)*C+c2]; + sub[j] += x[(j+offset)*C+c2]; } else if (c2==-2) { int c; for (c=1;c<C;c++) { for (j=0;j<subframe;j++) - y[j] += x[(j+offset)*C+c]; + sub[j] += x[(j+offset)*C+c]; } } +#ifdef FIXED_POINT + scale = (1<<SIG_SHIFT); +#else + scale = 1.f/32768; +#endif + if (C==-2) + scale /= C; + else + scale /= 2; + for (j=0;j<subframe;j++) + sub[j] *= scale; } opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs) @@ -615,24 +811,53 @@ opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_ return -1; if (variable_duration == OPUS_FRAMESIZE_ARG) new_size = frame_size; - else if (variable_duration >= OPUS_FRAMESIZE_2_5_MS && variable_duration <= OPUS_FRAMESIZE_120_MS) - { - if (variable_duration <= OPUS_FRAMESIZE_40_MS) - new_size = (Fs/400)<<(variable_duration-OPUS_FRAMESIZE_2_5_MS); - else - new_size = (variable_duration-OPUS_FRAMESIZE_2_5_MS-2)*Fs/50; - } + else if (variable_duration == OPUS_FRAMESIZE_VARIABLE) + new_size = Fs/50; + else if (variable_duration >= OPUS_FRAMESIZE_2_5_MS && variable_duration <= OPUS_FRAMESIZE_60_MS) + new_size = IMIN(3*Fs/50, (Fs/400)<<(variable_duration-OPUS_FRAMESIZE_2_5_MS)); else return -1; if (new_size>frame_size) return -1; - if (400*new_size!=Fs && 200*new_size!=Fs && 100*new_size!=Fs && - 50*new_size!=Fs && 25*new_size!=Fs && 50*new_size!=3*Fs && - 50*new_size!=4*Fs && 50*new_size!=5*Fs && 50*new_size!=6*Fs) + if (400*new_size!=Fs && 200*new_size!=Fs && 100*new_size!=Fs && + 50*new_size!=Fs && 25*new_size!=Fs && 50*new_size!=3*Fs) return -1; return new_size; } +opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, + int variable_duration, int C, opus_int32 Fs, int bitrate_bps, + int delay_compensation, downmix_func downmix +#ifndef DISABLE_FLOAT_API + , float *subframe_mem +#endif + ) +{ +#ifndef DISABLE_FLOAT_API + if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200) + { + int LM = 3; + LM = optimize_framesize(analysis_pcm, frame_size, C, Fs, bitrate_bps, + 0, subframe_mem, delay_compensation, downmix); + while ((Fs/400<<LM)>frame_size) + LM--; + frame_size = (Fs/400<<LM); + } else +#else + (void)analysis_pcm; + (void)C; + (void)bitrate_bps; + (void)delay_compensation; + (void)downmix; +#endif + { + frame_size = frame_size_select(frame_size, variable_duration, Fs); + } + if (frame_size<0) + return -1; + return frame_size; +} + opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int32 Fs, StereoWidthState *mem) { opus_val32 xx, xy, yy; @@ -679,12 +904,6 @@ opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int3 xy += SHR32(pxy, 10); yy += SHR32(pyy, 10); } -#ifndef FIXED_POINT - if (!(xx < 1e9f) || celt_isnan(xx) || !(yy < 1e9f) || celt_isnan(yy)) - { - xy = xx = yy = 0; - } -#endif mem->XX += MULT16_32_Q15(short_alpha, xx-mem->XX); mem->XY += MULT16_32_Q15(short_alpha, xy-mem->XY); mem->YY += MULT16_32_Q15(short_alpha, yy-mem->YY); @@ -715,354 +934,6 @@ opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int3 return EXTRACT16(MIN32(Q15ONE, MULT16_16(20, mem->max_follower))); } -static int decide_fec(int useInBandFEC, int PacketLoss_perc, int last_fec, int mode, int *bandwidth, opus_int32 rate) -{ - int orig_bandwidth; - if (!useInBandFEC || PacketLoss_perc == 0 || mode == MODE_CELT_ONLY) - return 0; - orig_bandwidth = *bandwidth; - for (;;) - { - opus_int32 hysteresis; - opus_int32 LBRR_rate_thres_bps; - /* Compute threshold for using FEC at the current bandwidth setting */ - LBRR_rate_thres_bps = fec_thresholds[2*(*bandwidth - OPUS_BANDWIDTH_NARROWBAND)]; - hysteresis = fec_thresholds[2*(*bandwidth - OPUS_BANDWIDTH_NARROWBAND) + 1]; - if (last_fec == 1) LBRR_rate_thres_bps -= hysteresis; - if (last_fec == 0) LBRR_rate_thres_bps += hysteresis; - LBRR_rate_thres_bps = silk_SMULWB( silk_MUL( LBRR_rate_thres_bps, - 125 - silk_min( PacketLoss_perc, 25 ) ), SILK_FIX_CONST( 0.01, 16 ) ); - /* If loss <= 5%, we look at whether we have enough rate to enable FEC. - If loss > 5%, we decrease the bandwidth until we can enable FEC. */ - if (rate > LBRR_rate_thres_bps) - return 1; - else if (PacketLoss_perc <= 5) - return 0; - else if (*bandwidth > OPUS_BANDWIDTH_NARROWBAND) - (*bandwidth)--; - else - break; - } - /* Couldn't find any bandwidth to enable FEC, keep original bandwidth. */ - *bandwidth = orig_bandwidth; - return 0; -} - -static int compute_silk_rate_for_hybrid(int rate, int bandwidth, int frame20ms, int vbr, int fec, int channels) { - int entry; - int i; - int N; - int silk_rate; - static int rate_table[][5] = { - /* |total| |-------- SILK------------| - |-- No FEC -| |--- FEC ---| - 10ms 20ms 10ms 20ms */ - { 0, 0, 0, 0, 0}, - {12000, 10000, 10000, 11000, 11000}, - {16000, 13500, 13500, 15000, 15000}, - {20000, 16000, 16000, 18000, 18000}, - {24000, 18000, 18000, 21000, 21000}, - {32000, 22000, 22000, 28000, 28000}, - {64000, 38000, 38000, 50000, 50000} - }; - /* Do the allocation per-channel. */ - rate /= channels; - entry = 1 + frame20ms + 2*fec; - N = sizeof(rate_table)/sizeof(rate_table[0]); - for (i=1;i<N;i++) - { - if (rate_table[i][0] > rate) break; - } - if (i == N) - { - silk_rate = rate_table[i-1][entry]; - /* For now, just give 50% of the extra bits to SILK. */ - silk_rate += (rate-rate_table[i-1][0])/2; - } else { - opus_int32 lo, hi, x0, x1; - lo = rate_table[i-1][entry]; - hi = rate_table[i][entry]; - x0 = rate_table[i-1][0]; - x1 = rate_table[i][0]; - silk_rate = (lo*(x1-rate) + hi*(rate-x0))/(x1-x0); - } - if (!vbr) - { - /* Tiny boost to SILK for CBR. We should probably tune this better. */ - silk_rate += 100; - } - if (bandwidth==OPUS_BANDWIDTH_SUPERWIDEBAND) - silk_rate += 300; - silk_rate *= channels; - /* Small adjustment for stereo (calibrated for 32 kb/s, haven't tried other bitrates). */ - if (channels == 2 && rate >= 12000) - silk_rate -= 1000; - return silk_rate; -} - -/* Returns the equivalent bitrate corresponding to 20 ms frames, - complexity 10 VBR operation. */ -static opus_int32 compute_equiv_rate(opus_int32 bitrate, int channels, - int frame_rate, int vbr, int mode, int complexity, int loss) -{ - opus_int32 equiv; - equiv = bitrate; - /* Take into account overhead from smaller frames. */ - if (frame_rate > 50) - equiv -= (40*channels+20)*(frame_rate - 50); - /* CBR is about a 8% penalty for both SILK and CELT. */ - if (!vbr) - equiv -= equiv/12; - /* Complexity makes about 10% difference (from 0 to 10) in general. */ - equiv = equiv * (90+complexity)/100; - if (mode == MODE_SILK_ONLY || mode == MODE_HYBRID) - { - /* SILK complexity 0-1 uses the non-delayed-decision NSQ, which - costs about 20%. */ - if (complexity<2) - equiv = equiv*4/5; - equiv -= equiv*loss/(6*loss + 10); - } else if (mode == MODE_CELT_ONLY) { - /* CELT complexity 0-4 doesn't have the pitch filter, which costs - about 10%. */ - if (complexity<5) - equiv = equiv*9/10; - } else { - /* Mode not known yet */ - /* Half the SILK loss*/ - equiv -= equiv*loss/(12*loss + 20); - } - return equiv; -} - -#ifndef DISABLE_FLOAT_API - -int is_digital_silence(const opus_val16* pcm, int frame_size, int channels, int lsb_depth) -{ - int silence = 0; - opus_val32 sample_max = 0; -#ifdef MLP_TRAINING - return 0; -#endif - sample_max = celt_maxabs16(pcm, frame_size*channels); - -#ifdef FIXED_POINT - silence = (sample_max == 0); - (void)lsb_depth; -#else - silence = (sample_max <= (opus_val16) 1 / (1 << lsb_depth)); -#endif - - return silence; -} - -#ifdef FIXED_POINT -static opus_val32 compute_frame_energy(const opus_val16 *pcm, int frame_size, int channels, int arch) -{ - int i; - opus_val32 sample_max; - int max_shift; - int shift; - opus_val32 energy = 0; - int len = frame_size*channels; - (void)arch; - /* Max amplitude in the signal */ - sample_max = celt_maxabs16(pcm, len); - - /* Compute the right shift required in the MAC to avoid an overflow */ - max_shift = celt_ilog2(len); - shift = IMAX(0, (celt_ilog2(sample_max) << 1) + max_shift - 28); - - /* Compute the energy */ - for (i=0; i<len; i++) - energy += SHR32(MULT16_16(pcm[i], pcm[i]), shift); - - /* Normalize energy by the frame size and left-shift back to the original position */ - energy /= len; - energy = SHL32(energy, shift); - - return energy; -} -#else -static opus_val32 compute_frame_energy(const opus_val16 *pcm, int frame_size, int channels, int arch) -{ - int len = frame_size*channels; - return celt_inner_prod(pcm, pcm, len, arch)/len; -} -#endif - -/* Decides if DTX should be turned on (=1) or off (=0) */ -static int decide_dtx_mode(float activity_probability, /* probability that current frame contains speech/music */ - int *nb_no_activity_frames, /* number of consecutive frames with no activity */ - opus_val32 peak_signal_energy, /* peak energy of desired signal detected so far */ - const opus_val16 *pcm, /* input pcm signal */ - int frame_size, /* frame size */ - int channels, - int is_silence, /* only digital silence detected in this frame */ - int arch - ) -{ - opus_val32 noise_energy; - - if (!is_silence) - { - if (activity_probability < DTX_ACTIVITY_THRESHOLD) /* is noise */ - { - noise_energy = compute_frame_energy(pcm, frame_size, channels, arch); - - /* but is sufficiently quiet */ - is_silence = peak_signal_energy >= (PSEUDO_SNR_THRESHOLD * noise_energy); - } - } - - if (is_silence) - { - /* The number of consecutive DTX frames should be within the allowed bounds */ - (*nb_no_activity_frames)++; - - if (*nb_no_activity_frames > NB_SPEECH_FRAMES_BEFORE_DTX) - { - if (*nb_no_activity_frames <= (NB_SPEECH_FRAMES_BEFORE_DTX + MAX_CONSECUTIVE_DTX)) - /* Valid frame for DTX! */ - return 1; - else - (*nb_no_activity_frames) = NB_SPEECH_FRAMES_BEFORE_DTX; - } - } else - (*nb_no_activity_frames) = 0; - - return 0; -} - -#endif - -static opus_int32 encode_multiframe_packet(OpusEncoder *st, - const opus_val16 *pcm, - int nb_frames, - int frame_size, - unsigned char *data, - opus_int32 out_data_bytes, - int to_celt, - int lsb_depth, - int float_api) -{ - int i; - int ret = 0; - VARDECL(unsigned char, tmp_data); - int bak_mode, bak_bandwidth, bak_channels, bak_to_mono; - VARDECL(OpusRepacketizer, rp); - int max_header_bytes; - opus_int32 bytes_per_frame; - opus_int32 cbr_bytes; - opus_int32 repacketize_len; - int tmp_len; - ALLOC_STACK; - - /* Worst cases: - * 2 frames: Code 2 with different compressed sizes - * >2 frames: Code 3 VBR */ - max_header_bytes = nb_frames == 2 ? 3 : (2+(nb_frames-1)*2); - - if (st->use_vbr || st->user_bitrate_bps==OPUS_BITRATE_MAX) - repacketize_len = out_data_bytes; - else { - cbr_bytes = 3*st->bitrate_bps/(3*8*st->Fs/(frame_size*nb_frames)); - repacketize_len = IMIN(cbr_bytes, out_data_bytes); - } - bytes_per_frame = IMIN(1276, 1+(repacketize_len-max_header_bytes)/nb_frames); - - ALLOC(tmp_data, nb_frames*bytes_per_frame, unsigned char); - ALLOC(rp, 1, OpusRepacketizer); - opus_repacketizer_init(rp); - - bak_mode = st->user_forced_mode; - bak_bandwidth = st->user_bandwidth; - bak_channels = st->force_channels; - - st->user_forced_mode = st->mode; - st->user_bandwidth = st->bandwidth; - st->force_channels = st->stream_channels; - - bak_to_mono = st->silk_mode.toMono; - if (bak_to_mono) - st->force_channels = 1; - else - st->prev_channels = st->stream_channels; - - for (i=0;i<nb_frames;i++) - { - st->silk_mode.toMono = 0; - st->nonfinal_frame = i<(nb_frames-1); - - /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */ - if (to_celt && i==nb_frames-1) - st->user_forced_mode = MODE_CELT_ONLY; - - tmp_len = opus_encode_native(st, pcm+i*(st->channels*frame_size), frame_size, - tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth, NULL, 0, 0, 0, 0, - NULL, float_api); - - if (tmp_len<0) - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - - ret = opus_repacketizer_cat(rp, tmp_data+i*bytes_per_frame, tmp_len); - - if (ret<0) - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - } - - ret = opus_repacketizer_out_range_impl(rp, 0, nb_frames, data, repacketize_len, 0, !st->use_vbr); - - if (ret<0) - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - - /* Discard configs that were forced locally for the purpose of repacketization */ - st->user_forced_mode = bak_mode; - st->user_bandwidth = bak_bandwidth; - st->force_channels = bak_channels; - st->silk_mode.toMono = bak_to_mono; - - RESTORE_STACK; - return ret; -} - -static int compute_redundancy_bytes(opus_int32 max_data_bytes, opus_int32 bitrate_bps, int frame_rate, int channels) -{ - int redundancy_bytes_cap; - int redundancy_bytes; - opus_int32 redundancy_rate; - int base_bits; - opus_int32 available_bits; - base_bits = (40*channels+20); - - /* Equivalent rate for 5 ms frames. */ - redundancy_rate = bitrate_bps + base_bits*(200 - frame_rate); - /* For VBR, further increase the bitrate if we can afford it. It's pretty short - and we'll avoid artefacts. */ - redundancy_rate = 3*redundancy_rate/2; - redundancy_bytes = redundancy_rate/1600; - - /* Compute the max rate we can use given CBR or VBR with cap. */ - available_bits = max_data_bytes*8 - 2*base_bits; - redundancy_bytes_cap = (available_bits*240/(240+48000/frame_rate) + base_bits)/8; - redundancy_bytes = IMIN(redundancy_bytes, redundancy_bytes_cap); - /* It we can't get enough bits for redundancy to be worth it, rely on the decoder PLC. */ - if (redundancy_bytes > 4 + 8*channels) - redundancy_bytes = IMIN(257, redundancy_bytes); - else - redundancy_bytes = 0; - return redundancy_bytes; -} - opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, @@ -1100,7 +971,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ AnalysisInfo analysis_info; int analysis_read_pos_bak=-1; int analysis_read_subframe_bak=-1; - int is_silence = 0; #endif VARDECL(opus_val16, tmp_prefill); @@ -1109,19 +979,15 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ max_data_bytes = IMIN(1276, out_data_bytes); st->rangeFinal = 0; - if (frame_size <= 0 || max_data_bytes <= 0) + if ((!st->variable_duration && 400*frame_size != st->Fs && 200*frame_size != st->Fs && 100*frame_size != st->Fs && + 50*frame_size != st->Fs && 25*frame_size != st->Fs && 50*frame_size != 3*st->Fs) + || (400*frame_size < st->Fs) + || max_data_bytes<=0 + ) { RESTORE_STACK; return OPUS_BAD_ARG; } - - /* Cannot encode 100 ms in 1 byte */ - if (max_data_bytes==1 && st->Fs==(frame_size*10)) - { - RESTORE_STACK; - return OPUS_BUFFER_TOO_SMALL; - } - silk_enc = (char*)st+st->silk_enc_offset; celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) @@ -1135,55 +1001,31 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ #ifndef DISABLE_FLOAT_API analysis_info.valid = 0; #ifdef FIXED_POINT - if (st->silk_mode.complexity >= 10 && st->Fs>=16000) + if (st->silk_mode.complexity >= 10 && st->Fs==48000) #else - if (st->silk_mode.complexity >= 7 && st->Fs>=16000) + if (st->silk_mode.complexity >= 7 && st->Fs==48000) #endif { - is_silence = is_digital_silence(pcm, frame_size, st->channels, lsb_depth); analysis_read_pos_bak = st->analysis.read_pos; analysis_read_subframe_bak = st->analysis.read_subframe; run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size, c1, c2, analysis_channels, st->Fs, lsb_depth, downmix, &analysis_info); - - /* Track the peak signal energy */ - if (!is_silence && analysis_info.activity_probability > DTX_ACTIVITY_THRESHOLD) - st->peak_signal_energy = MAX32(MULT16_32_Q15(QCONST16(0.999f, 15), st->peak_signal_energy), - compute_frame_energy(pcm, frame_size, st->channels, st->arch)); - } else if (st->analysis.initialized) { - tonality_analysis_reset(&st->analysis); } #else (void)analysis_pcm; (void)analysis_size; - (void)c1; - (void)c2; - (void)analysis_channels; - (void)downmix; #endif -#ifndef DISABLE_FLOAT_API - /* Reset voice_ratio if this frame is not silent or if analysis is disabled. - * Otherwise, preserve voice_ratio from the last non-silent frame */ - if (!is_silence) - st->voice_ratio = -1; + st->voice_ratio = -1; +#ifndef DISABLE_FLOAT_API st->detected_bandwidth = 0; if (analysis_info.valid) { int analysis_bandwidth; if (st->signal_type == OPUS_AUTO) - { - float prob; - if (st->prev_mode == 0) - prob = analysis_info.music_prob; - else if (st->prev_mode == MODE_CELT_ONLY) - prob = analysis_info.music_prob_max; - else - prob = analysis_info.music_prob_min; - st->voice_ratio = (int)floor(.5+100*(1-prob)); - } + st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob)); analysis_bandwidth = analysis_info.bandwidth; if (analysis_bandwidth<=12) @@ -1197,8 +1039,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ else st->detected_bandwidth = OPUS_BANDWIDTH_FULLBAND; } -#else - st->voice_ratio = -1; #endif if (st->channels==2 && st->force_channels!=1) @@ -1212,13 +1052,12 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (!st->use_vbr) { int cbrBytes; - /* Multiply by 12 to make sure the division is exact. */ - int frame_rate12 = 12*st->Fs/frame_size; + /* Multiply by 3 to make sure the division is exact. */ + int frame_rate3 = 3*st->Fs/frame_size; /* We need to make sure that "int" values always fit in 16 bits. */ - cbrBytes = IMIN( (12*st->bitrate_bps/8 + frame_rate12/2)/frame_rate12, max_data_bytes); - st->bitrate_bps = cbrBytes*(opus_int32)frame_rate12*8/12; - /* Make sure we provide at least one byte to avoid failing. */ - max_data_bytes = IMAX(1, cbrBytes); + cbrBytes = IMIN( (3*st->bitrate_bps/8 + frame_rate3/2)/frame_rate3, max_data_bytes); + st->bitrate_bps = cbrBytes*(opus_int32)frame_rate3*8/3; + max_data_bytes = cbrBytes; } if (max_data_bytes<3 || st->bitrate_bps < 3*frame_rate*8 || (frame_rate<50 && (max_data_bytes*frame_rate<300 || st->bitrate_bps < 2400))) @@ -1226,63 +1065,25 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ /*If the space is too low to do something useful, emit 'PLC' frames.*/ int tocmode = st->mode; int bw = st->bandwidth == 0 ? OPUS_BANDWIDTH_NARROWBAND : st->bandwidth; - int packet_code = 0; - int num_multiframes = 0; - if (tocmode==0) tocmode = MODE_SILK_ONLY; if (frame_rate>100) tocmode = MODE_CELT_ONLY; - /* 40 ms -> 2 x 20 ms if in CELT_ONLY or HYBRID mode */ - if (frame_rate==25 && tocmode!=MODE_SILK_ONLY) - { - frame_rate = 50; - packet_code = 1; - } - - /* >= 60 ms frames */ - if (frame_rate<=16) - { - /* 1 x 60 ms, 2 x 40 ms, 2 x 60 ms */ - if (out_data_bytes==1 || (tocmode==MODE_SILK_ONLY && frame_rate!=10)) - { - tocmode = MODE_SILK_ONLY; - - packet_code = frame_rate <= 12; - frame_rate = frame_rate == 12 ? 25 : 16; - } - else - { - num_multiframes = 50/frame_rate; - frame_rate = 50; - packet_code = 3; - } - } - + if (frame_rate < 50) + tocmode = MODE_SILK_ONLY; if(tocmode==MODE_SILK_ONLY&&bw>OPUS_BANDWIDTH_WIDEBAND) bw=OPUS_BANDWIDTH_WIDEBAND; else if (tocmode==MODE_CELT_ONLY&&bw==OPUS_BANDWIDTH_MEDIUMBAND) bw=OPUS_BANDWIDTH_NARROWBAND; else if (tocmode==MODE_HYBRID&&bw<=OPUS_BANDWIDTH_SUPERWIDEBAND) bw=OPUS_BANDWIDTH_SUPERWIDEBAND; - data[0] = gen_toc(tocmode, frame_rate, bw, st->stream_channels); - data[0] |= packet_code; - - ret = packet_code <= 1 ? 1 : 2; - - max_data_bytes = IMAX(max_data_bytes, ret); - - if (packet_code==3) - data[1] = num_multiframes; - + ret = 1; if (!st->use_vbr) { ret = opus_packet_pad(data, ret, max_data_bytes); if (ret == OPUS_OK) ret = max_data_bytes; - else - ret = OPUS_INTERNAL_ERROR; } RESTORE_STACK; return ret; @@ -1290,8 +1091,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ max_rate = frame_rate*max_data_bytes*8; /* Equivalent 20-ms rate for mode/channel/bandwidth decisions */ - equiv_rate = compute_equiv_rate(st->bitrate_bps, st->channels, st->Fs/frame_size, - st->use_vbr, 0, st->silk_mode.complexity, st->silk_mode.packetLossPercentage); + equiv_rate = st->bitrate_bps - (40*st->channels+20)*(st->Fs/frame_size - 50); if (st->signal_type == OPUS_SIGNAL_VOICE) voice_est = 127; @@ -1332,17 +1132,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ } #endif } - /* Update equivalent rate for channels decision. */ - equiv_rate = compute_equiv_rate(st->bitrate_bps, st->stream_channels, st->Fs/frame_size, - st->use_vbr, 0, st->silk_mode.complexity, st->silk_mode.packetLossPercentage); - - /* Allow SILK DTX if DTX is enabled but the generalized DTX cannot be used, - e.g. because of the complexity setting or sample rate. */ -#ifndef DISABLE_FLOAT_API - st->silk_mode.useDTX = st->use_dtx && !(analysis_info.valid || is_silence); -#else - st->silk_mode.useDTX = st->use_dtx; -#endif + equiv_rate = st->bitrate_bps - (40*st->stream_channels+20)*(st->Fs/frame_size - 50); /* Mode selection depending on application and signal type */ if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) @@ -1391,15 +1181,10 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ /* When FEC is enabled and there's enough packet loss, use SILK */ if (st->silk_mode.useInBandFEC && st->silk_mode.packetLossPercentage > (128-voice_est)>>4) st->mode = MODE_SILK_ONLY; - /* When encoding voice and DTX is enabled but the generalized DTX cannot be used, - use SILK in order to make use of its DTX. */ + /* When encoding voice and DTX is enabled, set the encoder to SILK mode (at least for now) */ if (st->silk_mode.useDTX && voice_est > 100) st->mode = MODE_SILK_ONLY; #endif - - /* If max_data_bytes represents less than 6 kb/s, switch to CELT-only mode */ - if (max_data_bytes < (frame_rate > 50 ? 9000 : 6000)*frame_size / (st->Fs * 8)) - st->mode = MODE_CELT_ONLY; } else { st->mode = st->user_forced_mode; } @@ -1409,6 +1194,19 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ st->mode = MODE_CELT_ONLY; if (st->lfe) st->mode = MODE_CELT_ONLY; + /* If max_data_bytes represents less than 8 kb/s, switch to CELT-only mode */ + if (max_data_bytes < (frame_rate > 50 ? 12000 : 8000)*frame_size / (st->Fs * 8)) + st->mode = MODE_CELT_ONLY; + + if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0 + && st->mode != MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY) + { + /* Delay stereo->mono transition by two frames so that SILK can do a smooth downmix */ + st->silk_mode.toMono = 1; + st->stream_channels = 2; + } else { + st->silk_mode.toMono = 0; + } if (st->prev_mode > 0 && ((st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) || @@ -1428,22 +1226,23 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ } } } - - /* When encoding multiframes, we can ask for a switch to CELT only in the last frame. This switch - * is processed above as the requested mode shouldn't interrupt stereo->mono transition. */ - if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0 - && st->mode != MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY) + /* For the first frame at a new SILK bandwidth */ + if (st->silk_bw_switch) { - /* Delay stereo->mono transition by two frames so that SILK can do a smooth downmix */ - st->silk_mode.toMono = 1; - st->stream_channels = 2; - } else { - st->silk_mode.toMono = 0; + redundancy = 1; + celt_to_silk = 1; + st->silk_bw_switch = 0; + prefill=1; } - /* Update equivalent rate with mode decision. */ - equiv_rate = compute_equiv_rate(st->bitrate_bps, st->stream_channels, st->Fs/frame_size, - st->use_vbr, st->mode, st->silk_mode.complexity, st->silk_mode.packetLossPercentage); + if (redundancy) + { + /* Fair share of the max size allowed */ + redundancy_bytes = IMIN(257, max_data_bytes*(opus_int32)(st->Fs/200)/(frame_size+st->Fs/200)); + /* For VBR, target the actual bitrate (subject to the limit above) */ + if (st->use_vbr) + redundancy_bytes = IMIN(redundancy_bytes, st->bitrate_bps/1600); + } if (st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) { @@ -1458,7 +1257,17 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ const opus_int32 *voice_bandwidth_thresholds, *music_bandwidth_thresholds; opus_int32 bandwidth_thresholds[8]; int bandwidth = OPUS_BANDWIDTH_FULLBAND; + opus_int32 equiv_rate2; + equiv_rate2 = equiv_rate; + if (st->mode != MODE_CELT_ONLY) + { + /* Adjust the threshold +/- 10% depending on complexity */ + equiv_rate2 = equiv_rate2 * (45+st->silk_mode.complexity)/50; + /* CBR is less efficient by ~1 kb/s */ + if (!st->use_vbr) + equiv_rate2 -= 1000; + } if (st->channels==2 && st->force_channels!=1) { voice_bandwidth_thresholds = stereo_voice_bandwidth_thresholds; @@ -1479,19 +1288,15 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ hysteresis = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)+1]; if (!st->first) { - if (st->auto_bandwidth >= bandwidth) + if (st->bandwidth >= bandwidth) threshold -= hysteresis; else threshold += hysteresis; } - if (equiv_rate >= threshold) + if (equiv_rate2 >= threshold) break; } while (--bandwidth>OPUS_BANDWIDTH_NARROWBAND); - /* We don't use mediumband anymore, except when explicitly requested or during - mode transitions. */ - if (bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) - bandwidth = OPUS_BANDWIDTH_WIDEBAND; - st->bandwidth = st->auto_bandwidth = bandwidth; + st->bandwidth = bandwidth; /* Prevents any transition to SWB/FB until the SILK layer has fully switched to WB mode and turned the variable LP filter off */ if (!st->first && st->mode != MODE_CELT_ONLY && !st->silk_mode.inWBmodeWithoutVariableLP && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND) @@ -1544,8 +1349,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ st->bandwidth = IMIN(st->bandwidth, st->detected_bandwidth); } #endif - st->silk_mode.LBRR_coded = decide_fec(st->silk_mode.useInBandFEC, st->silk_mode.packetLossPercentage, - st->silk_mode.LBRR_coded, st->mode, &st->bandwidth, equiv_rate); celt_encoder_ctl(celt_enc, OPUS_SET_LSB_DEPTH(lsb_depth)); /* CELT mode doesn't support mediumband, use wideband instead */ @@ -1554,34 +1357,15 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (st->lfe) st->bandwidth = OPUS_BANDWIDTH_NARROWBAND; - curr_bandwidth = st->bandwidth; - - /* Chooses the appropriate mode for speech - *NEVER* switch to/from CELT-only mode here as this will invalidate some assumptions */ - if (st->mode == MODE_SILK_ONLY && curr_bandwidth > OPUS_BANDWIDTH_WIDEBAND) - st->mode = MODE_HYBRID; - if (st->mode == MODE_HYBRID && curr_bandwidth <= OPUS_BANDWIDTH_WIDEBAND) - st->mode = MODE_SILK_ONLY; - - /* Can't support higher than >60 ms frames, and >20 ms when in Hybrid or CELT-only modes */ - if ((frame_size > st->Fs/50 && (st->mode != MODE_SILK_ONLY)) || frame_size > 3*st->Fs/50) + /* Can't support higher than wideband for >20 ms frames */ + if (frame_size > st->Fs/50 && (st->mode == MODE_CELT_ONLY || st->bandwidth > OPUS_BANDWIDTH_WIDEBAND)) { - int enc_frame_size; + VARDECL(unsigned char, tmp_data); int nb_frames; - - if (st->mode == MODE_SILK_ONLY) - { - if (frame_size == 2*st->Fs/25) /* 80 ms -> 2x 40 ms */ - enc_frame_size = st->Fs/25; - else if (frame_size == 3*st->Fs/25) /* 120 ms -> 2x 60 ms */ - enc_frame_size = 3*st->Fs/50; - else /* 100 ms -> 5x 20 ms */ - enc_frame_size = st->Fs/50; - } - else - enc_frame_size = st->Fs/50; - - nb_frames = frame_size/enc_frame_size; + int bak_mode, bak_bandwidth, bak_channels, bak_to_mono; + VARDECL(OpusRepacketizer, rp); + opus_int32 bytes_per_frame; + opus_int32 repacketize_len; #ifndef DISABLE_FLOAT_API if (analysis_read_pos_bak!= -1) @@ -1591,34 +1375,74 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ } #endif - ret = encode_multiframe_packet(st, pcm, nb_frames, enc_frame_size, data, - out_data_bytes, to_celt, lsb_depth, float_api); + nb_frames = frame_size > st->Fs/25 ? 3 : 2; + bytes_per_frame = IMIN(1276,(out_data_bytes-3)/nb_frames); - RESTORE_STACK; - return ret; - } + ALLOC(tmp_data, nb_frames*bytes_per_frame, unsigned char); - /* For the first frame at a new SILK bandwidth */ - if (st->silk_bw_switch) - { - redundancy = 1; - celt_to_silk = 1; - st->silk_bw_switch = 0; - /* Do a prefill without reseting the sampling rate control. */ - prefill=2; - } + ALLOC(rp, 1, OpusRepacketizer); + opus_repacketizer_init(rp); - /* If we decided to go with CELT, make sure redundancy is off, no matter what - we decided earlier. */ - if (st->mode == MODE_CELT_ONLY) - redundancy = 0; + bak_mode = st->user_forced_mode; + bak_bandwidth = st->user_bandwidth; + bak_channels = st->force_channels; - if (redundancy) - { - redundancy_bytes = compute_redundancy_bytes(max_data_bytes, st->bitrate_bps, frame_rate, st->stream_channels); - if (redundancy_bytes == 0) - redundancy = 0; + st->user_forced_mode = st->mode; + st->user_bandwidth = st->bandwidth; + st->force_channels = st->stream_channels; + bak_to_mono = st->silk_mode.toMono; + + if (bak_to_mono) + st->force_channels = 1; + else + st->prev_channels = st->stream_channels; + for (i=0;i<nb_frames;i++) + { + int tmp_len; + st->silk_mode.toMono = 0; + /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */ + if (to_celt && i==nb_frames-1) + st->user_forced_mode = MODE_CELT_ONLY; + tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, + tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth, + NULL, 0, c1, c2, analysis_channels, downmix, float_api); + if (tmp_len<0) + { + RESTORE_STACK; + return OPUS_INTERNAL_ERROR; + } + ret = opus_repacketizer_cat(rp, tmp_data+i*bytes_per_frame, tmp_len); + if (ret<0) + { + RESTORE_STACK; + return OPUS_INTERNAL_ERROR; + } + } + if (st->use_vbr) + repacketize_len = out_data_bytes; + else + repacketize_len = IMIN(3*st->bitrate_bps/(3*8*50/nb_frames), out_data_bytes); + ret = opus_repacketizer_out_range_impl(rp, 0, nb_frames, data, repacketize_len, 0, !st->use_vbr); + if (ret<0) + { + RESTORE_STACK; + return OPUS_INTERNAL_ERROR; + } + st->user_forced_mode = bak_mode; + st->user_bandwidth = bak_bandwidth; + st->force_channels = bak_channels; + st->silk_mode.toMono = bak_to_mono; + RESTORE_STACK; + return ret; } + curr_bandwidth = st->bandwidth; + + /* Chooses the appropriate mode for speech + *NEVER* switch to/from CELT-only mode here as this will invalidate some assumptions */ + if (st->mode == MODE_SILK_ONLY && curr_bandwidth > OPUS_BANDWIDTH_WIDEBAND) + st->mode = MODE_HYBRID; + if (st->mode == MODE_HYBRID && curr_bandwidth <= OPUS_BANDWIDTH_WIDEBAND) + st->mode = MODE_SILK_ONLY; /* printf("%d %d %d %d\n", st->bitrate_bps, st->stream_channels, st->mode, curr_bandwidth); */ bytes_target = IMIN(max_data_bytes-redundancy_bytes, st->bitrate_bps * frame_size / (st->Fs * 8)) - 1; @@ -1643,7 +1467,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (st->application == OPUS_APPLICATION_VOIP) { - hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs, st->arch); + hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); } else { dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); } @@ -1668,7 +1492,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (st->mode != MODE_CELT_ONLY) { opus_int32 total_bitRate, celt_rate; - opus_int activity; #ifdef FIXED_POINT const opus_int16 *pcm_silk; #else @@ -1676,26 +1499,30 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ ALLOC(pcm_silk, st->channels*frame_size, opus_int16); #endif - activity = VAD_NO_DECISION; -#ifndef DISABLE_FLOAT_API - if( analysis_info.valid ) { - /* Inform SILK about the Opus VAD decision */ - activity = ( analysis_info.activity_probability >= DTX_ACTIVITY_THRESHOLD ); - } -#endif - /* Distribute bits between SILK and CELT */ total_bitRate = 8 * bytes_target * frame_rate; if( st->mode == MODE_HYBRID ) { + int HB_gain_ref; /* Base rate for SILK */ - st->silk_mode.bitRate = compute_silk_rate_for_hybrid(total_bitRate, - curr_bandwidth, st->Fs == 50 * frame_size, st->use_vbr, st->silk_mode.LBRR_coded, - st->stream_channels); + st->silk_mode.bitRate = st->stream_channels * ( 5000 + 1000 * ( st->Fs == 100 * frame_size ) ); + if( curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND ) { + /* SILK gets 2/3 of the remaining bits */ + st->silk_mode.bitRate += ( total_bitRate - st->silk_mode.bitRate ) * 2 / 3; + } else { /* FULLBAND */ + /* SILK gets 3/5 of the remaining bits */ + st->silk_mode.bitRate += ( total_bitRate - st->silk_mode.bitRate ) * 3 / 5; + } + /* Don't let SILK use more than 80% */ + if( st->silk_mode.bitRate > total_bitRate * 4/5 ) { + st->silk_mode.bitRate = total_bitRate * 4/5; + } if (!st->energy_masking) { /* Increasingly attenuate high band when it gets allocated fewer bits */ celt_rate = total_bitRate - st->silk_mode.bitRate; - HB_gain = Q15ONE - SHR32(celt_exp2(-celt_rate * QCONST16(1.f/1024, 10)), 1); + HB_gain_ref = (curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND) ? 3000 : 3600; + HB_gain = SHL32((opus_val32)celt_rate, 9) / SHR32((opus_val32)celt_rate + st->stream_channels * HB_gain_ref, 6); + HB_gain = HB_gain < (opus_val32)Q15ONE*6/7 ? HB_gain + Q15ONE/7 : Q15ONE; } } else { /* SILK gets all bits */ @@ -1742,6 +1569,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ st->silk_mode.bitRate += 3*rate_offset/5; else st->silk_mode.bitRate += rate_offset; + bytes_target += rate_offset * frame_size / (8 * st->Fs); } st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs; @@ -1752,7 +1580,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ } else if (curr_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) { st->silk_mode.desiredInternalSampleRate = 12000; } else { - celt_assert( st->mode == MODE_HYBRID || curr_bandwidth == OPUS_BANDWIDTH_WIDEBAND ); + silk_assert( st->mode == MODE_HYBRID || curr_bandwidth == OPUS_BANDWIDTH_WIDEBAND ); st->silk_mode.desiredInternalSampleRate = 16000; } if( st->mode == MODE_HYBRID ) { @@ -1762,53 +1590,40 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ st->silk_mode.minInternalSampleRate = 8000; } - st->silk_mode.maxInternalSampleRate = 16000; if (st->mode == MODE_SILK_ONLY) { opus_int32 effective_max_rate = max_rate; + st->silk_mode.maxInternalSampleRate = 16000; if (frame_rate > 50) effective_max_rate = effective_max_rate*2/3; - if (effective_max_rate < 8000) + if (effective_max_rate < 13000) { st->silk_mode.maxInternalSampleRate = 12000; st->silk_mode.desiredInternalSampleRate = IMIN(12000, st->silk_mode.desiredInternalSampleRate); } - if (effective_max_rate < 7000) + if (effective_max_rate < 9600) { st->silk_mode.maxInternalSampleRate = 8000; st->silk_mode.desiredInternalSampleRate = IMIN(8000, st->silk_mode.desiredInternalSampleRate); } + } else { + st->silk_mode.maxInternalSampleRate = 16000; } st->silk_mode.useCBR = !st->use_vbr; /* Call SILK encoder for the low band */ + nBytes = IMIN(1275, max_data_bytes-1-redundancy_bytes); - /* Max bits for SILK, counting ToC, redundancy bytes, and optionally redundancy. */ - st->silk_mode.maxBits = (max_data_bytes-1)*8; - if (redundancy && redundancy_bytes >= 2) - { - /* Counting 1 bit for redundancy position and 20 bits for flag+size (only for hybrid). */ - st->silk_mode.maxBits -= redundancy_bytes*8 + 1; - if (st->mode == MODE_HYBRID) - st->silk_mode.maxBits -= 20; - } + st->silk_mode.maxBits = nBytes*8; + /* Only allow up to 90% of the bits for hybrid mode*/ + if (st->mode == MODE_HYBRID) + st->silk_mode.maxBits = (opus_int32)st->silk_mode.maxBits*9/10; if (st->silk_mode.useCBR) { - if (st->mode == MODE_HYBRID) - { - st->silk_mode.maxBits = IMIN(st->silk_mode.maxBits, st->silk_mode.bitRate * frame_size / st->Fs); - } - } else { - /* Constrained VBR. */ - if (st->mode == MODE_HYBRID) - { - /* Compute SILK bitrate corresponding to the max total bits available */ - opus_int32 maxBitRate = compute_silk_rate_for_hybrid(st->silk_mode.maxBits*st->Fs / frame_size, - curr_bandwidth, st->Fs == 50 * frame_size, st->use_vbr, st->silk_mode.LBRR_coded, - st->stream_channels); - st->silk_mode.maxBits = maxBitRate * frame_size / st->Fs; - } + st->silk_mode.maxBits = (st->silk_mode.bitRate * frame_size / (st->Fs * 8))*8; + /* Reduce the initial target to make it easier to reach the CBR rate */ + st->silk_mode.bitRate = IMAX(1, st->silk_mode.bitRate-2000); } if (prefill) @@ -1831,9 +1646,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ for (i=0;i<st->encoder_buffer*st->channels;i++) pcm_silk[i] = FLOAT2INT16(st->delay_buffer[i]); #endif - silk_Encode( silk_enc, &st->silk_mode, pcm_silk, st->encoder_buffer, NULL, &zero, prefill, activity ); - /* Prevent a second switch in the real encode call. */ - st->silk_mode.opusCanSwitch = 0; + silk_Encode( silk_enc, &st->silk_mode, pcm_silk, st->encoder_buffer, NULL, &zero, 1 ); } #ifdef FIXED_POINT @@ -1842,14 +1655,20 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ for (i=0;i<frame_size*st->channels;i++) pcm_silk[i] = FLOAT2INT16(pcm_buf[total_buffer*st->channels + i]); #endif - ret = silk_Encode( silk_enc, &st->silk_mode, pcm_silk, frame_size, &enc, &nBytes, 0, activity ); + ret = silk_Encode( silk_enc, &st->silk_mode, pcm_silk, frame_size, &enc, &nBytes, 0 ); if( ret ) { /*fprintf (stderr, "SILK encode error: %d\n", ret);*/ /* Handle error */ RESTORE_STACK; return OPUS_INTERNAL_ERROR; } - + if (nBytes==0) + { + st->rangeFinal = 0; + data[-1] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels); + RESTORE_STACK; + return 1; + } /* Extract SILK internal bandwidth for signaling in first byte */ if( st->mode == MODE_SILK_ONLY ) { if( st->silk_mode.internalSampleRate == 8000 ) { @@ -1860,24 +1679,14 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ curr_bandwidth = OPUS_BANDWIDTH_WIDEBAND; } } else { - celt_assert( st->silk_mode.internalSampleRate == 16000 ); - } - - st->silk_mode.opusCanSwitch = st->silk_mode.switchReady && !st->nonfinal_frame; - - if (nBytes==0) - { - st->rangeFinal = 0; - data[-1] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels); - RESTORE_STACK; - return 1; + silk_assert( st->silk_mode.internalSampleRate == 16000 ); } + st->silk_mode.opusCanSwitch = st->silk_mode.switchReady; /* FIXME: How do we allocate the redundancy for CBR? */ if (st->silk_mode.opusCanSwitch) { - redundancy_bytes = compute_redundancy_bytes(max_data_bytes, st->bitrate_bps, frame_rate, st->stream_channels); - redundancy = (redundancy_bytes != 0); + redundancy = 1; celt_to_silk = 0; st->silk_bw_switch = 1; } @@ -1918,18 +1727,40 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (st->mode == MODE_HYBRID) { + int len; + + len = (ec_tell(&enc)+7)>>3; + if (redundancy) + len += st->mode == MODE_HYBRID ? 3 : 1; if( st->use_vbr ) { - celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps-st->silk_mode.bitRate)); - celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(0)); + nb_compr_bytes = len + bytes_target - (st->silk_mode.bitRate * frame_size) / (8 * st->Fs); + } else { + /* check if SILK used up too much */ + nb_compr_bytes = len > bytes_target ? len : bytes_target; } } else { if (st->use_vbr) { + opus_int32 bonus=0; +#ifndef DISABLE_FLOAT_API + if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50) + { + bonus = (60*st->stream_channels+40)*(st->Fs/frame_size-50); + if (analysis_info.valid) + bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info.tonality)); + } +#endif celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1)); celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint)); - celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps)); + celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps+bonus)); + nb_compr_bytes = max_data_bytes-1-redundancy_bytes; + } else { + nb_compr_bytes = bytes_target; } } + + } else { + nb_compr_bytes = 0; } ALLOC(tmp_prefill, st->channels*st->Fs/400, opus_val16); @@ -1955,14 +1786,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ } st->prev_HB_gain = HB_gain; if (st->mode != MODE_HYBRID || st->stream_channels==1) - { - if (equiv_rate > 32000) - st->silk_mode.stereoWidth_Q14 = 16384; - else if (equiv_rate < 16000) - st->silk_mode.stereoWidth_Q14 = 0; - else - st->silk_mode.stereoWidth_Q14 = 16384 - 2048*(opus_int32)(32000-equiv_rate)/(equiv_rate-14000); - } + st->silk_mode.stereoWidth_Q14 = IMIN((1<<14),2*IMAX(0,equiv_rate-30000)); if( !st->energy_masking && st->channels == 2 ) { /* Apply stereo width reduction (at low bitrates) */ if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) { @@ -1985,23 +1809,19 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if ( st->mode != MODE_CELT_ONLY && ec_tell(&enc)+17+20*(st->mode == MODE_HYBRID) <= 8*(max_data_bytes-1)) { /* For SILK mode, the redundancy is inferred from the length */ - if (st->mode == MODE_HYBRID) + if (st->mode == MODE_HYBRID && (redundancy || ec_tell(&enc)+37 <= 8*nb_compr_bytes)) ec_enc_bit_logp(&enc, redundancy, 12); if (redundancy) { int max_redundancy; ec_enc_bit_logp(&enc, celt_to_silk, 1); if (st->mode == MODE_HYBRID) - { - /* Reserve the 8 bits needed for the redundancy length, - and at least a few bits for CELT if possible */ - max_redundancy = (max_data_bytes-1)-((ec_tell(&enc)+8+3+7)>>3); - } + max_redundancy = (max_data_bytes-1)-nb_compr_bytes; else max_redundancy = (max_data_bytes-1)-((ec_tell(&enc)+7)>>3); /* Target the same bit-rate for redundancy as for the rest, up to a max of 257 bytes */ - redundancy_bytes = IMIN(max_redundancy, redundancy_bytes); + redundancy_bytes = IMIN(max_redundancy, st->bitrate_bps/1600); redundancy_bytes = IMIN(257, IMAX(2, redundancy_bytes)); if (st->mode == MODE_HYBRID) ec_enc_uint(&enc, redundancy_bytes-2, 256); @@ -2023,7 +1843,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ ec_enc_done(&enc); nb_compr_bytes = ret; } else { - nb_compr_bytes = (max_data_bytes-1)-redundancy_bytes; + nb_compr_bytes = IMIN((max_data_bytes-1)-redundancy_bytes, nb_compr_bytes); ec_enc_shrink(&enc, nb_compr_bytes); } @@ -2031,12 +1851,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (redundancy || st->mode != MODE_SILK_ONLY) celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info)); #endif - if (st->mode == MODE_HYBRID) { - SILKInfo info; - info.signalType = st->silk_mode.signalType; - info.offset = st->silk_mode.offset; - celt_encoder_ctl(celt_enc, CELT_SET_SILK_INFO(&info)); - } /* 5 ms redundant frame for CELT->SILK */ if (redundancy && celt_to_silk) @@ -2044,7 +1858,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ int err; celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0)); celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0)); - celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(OPUS_BITRATE_MAX)); err = celt_encode_with_ec(celt_enc, pcm_buf, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL); if (err < 0) { @@ -2068,25 +1881,15 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ celt_encode_with_ec(celt_enc, tmp_prefill, st->Fs/400, dummy, 2, NULL); celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0)); } - /* If false, we already busted the budget and we'll end up with a "PLC frame" */ + /* If false, we already busted the budget and we'll end up with a "PLC packet" */ if (ec_tell(&enc) <= 8*nb_compr_bytes) { - /* Set the bitrate again if it was overridden in the redundancy code above*/ - if (redundancy && celt_to_silk && st->mode==MODE_HYBRID && st->use_vbr) - celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps-st->silk_mode.bitRate)); - celt_encoder_ctl(celt_enc, OPUS_SET_VBR(st->use_vbr)); ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc); if (ret < 0) { RESTORE_STACK; return OPUS_INTERNAL_ERROR; } - /* Put CELT->SILK redundancy data in the right place. */ - if (redundancy && celt_to_silk && st->mode==MODE_HYBRID && st->use_vbr) - { - OPUS_MOVE(data+ret, data+nb_compr_bytes, redundancy_bytes); - nb_compr_bytes = nb_compr_bytes+redundancy_bytes; - } } } @@ -2102,15 +1905,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0)); celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0)); - celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0)); - celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(OPUS_BITRATE_MAX)); - if (st->mode == MODE_HYBRID) - { - /* Shrink packet to what the encoder actually used. */ - nb_compr_bytes = ret; - ec_enc_shrink(&enc, nb_compr_bytes); - } /* NOTE: We could speed this up slightly (at the expense of code size) by just adding a function that prefills the buffer */ celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2-N4), N4, dummy, 2, NULL); @@ -2140,23 +1935,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ st->first = 0; - /* DTX decision */ -#ifndef DISABLE_FLOAT_API - if (st->use_dtx && (analysis_info.valid || is_silence)) - { - if (decide_dtx_mode(analysis_info.activity_probability, &st->nb_no_activity_frames, - st->peak_signal_energy, pcm, frame_size, st->channels, is_silence, st->arch)) - { - st->rangeFinal = 0; - data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels); - RESTORE_STACK; - return 1; - } - } else { - st->nb_no_activity_frames = 0; - } -#endif - /* In the unlikely case that the SILK encoder busted its target, tell the decoder to call the PLC */ if (ec_tell(&enc) > (max_data_bytes-1)*8) @@ -2184,6 +1962,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (!st->use_vbr) { if (opus_packet_pad(data, ret, max_data_bytes) != OPUS_OK) + { RESTORE_STACK; return OPUS_INTERNAL_ERROR; @@ -2202,15 +1981,18 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_fra { int i, ret; int frame_size; + int delay_compensation; VARDECL(opus_int16, in); ALLOC_STACK; - frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs); - if (frame_size <= 0) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } + if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) + delay_compensation = 0; + else + delay_compensation = st->delay_compensation; + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, st->channels, st->Fs, st->bitrate_bps, + delay_compensation, downmix_float, st->analysis.subframe_mem); + ALLOC(in, frame_size*st->channels, opus_int16); for (i=0;i<frame_size*st->channels;i++) @@ -2226,7 +2008,18 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_fram unsigned char *data, opus_int32 out_data_bytes) { int frame_size; - frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs); + int delay_compensation; + if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) + delay_compensation = 0; + else + delay_compensation = st->delay_compensation; + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, st->channels, st->Fs, st->bitrate_bps, + delay_compensation, downmix_int +#ifndef DISABLE_FLOAT_API + , st->analysis.subframe_mem +#endif + ); return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0); } @@ -2237,15 +2030,18 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_fram { int i, ret; int frame_size; + int delay_compensation; VARDECL(float, in); ALLOC_STACK; - frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs); - if (frame_size <= 0) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } + if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) + delay_compensation = 0; + else + delay_compensation = st->delay_compensation; + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, st->channels, st->Fs, st->bitrate_bps, + delay_compensation, downmix_int, st->analysis.subframe_mem); + ALLOC(in, frame_size*st->channels, float); for (i=0;i<frame_size*st->channels;i++) @@ -2259,7 +2055,14 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_fra unsigned char *data, opus_int32 out_data_bytes) { int frame_size; - frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs); + int delay_compensation; + if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) + delay_compensation = 0; + else + delay_compensation = st->delay_compensation; + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, st->channels, st->Fs, st->bitrate_bps, + delay_compensation, downmix_float, st->analysis.subframe_mem); return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1); } @@ -2290,9 +2093,6 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) break; } st->application = value; -#ifndef DISABLE_FLOAT_API - st->analysis.application = value; -#endif } break; case OPUS_GET_APPLICATION_REQUEST: @@ -2411,7 +2211,7 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) { goto bad_arg; } - st->use_dtx = value; + st->silk_mode.useDTX = value; } break; case OPUS_GET_DTX_REQUEST: @@ -2421,7 +2221,7 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) { goto bad_arg; } - *value = st->use_dtx; + *value = st->silk_mode.useDTX; } break; case OPUS_SET_COMPLEXITY_REQUEST: @@ -2622,15 +2422,15 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST: { opus_int32 value = va_arg(ap, opus_int32); - if (value != OPUS_FRAMESIZE_ARG && value != OPUS_FRAMESIZE_2_5_MS && - value != OPUS_FRAMESIZE_5_MS && value != OPUS_FRAMESIZE_10_MS && - value != OPUS_FRAMESIZE_20_MS && value != OPUS_FRAMESIZE_40_MS && - value != OPUS_FRAMESIZE_60_MS && value != OPUS_FRAMESIZE_80_MS && - value != OPUS_FRAMESIZE_100_MS && value != OPUS_FRAMESIZE_120_MS) + if (value != OPUS_FRAMESIZE_ARG && value != OPUS_FRAMESIZE_2_5_MS && + value != OPUS_FRAMESIZE_5_MS && value != OPUS_FRAMESIZE_10_MS && + value != OPUS_FRAMESIZE_20_MS && value != OPUS_FRAMESIZE_40_MS && + value != OPUS_FRAMESIZE_60_MS && value != OPUS_FRAMESIZE_VARIABLE) { goto bad_arg; } st->variable_duration = value; + celt_encoder_ctl(celt_enc, OPUS_SET_EXPERT_FRAME_DURATION(value)); } break; case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST: @@ -2659,26 +2459,6 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) *value = st->silk_mode.reducedDependency; } break; - case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if(value<0 || value>1) - { - goto bad_arg; - } - celt_encoder_ctl(celt_enc, OPUS_SET_PHASE_INVERSION_DISABLED(value)); - } - break; - case OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - celt_encoder_ctl(celt_enc, OPUS_GET_PHASE_INVERSION_DISABLED(value)); - } - break; case OPUS_RESET_STATE: { void *silk_enc; @@ -2727,33 +2507,6 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value)); } break; - case OPUS_GET_IN_DTX_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - if (st->silk_mode.useDTX && (st->prev_mode == MODE_SILK_ONLY || st->prev_mode == MODE_HYBRID)) { - /* DTX determined by Silk. */ - int n; - void *silk_enc = (char*)st+st->silk_enc_offset; - *value = 1; - for (n=0;n<st->silk_mode.nChannelsInternal;n++) { - *value = *value && ((silk_encoder*)silk_enc)->state_Fxx[n].sCmn.noSpeechCounter >= NB_SPEECH_FRAMES_BEFORE_DTX; - } - } -#ifndef DISABLE_FLOAT_API - else if (st->use_dtx) { - /* DTX determined by Opus. */ - *value = st->nb_no_activity_frames >= NB_SPEECH_FRAMES_BEFORE_DTX; - } -#endif - else { - *value = 0; - } - } - break; case CELT_GET_MODE_REQUEST: { diff --git a/thirdparty/opus/opus_multistream_decoder.c b/thirdparty/opus/opus_multistream_decoder.c index 0018517aeb..b95eaa6eac 100644 --- a/thirdparty/opus/opus_multistream_decoder.c +++ b/thirdparty/opus/opus_multistream_decoder.c @@ -37,19 +37,16 @@ #include "float_cast.h" #include "os_support.h" -/* DECODER */ +struct OpusMSDecoder { + ChannelLayout layout; + /* Decoder states go here */ +}; -#if defined(ENABLE_HARDENING) || defined(ENABLE_ASSERTIONS) -static void validate_ms_decoder(OpusMSDecoder *st) -{ - validate_layout(&st->layout); -} -#define VALIDATE_MS_DECODER(st) validate_ms_decoder(st) -#else -#define VALIDATE_MS_DECODER(st) -#endif + +/* DECODER */ + opus_int32 opus_multistream_decoder_get_size(int nb_streams, int nb_coupled_streams) { int coupled_size; @@ -146,6 +143,15 @@ OpusMSDecoder *opus_multistream_decoder_create( return st; } +typedef void (*opus_copy_channel_out_func)( + void *dst, + int dst_stride, + int dst_channel, + const opus_val16 *src, + int src_stride, + int frame_size +); + static int opus_multistream_packet_validate(const unsigned char *data, opus_int32 len, int nb_streams, opus_int32 Fs) { @@ -175,7 +181,7 @@ static int opus_multistream_packet_validate(const unsigned char *data, return samples; } -int opus_multistream_decode_native( +static int opus_multistream_decode_native( OpusMSDecoder *st, const unsigned char *data, opus_int32 len, @@ -183,8 +189,7 @@ int opus_multistream_decode_native( opus_copy_channel_out_func copy_channel_out, int frame_size, int decode_fec, - int soft_clip, - void *user_data + int soft_clip ) { opus_int32 Fs; @@ -196,14 +201,8 @@ int opus_multistream_decode_native( VARDECL(opus_val16, buf); ALLOC_STACK; - VALIDATE_MS_DECODER(st); - if (frame_size <= 0) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } /* Limit frame_size to avoid excessive stack allocations. */ - MUST_SUCCEED(opus_multistream_decoder_ctl(st, OPUS_GET_SAMPLE_RATE(&Fs))); + opus_multistream_decoder_ctl(st, OPUS_GET_SAMPLE_RATE(&Fs)); frame_size = IMIN(frame_size, Fs/25*3); ALLOC(buf, 2*frame_size, opus_val16); ptr = (char*)st + align(sizeof(OpusMSDecoder)); @@ -238,8 +237,7 @@ int opus_multistream_decode_native( for (s=0;s<st->layout.nb_streams;s++) { OpusDecoder *dec; - opus_int32 packet_offset; - int ret; + int packet_offset, ret; dec = (OpusDecoder*)ptr; ptr += (s < st->layout.nb_coupled_streams) ? align(coupled_size) : align(mono_size); @@ -267,7 +265,7 @@ int opus_multistream_decode_native( while ( (chan = get_left_channel(&st->layout, s, prev)) != -1) { (*copy_channel_out)(pcm, st->layout.nb_channels, chan, - buf, 2, frame_size, user_data); + buf, 2, frame_size); prev = chan; } prev = -1; @@ -275,7 +273,7 @@ int opus_multistream_decode_native( while ( (chan = get_right_channel(&st->layout, s, prev)) != -1) { (*copy_channel_out)(pcm, st->layout.nb_channels, chan, - buf+1, 2, frame_size, user_data); + buf+1, 2, frame_size); prev = chan; } } else { @@ -285,7 +283,7 @@ int opus_multistream_decode_native( while ( (chan = get_mono_channel(&st->layout, s, prev)) != -1) { (*copy_channel_out)(pcm, st->layout.nb_channels, chan, - buf, 1, frame_size, user_data); + buf, 1, frame_size); prev = chan; } } @@ -296,7 +294,7 @@ int opus_multistream_decode_native( if (st->layout.mapping[c] == 255) { (*copy_channel_out)(pcm, st->layout.nb_channels, c, - NULL, 0, frame_size, user_data); + NULL, 0, frame_size); } } RESTORE_STACK; @@ -310,13 +308,11 @@ static void opus_copy_channel_out_float( int dst_channel, const opus_val16 *src, int src_stride, - int frame_size, - void *user_data + int frame_size ) { float *float_dst; opus_int32 i; - (void)user_data; float_dst = (float*)dst; if (src != NULL) { @@ -341,13 +337,11 @@ static void opus_copy_channel_out_short( int dst_channel, const opus_val16 *src, int src_stride, - int frame_size, - void *user_data + int frame_size ) { opus_int16 *short_dst; opus_int32 i; - (void)user_data; short_dst = (opus_int16*)dst; if (src != NULL) { @@ -378,7 +372,7 @@ int opus_multistream_decode( ) { return opus_multistream_decode_native(st, data, len, - pcm, opus_copy_channel_out_short, frame_size, decode_fec, 0, NULL); + pcm, opus_copy_channel_out_short, frame_size, decode_fec, 0); } #ifndef DISABLE_FLOAT_API @@ -386,7 +380,7 @@ int opus_multistream_decode_float(OpusMSDecoder *st, const unsigned char *data, opus_int32 len, float *pcm, int frame_size, int decode_fec) { return opus_multistream_decode_native(st, data, len, - pcm, opus_copy_channel_out_float, frame_size, decode_fec, 0, NULL); + pcm, opus_copy_channel_out_float, frame_size, decode_fec, 0); } #endif @@ -396,30 +390,32 @@ int opus_multistream_decode(OpusMSDecoder *st, const unsigned char *data, opus_int32 len, opus_int16 *pcm, int frame_size, int decode_fec) { return opus_multistream_decode_native(st, data, len, - pcm, opus_copy_channel_out_short, frame_size, decode_fec, 1, NULL); + pcm, opus_copy_channel_out_short, frame_size, decode_fec, 1); } int opus_multistream_decode_float( OpusMSDecoder *st, const unsigned char *data, opus_int32 len, - opus_val16 *pcm, + float *pcm, int frame_size, int decode_fec ) { return opus_multistream_decode_native(st, data, len, - pcm, opus_copy_channel_out_float, frame_size, decode_fec, 0, NULL); + pcm, opus_copy_channel_out_float, frame_size, decode_fec, 0); } #endif -int opus_multistream_decoder_ctl_va_list(OpusMSDecoder *st, int request, - va_list ap) +int opus_multistream_decoder_ctl(OpusMSDecoder *st, int request, ...) { + va_list ap; int coupled_size, mono_size; char *ptr; int ret = OPUS_OK; + va_start(ap, request); + coupled_size = opus_decoder_get_size(2); mono_size = opus_decoder_get_size(1); ptr = (char*)st + align(sizeof(OpusMSDecoder)); @@ -429,7 +425,6 @@ int opus_multistream_decoder_ctl_va_list(OpusMSDecoder *st, int request, case OPUS_GET_SAMPLE_RATE_REQUEST: case OPUS_GET_GAIN_REQUEST: case OPUS_GET_LAST_PACKET_DURATION_REQUEST: - case OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST: { OpusDecoder *dec; /* For int32* GET params, just query the first stream */ @@ -487,7 +482,7 @@ int opus_multistream_decoder_ctl_va_list(OpusMSDecoder *st, int request, OpusDecoder **value; stream_id = va_arg(ap, opus_int32); if (stream_id<0 || stream_id >= st->layout.nb_streams) - goto bad_arg; + ret = OPUS_BAD_ARG; value = va_arg(ap, OpusDecoder**); if (!value) { @@ -504,7 +499,6 @@ int opus_multistream_decoder_ctl_va_list(OpusMSDecoder *st, int request, } break; case OPUS_SET_GAIN_REQUEST: - case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST: { int s; /* This works for int32 params */ @@ -528,20 +522,14 @@ int opus_multistream_decoder_ctl_va_list(OpusMSDecoder *st, int request, ret = OPUS_UNIMPLEMENTED; break; } + + va_end(ap); return ret; bad_arg: + va_end(ap); return OPUS_BAD_ARG; } -int opus_multistream_decoder_ctl(OpusMSDecoder *st, int request, ...) -{ - int ret; - va_list ap; - va_start(ap, request); - ret = opus_multistream_decoder_ctl_va_list(st, request, ap); - va_end(ap); - return ret; -} void opus_multistream_decoder_destroy(OpusMSDecoder *st) { diff --git a/thirdparty/opus/opus_multistream_encoder.c b/thirdparty/opus/opus_multistream_encoder.c index 93204a14c1..1698223a16 100644 --- a/thirdparty/opus/opus_multistream_encoder.c +++ b/thirdparty/opus/opus_multistream_encoder.c @@ -61,6 +61,38 @@ static const VorbisLayout vorbis_mappings[8] = { {5, 3, {0, 6, 1, 2, 3, 4, 5, 7}}, /* 8: 7.1 surround */ }; +typedef void (*opus_copy_channel_in_func)( + opus_val16 *dst, + int dst_stride, + const void *src, + int src_stride, + int src_channel, + int frame_size +); + +typedef enum { + MAPPING_TYPE_NONE, + MAPPING_TYPE_SURROUND +#ifdef ENABLE_EXPERIMENTAL_AMBISONICS + , /* Do not include comma at end of enumerator list */ + MAPPING_TYPE_AMBISONICS +#endif +} MappingType; + +struct OpusMSEncoder { + ChannelLayout layout; + int arch; + int lfe_stream; + int application; + int variable_duration; + MappingType mapping_type; + opus_int32 bitrate_bps; + float subframe_mem[3]; + /* Encoder states go here */ + /* then opus_val32 window_mem[channels*120]; */ + /* then opus_val32 preemph_mem[channels]; */ +}; + static opus_val32 *ms_get_preemph_mem(OpusMSEncoder *st) { int s; @@ -101,29 +133,6 @@ static opus_val32 *ms_get_window_mem(OpusMSEncoder *st) return (opus_val32*)(void*)ptr; } -static int validate_ambisonics(int nb_channels, int *nb_streams, int *nb_coupled_streams) -{ - int order_plus_one; - int acn_channels; - int nondiegetic_channels; - - if (nb_channels < 1 || nb_channels > 227) - return 0; - - order_plus_one = isqrt32(nb_channels); - acn_channels = order_plus_one * order_plus_one; - nondiegetic_channels = nb_channels - acn_channels; - - if (nondiegetic_channels != 0 && nondiegetic_channels != 2) - return 0; - - if (nb_streams) - *nb_streams = acn_channels + (nondiegetic_channels != 0); - if (nb_coupled_streams) - *nb_coupled_streams = nondiegetic_channels != 0; - return 1; -} - static int validate_encoder_layout(const ChannelLayout *layout) { int s; @@ -231,7 +240,6 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b int pos[8] = {0}; int upsample; int frame_size; - int freq_size; opus_val16 channel_offset; opus_val32 bandE[21]; opus_val16 maskLogE[3][21]; @@ -242,7 +250,6 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b upsample = resampling_factor(rate); frame_size = len*upsample; - freq_size = IMIN(960, frame_size); /* LM = log2(frame_size / 120) */ for (LM=0;LM<celt_mode->maxLM;LM++) @@ -251,7 +258,7 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b ALLOC(in, frame_size+overlap, opus_val32); ALLOC(x, len, opus_val16); - ALLOC(freq, freq_size, opus_val32); + ALLOC(freq, frame_size, opus_val32); channel_pos(channels, pos); @@ -261,11 +268,8 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b for (c=0;c<channels;c++) { - int frame; - int nb_frames = frame_size/freq_size; - celt_assert(nb_frames*freq_size == frame_size); OPUS_COPY(in, mem+c*overlap, overlap); - (*copy_channel_in)(x, 1, pcm, channels, c, len, NULL); + (*copy_channel_in)(x, 1, pcm, channels, c, len); celt_preemphasis(x, in+overlap, frame_size, 1, upsample, celt_mode->preemph, preemph_mem+c, 0); #ifndef FIXED_POINT { @@ -280,26 +284,18 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b } } #endif - OPUS_CLEAR(bandE, 21); - for (frame=0;frame<nb_frames;frame++) + clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, + overlap, celt_mode->maxLM-LM, 1, arch); + if (upsample != 1) { - opus_val32 tmpE[21]; - clt_mdct_forward(&celt_mode->mdct, in+960*frame, freq, celt_mode->window, - overlap, celt_mode->maxLM-LM, 1, arch); - if (upsample != 1) - { - int bound = freq_size/upsample; - for (i=0;i<bound;i++) - freq[i] *= upsample; - for (;i<freq_size;i++) - freq[i] = 0; - } - - compute_band_energies(celt_mode, freq, tmpE, 21, 1, LM, arch); - /* If we have multiple frames, take the max energy. */ - for (i=0;i<21;i++) - bandE[i] = MAX32(bandE[i], tmpE[i]); + int bound = len; + for (i=0;i<bound;i++) + freq[i] *= upsample; + for (;i<frame_size;i++) + freq[i] = 0; } + + compute_band_energies(celt_mode, freq, bandE, 21, 1, LM); amp2Log2(celt_mode, 21, 21, bandE, bandLogE+21*c, 1); /* Apply spreading function with -6 dB/band going up and -12 dB/band going down. */ for (i=1;i<21;i++) @@ -412,10 +408,12 @@ opus_int32 opus_multistream_surround_encoder_get_size(int channels, int mapping_ { nb_streams=channels; nb_coupled_streams=0; - } else if (mapping_family==2) +#ifdef ENABLE_EXPERIMENTAL_AMBISONICS + } else if (mapping_family==254) { - if (!validate_ambisonics(channels, &nb_streams, &nb_coupled_streams)) - return 0; + nb_streams=channels; + nb_coupled_streams=0; +#endif } else return 0; size = opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams); @@ -450,6 +448,7 @@ static int opus_multistream_encoder_init_impl( st->layout.nb_channels = channels; st->layout.nb_streams = streams; st->layout.nb_coupled_streams = coupled_streams; + st->subframe_mem[0]=st->subframe_mem[1]=st->subframe_mem[2]=0; if (mapping_type != MAPPING_TYPE_SURROUND) st->lfe_stream = -1; st->bitrate_bps = OPUS_AUTO; @@ -457,13 +456,7 @@ static int opus_multistream_encoder_init_impl( st->variable_duration = OPUS_FRAMESIZE_ARG; for (i=0;i<st->layout.nb_channels;i++) st->layout.mapping[i] = mapping[i]; - if (!validate_layout(&st->layout)) - return OPUS_BAD_ARG; - if (mapping_type == MAPPING_TYPE_SURROUND && - !validate_encoder_layout(&st->layout)) - return OPUS_BAD_ARG; - if (mapping_type == MAPPING_TYPE_AMBISONICS && - !validate_ambisonics(st->layout.nb_channels, NULL, NULL)) + if (!validate_layout(&st->layout) || !validate_encoder_layout(&st->layout)) return OPUS_BAD_ARG; ptr = (char*)st + align(sizeof(OpusMSEncoder)); coupled_size = opus_encoder_get_size(2); @@ -556,23 +549,25 @@ int opus_multistream_surround_encoder_init( *coupled_streams=0; for(i=0;i<channels;i++) mapping[i] = i; - } else if (mapping_family==2) +#ifdef ENABLE_EXPERIMENTAL_AMBISONICS + } else if (mapping_family==254) { int i; - if (!validate_ambisonics(channels, streams, coupled_streams)) - return OPUS_BAD_ARG; - for(i = 0; i < (*streams - *coupled_streams); i++) - mapping[i] = i + (*coupled_streams * 2); - for(i = 0; i < *coupled_streams * 2; i++) - mapping[i + (*streams - *coupled_streams)] = i; + *streams=channels; + *coupled_streams=0; + for(i=0;i<channels;i++) + mapping[i] = i; +#endif } else return OPUS_UNIMPLEMENTED; if (channels>2 && mapping_family==1) { mapping_type = MAPPING_TYPE_SURROUND; - } else if (mapping_family==2) +#ifdef ENABLE_EXPERIMENTAL_AMBISONICS + } else if (mapping_family==254) { mapping_type = MAPPING_TYPE_AMBISONICS; +#endif } else { mapping_type = MAPPING_TYPE_NONE; @@ -677,62 +672,62 @@ static void surround_rate_allocation( int lfe_offset; int coupled_ratio; /* Q8 */ int lfe_ratio; /* Q8 */ - int nb_lfe; - int nb_uncoupled; - int nb_coupled; - int nb_normal; - opus_int32 channel_offset; - opus_int32 bitrate; - int total; - - nb_lfe = (st->lfe_stream!=-1); - nb_coupled = st->layout.nb_coupled_streams; - nb_uncoupled = st->layout.nb_streams-nb_coupled-nb_lfe; - nb_normal = 2*nb_coupled + nb_uncoupled; - - /* Give each non-LFE channel enough bits per channel for coding band energy. */ - channel_offset = 40*IMAX(50, Fs/frame_size); + if (st->bitrate_bps > st->layout.nb_channels*40000) + stream_offset = 20000; + else + stream_offset = st->bitrate_bps/st->layout.nb_channels/2; + stream_offset += 60*(Fs/frame_size-50); + /* We start by giving each stream (coupled or uncoupled) the same bitrate. + This models the main saving of coupled channels over uncoupled. */ + /* The LFE stream is an exception to the above and gets fewer bits. */ + lfe_offset = 3500 + 60*(Fs/frame_size-50); + /* Coupled streams get twice the mono rate after the first 20 kb/s. */ + coupled_ratio = 512; + /* Should depend on the bitrate, for now we assume LFE gets 1/8 the bits of mono */ + lfe_ratio = 32; + + /* Compute bitrate allocation between streams */ if (st->bitrate_bps==OPUS_AUTO) { - bitrate = nb_normal*(channel_offset + Fs + 10000) + 8000*nb_lfe; + channel_rate = Fs+60*Fs/frame_size; } else if (st->bitrate_bps==OPUS_BITRATE_MAX) { - bitrate = nb_normal*300000 + nb_lfe*128000; + channel_rate = 300000; } else { - bitrate = st->bitrate_bps; + int nb_lfe; + int nb_uncoupled; + int nb_coupled; + int total; + nb_lfe = (st->lfe_stream!=-1); + nb_coupled = st->layout.nb_coupled_streams; + nb_uncoupled = st->layout.nb_streams-nb_coupled-nb_lfe; + total = (nb_uncoupled<<8) /* mono */ + + coupled_ratio*nb_coupled /* stereo */ + + nb_lfe*lfe_ratio; + channel_rate = 256*(st->bitrate_bps-lfe_offset*nb_lfe-stream_offset*(nb_coupled+nb_uncoupled))/total; } - - /* Give LFE some basic stream_channel allocation but never exceed 1/20 of the - total rate for the non-energy part to avoid problems at really low rate. */ - lfe_offset = IMIN(bitrate/20, 3000) + 15*IMAX(50, Fs/frame_size); - - /* We give each stream (coupled or uncoupled) a starting bitrate. - This models the main saving of coupled channels over uncoupled. */ - stream_offset = (bitrate - channel_offset*nb_normal - lfe_offset*nb_lfe)/nb_normal/2; - stream_offset = IMAX(0, IMIN(20000, stream_offset)); - - /* Coupled streams get twice the mono rate after the offset is allocated. */ - coupled_ratio = 512; - /* Should depend on the bitrate, for now we assume LFE gets 1/8 the bits of mono */ - lfe_ratio = 32; - - total = (nb_uncoupled<<8) /* mono */ - + coupled_ratio*nb_coupled /* stereo */ - + nb_lfe*lfe_ratio; - channel_rate = 256*(opus_int64)(bitrate - lfe_offset*nb_lfe - stream_offset*(nb_coupled+nb_uncoupled) - channel_offset*nb_normal)/total; +#ifndef FIXED_POINT + if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50) + { + opus_int32 bonus; + bonus = 60*(Fs/frame_size-50); + channel_rate += bonus; + } +#endif for (i=0;i<st->layout.nb_streams;i++) { if (i<st->layout.nb_coupled_streams) - rate[i] = 2*channel_offset + IMAX(0, stream_offset+(channel_rate*coupled_ratio>>8)); + rate[i] = stream_offset+(channel_rate*coupled_ratio>>8); else if (i!=st->lfe_stream) - rate[i] = channel_offset + IMAX(0, stream_offset + channel_rate); + rate[i] = stream_offset+channel_rate; else - rate[i] = IMAX(0, lfe_offset+(channel_rate*lfe_ratio>>8)); + rate[i] = lfe_offset+(channel_rate*lfe_ratio>>8); } } +#ifdef ENABLE_EXPERIMENTAL_AMBISONICS static void ambisonics_rate_allocation( OpusMSEncoder *st, opus_int32 *rate, @@ -741,31 +736,50 @@ static void ambisonics_rate_allocation( ) { int i; - opus_int32 total_rate; - opus_int32 per_stream_rate; + int non_mono_rate; + int total_rate; - const int nb_channels = st->layout.nb_streams + st->layout.nb_coupled_streams; + /* The mono channel gets (rate_ratio_num / rate_ratio_den) times as many bits + * as all other channels */ + const int rate_ratio_num = 4; + const int rate_ratio_den = 3; + const int num_channels = st->layout.nb_streams; if (st->bitrate_bps==OPUS_AUTO) { - total_rate = (st->layout.nb_coupled_streams + st->layout.nb_streams) * - (Fs+60*Fs/frame_size) + st->layout.nb_streams * (opus_int32)15000; + total_rate = num_channels * (20000 + st->layout.nb_streams*(Fs+60*Fs/frame_size)); } else if (st->bitrate_bps==OPUS_BITRATE_MAX) { - total_rate = nb_channels * 320000; - } else - { + total_rate = num_channels * 320000; + } else { total_rate = st->bitrate_bps; } - /* Allocate equal number of bits to Ambisonic (uncoupled) and non-diegetic - * (coupled) streams */ - per_stream_rate = total_rate / st->layout.nb_streams; - for (i = 0; i < st->layout.nb_streams; i++) + /* Let y be the non-mono rate and let p, q be integers such that the mono + * channel rate is (p/q) * y. + * Also let T be the total bitrate to allocate. Then + * (n - 1) y + (p/q) y = T + * y = (T q) / (qn - q + p) + */ + non_mono_rate = + total_rate * rate_ratio_den + / (rate_ratio_den*num_channels + rate_ratio_num - rate_ratio_den); + +#ifndef FIXED_POINT + if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50) { - rate[i] = per_stream_rate; + opus_int32 bonus = 60*(Fs/frame_size-50); + non_mono_rate += bonus; + } +#endif + + rate[0] = total_rate - (num_channels - 1) * non_mono_rate; + for (i=1;i<st->layout.nb_streams;i++) + { + rate[i] = non_mono_rate; } } +#endif /* ENABLE_EXPERIMENTAL_AMBISONICS */ static opus_int32 rate_allocation( OpusMSEncoder *st, @@ -781,9 +795,11 @@ static opus_int32 rate_allocation( ptr = (char*)st + align(sizeof(OpusMSEncoder)); opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs)); +#ifdef ENABLE_EXPERIMENTAL_AMBISONICS if (st->mapping_type == MAPPING_TYPE_AMBISONICS) { ambisonics_rate_allocation(st, rate, frame_size, Fs); } else +#endif { surround_rate_allocation(st, rate, frame_size, Fs); } @@ -796,9 +812,9 @@ static opus_int32 rate_allocation( return rate_sum; } -/* Max size in case the encoder decides to return six frames (6 x 20 ms = 120 ms) */ -#define MS_FRAME_TMP (6*1275+12) -int opus_multistream_encode_native +/* Max size in case the encoder decides to return three frames */ +#define MS_FRAME_TMP (3*1275+7) +static int opus_multistream_encode_native ( OpusMSEncoder *st, opus_copy_channel_in_func copy_channel_in, @@ -808,8 +824,7 @@ int opus_multistream_encode_native opus_int32 max_data_bytes, int lsb_depth, downmix_func downmix, - int float_api, - void *user_data + int float_api ) { opus_int32 Fs; @@ -844,8 +859,32 @@ int opus_multistream_encode_native opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_VBR(&vbr)); opus_encoder_ctl((OpusEncoder*)ptr, CELT_GET_MODE(&celt_mode)); - frame_size = frame_size_select(analysis_frame_size, st->variable_duration, Fs); - if (frame_size <= 0) + { + opus_int32 delay_compensation; + int channels; + + channels = st->layout.nb_streams + st->layout.nb_coupled_streams; + opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_LOOKAHEAD(&delay_compensation)); + delay_compensation -= Fs/400; + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, channels, Fs, st->bitrate_bps, + delay_compensation, downmix +#ifndef DISABLE_FLOAT_API + , st->subframe_mem +#endif + ); + } + + if (400*frame_size < Fs) + { + RESTORE_STACK; + return OPUS_BAD_ARG; + } + /* Validate frame_size before using it to allocate stack space. + This mirrors the checks in opus_encode[_float](). */ + if (400*frame_size != Fs && 200*frame_size != Fs && + 100*frame_size != Fs && 50*frame_size != Fs && + 25*frame_size != Fs && 50*frame_size != 3*Fs) { RESTORE_STACK; return OPUS_BAD_ARG; @@ -853,9 +892,6 @@ int opus_multistream_encode_native /* Smallest packet the encoder can produce. */ smallest_packet = st->layout.nb_streams*2-1; - /* 100 ms needs an extra byte per stream for the ToC. */ - if (Fs/frame_size == 10) - smallest_packet += st->layout.nb_streams; if (max_data_bytes < smallest_packet) { RESTORE_STACK; @@ -916,9 +952,11 @@ int opus_multistream_encode_native opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(2)); } } +#ifdef ENABLE_EXPERIMENTAL_AMBISONICS else if (st->mapping_type == MAPPING_TYPE_AMBISONICS) { opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY)); } +#endif } ptr = (char*)st + align(sizeof(OpusMSEncoder)); @@ -941,9 +979,9 @@ int opus_multistream_encode_native left = get_left_channel(&st->layout, s, -1); right = get_right_channel(&st->layout, s, -1); (*copy_channel_in)(buf, 2, - pcm, st->layout.nb_channels, left, frame_size, user_data); + pcm, st->layout.nb_channels, left, frame_size); (*copy_channel_in)(buf+1, 2, - pcm, st->layout.nb_channels, right, frame_size, user_data); + pcm, st->layout.nb_channels, right, frame_size); ptr += align(coupled_size); if (st->mapping_type == MAPPING_TYPE_SURROUND) { @@ -959,7 +997,7 @@ int opus_multistream_encode_native int i; int chan = get_mono_channel(&st->layout, s, -1); (*copy_channel_in)(buf, 1, - pcm, st->layout.nb_channels, chan, frame_size, user_data); + pcm, st->layout.nb_channels, chan, frame_size); ptr += align(mono_size); if (st->mapping_type == MAPPING_TYPE_SURROUND) { @@ -975,9 +1013,6 @@ int opus_multistream_encode_native curr_max = max_data_bytes - tot_size; /* Reserve one byte for the last stream and two for the others */ curr_max -= IMAX(0,2*(st->layout.nb_streams-s-1)-1); - /* For 100 ms, reserve an extra byte per stream for the ToC */ - if (Fs/frame_size == 10) - curr_max -= st->layout.nb_streams-s-1; curr_max = IMIN(curr_max,MS_FRAME_TMP); /* Repacketizer will add one or two bytes for self-delimited frames */ if (s != st->layout.nb_streams-1) curr_max -= curr_max>253 ? 2 : 1; @@ -1018,13 +1053,11 @@ static void opus_copy_channel_in_float( const void *src, int src_stride, int src_channel, - int frame_size, - void *user_data + int frame_size ) { const float *float_src; opus_int32 i; - (void)user_data; float_src = (const float *)src; for (i=0;i<frame_size;i++) #if defined(FIXED_POINT) @@ -1041,13 +1074,11 @@ static void opus_copy_channel_in_short( const void *src, int src_stride, int src_channel, - int frame_size, - void *user_data + int frame_size ) { const opus_int16 *short_src; opus_int32 i; - (void)user_data; short_src = (const opus_int16 *)src; for (i=0;i<frame_size;i++) #if defined(FIXED_POINT) @@ -1068,7 +1099,7 @@ int opus_multistream_encode( ) { return opus_multistream_encode_native(st, opus_copy_channel_in_short, - pcm, frame_size, data, max_data_bytes, 16, downmix_int, 0, NULL); + pcm, frame_size, data, max_data_bytes, 16, downmix_int, 0); } #ifndef DISABLE_FLOAT_API @@ -1081,7 +1112,7 @@ int opus_multistream_encode_float( ) { return opus_multistream_encode_native(st, opus_copy_channel_in_float, - pcm, frame_size, data, max_data_bytes, 16, downmix_float, 1, NULL); + pcm, frame_size, data, max_data_bytes, 16, downmix_float, 1); } #endif @@ -1097,7 +1128,7 @@ int opus_multistream_encode_float ) { return opus_multistream_encode_native(st, opus_copy_channel_in_float, - pcm, frame_size, data, max_data_bytes, 24, downmix_float, 1, NULL); + pcm, frame_size, data, max_data_bytes, 24, downmix_float, 1); } int opus_multistream_encode( @@ -1109,17 +1140,19 @@ int opus_multistream_encode( ) { return opus_multistream_encode_native(st, opus_copy_channel_in_short, - pcm, frame_size, data, max_data_bytes, 16, downmix_int, 0, NULL); + pcm, frame_size, data, max_data_bytes, 16, downmix_int, 0); } #endif -int opus_multistream_encoder_ctl_va_list(OpusMSEncoder *st, int request, - va_list ap) +int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...) { + va_list ap; int coupled_size, mono_size; char *ptr; int ret = OPUS_OK; + va_start(ap, request); + coupled_size = opus_encoder_get_size(2); mono_size = opus_encoder_get_size(1); ptr = (char*)st + align(sizeof(OpusMSEncoder)); @@ -1128,11 +1161,9 @@ int opus_multistream_encoder_ctl_va_list(OpusMSEncoder *st, int request, case OPUS_SET_BITRATE_REQUEST: { opus_int32 value = va_arg(ap, opus_int32); - if (value != OPUS_AUTO && value != OPUS_BITRATE_MAX) + if (value<0 && value!=OPUS_AUTO && value!=OPUS_BITRATE_MAX) { - if (value <= 0) - goto bad_arg; - value = IMIN(300000*st->layout.nb_channels, IMAX(500*st->layout.nb_channels, value)); + goto bad_arg; } st->bitrate_bps = value; } @@ -1175,7 +1206,6 @@ int opus_multistream_encoder_ctl_va_list(OpusMSEncoder *st, int request, case OPUS_GET_INBAND_FEC_REQUEST: case OPUS_GET_FORCE_CHANNELS_REQUEST: case OPUS_GET_PREDICTION_DISABLED_REQUEST: - case OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST: { OpusEncoder *enc; /* For int32* GET params, just query the first stream */ @@ -1222,7 +1252,6 @@ int opus_multistream_encoder_ctl_va_list(OpusMSEncoder *st, int request, case OPUS_SET_FORCE_MODE_REQUEST: case OPUS_SET_FORCE_CHANNELS_REQUEST: case OPUS_SET_PREDICTION_DISABLED_REQUEST: - case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST: { int s; /* This works for int32 params */ @@ -1249,7 +1278,7 @@ int opus_multistream_encoder_ctl_va_list(OpusMSEncoder *st, int request, OpusEncoder **value; stream_id = va_arg(ap, opus_int32); if (stream_id<0 || stream_id >= st->layout.nb_streams) - goto bad_arg; + ret = OPUS_BAD_ARG; value = va_arg(ap, OpusEncoder**); if (!value) { @@ -1284,6 +1313,7 @@ int opus_multistream_encoder_ctl_va_list(OpusMSEncoder *st, int request, case OPUS_RESET_STATE: { int s; + st->subframe_mem[0] = st->subframe_mem[1] = st->subframe_mem[2] = 0; if (st->mapping_type == MAPPING_TYPE_SURROUND) { OPUS_CLEAR(ms_get_preemph_mem(st), st->layout.nb_channels); @@ -1307,19 +1337,12 @@ int opus_multistream_encoder_ctl_va_list(OpusMSEncoder *st, int request, ret = OPUS_UNIMPLEMENTED; break; } - return ret; -bad_arg: - return OPUS_BAD_ARG; -} -int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...) -{ - int ret; - va_list ap; - va_start(ap, request); - ret = opus_multistream_encoder_ctl_va_list(st, request, ap); va_end(ap); return ret; +bad_arg: + va_end(ap); + return OPUS_BAD_ARG; } void opus_multistream_encoder_destroy(OpusMSEncoder *st) diff --git a/thirdparty/opus/opus_private.h b/thirdparty/opus/opus_private.h index 5e2463f546..3b62eed096 100644 --- a/thirdparty/opus/opus_private.h +++ b/thirdparty/opus/opus_private.h @@ -33,7 +33,6 @@ #include "opus.h" #include "celt.h" -#include <stdarg.h> /* va_list */ #include <stddef.h> /* offsetof */ struct OpusRepacketizer { @@ -51,59 +50,12 @@ typedef struct ChannelLayout { unsigned char mapping[256]; } ChannelLayout; -typedef enum { - MAPPING_TYPE_NONE, - MAPPING_TYPE_SURROUND, - MAPPING_TYPE_AMBISONICS -} MappingType; - -struct OpusMSEncoder { - ChannelLayout layout; - int arch; - int lfe_stream; - int application; - int variable_duration; - MappingType mapping_type; - opus_int32 bitrate_bps; - /* Encoder states go here */ - /* then opus_val32 window_mem[channels*120]; */ - /* then opus_val32 preemph_mem[channels]; */ -}; - -struct OpusMSDecoder { - ChannelLayout layout; - /* Decoder states go here */ -}; - -int opus_multistream_encoder_ctl_va_list(struct OpusMSEncoder *st, int request, - va_list ap); -int opus_multistream_decoder_ctl_va_list(struct OpusMSDecoder *st, int request, - va_list ap); - int validate_layout(const ChannelLayout *layout); int get_left_channel(const ChannelLayout *layout, int stream_id, int prev); int get_right_channel(const ChannelLayout *layout, int stream_id, int prev); int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev); -typedef void (*opus_copy_channel_in_func)( - opus_val16 *dst, - int dst_stride, - const void *src, - int src_stride, - int src_channel, - int frame_size, - void *user_data -); - -typedef void (*opus_copy_channel_out_func)( - void *dst, - int dst_stride, - int dst_channel, - const opus_val16 *src, - int src_stride, - int frame_size, - void *user_data -); + #define MODE_SILK_ONLY 1000 #define MODE_HYBRID 1001 @@ -135,12 +87,19 @@ typedef void (*opus_copy_channel_out_func)( typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int); void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); -int is_digital_silence(const opus_val16* pcm, int frame_size, int channels, int lsb_depth); int encode_size(int size, unsigned char *data); opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs); +opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, + int variable_duration, int C, opus_int32 Fs, int bitrate_bps, + int delay_compensation, downmix_func downmix +#ifndef DISABLE_FLOAT_API + , float *subframe_mem +#endif + ); + opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, @@ -172,30 +131,4 @@ opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int int pad_frame(unsigned char *data, opus_int32 len, opus_int32 new_len); -int opus_multistream_encode_native -( - struct OpusMSEncoder *st, - opus_copy_channel_in_func copy_channel_in, - const void *pcm, - int analysis_frame_size, - unsigned char *data, - opus_int32 max_data_bytes, - int lsb_depth, - downmix_func downmix, - int float_api, - void *user_data -); - -int opus_multistream_decode_native( - struct OpusMSDecoder *st, - const unsigned char *data, - opus_int32 len, - void *pcm, - opus_copy_channel_out_func copy_channel_out, - int frame_size, - int decode_fec, - int soft_clip, - void *user_data -); - #endif /* OPUS_PRIVATE_H */ diff --git a/thirdparty/opus/opus_projection_decoder.c b/thirdparty/opus/opus_projection_decoder.c deleted file mode 100644 index c2e07d5bcf..0000000000 --- a/thirdparty/opus/opus_projection_decoder.c +++ /dev/null @@ -1,258 +0,0 @@ -/* Copyright (c) 2017 Google Inc. - Written by Andrew Allen */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "mathops.h" -#include "os_support.h" -#include "opus_private.h" -#include "opus_defines.h" -#include "opus_projection.h" -#include "opus_multistream.h" -#include "mapping_matrix.h" -#include "stack_alloc.h" - -struct OpusProjectionDecoder -{ - opus_int32 demixing_matrix_size_in_bytes; - /* Encoder states go here */ -}; - -#if !defined(DISABLE_FLOAT_API) -static void opus_projection_copy_channel_out_float( - void *dst, - int dst_stride, - int dst_channel, - const opus_val16 *src, - int src_stride, - int frame_size, - void *user_data) -{ - float *float_dst; - const MappingMatrix *matrix; - float_dst = (float *)dst; - matrix = (const MappingMatrix *)user_data; - - if (dst_channel == 0) - OPUS_CLEAR(float_dst, frame_size * dst_stride); - - if (src != NULL) - mapping_matrix_multiply_channel_out_float(matrix, src, dst_channel, - src_stride, float_dst, dst_stride, frame_size); -} -#endif - -static void opus_projection_copy_channel_out_short( - void *dst, - int dst_stride, - int dst_channel, - const opus_val16 *src, - int src_stride, - int frame_size, - void *user_data) -{ - opus_int16 *short_dst; - const MappingMatrix *matrix; - short_dst = (opus_int16 *)dst; - matrix = (const MappingMatrix *)user_data; - if (dst_channel == 0) - OPUS_CLEAR(short_dst, frame_size * dst_stride); - - if (src != NULL) - mapping_matrix_multiply_channel_out_short(matrix, src, dst_channel, - src_stride, short_dst, dst_stride, frame_size); -} - -static MappingMatrix *get_dec_demixing_matrix(OpusProjectionDecoder *st) -{ - /* void* cast avoids clang -Wcast-align warning */ - return (MappingMatrix*)(void*)((char*)st + - align(sizeof(OpusProjectionDecoder))); -} - -static OpusMSDecoder *get_multistream_decoder(OpusProjectionDecoder *st) -{ - /* void* cast avoids clang -Wcast-align warning */ - return (OpusMSDecoder*)(void*)((char*)st + - align(sizeof(OpusProjectionDecoder) + - st->demixing_matrix_size_in_bytes)); -} - -opus_int32 opus_projection_decoder_get_size(int channels, int streams, - int coupled_streams) -{ - opus_int32 matrix_size; - opus_int32 decoder_size; - - matrix_size = - mapping_matrix_get_size(streams + coupled_streams, channels); - if (!matrix_size) - return 0; - - decoder_size = opus_multistream_decoder_get_size(streams, coupled_streams); - if (!decoder_size) - return 0; - - return align(sizeof(OpusProjectionDecoder)) + matrix_size + decoder_size; -} - -int opus_projection_decoder_init(OpusProjectionDecoder *st, opus_int32 Fs, - int channels, int streams, int coupled_streams, - unsigned char *demixing_matrix, opus_int32 demixing_matrix_size) -{ - int nb_input_streams; - opus_int32 expected_matrix_size; - int i, ret; - unsigned char mapping[255]; - VARDECL(opus_int16, buf); - ALLOC_STACK; - - /* Verify supplied matrix size. */ - nb_input_streams = streams + coupled_streams; - expected_matrix_size = nb_input_streams * channels * sizeof(opus_int16); - if (expected_matrix_size != demixing_matrix_size) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } - - /* Convert demixing matrix input into internal format. */ - ALLOC(buf, nb_input_streams * channels, opus_int16); - for (i = 0; i < nb_input_streams * channels; i++) - { - int s = demixing_matrix[2*i + 1] << 8 | demixing_matrix[2*i]; - s = ((s & 0xFFFF) ^ 0x8000) - 0x8000; - buf[i] = (opus_int16)s; - } - - /* Assign demixing matrix. */ - st->demixing_matrix_size_in_bytes = - mapping_matrix_get_size(channels, nb_input_streams); - if (!st->demixing_matrix_size_in_bytes) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } - - mapping_matrix_init(get_dec_demixing_matrix(st), channels, nb_input_streams, 0, - buf, demixing_matrix_size); - - /* Set trivial mapping so each input channel pairs with a matrix column. */ - for (i = 0; i < channels; i++) - mapping[i] = i; - - ret = opus_multistream_decoder_init( - get_multistream_decoder(st), Fs, channels, streams, coupled_streams, mapping); - RESTORE_STACK; - return ret; -} - -OpusProjectionDecoder *opus_projection_decoder_create( - opus_int32 Fs, int channels, int streams, int coupled_streams, - unsigned char *demixing_matrix, opus_int32 demixing_matrix_size, int *error) -{ - int size; - int ret; - OpusProjectionDecoder *st; - - /* Allocate space for the projection decoder. */ - size = opus_projection_decoder_get_size(channels, streams, coupled_streams); - if (!size) { - if (error) - *error = OPUS_ALLOC_FAIL; - return NULL; - } - st = (OpusProjectionDecoder *)opus_alloc(size); - if (!st) - { - if (error) - *error = OPUS_ALLOC_FAIL; - return NULL; - } - - /* Initialize projection decoder with provided settings. */ - ret = opus_projection_decoder_init(st, Fs, channels, streams, coupled_streams, - demixing_matrix, demixing_matrix_size); - if (ret != OPUS_OK) - { - opus_free(st); - st = NULL; - } - if (error) - *error = ret; - return st; -} - -#ifdef FIXED_POINT -int opus_projection_decode(OpusProjectionDecoder *st, const unsigned char *data, - opus_int32 len, opus_int16 *pcm, int frame_size, - int decode_fec) -{ - return opus_multistream_decode_native(get_multistream_decoder(st), data, len, - pcm, opus_projection_copy_channel_out_short, frame_size, decode_fec, 0, - get_dec_demixing_matrix(st)); -} -#else -int opus_projection_decode(OpusProjectionDecoder *st, const unsigned char *data, - opus_int32 len, opus_int16 *pcm, int frame_size, - int decode_fec) -{ - return opus_multistream_decode_native(get_multistream_decoder(st), data, len, - pcm, opus_projection_copy_channel_out_short, frame_size, decode_fec, 1, - get_dec_demixing_matrix(st)); -} -#endif - -#ifndef DISABLE_FLOAT_API -int opus_projection_decode_float(OpusProjectionDecoder *st, const unsigned char *data, - opus_int32 len, float *pcm, int frame_size, int decode_fec) -{ - return opus_multistream_decode_native(get_multistream_decoder(st), data, len, - pcm, opus_projection_copy_channel_out_float, frame_size, decode_fec, 0, - get_dec_demixing_matrix(st)); -} -#endif - -int opus_projection_decoder_ctl(OpusProjectionDecoder *st, int request, ...) -{ - va_list ap; - int ret = OPUS_OK; - - va_start(ap, request); - ret = opus_multistream_decoder_ctl_va_list(get_multistream_decoder(st), - request, ap); - va_end(ap); - return ret; -} - -void opus_projection_decoder_destroy(OpusProjectionDecoder *st) -{ - opus_free(st); -} - diff --git a/thirdparty/opus/opus_projection_encoder.c b/thirdparty/opus/opus_projection_encoder.c deleted file mode 100644 index 06fb2d2526..0000000000 --- a/thirdparty/opus/opus_projection_encoder.c +++ /dev/null @@ -1,468 +0,0 @@ -/* Copyright (c) 2017 Google Inc. - Written by Andrew Allen */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "mathops.h" -#include "os_support.h" -#include "opus_private.h" -#include "opus_defines.h" -#include "opus_projection.h" -#include "opus_multistream.h" -#include "stack_alloc.h" -#include "mapping_matrix.h" - -struct OpusProjectionEncoder -{ - opus_int32 mixing_matrix_size_in_bytes; - opus_int32 demixing_matrix_size_in_bytes; - /* Encoder states go here */ -}; - -#if !defined(DISABLE_FLOAT_API) -static void opus_projection_copy_channel_in_float( - opus_val16 *dst, - int dst_stride, - const void *src, - int src_stride, - int src_channel, - int frame_size, - void *user_data -) -{ - mapping_matrix_multiply_channel_in_float((const MappingMatrix*)user_data, - (const float*)src, src_stride, dst, src_channel, dst_stride, frame_size); -} -#endif - -static void opus_projection_copy_channel_in_short( - opus_val16 *dst, - int dst_stride, - const void *src, - int src_stride, - int src_channel, - int frame_size, - void *user_data -) -{ - mapping_matrix_multiply_channel_in_short((const MappingMatrix*)user_data, - (const opus_int16*)src, src_stride, dst, src_channel, dst_stride, frame_size); -} - -static int get_order_plus_one_from_channels(int channels, int *order_plus_one) -{ - int order_plus_one_; - int acn_channels; - int nondiegetic_channels; - - /* Allowed numbers of channels: - * (1 + n)^2 + 2j, for n = 0...14 and j = 0 or 1. - */ - if (channels < 1 || channels > 227) - return OPUS_BAD_ARG; - - order_plus_one_ = isqrt32(channels); - acn_channels = order_plus_one_ * order_plus_one_; - nondiegetic_channels = channels - acn_channels; - if (nondiegetic_channels != 0 && nondiegetic_channels != 2) - return OPUS_BAD_ARG; - - if (order_plus_one) - *order_plus_one = order_plus_one_; - return OPUS_OK; -} - -static int get_streams_from_channels(int channels, int mapping_family, - int *streams, int *coupled_streams, - int *order_plus_one) -{ - if (mapping_family == 3) - { - if (get_order_plus_one_from_channels(channels, order_plus_one) != OPUS_OK) - return OPUS_BAD_ARG; - if (streams) - *streams = (channels + 1) / 2; - if (coupled_streams) - *coupled_streams = channels / 2; - return OPUS_OK; - } - return OPUS_BAD_ARG; -} - -static MappingMatrix *get_mixing_matrix(OpusProjectionEncoder *st) -{ - /* void* cast avoids clang -Wcast-align warning */ - return (MappingMatrix *)(void*)((char*)st + - align(sizeof(OpusProjectionEncoder))); -} - -static MappingMatrix *get_enc_demixing_matrix(OpusProjectionEncoder *st) -{ - /* void* cast avoids clang -Wcast-align warning */ - return (MappingMatrix *)(void*)((char*)st + - align(sizeof(OpusProjectionEncoder) + - st->mixing_matrix_size_in_bytes)); -} - -static OpusMSEncoder *get_multistream_encoder(OpusProjectionEncoder *st) -{ - /* void* cast avoids clang -Wcast-align warning */ - return (OpusMSEncoder *)(void*)((char*)st + - align(sizeof(OpusProjectionEncoder) + - st->mixing_matrix_size_in_bytes + - st->demixing_matrix_size_in_bytes)); -} - -opus_int32 opus_projection_ambisonics_encoder_get_size(int channels, - int mapping_family) -{ - int nb_streams; - int nb_coupled_streams; - int order_plus_one; - int mixing_matrix_rows, mixing_matrix_cols; - int demixing_matrix_rows, demixing_matrix_cols; - opus_int32 mixing_matrix_size, demixing_matrix_size; - opus_int32 encoder_size; - int ret; - - ret = get_streams_from_channels(channels, mapping_family, &nb_streams, - &nb_coupled_streams, &order_plus_one); - if (ret != OPUS_OK) - return 0; - - if (order_plus_one == 2) - { - mixing_matrix_rows = mapping_matrix_foa_mixing.rows; - mixing_matrix_cols = mapping_matrix_foa_mixing.cols; - demixing_matrix_rows = mapping_matrix_foa_demixing.rows; - demixing_matrix_cols = mapping_matrix_foa_demixing.cols; - } - else if (order_plus_one == 3) - { - mixing_matrix_rows = mapping_matrix_soa_mixing.rows; - mixing_matrix_cols = mapping_matrix_soa_mixing.cols; - demixing_matrix_rows = mapping_matrix_soa_demixing.rows; - demixing_matrix_cols = mapping_matrix_soa_demixing.cols; - } - else if (order_plus_one == 4) - { - mixing_matrix_rows = mapping_matrix_toa_mixing.rows; - mixing_matrix_cols = mapping_matrix_toa_mixing.cols; - demixing_matrix_rows = mapping_matrix_toa_demixing.rows; - demixing_matrix_cols = mapping_matrix_toa_demixing.cols; - } - else - return 0; - - mixing_matrix_size = - mapping_matrix_get_size(mixing_matrix_rows, mixing_matrix_cols); - if (!mixing_matrix_size) - return 0; - - demixing_matrix_size = - mapping_matrix_get_size(demixing_matrix_rows, demixing_matrix_cols); - if (!demixing_matrix_size) - return 0; - - encoder_size = - opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams); - if (!encoder_size) - return 0; - - return align(sizeof(OpusProjectionEncoder)) + - mixing_matrix_size + demixing_matrix_size + encoder_size; -} - -int opus_projection_ambisonics_encoder_init(OpusProjectionEncoder *st, opus_int32 Fs, - int channels, int mapping_family, - int *streams, int *coupled_streams, - int application) -{ - MappingMatrix *mixing_matrix; - MappingMatrix *demixing_matrix; - OpusMSEncoder *ms_encoder; - int i; - int ret; - int order_plus_one; - unsigned char mapping[255]; - - if (streams == NULL || coupled_streams == NULL) { - return OPUS_BAD_ARG; - } - - if (get_streams_from_channels(channels, mapping_family, streams, - coupled_streams, &order_plus_one) != OPUS_OK) - return OPUS_BAD_ARG; - - if (mapping_family == 3) - { - /* Assign mixing matrix based on available pre-computed matrices. */ - mixing_matrix = get_mixing_matrix(st); - if (order_plus_one == 2) - { - mapping_matrix_init(mixing_matrix, mapping_matrix_foa_mixing.rows, - mapping_matrix_foa_mixing.cols, mapping_matrix_foa_mixing.gain, - mapping_matrix_foa_mixing_data, - sizeof(mapping_matrix_foa_mixing_data)); - } - else if (order_plus_one == 3) - { - mapping_matrix_init(mixing_matrix, mapping_matrix_soa_mixing.rows, - mapping_matrix_soa_mixing.cols, mapping_matrix_soa_mixing.gain, - mapping_matrix_soa_mixing_data, - sizeof(mapping_matrix_soa_mixing_data)); - } - else if (order_plus_one == 4) - { - mapping_matrix_init(mixing_matrix, mapping_matrix_toa_mixing.rows, - mapping_matrix_toa_mixing.cols, mapping_matrix_toa_mixing.gain, - mapping_matrix_toa_mixing_data, - sizeof(mapping_matrix_toa_mixing_data)); - } - else - return OPUS_BAD_ARG; - - st->mixing_matrix_size_in_bytes = mapping_matrix_get_size( - mixing_matrix->rows, mixing_matrix->cols); - if (!st->mixing_matrix_size_in_bytes) - return OPUS_BAD_ARG; - - /* Assign demixing matrix based on available pre-computed matrices. */ - demixing_matrix = get_enc_demixing_matrix(st); - if (order_plus_one == 2) - { - mapping_matrix_init(demixing_matrix, mapping_matrix_foa_demixing.rows, - mapping_matrix_foa_demixing.cols, mapping_matrix_foa_demixing.gain, - mapping_matrix_foa_demixing_data, - sizeof(mapping_matrix_foa_demixing_data)); - } - else if (order_plus_one == 3) - { - mapping_matrix_init(demixing_matrix, mapping_matrix_soa_demixing.rows, - mapping_matrix_soa_demixing.cols, mapping_matrix_soa_demixing.gain, - mapping_matrix_soa_demixing_data, - sizeof(mapping_matrix_soa_demixing_data)); - } - else if (order_plus_one == 4) - { - mapping_matrix_init(demixing_matrix, mapping_matrix_toa_demixing.rows, - mapping_matrix_toa_demixing.cols, mapping_matrix_toa_demixing.gain, - mapping_matrix_toa_demixing_data, - sizeof(mapping_matrix_toa_demixing_data)); - } - else - return OPUS_BAD_ARG; - - st->demixing_matrix_size_in_bytes = mapping_matrix_get_size( - demixing_matrix->rows, demixing_matrix->cols); - if (!st->demixing_matrix_size_in_bytes) - return OPUS_BAD_ARG; - } - else - return OPUS_UNIMPLEMENTED; - - /* Ensure matrices are large enough for desired coding scheme. */ - if (*streams + *coupled_streams > mixing_matrix->rows || - channels > mixing_matrix->cols || - channels > demixing_matrix->rows || - *streams + *coupled_streams > demixing_matrix->cols) - return OPUS_BAD_ARG; - - /* Set trivial mapping so each input channel pairs with a matrix column. */ - for (i = 0; i < channels; i++) - mapping[i] = i; - - /* Initialize multistream encoder with provided settings. */ - ms_encoder = get_multistream_encoder(st); - ret = opus_multistream_encoder_init(ms_encoder, Fs, channels, *streams, - *coupled_streams, mapping, application); - return ret; -} - -OpusProjectionEncoder *opus_projection_ambisonics_encoder_create( - opus_int32 Fs, int channels, int mapping_family, int *streams, - int *coupled_streams, int application, int *error) -{ - int size; - int ret; - OpusProjectionEncoder *st; - - /* Allocate space for the projection encoder. */ - size = opus_projection_ambisonics_encoder_get_size(channels, mapping_family); - if (!size) { - if (error) - *error = OPUS_ALLOC_FAIL; - return NULL; - } - st = (OpusProjectionEncoder *)opus_alloc(size); - if (!st) - { - if (error) - *error = OPUS_ALLOC_FAIL; - return NULL; - } - - /* Initialize projection encoder with provided settings. */ - ret = opus_projection_ambisonics_encoder_init(st, Fs, channels, - mapping_family, streams, coupled_streams, application); - if (ret != OPUS_OK) - { - opus_free(st); - st = NULL; - } - if (error) - *error = ret; - return st; -} - -int opus_projection_encode(OpusProjectionEncoder *st, const opus_int16 *pcm, - int frame_size, unsigned char *data, - opus_int32 max_data_bytes) -{ - return opus_multistream_encode_native(get_multistream_encoder(st), - opus_projection_copy_channel_in_short, pcm, frame_size, data, - max_data_bytes, 16, downmix_int, 0, get_mixing_matrix(st)); -} - -#ifndef DISABLE_FLOAT_API -#ifdef FIXED_POINT -int opus_projection_encode_float(OpusProjectionEncoder *st, const float *pcm, - int frame_size, unsigned char *data, - opus_int32 max_data_bytes) -{ - return opus_multistream_encode_native(get_multistream_encoder(st), - opus_projection_copy_channel_in_float, pcm, frame_size, data, - max_data_bytes, 16, downmix_float, 1, get_mixing_matrix(st)); -} -#else -int opus_projection_encode_float(OpusProjectionEncoder *st, const float *pcm, - int frame_size, unsigned char *data, - opus_int32 max_data_bytes) -{ - return opus_multistream_encode_native(get_multistream_encoder(st), - opus_projection_copy_channel_in_float, pcm, frame_size, data, - max_data_bytes, 24, downmix_float, 1, get_mixing_matrix(st)); -} -#endif -#endif - -void opus_projection_encoder_destroy(OpusProjectionEncoder *st) -{ - opus_free(st); -} - -int opus_projection_encoder_ctl(OpusProjectionEncoder *st, int request, ...) -{ - va_list ap; - MappingMatrix *demixing_matrix; - OpusMSEncoder *ms_encoder; - int ret = OPUS_OK; - - ms_encoder = get_multistream_encoder(st); - demixing_matrix = get_enc_demixing_matrix(st); - - va_start(ap, request); - switch(request) - { - case OPUS_PROJECTION_GET_DEMIXING_MATRIX_SIZE_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = - ms_encoder->layout.nb_channels * (ms_encoder->layout.nb_streams - + ms_encoder->layout.nb_coupled_streams) * sizeof(opus_int16); - } - break; - case OPUS_PROJECTION_GET_DEMIXING_MATRIX_GAIN_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = demixing_matrix->gain; - } - break; - case OPUS_PROJECTION_GET_DEMIXING_MATRIX_REQUEST: - { - int i, j, k, l; - int nb_input_streams; - int nb_output_streams; - unsigned char *external_char; - opus_int16 *internal_short; - opus_int32 external_size; - opus_int32 internal_size; - - /* (I/O is in relation to the decoder's perspective). */ - nb_input_streams = ms_encoder->layout.nb_streams + - ms_encoder->layout.nb_coupled_streams; - nb_output_streams = ms_encoder->layout.nb_channels; - - external_char = va_arg(ap, unsigned char *); - external_size = va_arg(ap, opus_int32); - if (!external_char) - { - goto bad_arg; - } - internal_short = mapping_matrix_get_data(demixing_matrix); - internal_size = nb_input_streams * nb_output_streams * sizeof(opus_int16); - if (external_size != internal_size) - { - goto bad_arg; - } - - /* Copy demixing matrix subset to output destination. */ - l = 0; - for (i = 0; i < nb_input_streams; i++) { - for (j = 0; j < nb_output_streams; j++) { - k = demixing_matrix->rows * i + j; - external_char[2*l] = (unsigned char)internal_short[k]; - external_char[2*l+1] = (unsigned char)(internal_short[k] >> 8); - l++; - } - } - } - break; - default: - { - ret = opus_multistream_encoder_ctl_va_list(ms_encoder, request, ap); - } - break; - } - va_end(ap); - return ret; - -bad_arg: - va_end(ap); - return OPUS_BAD_ARG; -} - diff --git a/thirdparty/opus/opusfile.c b/thirdparty/opus/opusfile.c index 8b000a2c58..b8b3a354cf 100644 --- a/thirdparty/opus/opusfile.c +++ b/thirdparty/opus/opusfile.c @@ -86,15 +86,14 @@ int op_test(OpusHead *_head, This is to prevent us spending a lot of time allocating memory and looking for Ogg pages in non-Ogg files.*/ if(memcmp(_initial_data,"OggS",4)!=0)return OP_ENOTFORMAT; - if(OP_UNLIKELY(_initial_bytes>(size_t)LONG_MAX))return OP_EFAULT; ogg_sync_init(&oy); - data=ogg_sync_buffer(&oy,(long)_initial_bytes); + data=ogg_sync_buffer(&oy,_initial_bytes); if(data!=NULL){ ogg_stream_state os; ogg_page og; int ret; memcpy(data,_initial_data,_initial_bytes); - ogg_sync_wrote(&oy,(long)_initial_bytes); + ogg_sync_wrote(&oy,_initial_bytes); ogg_stream_init(&os,-1); err=OP_FALSE; do{ @@ -148,7 +147,7 @@ static int op_get_data(OggOpusFile *_of,int _nbytes){ int nbytes; OP_ASSERT(_nbytes>0); buffer=(unsigned char *)ogg_sync_buffer(&_of->oy,_nbytes); - nbytes=(int)(*_of->callbacks.read)(_of->stream,buffer,_nbytes); + nbytes=(int)(*_of->callbacks.read)(_of->source,buffer,_nbytes); OP_ASSERT(nbytes<=_nbytes); if(OP_LIKELY(nbytes>0))ogg_sync_wrote(&_of->oy,nbytes); return nbytes; @@ -158,7 +157,7 @@ static int op_get_data(OggOpusFile *_of,int _nbytes){ static int op_seek_helper(OggOpusFile *_of,opus_int64 _offset){ if(_offset==_of->offset)return 0; if(_of->callbacks.seek==NULL - ||(*_of->callbacks.seek)(_of->stream,_offset,SEEK_SET)){ + ||(*_of->callbacks.seek)(_of->source,_offset,SEEK_SET)){ return OP_EREAD; } _of->offset=_offset; @@ -166,7 +165,7 @@ static int op_seek_helper(OggOpusFile *_of,opus_int64 _offset){ return 0; } -/*Get the current position indicator of the underlying stream. +/*Get the current position indicator of the underlying source. This should be the same as the value reported by tell().*/ static opus_int64 op_position(const OggOpusFile *_of){ /*The current position indicator is _not_ simply offset. @@ -370,7 +369,7 @@ static int op_get_prev_page_serial(OggOpusFile *_of,OpusSeekRecord *_sr, search_start=llret+1; } /*We started from the beginning of the stream and found nothing. - This should be impossible unless the contents of the stream changed out + This should be impossible unless the contents of the source changed out from under us after we read from it.*/ if(OP_UNLIKELY(!begin)&&OP_UNLIKELY(_offset<0))return OP_EBADLINK; /*Bump up the chunk size. @@ -456,7 +455,7 @@ static opus_int64 op_get_last_page(OggOpusFile *_of,ogg_int64_t *_gp, } } /*We started from at or before the beginning of the link and found nothing. - This should be impossible unless the contents of the stream changed out + This should be impossible unless the contents of the source changed out from under us after we read from it.*/ if((OP_UNLIKELY(left_link)||OP_UNLIKELY(!begin))&&OP_UNLIKELY(_offset<0)){ return OP_EBADLINK; @@ -856,7 +855,6 @@ static int op_find_initial_pcm_offset(OggOpusFile *_of, /*Fail if the pre-skip is non-zero, since it's asking us to skip more samples than exist.*/ if(_link->head.pre_skip>0)return OP_EBADTIMESTAMP; - _link->pcm_file_offset=0; /*Set pcm_end and end_offset so we can skip the call to op_find_final_pcm_offset().*/ _link->pcm_start=_link->pcm_end=0; @@ -868,8 +866,7 @@ static int op_find_initial_pcm_offset(OggOpusFile *_of, if(_link->head.pre_skip>0)return OP_EBADTIMESTAMP; /*Set pcm_end and end_offset so we can skip the call to op_find_final_pcm_offset().*/ - _link->pcm_file_offset=0; - _link->pcm_start=_link->pcm_end=0; + _link->pcm_end=_link->pcm_start=0; _link->end_offset=_link->data_offset; /*Tell the caller we've got a buffered page for them.*/ return 1; @@ -954,7 +951,6 @@ static int op_find_initial_pcm_offset(OggOpusFile *_of, /*Update the packet count after end-trimming.*/ _of->op_count=pi; _of->cur_discard_count=_link->head.pre_skip; - _link->pcm_file_offset=0; _of->prev_packet_gp=_link->pcm_start=pcm_start; _of->prev_page_offset=page_offset; return 0; @@ -1275,7 +1271,6 @@ static int op_bisect_forward_serialno(OggOpusFile *_of, always starts with a seek.*/ ret=op_find_initial_pcm_offset(_of,links+nlinks,NULL); if(OP_UNLIKELY(ret<0))return ret; - links[nlinks].pcm_file_offset=total_duration; _searched=_of->offset; /*Mark the current link count so it can be cleaned up on error.*/ _of->nlinks=++nlinks; @@ -1395,8 +1390,8 @@ static int op_open_seekable2_impl(OggOpusFile *_of){ opus_int64 data_offset; int ret; /*We can seek, so set out learning all about this file.*/ - (*_of->callbacks.seek)(_of->stream,0,SEEK_END); - _of->offset=_of->end=(*_of->callbacks.tell)(_of->stream); + (*_of->callbacks.seek)(_of->source,0,SEEK_END); + _of->offset=_of->end=(*_of->callbacks.tell)(_of->source); if(OP_UNLIKELY(_of->end<0))return OP_EREAD; data_offset=_of->links[0].data_offset; if(OP_UNLIKELY(_of->end<data_offset))return OP_EBADLINK; @@ -1441,7 +1436,7 @@ static int op_open_seekable2(OggOpusFile *_of){ prev_page_offset=_of->prev_page_offset; start_offset=_of->offset; memcpy(op_start,_of->op,sizeof(*op_start)*start_op_count); - OP_ASSERT((*_of->callbacks.tell)(_of->stream)==op_position(_of)); + OP_ASSERT((*_of->callbacks.tell)(_of->source)==op_position(_of)); ogg_sync_init(&_of->oy); ogg_stream_init(&_of->os,-1); ret=op_open_seekable2_impl(_of); @@ -1459,7 +1454,7 @@ static int op_open_seekable2(OggOpusFile *_of){ _of->cur_discard_count=_of->links[0].head.pre_skip; if(OP_UNLIKELY(ret<0))return ret; /*And restore the position indicator.*/ - ret=(*_of->callbacks.seek)(_of->stream,op_position(_of),SEEK_SET); + ret=(*_of->callbacks.seek)(_of->source,op_position(_of),SEEK_SET); return OP_UNLIKELY(ret<0)?OP_EREAD:0; } @@ -1498,20 +1493,19 @@ static void op_clear(OggOpusFile *_of){ _ogg_free(_of->serialnos); ogg_stream_clear(&_of->os); ogg_sync_clear(&_of->oy); - if(_of->callbacks.close!=NULL)(*_of->callbacks.close)(_of->stream); + if(_of->callbacks.close!=NULL)(*_of->callbacks.close)(_of->source); } static int op_open1(OggOpusFile *_of, - void *_stream,const OpusFileCallbacks *_cb, + void *_source,const OpusFileCallbacks *_cb, const unsigned char *_initial_data,size_t _initial_bytes){ ogg_page og; ogg_page *pog; int seekable; int ret; memset(_of,0,sizeof(*_of)); - if(OP_UNLIKELY(_initial_bytes>(size_t)LONG_MAX))return OP_EFAULT; _of->end=-1; - _of->stream=_stream; + _of->source=_source; *&_of->callbacks=*_cb; /*At a minimum, we need to be able to read data.*/ if(OP_UNLIKELY(_of->callbacks.read==NULL))return OP_EREAD; @@ -1526,18 +1520,18 @@ static int op_open1(OggOpusFile *_of, decoding entire files from RAM.*/ if(_initial_bytes>0){ char *buffer; - buffer=ogg_sync_buffer(&_of->oy,(long)_initial_bytes); + buffer=ogg_sync_buffer(&_of->oy,_initial_bytes); memcpy(buffer,_initial_data,_initial_bytes*sizeof(*buffer)); - ogg_sync_wrote(&_of->oy,(long)_initial_bytes); + ogg_sync_wrote(&_of->oy,_initial_bytes); } /*Can we seek? Stevens suggests the seek test is portable.*/ - seekable=_cb->seek!=NULL&&(*_cb->seek)(_stream,0,SEEK_CUR)!=-1; + seekable=_cb->seek!=NULL&&(*_cb->seek)(_source,0,SEEK_CUR)!=-1; /*If seek is implemented, tell must also be implemented.*/ if(seekable){ opus_int64 pos; if(OP_UNLIKELY(_of->callbacks.tell==NULL))return OP_EINVAL; - pos=(*_of->callbacks.tell)(_of->stream); + pos=(*_of->callbacks.tell)(_of->source); /*If the current position is not equal to the initial bytes consumed, absolute seeking will not work.*/ if(OP_UNLIKELY(pos!=(opus_int64)_initial_bytes))return OP_EINVAL; @@ -1596,14 +1590,14 @@ static int op_open2(OggOpusFile *_of){ return ret; } -OggOpusFile *op_test_callbacks(void *_stream,const OpusFileCallbacks *_cb, +OggOpusFile *op_test_callbacks(void *_source,const OpusFileCallbacks *_cb, const unsigned char *_initial_data,size_t _initial_bytes,int *_error){ OggOpusFile *of; int ret; of=(OggOpusFile *)_ogg_malloc(sizeof(*of)); ret=OP_EFAULT; if(OP_LIKELY(of!=NULL)){ - ret=op_open1(of,_stream,_cb,_initial_data,_initial_bytes); + ret=op_open1(of,_source,_cb,_initial_data,_initial_bytes); if(OP_LIKELY(ret>=0)){ if(_error!=NULL)*_error=0; return of; @@ -1617,10 +1611,10 @@ OggOpusFile *op_test_callbacks(void *_stream,const OpusFileCallbacks *_cb, return NULL; } -OggOpusFile *op_open_callbacks(void *_stream,const OpusFileCallbacks *_cb, +OggOpusFile *op_open_callbacks(void *_source,const OpusFileCallbacks *_cb, const unsigned char *_initial_data,size_t _initial_bytes,int *_error){ OggOpusFile *of; - of=op_test_callbacks(_stream,_cb,_initial_data,_initial_bytes,_error); + of=op_test_callbacks(_source,_cb,_initial_data,_initial_bytes,_error); if(OP_LIKELY(of!=NULL)){ int ret; ret=op_open2(of); @@ -1633,15 +1627,15 @@ OggOpusFile *op_open_callbacks(void *_stream,const OpusFileCallbacks *_cb, /*Convenience routine to clean up from failure for the open functions that create their own streams.*/ -static OggOpusFile *op_open_close_on_failure(void *_stream, +static OggOpusFile *op_open_close_on_failure(void *_source, const OpusFileCallbacks *_cb,int *_error){ OggOpusFile *of; - if(OP_UNLIKELY(_stream==NULL)){ + if(OP_UNLIKELY(_source==NULL)){ if(_error!=NULL)*_error=OP_EFAULT; return NULL; } - of=op_open_callbacks(_stream,_cb,NULL,0,_error); - if(OP_UNLIKELY(of==NULL))(*_cb->close)(_stream); + of=op_open_callbacks(_source,_cb,NULL,0,_error); + if(OP_UNLIKELY(of==NULL))(*_cb->close)(_source); return of; } @@ -1659,15 +1653,15 @@ OggOpusFile *op_open_memory(const unsigned char *_data,size_t _size, /*Convenience routine to clean up from failure for the open functions that create their own streams.*/ -static OggOpusFile *op_test_close_on_failure(void *_stream, +static OggOpusFile *op_test_close_on_failure(void *_source, const OpusFileCallbacks *_cb,int *_error){ OggOpusFile *of; - if(OP_UNLIKELY(_stream==NULL)){ + if(OP_UNLIKELY(_source==NULL)){ if(_error!=NULL)*_error=OP_EFAULT; return NULL; } - of=op_test_callbacks(_stream,_cb,NULL,0,_error); - if(OP_UNLIKELY(of==NULL))(*_cb->close)(_stream); + of=op_test_callbacks(_source,_cb,NULL,0,_error); + if(OP_UNLIKELY(of==NULL))(*_cb->close)(_source); return of; } @@ -1708,7 +1702,7 @@ int op_link_count(const OggOpusFile *_of){ return _of->nlinks; } -opus_uint32 op_serialno(const OggOpusFile *_of,int _li){ +ogg_uint32_t op_serialno(const OggOpusFile *_of,int _li){ if(OP_UNLIKELY(_li>=_of->nlinks))_li=_of->nlinks-1; if(!_of->seekable)_li=0; return _of->links[_li<0?_of->cur_link:_li].serialno; @@ -1724,14 +1718,13 @@ opus_int64 op_raw_total(const OggOpusFile *_of,int _li){ ||OP_UNLIKELY(_li>=_of->nlinks)){ return OP_EINVAL; } - if(_li<0)return _of->end; + if(_li<0)return _of->end-_of->links[0].offset; return (_li+1>=_of->nlinks?_of->end:_of->links[_li+1].offset) - -(_li>0?_of->links[_li].offset:0); + -_of->links[_li].offset; } ogg_int64_t op_pcm_total(const OggOpusFile *_of,int _li){ OggOpusLink *links; - ogg_int64_t pcm_total; ogg_int64_t diff; int nlinks; nlinks=_of->nlinks; @@ -1744,14 +1737,20 @@ ogg_int64_t op_pcm_total(const OggOpusFile *_of,int _li){ /*We verify that the granule position differences are larger than the pre-skip and that the total duration does not overflow during link enumeration, so we don't have to check here.*/ - pcm_total=0; if(_li<0){ - pcm_total=links[nlinks-1].pcm_file_offset; - _li=nlinks-1; + ogg_int64_t pcm_total; + int li; + pcm_total=0; + for(li=0;li<nlinks;li++){ + OP_ALWAYS_TRUE(!op_granpos_diff(&diff, + links[li].pcm_end,links[li].pcm_start)); + pcm_total+=diff-links[li].head.pre_skip; + } + return pcm_total; } OP_ALWAYS_TRUE(!op_granpos_diff(&diff, links[_li].pcm_end,links[_li].pcm_start)); - return pcm_total+diff-links[_li].head.pre_skip; + return diff-links[_li].head.pre_skip; } const OpusHead *op_head(const OggOpusFile *_of,int _li){ @@ -1821,34 +1820,6 @@ opus_int32 op_bitrate_instant(OggOpusFile *_of){ return ret; } -/*Given a serialno, find a link with a corresponding Opus stream, if it exists. - Return: The index of the link to which the page belongs, or a negative number - if it was not a desired Opus bitstream section.*/ -static int op_get_link_from_serialno(const OggOpusFile *_of,int _cur_link, - opus_int64 _page_offset,ogg_uint32_t _serialno){ - const OggOpusLink *links; - int nlinks; - int li_lo; - int li_hi; - OP_ASSERT(_of->seekable); - links=_of->links; - nlinks=_of->nlinks; - li_lo=0; - /*Start off by guessing we're just a multiplexed page in the current link.*/ - li_hi=_cur_link+1<nlinks&&_page_offset<links[_cur_link+1].offset? - _cur_link+1:nlinks; - do{ - if(_page_offset>=links[_cur_link].offset)li_lo=_cur_link; - else li_hi=_cur_link; - _cur_link=li_lo+(li_hi-li_lo>>1); - } - while(li_hi-li_lo>1); - /*We've identified the link that should contain this page. - Make sure it's a page we care about.*/ - if(links[_cur_link].serialno!=_serialno)return OP_FALSE; - return _cur_link; -} - /*Fetch and process a page. This handles the case where we're at a bitstream boundary and dumps the decoding machine. @@ -1905,28 +1876,19 @@ static int op_fetch_and_process_page(OggOpusFile *_of, if(OP_UNLIKELY(_of->ready_state<OP_STREAMSET)){ if(seekable){ ogg_uint32_t serialno; + int nlinks; + int li; serialno=ogg_page_serialno(&og); - /*Match the serialno to bitstream section.*/ - OP_ASSERT(cur_link>=0&&cur_link<_of->nlinks); - if(links[cur_link].serialno!=serialno){ - /*It wasn't a page from the current link. - Is it from the next one?*/ - if(OP_LIKELY(cur_link+1<_of->nlinks&&links[cur_link+1].serialno== - serialno)){ - cur_link++; - } - else{ - int new_link; - new_link= - op_get_link_from_serialno(_of,cur_link,_page_offset,serialno); - /*Not a desired Opus bitstream section. - Keep trying.*/ - if(new_link<0)continue; - cur_link=new_link; - } - } + /*Match the serialno to bitstream section. + We use this rather than offset positions to avoid problems near + logical bitstream boundaries.*/ + nlinks=_of->nlinks; + for(li=0;li<nlinks&&links[li].serialno!=serialno;li++); + /*Not a desired Opus bitstream section. + Keep trying.*/ + if(li>=nlinks)continue; cur_serialno=serialno; - _of->cur_link=cur_link; + _of->cur_link=cur_link=li; ogg_stream_reset_serialno(&_of->os,serialno); _of->ready_state=OP_STREAMSET; /*If we're at the start of this link, initialize the granule position @@ -1980,32 +1942,13 @@ static int op_fetch_and_process_page(OggOpusFile *_of, opus_int32 total_duration; int durations[255]; int op_count; - int report_hole; - report_hole=0; total_duration=op_collect_audio_packets(_of,durations); if(OP_UNLIKELY(total_duration<0)){ - /*libogg reported a hole (a gap in the page sequence numbers). - Drain the packets from the page anyway. - If we don't, they'll still be there when we fetch the next page. - Then, when we go to pull out packets, we might get more than 255, - which would overrun our packet buffer.*/ + /*Drain the packets from the page anyway.*/ total_duration=op_collect_audio_packets(_of,durations); OP_ASSERT(total_duration>=0); - if(!_ignore_holes){ - /*Report the hole to the caller after we finish timestamping the - packets.*/ - report_hole=1; - /*We had lost or damaged pages, so reset our granule position - tracking. - This makes holes behave the same as a small raw seek. - If the next page is the EOS page, we'll discard it (because we - can't perform end trimming properly), and we'll always discard at - least 80 ms of audio (to allow decoder state to re-converge). - We could try to fill in the gap with PLC by looking at timestamps - in the non-EOS case, but that's complicated and error prone and we - can't rely on the timestamps being valid.*/ - _of->prev_packet_gp=-1; - } + /*Report holes to the caller.*/ + if(!_ignore_holes)return OP_HOLE; } op_count=_of->op_count; /*If we found at least one audio data packet, compute per-packet granule @@ -2032,7 +1975,6 @@ static int op_fetch_and_process_page(OggOpusFile *_of, Proceed to the next link, rather than risk playing back some samples that shouldn't have been played.*/ _of->op_count=0; - if(report_hole)return OP_HOLE; continue; } /*By default discard 80 ms of data after a seek, unless we seek @@ -2078,11 +2020,7 @@ static int op_fetch_and_process_page(OggOpusFile *_of, &&OP_LIKELY(diff<total_duration)){ cur_packet_gp=prev_packet_gp; for(pi=0;pi<op_count;pi++){ - /*Check for overflow.*/ - if(diff<0&&OP_UNLIKELY(OP_INT64_MAX+diff<durations[pi])){ - diff=durations[pi]+1; - } - else diff=durations[pi]-diff; + diff=durations[pi]-diff; /*If we have samples to trim...*/ if(diff>0){ /*If we trimmed the entire packet, stop (the spec says encoders @@ -2138,11 +2076,10 @@ static int op_fetch_and_process_page(OggOpusFile *_of, } _of->prev_packet_gp=prev_packet_gp; _of->prev_page_offset=_page_offset; - _of->op_count=op_count=pi; + _of->op_count=pi; + /*If end-trimming didn't trim all the packets, we're done.*/ + if(OP_LIKELY(pi>0))return 0; } - if(report_hole)return OP_HOLE; - /*If end-trimming didn't trim all the packets, we're done.*/ - if(op_count>0)return 0; } } } @@ -2180,41 +2117,35 @@ static ogg_int64_t op_get_granulepos(const OggOpusFile *_of, ogg_int64_t _pcm_offset,int *_li){ const OggOpusLink *links; ogg_int64_t duration; - ogg_int64_t pcm_start; - opus_int32 pre_skip; int nlinks; - int li_lo; - int li_hi; + int li; OP_ASSERT(_pcm_offset>=0); nlinks=_of->nlinks; links=_of->links; - li_lo=0; - li_hi=nlinks; - do{ - int li; - li=li_lo+(li_hi-li_lo>>1); - if(links[li].pcm_file_offset<=_pcm_offset)li_lo=li; - else li_hi=li; - } - while(li_hi-li_lo>1); - _pcm_offset-=links[li_lo].pcm_file_offset; - pcm_start=links[li_lo].pcm_start; - pre_skip=links[li_lo].head.pre_skip; - OP_ALWAYS_TRUE(!op_granpos_diff(&duration,links[li_lo].pcm_end,pcm_start)); - duration-=pre_skip; - if(_pcm_offset>=duration)return -1; - _pcm_offset+=pre_skip; - if(OP_UNLIKELY(pcm_start>OP_INT64_MAX-_pcm_offset)){ - /*Adding this amount to the granule position would overflow the positive - half of its 64-bit range. - Since signed overflow is undefined in C, do it in a way the compiler - isn't allowed to screw up.*/ - _pcm_offset-=OP_INT64_MAX-pcm_start+1; - pcm_start=OP_INT64_MIN; - } - pcm_start+=_pcm_offset; - *_li=li_lo; - return pcm_start; + for(li=0;OP_LIKELY(li<nlinks);li++){ + ogg_int64_t pcm_start; + opus_int32 pre_skip; + pcm_start=links[li].pcm_start; + pre_skip=links[li].head.pre_skip; + OP_ALWAYS_TRUE(!op_granpos_diff(&duration,links[li].pcm_end,pcm_start)); + duration-=pre_skip; + if(_pcm_offset<duration){ + _pcm_offset+=pre_skip; + if(OP_UNLIKELY(pcm_start>OP_INT64_MAX-_pcm_offset)){ + /*Adding this amount to the granule position would overflow the positive + half of its 64-bit range. + Since signed overflow is undefined in C, do it in a way the compiler + isn't allowed to screw up.*/ + _pcm_offset-=OP_INT64_MAX-pcm_start+1; + pcm_start=OP_INT64_MIN; + } + pcm_start+=_pcm_offset; + *_li=li; + return pcm_start; + } + _pcm_offset-=duration; + } + return -1; } /*A small helper to determine if an Ogg page contains data that continues onto @@ -2601,14 +2532,15 @@ int op_pcm_seek(OggOpusFile *_of,ogg_int64_t _pcm_offset){ ogg_int64_t gp; gp=_of->prev_packet_gp; if(OP_LIKELY(gp!=-1)){ - ogg_int64_t discard_count; - int nbuffered; + int nbuffered; nbuffered=OP_MAX(_of->od_buffer_size-_of->od_buffer_pos,0); OP_ALWAYS_TRUE(!op_granpos_add(&gp,gp,-nbuffered)); /*We do _not_ add cur_discard_count to gp. Otherwise the total amount to discard could grow without bound, and it would be better just to do a full seek.*/ - if(OP_LIKELY(!op_granpos_diff(&discard_count,target_gp,gp))){ + if(OP_LIKELY(!op_granpos_diff(&diff,gp,pcm_start))){ + ogg_int64_t discard_count; + discard_count=_pcm_offset-diff; /*We use a threshold of 90 ms instead of 80, since 80 ms is the _minimum_ we would have discarded after a full seek. Assuming 20 ms frames (the default), we'd discard 90 ms on average.*/ @@ -2674,14 +2606,22 @@ static ogg_int64_t op_get_pcm_offset(const OggOpusFile *_of, ogg_int64_t _gp,int _li){ const OggOpusLink *links; ogg_int64_t pcm_offset; + ogg_int64_t delta; + int li; links=_of->links; - OP_ASSERT(_li>=0&&_li<_of->nlinks); - pcm_offset=links[_li].pcm_file_offset; + pcm_offset=0; + OP_ASSERT(_li<_of->nlinks); + for(li=0;li<_li;li++){ + OP_ALWAYS_TRUE(!op_granpos_diff(&delta, + links[li].pcm_end,links[li].pcm_start)); + delta-=links[li].head.pre_skip; + pcm_offset+=delta; + } + OP_ASSERT(_li>=0); if(_of->seekable&&OP_UNLIKELY(op_granpos_cmp(_gp,links[_li].pcm_end)>0)){ _gp=links[_li].pcm_end; } if(OP_LIKELY(op_granpos_cmp(_gp,links[_li].pcm_start)>0)){ - ogg_int64_t delta; if(OP_UNLIKELY(op_granpos_diff(&delta,_gp,links[_li].pcm_start)<0)){ /*This means an unseekable stream claimed to have a page from more than 2 billion days after we joined.*/ diff --git a/thirdparty/opus/repacketizer.c b/thirdparty/opus/repacketizer.c index bda44a148a..c80ee7f001 100644 --- a/thirdparty/opus/repacketizer.c +++ b/thirdparty/opus/repacketizer.c @@ -213,8 +213,7 @@ opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int { /* Using OPUS_MOVE() instead of OPUS_COPY() in case we're doing in-place padding from opus_packet_pad or opus_packet_unpad(). */ - /* assert disabled because it's not valid in C. */ - /* celt_assert(frames[i] + len[i] <= data || ptr <= frames[i]); */ + celt_assert(frames[i] + len[i] <= data || ptr <= frames[i]); OPUS_MOVE(ptr, frames[i], len[i]); ptr += len[i]; } diff --git a/thirdparty/opus/silk/A2NLSF.c b/thirdparty/opus/silk/A2NLSF.c index b487686ff9..b6e9e5ffcc 100644 --- a/thirdparty/opus/silk/A2NLSF.c +++ b/thirdparty/opus/silk/A2NLSF.c @@ -40,7 +40,7 @@ POSSIBILITY OF SUCH DAMAGE. /* Number of binary divisions, when not in low complexity mode */ #define BIN_DIV_STEPS_A2NLSF_FIX 3 /* must be no higher than 16 - log2( LSF_COS_TAB_SZ_FIX ) */ -#define MAX_ITERATIONS_A2NLSF_FIX 16 +#define MAX_ITERATIONS_A2NLSF_FIX 30 /* Helper function for A2NLSF(..) */ /* Transforms polynomials from cos(n*f) to cos(f)^n */ @@ -130,7 +130,7 @@ void silk_A2NLSF( const opus_int d /* I Filter order (must be even) */ ) { - opus_int i, k, m, dd, root_ix, ffrac; + opus_int i, k, m, dd, root_ix, ffrac; opus_int32 xlo, xhi, xmid; opus_int32 ylo, yhi, ymid, thr; opus_int32 nom, den; @@ -239,13 +239,13 @@ void silk_A2NLSF( /* Set NLSFs to white spectrum and exit */ NLSF[ 0 ] = (opus_int16)silk_DIV32_16( 1 << 15, d + 1 ); for( k = 1; k < d; k++ ) { - NLSF[ k ] = (opus_int16)silk_ADD16( NLSF[ k-1 ], NLSF[ 0 ] ); + NLSF[ k ] = (opus_int16)silk_SMULBB( k + 1, NLSF[ 0 ] ); } return; } /* Error: Apply progressively more bandwidth expansion and run again */ - silk_bwexpander_32( a_Q16, d, 65536 - silk_LSHIFT( 1, i ) ); + silk_bwexpander_32( a_Q16, d, 65536 - silk_SMULBB( 10 + i, i ) ); /* 10_Q16 = 0.00015*/ silk_A2NLSF_init( a_Q16, P, Q, dd ); p = P; /* Pointer to polynomial */ diff --git a/thirdparty/opus/silk/API.h b/thirdparty/opus/silk/API.h index 4d90ff9aa3..0131acbb08 100644 --- a/thirdparty/opus/silk/API.h +++ b/thirdparty/opus/silk/API.h @@ -80,8 +80,7 @@ opus_int silk_Encode( /* O Returns error co opus_int nSamplesIn, /* I Number of samples in input vector */ ec_enc *psRangeEnc, /* I/O Compressor data structure */ opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */ - const opus_int prefillFlag, /* I Flag to indicate prefilling buffers no coding */ - int activity /* I Decision of Opus voice activity detector */ + const opus_int prefillFlag /* I Flag to indicate prefilling buffers no coding */ ); /****************************************/ diff --git a/thirdparty/opus/silk/CNG.c b/thirdparty/opus/silk/CNG.c index ef8e38df9f..8443ad63bb 100644 --- a/thirdparty/opus/silk/CNG.c +++ b/thirdparty/opus/silk/CNG.c @@ -138,16 +138,16 @@ void silk_CNG( gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 ); } gain_Q10 = silk_RSHIFT( gain_Q16, 6 ); - + silk_CNG_exc( CNG_sig_Q14 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, length, &psCNG->rand_seed ); /* Convert CNG NLSF to filter representation */ - silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order, psDec->arch ); + silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order ); /* Generate CNG signal, by synthesis filtering */ silk_memcpy( CNG_sig_Q14, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) ); - celt_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 ); for( i = 0; i < length; i++ ) { + silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 ); /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 1 ], A_Q12[ 0 ] ); @@ -170,11 +170,11 @@ void silk_CNG( } /* Update states */ - CNG_sig_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_SAT32( CNG_sig_Q14[ MAX_LPC_ORDER + i ], silk_LSHIFT_SAT32( LPC_pred_Q10, 4 ) ); - + CNG_sig_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q14[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 ); + /* Scale with Gain and add to input signal */ frame[ i ] = (opus_int16)silk_ADD_SAT16( frame[ i ], silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( CNG_sig_Q14[ MAX_LPC_ORDER + i ], gain_Q10 ), 8 ) ) ); - + } silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q14[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) ); } else { diff --git a/thirdparty/opus/silk/LPC_analysis_filter.c b/thirdparty/opus/silk/LPC_analysis_filter.c index d34b5eb709..20906673ff 100644 --- a/thirdparty/opus/silk/LPC_analysis_filter.c +++ b/thirdparty/opus/silk/LPC_analysis_filter.c @@ -39,13 +39,6 @@ POSSIBILITY OF SUCH DAMAGE. /* first d output samples are set to zero */ /*******************************************/ -/* OPT: Using celt_fir() for this function should be faster, but it may cause - integer overflows in intermediate values (not final results), which the - current implementation silences by casting to unsigned. Enabling - this should be safe in pretty much all cases, even though it is not technically - C89-compliant. */ -#define USE_CELT_FIR 0 - void silk_LPC_analysis_filter( opus_int16 *out, /* O Output signal */ const opus_int16 *in, /* I Input signal */ @@ -56,7 +49,8 @@ void silk_LPC_analysis_filter( ) { opus_int j; -#if defined(FIXED_POINT) && USE_CELT_FIR +#ifdef FIXED_POINT + opus_int16 mem[SILK_MAX_ORDER_LPC]; opus_int16 num[SILK_MAX_ORDER_LPC]; #else int ix; @@ -64,16 +58,19 @@ void silk_LPC_analysis_filter( const opus_int16 *in_ptr; #endif - celt_assert( d >= 6 ); - celt_assert( (d & 1) == 0 ); - celt_assert( d <= len ); + silk_assert( d >= 6 ); + silk_assert( (d & 1) == 0 ); + silk_assert( d <= len ); -#if defined(FIXED_POINT) && USE_CELT_FIR - celt_assert( d <= SILK_MAX_ORDER_LPC ); +#ifdef FIXED_POINT + silk_assert( d <= SILK_MAX_ORDER_LPC ); for ( j = 0; j < d; j++ ) { num[ j ] = -B[ j ]; } - celt_fir( in + d, num, out + d, len - d, d, arch ); + for (j=0;j<d;j++) { + mem[ j ] = in[ d - j - 1 ]; + } + celt_fir( in + d, num, out + d, len - d, d, mem, arch ); for ( j = 0; j < d; j++ ) { out[ j ] = 0; } diff --git a/thirdparty/opus/silk/LPC_fit.c b/thirdparty/opus/silk/LPC_fit.c deleted file mode 100644 index cdea4f3abc..0000000000 --- a/thirdparty/opus/silk/LPC_fit.c +++ /dev/null @@ -1,81 +0,0 @@ -/*********************************************************************** -Copyright (c) 2013, Koen Vos. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Convert int32 coefficients to int16 coefs and make sure there's no wrap-around */ -void silk_LPC_fit( - opus_int16 *a_QOUT, /* O Output signal */ - opus_int32 *a_QIN, /* I/O Input signal */ - const opus_int QOUT, /* I Input Q domain */ - const opus_int QIN, /* I Input Q domain */ - const opus_int d /* I Filter order */ -) -{ - opus_int i, k, idx = 0; - opus_int32 maxabs, absval, chirp_Q16; - - /* Limit the maximum absolute value of the prediction coefficients, so that they'll fit in int16 */ - for( i = 0; i < 10; i++ ) { - /* Find maximum absolute value and its index */ - maxabs = 0; - for( k = 0; k < d; k++ ) { - absval = silk_abs( a_QIN[k] ); - if( absval > maxabs ) { - maxabs = absval; - idx = k; - } - } - maxabs = silk_RSHIFT_ROUND( maxabs, QIN - QOUT ); - - if( maxabs > silk_int16_MAX ) { - /* Reduce magnitude of prediction coefficients */ - maxabs = silk_min( maxabs, 163838 ); /* ( silk_int32_MAX >> 14 ) + silk_int16_MAX = 163838 */ - chirp_Q16 = SILK_FIX_CONST( 0.999, 16 ) - silk_DIV32( silk_LSHIFT( maxabs - silk_int16_MAX, 14 ), - silk_RSHIFT32( silk_MUL( maxabs, idx + 1), 2 ) ); - silk_bwexpander_32( a_QIN, d, chirp_Q16 ); - } else { - break; - } - } - - if( i == 10 ) { - /* Reached the last iteration, clip the coefficients */ - for( k = 0; k < d; k++ ) { - a_QOUT[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( a_QIN[ k ], QIN - QOUT ) ); - a_QIN[ k ] = silk_LSHIFT( (opus_int32)a_QOUT[ k ], QIN - QOUT ); - } - } else { - for( k = 0; k < d; k++ ) { - a_QOUT[ k ] = (opus_int16)silk_RSHIFT_ROUND( a_QIN[ k ], QIN - QOUT ); - } - } -} diff --git a/thirdparty/opus/silk/LPC_inv_pred_gain.c b/thirdparty/opus/silk/LPC_inv_pred_gain.c index a3746a6ef9..4af89aa5fa 100644 --- a/thirdparty/opus/silk/LPC_inv_pred_gain.c +++ b/thirdparty/opus/silk/LPC_inv_pred_gain.c @@ -30,7 +30,6 @@ POSSIBILITY OF SUCH DAMAGE. #endif #include "SigProc_FIX.h" -#include "define.h" #define QA 24 #define A_LIMIT SILK_FIX_CONST( 0.99975, QA ) @@ -39,103 +38,117 @@ POSSIBILITY OF SUCH DAMAGE. /* Compute inverse of LPC prediction gain, and */ /* test if LPC coefficients are stable (all poles within unit circle) */ -static opus_int32 LPC_inverse_pred_gain_QA_c( /* O Returns inverse prediction gain in energy domain, Q30 */ - opus_int32 A_QA[ SILK_MAX_ORDER_LPC ], /* I Prediction coefficients */ +static opus_int32 LPC_inverse_pred_gain_QA( /* O Returns inverse prediction gain in energy domain, Q30 */ + opus_int32 A_QA[ 2 ][ SILK_MAX_ORDER_LPC ], /* I Prediction coefficients */ const opus_int order /* I Prediction order */ ) { opus_int k, n, mult2Q; - opus_int32 invGain_Q30, rc_Q31, rc_mult1_Q30, rc_mult2, tmp1, tmp2; + opus_int32 invGain_Q30, rc_Q31, rc_mult1_Q30, rc_mult2, tmp_QA; + opus_int32 *Aold_QA, *Anew_QA; - invGain_Q30 = SILK_FIX_CONST( 1, 30 ); + Anew_QA = A_QA[ order & 1 ]; + + invGain_Q30 = (opus_int32)1 << 30; for( k = order - 1; k > 0; k-- ) { /* Check for stability */ - if( ( A_QA[ k ] > A_LIMIT ) || ( A_QA[ k ] < -A_LIMIT ) ) { + if( ( Anew_QA[ k ] > A_LIMIT ) || ( Anew_QA[ k ] < -A_LIMIT ) ) { return 0; } /* Set RC equal to negated AR coef */ - rc_Q31 = -silk_LSHIFT( A_QA[ k ], 31 - QA ); + rc_Q31 = -silk_LSHIFT( Anew_QA[ k ], 31 - QA ); /* rc_mult1_Q30 range: [ 1 : 2^30 ] */ - rc_mult1_Q30 = silk_SUB32( SILK_FIX_CONST( 1, 30 ), silk_SMMUL( rc_Q31, rc_Q31 ) ); + rc_mult1_Q30 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); silk_assert( rc_mult1_Q30 > ( 1 << 15 ) ); /* reduce A_LIMIT if fails */ silk_assert( rc_mult1_Q30 <= ( 1 << 30 ) ); + /* rc_mult2 range: [ 2^30 : silk_int32_MAX ] */ + mult2Q = 32 - silk_CLZ32( silk_abs( rc_mult1_Q30 ) ); + rc_mult2 = silk_INVERSE32_varQ( rc_mult1_Q30, mult2Q + 30 ); + /* Update inverse gain */ /* invGain_Q30 range: [ 0 : 2^30 ] */ invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 ); silk_assert( invGain_Q30 >= 0 ); silk_assert( invGain_Q30 <= ( 1 << 30 ) ); - if( invGain_Q30 < SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN, 30 ) ) { - return 0; - } - /* rc_mult2 range: [ 2^30 : silk_int32_MAX ] */ - mult2Q = 32 - silk_CLZ32( silk_abs( rc_mult1_Q30 ) ); - rc_mult2 = silk_INVERSE32_varQ( rc_mult1_Q30, mult2Q + 30 ); + /* Swap pointers */ + Aold_QA = Anew_QA; + Anew_QA = A_QA[ k & 1 ]; /* Update AR coefficient */ - for( n = 0; n < (k + 1) >> 1; n++ ) { - opus_int64 tmp64; - tmp1 = A_QA[ n ]; - tmp2 = A_QA[ k - n - 1 ]; - tmp64 = silk_RSHIFT_ROUND64( silk_SMULL( silk_SUB_SAT32(tmp1, - MUL32_FRAC_Q( tmp2, rc_Q31, 31 ) ), rc_mult2 ), mult2Q); - if( tmp64 > silk_int32_MAX || tmp64 < silk_int32_MIN ) { - return 0; - } - A_QA[ n ] = ( opus_int32 )tmp64; - tmp64 = silk_RSHIFT_ROUND64( silk_SMULL( silk_SUB_SAT32(tmp2, - MUL32_FRAC_Q( tmp1, rc_Q31, 31 ) ), rc_mult2), mult2Q); - if( tmp64 > silk_int32_MAX || tmp64 < silk_int32_MIN ) { - return 0; - } - A_QA[ k - n - 1 ] = ( opus_int32 )tmp64; + for( n = 0; n < k; n++ ) { + tmp_QA = Aold_QA[ n ] - MUL32_FRAC_Q( Aold_QA[ k - n - 1 ], rc_Q31, 31 ); + Anew_QA[ n ] = MUL32_FRAC_Q( tmp_QA, rc_mult2 , mult2Q ); } } /* Check for stability */ - if( ( A_QA[ k ] > A_LIMIT ) || ( A_QA[ k ] < -A_LIMIT ) ) { + if( ( Anew_QA[ 0 ] > A_LIMIT ) || ( Anew_QA[ 0 ] < -A_LIMIT ) ) { return 0; } /* Set RC equal to negated AR coef */ - rc_Q31 = -silk_LSHIFT( A_QA[ 0 ], 31 - QA ); + rc_Q31 = -silk_LSHIFT( Anew_QA[ 0 ], 31 - QA ); /* Range: [ 1 : 2^30 ] */ - rc_mult1_Q30 = silk_SUB32( SILK_FIX_CONST( 1, 30 ), silk_SMMUL( rc_Q31, rc_Q31 ) ); + rc_mult1_Q30 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); /* Update inverse gain */ /* Range: [ 0 : 2^30 ] */ invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 ); - silk_assert( invGain_Q30 >= 0 ); - silk_assert( invGain_Q30 <= ( 1 << 30 ) ); - if( invGain_Q30 < SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN, 30 ) ) { - return 0; - } + silk_assert( invGain_Q30 >= 0 ); + silk_assert( invGain_Q30 <= 1<<30 ); return invGain_Q30; } /* For input in Q12 domain */ -opus_int32 silk_LPC_inverse_pred_gain_c( /* O Returns inverse prediction gain in energy domain, Q30 */ +opus_int32 silk_LPC_inverse_pred_gain( /* O Returns inverse prediction gain in energy domain, Q30 */ const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */ const opus_int order /* I Prediction order */ ) { opus_int k; - opus_int32 Atmp_QA[ SILK_MAX_ORDER_LPC ]; + opus_int32 Atmp_QA[ 2 ][ SILK_MAX_ORDER_LPC ]; + opus_int32 *Anew_QA; opus_int32 DC_resp = 0; + Anew_QA = Atmp_QA[ order & 1 ]; + /* Increase Q domain of the AR coefficients */ for( k = 0; k < order; k++ ) { DC_resp += (opus_int32)A_Q12[ k ]; - Atmp_QA[ k ] = silk_LSHIFT32( (opus_int32)A_Q12[ k ], QA - 12 ); + Anew_QA[ k ] = silk_LSHIFT32( (opus_int32)A_Q12[ k ], QA - 12 ); } /* If the DC is unstable, we don't even need to do the full calculations */ if( DC_resp >= 4096 ) { return 0; } - return LPC_inverse_pred_gain_QA_c( Atmp_QA, order ); + return LPC_inverse_pred_gain_QA( Atmp_QA, order ); } + +#ifdef FIXED_POINT + +/* For input in Q24 domain */ +opus_int32 silk_LPC_inverse_pred_gain_Q24( /* O Returns inverse prediction gain in energy domain, Q30 */ + const opus_int32 *A_Q24, /* I Prediction coefficients [order] */ + const opus_int order /* I Prediction order */ +) +{ + opus_int k; + opus_int32 Atmp_QA[ 2 ][ SILK_MAX_ORDER_LPC ]; + opus_int32 *Anew_QA; + + Anew_QA = Atmp_QA[ order & 1 ]; + + /* Increase Q domain of the AR coefficients */ + for( k = 0; k < order; k++ ) { + Anew_QA[ k ] = silk_RSHIFT32( A_Q24[ k ], 24 - QA ); + } + + return LPC_inverse_pred_gain_QA( Atmp_QA, order ); +} +#endif diff --git a/thirdparty/opus/silk/LP_variable_cutoff.c b/thirdparty/opus/silk/LP_variable_cutoff.c index 79112ad354..f639e1f899 100644 --- a/thirdparty/opus/silk/LP_variable_cutoff.c +++ b/thirdparty/opus/silk/LP_variable_cutoff.c @@ -130,6 +130,6 @@ void silk_LP_variable_cutoff( /* ARMA low-pass filtering */ silk_assert( TRANSITION_NB == 3 && TRANSITION_NA == 2 ); - silk_biquad_alt_stride1( frame, B_Q28, A_Q28, psLP->In_LP_State, frame, frame_length); + silk_biquad_alt( frame, B_Q28, A_Q28, psLP->In_LP_State, frame, frame_length, 1); } } diff --git a/thirdparty/opus/silk/MacroCount.h b/thirdparty/opus/silk/MacroCount.h index 78100ffede..834817d058 100644 --- a/thirdparty/opus/silk/MacroCount.h +++ b/thirdparty/opus/silk/MacroCount.h @@ -319,6 +319,14 @@ static OPUS_INLINE opus_int32 silk_ADD_POS_SAT32(opus_int64 a, opus_int64 b){ return(tmp); } +#undef silk_ADD_POS_SAT64 +static OPUS_INLINE opus_int64 silk_ADD_POS_SAT64(opus_int64 a, opus_int64 b){ + opus_int64 tmp; + ops_count += 1; + tmp = ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b))); + return(tmp); +} + #undef silk_LSHIFT8 static OPUS_INLINE opus_int8 silk_LSHIFT8(opus_int8 a, opus_int32 shift){ opus_int8 ret; @@ -691,7 +699,7 @@ return(ret); #undef silk_LIMIT_32 -static OPUS_INLINE opus_int32 silk_LIMIT_32(opus_int32 a, opus_int32 limit1, opus_int32 limit2) +static OPUS_INLINE opus_int silk_LIMIT_32(opus_int32 a, opus_int32 limit1, opus_int32 limit2) { opus_int32 ret; ops_count += 6; diff --git a/thirdparty/opus/silk/MacroDebug.h b/thirdparty/opus/silk/MacroDebug.h index 8dd4ce2ee2..35aedc5c5f 100644 --- a/thirdparty/opus/silk/MacroDebug.h +++ b/thirdparty/opus/silk/MacroDebug.h @@ -539,7 +539,8 @@ static OPUS_INLINE opus_int32 silk_DIV32_16_(opus_int32 a32, opus_int32 b32, cha no checking needed for silk_POS_SAT32 no checking needed for silk_ADD_POS_SAT8 no checking needed for silk_ADD_POS_SAT16 - no checking needed for silk_ADD_POS_SAT32 */ + no checking needed for silk_ADD_POS_SAT32 + no checking needed for silk_ADD_POS_SAT64 */ #undef silk_LSHIFT8 #define silk_LSHIFT8(a,b) silk_LSHIFT8_((a), (b), __FILE__, __LINE__) diff --git a/thirdparty/opus/silk/NLSF2A.c b/thirdparty/opus/silk/NLSF2A.c index d5b7730638..b1c559ea68 100644 --- a/thirdparty/opus/silk/NLSF2A.c +++ b/thirdparty/opus/silk/NLSF2A.c @@ -66,8 +66,7 @@ static OPUS_INLINE void silk_NLSF2A_find_poly( void silk_NLSF2A( opus_int16 *a_Q12, /* O monic whitening filter coefficients in Q12, [ d ] */ const opus_int16 *NLSF, /* I normalized line spectral frequencies in Q15, [ d ] */ - const opus_int d, /* I filter order (should be even) */ - int arch /* I Run-time architecture */ + const opus_int d /* I filter order (should be even) */ ) { /* This ordering was found to maximize quality. It improves numerical accuracy of @@ -84,14 +83,15 @@ void silk_NLSF2A( opus_int32 P[ SILK_MAX_ORDER_LPC / 2 + 1 ], Q[ SILK_MAX_ORDER_LPC / 2 + 1 ]; opus_int32 Ptmp, Qtmp, f_int, f_frac, cos_val, delta; opus_int32 a32_QA1[ SILK_MAX_ORDER_LPC ]; + opus_int32 maxabs, absval, idx=0, sc_Q16; silk_assert( LSF_COS_TAB_SZ_FIX == 128 ); - celt_assert( d==10 || d==16 ); + silk_assert( d==10||d==16 ); /* convert LSFs to 2*cos(LSF), using piecewise linear curve from table */ ordering = d == 16 ? ordering16 : ordering10; for( k = 0; k < d; k++ ) { - silk_assert( NLSF[k] >= 0 ); + silk_assert(NLSF[k] >= 0 ); /* f_int on a scale 0-127 (rounded down) */ f_int = silk_RSHIFT( NLSF[k], 15 - 7 ); @@ -126,15 +126,52 @@ void silk_NLSF2A( a32_QA1[ d-k-1 ] = Qtmp - Ptmp; /* QA+1 */ } - /* Convert int32 coefficients to Q12 int16 coefs */ - silk_LPC_fit( a_Q12, a32_QA1, 12, QA + 1, d ); + /* Limit the maximum absolute value of the prediction coefficients, so that they'll fit in int16 */ + for( i = 0; i < 10; i++ ) { + /* Find maximum absolute value and its index */ + maxabs = 0; + for( k = 0; k < d; k++ ) { + absval = silk_abs( a32_QA1[k] ); + if( absval > maxabs ) { + maxabs = absval; + idx = k; + } + } + maxabs = silk_RSHIFT_ROUND( maxabs, QA + 1 - 12 ); /* QA+1 -> Q12 */ + + if( maxabs > silk_int16_MAX ) { + /* Reduce magnitude of prediction coefficients */ + maxabs = silk_min( maxabs, 163838 ); /* ( silk_int32_MAX >> 14 ) + silk_int16_MAX = 163838 */ + sc_Q16 = SILK_FIX_CONST( 0.999, 16 ) - silk_DIV32( silk_LSHIFT( maxabs - silk_int16_MAX, 14 ), + silk_RSHIFT32( silk_MUL( maxabs, idx + 1), 2 ) ); + silk_bwexpander_32( a32_QA1, d, sc_Q16 ); + } else { + break; + } + } - for( i = 0; silk_LPC_inverse_pred_gain( a_Q12, d, arch ) == 0 && i < MAX_LPC_STABILIZE_ITERATIONS; i++ ) { - /* Prediction coefficients are (too close to) unstable; apply bandwidth expansion */ - /* on the unscaled coefficients, convert to Q12 and measure again */ - silk_bwexpander_32( a32_QA1, d, 65536 - silk_LSHIFT( 2, i ) ); + if( i == 10 ) { + /* Reached the last iteration, clip the coefficients */ for( k = 0; k < d; k++ ) { - a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ); /* QA+1 -> Q12 */ + a_Q12[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ) ); /* QA+1 -> Q12 */ + a32_QA1[ k ] = silk_LSHIFT( (opus_int32)a_Q12[ k ], QA + 1 - 12 ); + } + } else { + for( k = 0; k < d; k++ ) { + a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ); /* QA+1 -> Q12 */ + } + } + + for( i = 0; i < MAX_LPC_STABILIZE_ITERATIONS; i++ ) { + if( silk_LPC_inverse_pred_gain( a_Q12, d ) < SILK_FIX_CONST( 1.0 / MAX_PREDICTION_POWER_GAIN, 30 ) ) { + /* Prediction coefficients are (too close to) unstable; apply bandwidth expansion */ + /* on the unscaled coefficients, convert to Q12 and measure again */ + silk_bwexpander_32( a32_QA1, d, 65536 - silk_LSHIFT( 2, i ) ); + for( k = 0; k < d; k++ ) { + a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ); /* QA+1 -> Q12 */ + } + } else { + break; } } } diff --git a/thirdparty/opus/silk/NLSF_VQ.c b/thirdparty/opus/silk/NLSF_VQ.c index b83182a79c..69b6e22e18 100644 --- a/thirdparty/opus/silk/NLSF_VQ.c +++ b/thirdparty/opus/silk/NLSF_VQ.c @@ -33,44 +33,36 @@ POSSIBILITY OF SUCH DAMAGE. /* Compute quantization errors for an LPC_order element input vector for a VQ codebook */ void silk_NLSF_VQ( - opus_int32 err_Q24[], /* O Quantization errors [K] */ + opus_int32 err_Q26[], /* O Quantization errors [K] */ const opus_int16 in_Q15[], /* I Input vectors to be quantized [LPC_order] */ const opus_uint8 pCB_Q8[], /* I Codebook vectors [K*LPC_order] */ - const opus_int16 pWght_Q9[], /* I Codebook weights [K*LPC_order] */ const opus_int K, /* I Number of codebook vectors */ const opus_int LPC_order /* I Number of LPCs */ ) { - opus_int i, m; - opus_int32 diff_Q15, diffw_Q24, sum_error_Q24, pred_Q24; - const opus_int16 *w_Q9_ptr; - const opus_uint8 *cb_Q8_ptr; + opus_int i, m; + opus_int32 diff_Q15, sum_error_Q30, sum_error_Q26; - celt_assert( ( LPC_order & 1 ) == 0 ); + silk_assert( LPC_order <= 16 ); + silk_assert( ( LPC_order & 1 ) == 0 ); /* Loop over codebook */ - cb_Q8_ptr = pCB_Q8; - w_Q9_ptr = pWght_Q9; for( i = 0; i < K; i++ ) { - sum_error_Q24 = 0; - pred_Q24 = 0; - for( m = LPC_order-2; m >= 0; m -= 2 ) { - /* Compute weighted absolute predictive quantization error for index m + 1 */ - diff_Q15 = silk_SUB_LSHIFT32( in_Q15[ m + 1 ], (opus_int32)cb_Q8_ptr[ m + 1 ], 7 ); /* range: [ -32767 : 32767 ]*/ - diffw_Q24 = silk_SMULBB( diff_Q15, w_Q9_ptr[ m + 1 ] ); - sum_error_Q24 = silk_ADD32( sum_error_Q24, silk_abs( silk_SUB_RSHIFT32( diffw_Q24, pred_Q24, 1 ) ) ); - pred_Q24 = diffw_Q24; + sum_error_Q26 = 0; + for( m = 0; m < LPC_order; m += 2 ) { + /* Compute weighted squared quantization error for index m */ + diff_Q15 = silk_SUB_LSHIFT32( in_Q15[ m ], (opus_int32)*pCB_Q8++, 7 ); /* range: [ -32767 : 32767 ]*/ + sum_error_Q30 = silk_SMULBB( diff_Q15, diff_Q15 ); - /* Compute weighted absolute predictive quantization error for index m */ - diff_Q15 = silk_SUB_LSHIFT32( in_Q15[ m ], (opus_int32)cb_Q8_ptr[ m ], 7 ); /* range: [ -32767 : 32767 ]*/ - diffw_Q24 = silk_SMULBB( diff_Q15, w_Q9_ptr[ m ] ); - sum_error_Q24 = silk_ADD32( sum_error_Q24, silk_abs( silk_SUB_RSHIFT32( diffw_Q24, pred_Q24, 1 ) ) ); - pred_Q24 = diffw_Q24; + /* Compute weighted squared quantization error for index m + 1 */ + diff_Q15 = silk_SUB_LSHIFT32( in_Q15[m + 1], (opus_int32)*pCB_Q8++, 7 ); /* range: [ -32767 : 32767 ]*/ + sum_error_Q30 = silk_SMLABB( sum_error_Q30, diff_Q15, diff_Q15 ); - silk_assert( sum_error_Q24 >= 0 ); + sum_error_Q26 = silk_ADD_RSHIFT32( sum_error_Q26, sum_error_Q30, 4 ); + + silk_assert( sum_error_Q26 >= 0 ); + silk_assert( sum_error_Q30 >= 0 ); } - err_Q24[ i ] = sum_error_Q24; - cb_Q8_ptr += LPC_order; - w_Q9_ptr += LPC_order; + err_Q26[ i ] = sum_error_Q26; } } diff --git a/thirdparty/opus/silk/NLSF_VQ_weights_laroia.c b/thirdparty/opus/silk/NLSF_VQ_weights_laroia.c index 9873bcde10..04894c59ab 100644 --- a/thirdparty/opus/silk/NLSF_VQ_weights_laroia.c +++ b/thirdparty/opus/silk/NLSF_VQ_weights_laroia.c @@ -48,8 +48,8 @@ void silk_NLSF_VQ_weights_laroia( opus_int k; opus_int32 tmp1_int, tmp2_int; - celt_assert( D > 0 ); - celt_assert( ( D & 1 ) == 0 ); + silk_assert( D > 0 ); + silk_assert( ( D & 1 ) == 0 ); /* First value */ tmp1_int = silk_max_int( pNLSF_Q15[ 0 ], 1 ); diff --git a/thirdparty/opus/silk/NLSF_decode.c b/thirdparty/opus/silk/NLSF_decode.c index eeb0ba8c92..9f715060b8 100644 --- a/thirdparty/opus/silk/NLSF_decode.c +++ b/thirdparty/opus/silk/NLSF_decode.c @@ -32,7 +32,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "main.h" /* Predictive dequantizer for NLSF residuals */ -static OPUS_INLINE void silk_NLSF_residual_dequant( /* O Returns RD value in Q30 */ +static OPUS_INLINE void silk_NLSF_residual_dequant( /* O Returns RD value in Q30 */ opus_int16 x_Q10[], /* O Output [ order ] */ const opus_int8 indices[], /* I Quantization indices [ order ] */ const opus_uint8 pred_coef_Q8[], /* I Backward predictor coefs [ order ] */ @@ -70,9 +70,15 @@ void silk_NLSF_decode( opus_uint8 pred_Q8[ MAX_LPC_ORDER ]; opus_int16 ec_ix[ MAX_LPC_ORDER ]; opus_int16 res_Q10[ MAX_LPC_ORDER ]; - opus_int32 NLSF_Q15_tmp; + opus_int16 W_tmp_QW[ MAX_LPC_ORDER ]; + opus_int32 W_tmp_Q9, NLSF_Q15_tmp; const opus_uint8 *pCB_element; - const opus_int16 *pCB_Wght_Q9; + + /* Decode first stage */ + pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ NLSFIndices[ 0 ] * psNLSF_CB->order ]; + for( i = 0; i < psNLSF_CB->order; i++ ) { + pNLSF_Q15[ i ] = silk_LSHIFT( (opus_int16)pCB_element[ i ], 7 ); + } /* Unpack entropy table indices and predictor for current CB1 index */ silk_NLSF_unpack( ec_ix, pred_Q8, psNLSF_CB, NLSFIndices[ 0 ] ); @@ -80,11 +86,13 @@ void silk_NLSF_decode( /* Predictive residual dequantizer */ silk_NLSF_residual_dequant( res_Q10, &NLSFIndices[ 1 ], pred_Q8, psNLSF_CB->quantStepSize_Q16, psNLSF_CB->order ); - /* Apply inverse square-rooted weights to first stage and add to output */ - pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ NLSFIndices[ 0 ] * psNLSF_CB->order ]; - pCB_Wght_Q9 = &psNLSF_CB->CB1_Wght_Q9[ NLSFIndices[ 0 ] * psNLSF_CB->order ]; + /* Weights from codebook vector */ + silk_NLSF_VQ_weights_laroia( W_tmp_QW, pNLSF_Q15, psNLSF_CB->order ); + + /* Apply inverse square-rooted weights and add to output */ for( i = 0; i < psNLSF_CB->order; i++ ) { - NLSF_Q15_tmp = silk_ADD_LSHIFT32( silk_DIV32_16( silk_LSHIFT( (opus_int32)res_Q10[ i ], 14 ), pCB_Wght_Q9[ i ] ), (opus_int16)pCB_element[ i ], 7 ); + W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) ); + NLSF_Q15_tmp = silk_ADD32( pNLSF_Q15[ i ], silk_DIV32_16( silk_LSHIFT( (opus_int32)res_Q10[ i ], 14 ), W_tmp_Q9 ) ); pNLSF_Q15[ i ] = (opus_int16)silk_LIMIT( NLSF_Q15_tmp, 0, 32767 ); } diff --git a/thirdparty/opus/silk/NLSF_del_dec_quant.c b/thirdparty/opus/silk/NLSF_del_dec_quant.c index 44a16acd0b..de88fee060 100644 --- a/thirdparty/opus/silk/NLSF_del_dec_quant.c +++ b/thirdparty/opus/silk/NLSF_del_dec_quant.c @@ -84,7 +84,7 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns nStates = 1; RD_Q25[ 0 ] = 0; prev_out_Q10[ 0 ] = 0; - for( i = order - 1; i >= 0; i-- ) { + for( i = order - 1; ; i-- ) { rates_Q5 = &ec_rates_Q5[ ec_ix[ i ] ]; in_Q10 = x_Q10[ i ]; for( j = 0; j < nStates; j++ ) { @@ -131,7 +131,7 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns RD_Q25[ j + nStates ] = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate1_Q5 ); } - if( nStates <= NLSF_QUANT_DEL_DEC_STATES/2 ) { + if( nStates <= ( NLSF_QUANT_DEL_DEC_STATES >> 1 ) ) { /* double number of states and copy */ for( j = 0; j < nStates; j++ ) { ind[ j + nStates ][ i ] = ind[ j ][ i ] + 1; @@ -140,7 +140,7 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns for( j = nStates; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) { ind[ j ][ i ] = ind[ j - nStates ][ i ]; } - } else { + } else if( i > 0 ) { /* sort lower and upper half of RD_Q25, pairwise */ for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) { if( RD_Q25[ j ] > RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ] ) { @@ -191,6 +191,8 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) { ind[ j ][ i ] += silk_RSHIFT( ind_sort[ j ], NLSF_QUANT_DEL_DEC_STATES_LOG2 ); } + } else { /* i == 0 */ + break; } } diff --git a/thirdparty/opus/silk/NLSF_encode.c b/thirdparty/opus/silk/NLSF_encode.c index 01ac7db78c..f03c3f1c35 100644 --- a/thirdparty/opus/silk/NLSF_encode.c +++ b/thirdparty/opus/silk/NLSF_encode.c @@ -37,9 +37,9 @@ POSSIBILITY OF SUCH DAMAGE. /***********************/ opus_int32 silk_NLSF_encode( /* O Returns RD value in Q25 */ opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */ - opus_int16 *pNLSF_Q15, /* I/O (Un)quantized NLSF vector [ LPC_ORDER ] */ + opus_int16 *pNLSF_Q15, /* I/O Quantized NLSF vector [ LPC_ORDER ] */ const silk_NLSF_CB_struct *psNLSF_CB, /* I Codebook object */ - const opus_int16 *pW_Q2, /* I NLSF weight vector [ LPC_ORDER ] */ + const opus_int16 *pW_QW, /* I NLSF weight vector [ LPC_ORDER ] */ const opus_int NLSF_mu_Q20, /* I Rate weight for the RD optimization */ const opus_int nSurvivors, /* I Max survivors after first stage */ const opus_int signalType /* I Signal type: 0/1/2 */ @@ -47,32 +47,34 @@ opus_int32 silk_NLSF_encode( /* O Returns { opus_int i, s, ind1, bestIndex, prob_Q8, bits_q7; opus_int32 W_tmp_Q9, ret; - VARDECL( opus_int32, err_Q24 ); + VARDECL( opus_int32, err_Q26 ); VARDECL( opus_int32, RD_Q25 ); VARDECL( opus_int, tempIndices1 ); VARDECL( opus_int8, tempIndices2 ); + opus_int16 res_Q15[ MAX_LPC_ORDER ]; opus_int16 res_Q10[ MAX_LPC_ORDER ]; opus_int16 NLSF_tmp_Q15[ MAX_LPC_ORDER ]; + opus_int16 W_tmp_QW[ MAX_LPC_ORDER ]; opus_int16 W_adj_Q5[ MAX_LPC_ORDER ]; opus_uint8 pred_Q8[ MAX_LPC_ORDER ]; opus_int16 ec_ix[ MAX_LPC_ORDER ]; const opus_uint8 *pCB_element, *iCDF_ptr; - const opus_int16 *pCB_Wght_Q9; SAVE_STACK; - celt_assert( signalType >= 0 && signalType <= 2 ); + silk_assert( nSurvivors <= NLSF_VQ_MAX_SURVIVORS ); + silk_assert( signalType >= 0 && signalType <= 2 ); silk_assert( NLSF_mu_Q20 <= 32767 && NLSF_mu_Q20 >= 0 ); /* NLSF stabilization */ silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order ); /* First stage: VQ */ - ALLOC( err_Q24, psNLSF_CB->nVectors, opus_int32 ); - silk_NLSF_VQ( err_Q24, pNLSF_Q15, psNLSF_CB->CB1_NLSF_Q8, psNLSF_CB->CB1_Wght_Q9, psNLSF_CB->nVectors, psNLSF_CB->order ); + ALLOC( err_Q26, psNLSF_CB->nVectors, opus_int32 ); + silk_NLSF_VQ( err_Q26, pNLSF_Q15, psNLSF_CB->CB1_NLSF_Q8, psNLSF_CB->nVectors, psNLSF_CB->order ); /* Sort the quantization errors */ ALLOC( tempIndices1, nSurvivors, opus_int ); - silk_insertion_sort_increasing( err_Q24, tempIndices1, psNLSF_CB->nVectors, nSurvivors ); + silk_insertion_sort_increasing( err_Q26, tempIndices1, psNLSF_CB->nVectors, nSurvivors ); ALLOC( RD_Q25, nSurvivors, opus_int32 ); ALLOC( tempIndices2, nSurvivors * MAX_LPC_ORDER, opus_int8 ); @@ -83,12 +85,23 @@ opus_int32 silk_NLSF_encode( /* O Returns /* Residual after first stage */ pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ ind1 * psNLSF_CB->order ]; - pCB_Wght_Q9 = &psNLSF_CB->CB1_Wght_Q9[ ind1 * psNLSF_CB->order ]; for( i = 0; i < psNLSF_CB->order; i++ ) { NLSF_tmp_Q15[ i ] = silk_LSHIFT16( (opus_int16)pCB_element[ i ], 7 ); - W_tmp_Q9 = pCB_Wght_Q9[ i ]; - res_Q10[ i ] = (opus_int16)silk_RSHIFT( silk_SMULBB( pNLSF_Q15[ i ] - NLSF_tmp_Q15[ i ], W_tmp_Q9 ), 14 ); - W_adj_Q5[ i ] = silk_DIV32_varQ( (opus_int32)pW_Q2[ i ], silk_SMULBB( W_tmp_Q9, W_tmp_Q9 ), 21 ); + res_Q15[ i ] = pNLSF_Q15[ i ] - NLSF_tmp_Q15[ i ]; + } + + /* Weights from codebook vector */ + silk_NLSF_VQ_weights_laroia( W_tmp_QW, NLSF_tmp_Q15, psNLSF_CB->order ); + + /* Apply square-rooted weights */ + for( i = 0; i < psNLSF_CB->order; i++ ) { + W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) ); + res_Q10[ i ] = (opus_int16)silk_RSHIFT( silk_SMULBB( res_Q15[ i ], W_tmp_Q9 ), 14 ); + } + + /* Modify input weights accordingly */ + for( i = 0; i < psNLSF_CB->order; i++ ) { + W_adj_Q5[ i ] = silk_DIV32_16( silk_LSHIFT( (opus_int32)pW_QW[ i ], 5 ), W_tmp_QW[ i ] ); } /* Unpack entropy table indices and predictor for current CB1 index */ diff --git a/thirdparty/opus/silk/NSQ.c b/thirdparty/opus/silk/NSQ.c index 1d64d8e257..43e3fee7e0 100644 --- a/thirdparty/opus/silk/NSQ.c +++ b/thirdparty/opus/silk/NSQ.c @@ -37,7 +37,7 @@ POSSIBILITY OF SUCH DAMAGE. static OPUS_INLINE void silk_nsq_scale_states( const silk_encoder_state *psEncC, /* I Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ - const opus_int16 x16[], /* I input */ + const opus_int32 x_Q3[], /* I input in Q3 */ opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ @@ -75,14 +75,14 @@ static OPUS_INLINE void silk_noise_shape_quantizer( void silk_NSQ_c ( - const silk_encoder_state *psEncC, /* I Encoder State */ + const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int16 x16[], /* I Input */ + const opus_int32 x_Q3[], /* I Prefiltered input signal */ opus_int8 pulses[], /* O Quantized pulse signal */ const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ @@ -117,7 +117,8 @@ void silk_NSQ_c LSF_interpolation_flag = 1; } - ALLOC( sLTP_Q15, psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); + ALLOC( sLTP_Q15, + psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 ); ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 ); /* Set up pointers to start of sub frame */ @@ -127,7 +128,7 @@ void silk_NSQ_c for( k = 0; k < psEncC->nb_subfr; k++ ) { A_Q12 = &PredCoef_Q12[ (( k >> 1 ) | ( 1 - LSF_interpolation_flag )) * MAX_LPC_ORDER ]; B_Q14 = <PCoef_Q14[ k * LTP_ORDER ]; - AR_shp_Q13 = &AR_Q13[ k * MAX_SHAPE_LPC_ORDER ]; + AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ]; /* Noise shape parameters */ silk_assert( HarmShapeGain_Q14[ k ] >= 0 ); @@ -143,7 +144,7 @@ void silk_NSQ_c if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) { /* Rewhiten with new A coefs */ start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2; - celt_assert( start_idx > 0 ); + silk_assert( start_idx > 0 ); silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch ); @@ -153,13 +154,13 @@ void silk_NSQ_c } } - silk_nsq_scale_states( psEncC, NSQ, x16, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType ); + silk_nsq_scale_states( psEncC, NSQ, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType ); silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch ); - x16 += psEncC->subfr_length; + x_Q3 += psEncC->subfr_length; pulses += psEncC->subfr_length; pxq += psEncC->subfr_length; } @@ -168,6 +169,7 @@ void silk_NSQ_c NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ]; /* Save quantized speech and noise shaping signals */ + /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[ psEncC->ltp_mem_length ], psEncC->frame_length * sizeof( opus_int16 ) ) */ silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); RESTORE_STACK; @@ -247,15 +249,15 @@ void silk_noise_shape_quantizer( } /* Noise shape feedback */ - celt_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ - n_AR_Q12 = silk_NSQ_noise_shape_feedback_loop(&NSQ->sDiff_shp_Q14, NSQ->sAR2_Q14, AR_shp_Q13, shapingLPCOrder, arch); + silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ + n_AR_Q12 = silk_NSQ_noise_shape_feedback_loop(psLPC_Q14, NSQ->sAR2_Q14, AR_shp_Q13, shapingLPCOrder, arch); n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sLF_AR_shp_Q14, Tilt_Q14 ); n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 ); n_LF_Q12 = silk_SMLAWT( n_LF_Q12, NSQ->sLF_AR_shp_Q14, LF_shp_Q14 ); - celt_assert( lag > 0 || signalType != TYPE_VOICED ); + silk_assert( lag > 0 || signalType != TYPE_VOICED ); /* Combine prediction and noise shaping signals */ tmp1 = silk_SUB32( silk_LSHIFT32( LPC_pred_Q10, 2 ), n_AR_Q12 ); /* Q12 */ @@ -277,27 +279,14 @@ void silk_noise_shape_quantizer( r_Q10 = silk_SUB32( x_sc_Q10[ i ], tmp1 ); /* residual error Q10 */ /* Flip sign depending on dither */ - if( NSQ->rand_seed < 0 ) { - r_Q10 = -r_Q10; + if ( NSQ->rand_seed < 0 ) { + r_Q10 = -r_Q10; } r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); /* Find two quantization level candidates and measure their rate-distortion */ q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); - if (Lambda_Q10 > 2048) { - /* For aggressive RDO, the bias becomes more than one pulse. */ - int rdo_offset = Lambda_Q10/2 - 512; - if (q1_Q10 > rdo_offset) { - q1_Q0 = silk_RSHIFT( q1_Q10 - rdo_offset, 10 ); - } else if (q1_Q10 < -rdo_offset) { - q1_Q0 = silk_RSHIFT( q1_Q10 + rdo_offset, 10 ); - } else if (q1_Q10 < 0) { - q1_Q0 = -1; - } else { - q1_Q0 = 0; - } - } if( q1_Q0 > 0 ) { q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); @@ -348,8 +337,7 @@ void silk_noise_shape_quantizer( /* Update states */ psLPC_Q14++; *psLPC_Q14 = xq_Q14; - NSQ->sDiff_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, x_sc_Q10[ i ], 4 ); - sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( NSQ->sDiff_shp_Q14, n_AR_Q12, 2 ); + sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, n_AR_Q12, 2 ); NSQ->sLF_AR_shp_Q14 = sLF_AR_shp_Q14; NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx ] = silk_SUB_LSHIFT32( sLF_AR_shp_Q14, n_LF_Q12, 2 ); @@ -368,7 +356,7 @@ void silk_noise_shape_quantizer( static OPUS_INLINE void silk_nsq_scale_states( const silk_encoder_state *psEncC, /* I Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ - const opus_int16 x16[], /* I input */ + const opus_int32 x_Q3[], /* I input in Q3 */ opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ @@ -380,18 +368,28 @@ static OPUS_INLINE void silk_nsq_scale_states( ) { opus_int i, lag; - opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q26; + opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; lag = pitchL[ subfr ]; inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); silk_assert( inv_gain_Q31 != 0 ); + /* Calculate gain adjustment factor */ + if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { + gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); + } else { + gain_adj_Q16 = (opus_int32)1 << 16; + } + /* Scale input */ - inv_gain_Q26 = silk_RSHIFT_ROUND( inv_gain_Q31, 5 ); + inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); for( i = 0; i < psEncC->subfr_length; i++ ) { - x_sc_Q10[ i ] = silk_SMULWW( x16[ i ], inv_gain_Q26 ); + x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); } + /* Save inverse gain */ + NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; + /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */ if( NSQ->rewhite_flag ) { if( subfr == 0 ) { @@ -405,9 +403,7 @@ static OPUS_INLINE void silk_nsq_scale_states( } /* Adjust for changing gain */ - if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { - gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); - + if( gain_adj_Q16 != (opus_int32)1 << 16 ) { /* Scale long-term shaping state */ for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) { NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] ); @@ -421,7 +417,6 @@ static OPUS_INLINE void silk_nsq_scale_states( } NSQ->sLF_AR_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sLF_AR_shp_Q14 ); - NSQ->sDiff_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sDiff_shp_Q14 ); /* Scale short-term prediction and shaping states */ for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { @@ -430,8 +425,5 @@ static OPUS_INLINE void silk_nsq_scale_states( for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { NSQ->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sAR2_Q14[ i ] ); } - - /* Save inverse gain */ - NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; } } diff --git a/thirdparty/opus/silk/NSQ_del_dec.c b/thirdparty/opus/silk/NSQ_del_dec.c index 3fd9fa0d5b..ab6feeac98 100644 --- a/thirdparty/opus/silk/NSQ_del_dec.c +++ b/thirdparty/opus/silk/NSQ_del_dec.c @@ -43,7 +43,6 @@ typedef struct { opus_int32 Shape_Q14[ DECISION_DELAY ]; opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ]; opus_int32 LF_AR_Q14; - opus_int32 Diff_Q14; opus_int32 Seed; opus_int32 SeedInit; opus_int32 RD_Q10; @@ -54,7 +53,6 @@ typedef struct { opus_int32 RD_Q10; opus_int32 xq_Q14; opus_int32 LF_AR_Q14; - opus_int32 Diff_Q14; opus_int32 sLTP_shp_Q14; opus_int32 LPC_exc_Q14; } NSQ_sample_struct; @@ -68,7 +66,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states( const silk_encoder_state *psEncC, /* I Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - const opus_int16 x16[], /* I Input */ + const opus_int32 x_Q3[], /* I Input in Q3 */ opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ @@ -109,20 +107,20 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( opus_int predictLPCOrder, /* I Prediction filter order */ opus_int warping_Q16, /* I */ opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ - opus_int *smpl_buf_idx, /* I/O Index to newest samples in buffers */ + opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ opus_int decisionDelay, /* I */ int arch /* I */ ); void silk_NSQ_del_dec_c( - const silk_encoder_state *psEncC, /* I Encoder State */ + const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int16 x16[], /* I Input */ + const opus_int32 x_Q3[], /* I Prefiltered input signal */ opus_int8 pulses[], /* O Quantized pulse signal */ const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ @@ -161,7 +159,6 @@ void silk_NSQ_del_dec_c( psDD->SeedInit = psDD->Seed; psDD->RD_Q10 = 0; psDD->LF_AR_Q14 = NSQ->sLF_AR_shp_Q14; - psDD->Diff_Q14 = NSQ->sDiff_shp_Q14; psDD->Shape_Q14[ 0 ] = NSQ->sLTP_shp_Q14[ psEncC->ltp_mem_length - 1 ]; silk_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); silk_memcpy( psDD->sAR2_Q14, NSQ->sAR2_Q14, sizeof( NSQ->sAR2_Q14 ) ); @@ -189,7 +186,8 @@ void silk_NSQ_del_dec_c( LSF_interpolation_flag = 1; } - ALLOC( sLTP_Q15, psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); + ALLOC( sLTP_Q15, + psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 ); ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 ); ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 ); @@ -201,7 +199,7 @@ void silk_NSQ_del_dec_c( for( k = 0; k < psEncC->nb_subfr; k++ ) { A_Q12 = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ]; B_Q14 = <PCoef_Q14[ k * LTP_ORDER ]; - AR_shp_Q13 = &AR_Q13[ k * MAX_SHAPE_LPC_ORDER ]; + AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ]; /* Noise shape parameters */ silk_assert( HarmShapeGain_Q14[ k ] >= 0 ); @@ -237,8 +235,7 @@ void silk_NSQ_del_dec_c( psDD = &psDelDec[ Winner_ind ]; last_smple_idx = smpl_buf_idx + decisionDelay; for( i = 0; i < decisionDelay; i++ ) { - last_smple_idx = ( last_smple_idx - 1 ) % DECISION_DELAY; - if( last_smple_idx < 0 ) last_smple_idx += DECISION_DELAY; + last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK; pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gains_Q16[ 1 ] ), 14 ) ); @@ -250,7 +247,7 @@ void silk_NSQ_del_dec_c( /* Rewhiten with new A coefs */ start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2; - celt_assert( start_idx > 0 ); + silk_assert( start_idx > 0 ); silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch ); @@ -260,7 +257,7 @@ void silk_NSQ_del_dec_c( } } - silk_nsq_del_dec_scale_states( psEncC, NSQ, psDelDec, x16, x_sc_Q10, sLTP, sLTP_Q15, k, + silk_nsq_del_dec_scale_states( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay ); silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, @@ -268,7 +265,7 @@ void silk_NSQ_del_dec_c( Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay, psEncC->arch ); - x16 += psEncC->subfr_length; + x_Q3 += psEncC->subfr_length; pulses += psEncC->subfr_length; pxq += psEncC->subfr_length; } @@ -289,9 +286,7 @@ void silk_NSQ_del_dec_c( last_smple_idx = smpl_buf_idx + decisionDelay; Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 ); for( i = 0; i < decisionDelay; i++ ) { - last_smple_idx = ( last_smple_idx - 1 ) % DECISION_DELAY; - if( last_smple_idx < 0 ) last_smple_idx += DECISION_DELAY; - + last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK; pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) ); @@ -302,10 +297,10 @@ void silk_NSQ_del_dec_c( /* Update states */ NSQ->sLF_AR_shp_Q14 = psDD->LF_AR_Q14; - NSQ->sDiff_shp_Q14 = psDD->Diff_Q14; NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ]; /* Save quantized speech signal */ + /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->frame_length * sizeof( opus_int16 ) ) */ silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); RESTORE_STACK; @@ -340,7 +335,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( opus_int predictLPCOrder, /* I Prediction filter order */ opus_int warping_Q16, /* I */ opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ - opus_int *smpl_buf_idx, /* I/O Index to newest samples in buffers */ + opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ opus_int decisionDelay, /* I */ int arch /* I */ ) @@ -361,7 +356,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( NSQ_sample_struct *psSS; SAVE_STACK; - celt_assert( nStatesDelayedDecision > 0 ); + silk_assert( nStatesDelayedDecision > 0 ); ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair ); shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; @@ -419,9 +414,9 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */ /* Noise shape feedback */ - celt_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ + silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ /* Output of lowpass section */ - tmp2 = silk_SMLAWB( psDD->Diff_Q14, psDD->sAR2_Q14[ 0 ], warping_Q16 ); + tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 ); /* Output of allpass section */ tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 ); psDD->sAR2_Q14[ 0 ] = tmp2; @@ -467,19 +462,6 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( /* Find two quantization level candidates and measure their rate-distortion */ q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); - if (Lambda_Q10 > 2048) { - /* For aggressive RDO, the bias becomes more than one pulse. */ - int rdo_offset = Lambda_Q10/2 - 512; - if (q1_Q10 > rdo_offset) { - q1_Q0 = silk_RSHIFT( q1_Q10 - rdo_offset, 10 ); - } else if (q1_Q10 < -rdo_offset) { - q1_Q0 = silk_RSHIFT( q1_Q10 + rdo_offset, 10 ); - } else if (q1_Q10 < 0) { - q1_Q0 = -1; - } else { - q1_Q0 = 0; - } - } if( q1_Q0 > 0 ) { q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); @@ -533,8 +515,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); /* Update states */ - psSS[ 0 ].Diff_Q14 = silk_SUB_LSHIFT32( xq_Q14, x_Q10[ i ], 4 ); - sLF_AR_shp_Q14 = silk_SUB32( psSS[ 0 ].Diff_Q14, n_AR_Q14 ); + sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14; psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14; @@ -548,22 +529,21 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( exc_Q14 = -exc_Q14; } + /* Add predictions */ LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); /* Update states */ - psSS[ 1 ].Diff_Q14 = silk_SUB_LSHIFT32( xq_Q14, x_Q10[ i ], 4 ); - sLF_AR_shp_Q14 = silk_SUB32( psSS[ 1 ].Diff_Q14, n_AR_Q14 ); + sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14; psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14; psSS[ 1 ].xq_Q14 = xq_Q14; } - *smpl_buf_idx = ( *smpl_buf_idx - 1 ) % DECISION_DELAY; - if( *smpl_buf_idx < 0 ) *smpl_buf_idx += DECISION_DELAY; - last_smple_idx = ( *smpl_buf_idx + decisionDelay ) % DECISION_DELAY; + *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK; /* Index to newest samples */ + last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK; /* Index to decisionDelay old samples */ /* Find winner */ RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; @@ -627,7 +607,6 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( psDD = &psDelDec[ k ]; psSS = &psSampleState[ k ][ 0 ]; psDD->LF_AR_Q14 = psSS->LF_AR_Q14; - psDD->Diff_Q14 = psSS->Diff_Q14; psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14; psDD->Xq_Q14[ *smpl_buf_idx ] = psSS->xq_Q14; psDD->Q_Q10[ *smpl_buf_idx ] = psSS->Q_Q10; @@ -652,7 +631,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states( const silk_encoder_state *psEncC, /* I Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - const opus_int16 x16[], /* I Input */ + const opus_int32 x_Q3[], /* I Input in Q3 */ opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ @@ -666,19 +645,29 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states( ) { opus_int i, k, lag; - opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q26; + opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; NSQ_del_dec_struct *psDD; lag = pitchL[ subfr ]; inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); silk_assert( inv_gain_Q31 != 0 ); + /* Calculate gain adjustment factor */ + if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { + gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); + } else { + gain_adj_Q16 = (opus_int32)1 << 16; + } + /* Scale input */ - inv_gain_Q26 = silk_RSHIFT_ROUND( inv_gain_Q31, 5 ); + inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); for( i = 0; i < psEncC->subfr_length; i++ ) { - x_sc_Q10[ i ] = silk_SMULWW( x16[ i ], inv_gain_Q26 ); + x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); } + /* Save inverse gain */ + NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; + /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */ if( NSQ->rewhite_flag ) { if( subfr == 0 ) { @@ -692,9 +681,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states( } /* Adjust for changing gain */ - if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { - gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); - + if( gain_adj_Q16 != (opus_int32)1 << 16 ) { /* Scale long-term shaping state */ for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) { NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] ); @@ -712,7 +699,6 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states( /* Scale scalar states */ psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 ); - psDD->Diff_Q14 = silk_SMULWW( gain_adj_Q16, psDD->Diff_Q14 ); /* Scale short-term prediction and shaping states */ for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { @@ -726,8 +712,5 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states( psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q14[ i ] ); } } - - /* Save inverse gain */ - NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; } } diff --git a/thirdparty/opus/silk/PLC.c b/thirdparty/opus/silk/PLC.c index f89391651c..fb6ea887b7 100644 --- a/thirdparty/opus/silk/PLC.c +++ b/thirdparty/opus/silk/PLC.c @@ -275,7 +275,7 @@ static OPUS_INLINE void silk_PLC_conceal( /* Reduce random noise for unvoiced frames with high LPC gain */ opus_int32 invGain_Q30, down_scale_Q30; - invGain_Q30 = silk_LPC_inverse_pred_gain( psPLC->prevLPC_Q12, psDec->LPC_order, arch ); + invGain_Q30 = silk_LPC_inverse_pred_gain( psPLC->prevLPC_Q12, psDec->LPC_order ); down_scale_Q30 = silk_min_32( silk_RSHIFT( (opus_int32)1 << 30, LOG2_INV_LPC_GAIN_HIGH_THRES ), invGain_Q30 ); down_scale_Q30 = silk_max_32( silk_RSHIFT( (opus_int32)1 << 30, LOG2_INV_LPC_GAIN_LOW_THRES ), down_scale_Q30 ); @@ -291,7 +291,7 @@ static OPUS_INLINE void silk_PLC_conceal( /* Rewhiten LTP state */ idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2; - celt_assert( idx > 0 ); + silk_assert( idx > 0 ); silk_LPC_analysis_filter( &sLTP[ idx ], &psDec->outBuf[ idx ], A_Q12, psDec->ltp_mem_length - idx, psDec->LPC_order, arch ); /* Scale LTP state */ inv_gain_Q30 = silk_INVERSE32_varQ( psPLC->prevGain_Q16[ 1 ], 46 ); @@ -328,10 +328,8 @@ static OPUS_INLINE void silk_PLC_conceal( for( j = 0; j < LTP_ORDER; j++ ) { B_Q14[ j ] = silk_RSHIFT( silk_SMULBB( harm_Gain_Q15, B_Q14[ j ] ), 15 ); } - if ( psDec->indices.signalType != TYPE_NO_VOICE_ACTIVITY ) { - /* Gradually reduce excitation gain */ - rand_scale_Q14 = silk_RSHIFT( silk_SMULBB( rand_scale_Q14, rand_Gain_Q15 ), 15 ); - } + /* Gradually reduce excitation gain */ + rand_scale_Q14 = silk_RSHIFT( silk_SMULBB( rand_scale_Q14, rand_Gain_Q15 ), 15 ); /* Slowly increase pitch lag */ psPLC->pitchL_Q8 = silk_SMLAWB( psPLC->pitchL_Q8, psPLC->pitchL_Q8, PITCH_DRIFT_FAC_Q16 ); @@ -347,7 +345,7 @@ static OPUS_INLINE void silk_PLC_conceal( /* Copy LPC state */ silk_memcpy( sLPC_Q14_ptr, psDec->sLPC_Q14_buf, MAX_LPC_ORDER * sizeof( opus_int32 ) ); - celt_assert( psDec->LPC_order >= 10 ); /* check that unrolling works */ + silk_assert( psDec->LPC_order >= 10 ); /* check that unrolling works */ for( i = 0; i < psDec->frame_length; i++ ) { /* partly unrolled */ /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ diff --git a/thirdparty/opus/silk/SigProc_FIX.h b/thirdparty/opus/silk/SigProc_FIX.h index f9ae326326..b63299441e 100644 --- a/thirdparty/opus/silk/SigProc_FIX.h +++ b/thirdparty/opus/silk/SigProc_FIX.h @@ -35,7 +35,7 @@ extern "C" /*#define silk_MACRO_COUNT */ /* Used to enable WMOPS counting */ -#define SILK_MAX_ORDER_LPC 24 /* max order of the LPC analysis in schur() and k2a() */ +#define SILK_MAX_ORDER_LPC 16 /* max order of the LPC analysis in schur() and k2a() */ #include <string.h> /* for memset(), memcpy(), memmove() */ #include "typedef.h" @@ -47,11 +47,6 @@ extern "C" #include "x86/SigProc_FIX_sse.h" #endif -#if (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)) -#include "arm/biquad_alt_arm.h" -#include "arm/LPC_inv_pred_gain_arm.h" -#endif - /********************************************************************/ /* SIGNAL PROCESSING FUNCTIONS */ /********************************************************************/ @@ -101,22 +96,14 @@ void silk_resampler_down2_3( * slower than biquad() but uses more precise coefficients * can handle (slowly) varying coefficients */ -void silk_biquad_alt_stride1( +void silk_biquad_alt( const opus_int16 *in, /* I input signal */ const opus_int32 *B_Q28, /* I MA coefficients [3] */ const opus_int32 *A_Q28, /* I AR coefficients [2] */ opus_int32 *S, /* I/O State vector [2] */ opus_int16 *out, /* O output signal */ - const opus_int32 len /* I signal length (must be even) */ -); - -void silk_biquad_alt_stride2_c( - const opus_int16 *in, /* I input signal */ - const opus_int32 *B_Q28, /* I MA coefficients [3] */ - const opus_int32 *A_Q28, /* I AR coefficients [2] */ - opus_int32 *S, /* I/O State vector [4] */ - opus_int16 *out, /* O output signal */ - const opus_int32 len /* I signal length (must be even) */ + const opus_int32 len, /* I signal length (must be even) */ + opus_int stride /* I Operate on interleaved signal if > 1 */ ); /* Variable order MA prediction error filter. */ @@ -145,11 +132,17 @@ void silk_bwexpander_32( /* Compute inverse of LPC prediction gain, and */ /* test if LPC coefficients are stable (all poles within unit circle) */ -opus_int32 silk_LPC_inverse_pred_gain_c( /* O Returns inverse prediction gain in energy domain, Q30 */ +opus_int32 silk_LPC_inverse_pred_gain( /* O Returns inverse prediction gain in energy domain, Q30 */ const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */ const opus_int order /* I Prediction order */ ); +/* For input in Q24 domain */ +opus_int32 silk_LPC_inverse_pred_gain_Q24( /* O Returns inverse prediction gain in energy domain, Q30 */ + const opus_int32 *A_Q24, /* I Prediction coefficients [order] */ + const opus_int order /* I Prediction order */ +); + /* Split signal in two decimated bands using first-order allpass filters */ void silk_ana_filt_bank_1( const opus_int16 *in, /* I Input signal [N] */ @@ -159,14 +152,6 @@ void silk_ana_filt_bank_1( const opus_int32 N /* I Number of input samples */ ); -#if !defined(OVERRIDE_silk_biquad_alt_stride2) -#define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((void)(arch), silk_biquad_alt_stride2_c(in, B_Q28, A_Q28, S, out, len)) -#endif - -#if !defined(OVERRIDE_silk_LPC_inverse_pred_gain) -#define silk_LPC_inverse_pred_gain(A_Q12, order, arch) ((void)(arch), silk_LPC_inverse_pred_gain_c(A_Q12, order)) -#endif - /********************************************************************/ /* SCALAR FUNCTIONS */ /********************************************************************/ @@ -286,17 +271,7 @@ void silk_A2NLSF( void silk_NLSF2A( opus_int16 *a_Q12, /* O monic whitening filter coefficients in Q12, [ d ] */ const opus_int16 *NLSF, /* I normalized line spectral frequencies in Q15, [ d ] */ - const opus_int d, /* I filter order (should be even) */ - int arch /* I Run-time architecture */ -); - -/* Convert int32 coefficients to int16 coefs and make sure there's no wrap-around */ -void silk_LPC_fit( - opus_int16 *a_QOUT, /* O Output signal */ - opus_int32 *a_QIN, /* I/O Input signal */ - const opus_int QOUT, /* I Input Q domain */ - const opus_int QIN, /* I Input Q domain */ - const opus_int d /* I Filter order */ + const opus_int d /* I filter order (should be even) */ ); void silk_insertion_sort_increasing( @@ -496,7 +471,8 @@ static OPUS_INLINE opus_int32 silk_ROR32( opus_int32 a32, opus_int rot ) /* Add with saturation for positive input values */ #define silk_ADD_POS_SAT8(a, b) ((((a)+(b)) & 0x80) ? silk_int8_MAX : ((a)+(b))) #define silk_ADD_POS_SAT16(a, b) ((((a)+(b)) & 0x8000) ? silk_int16_MAX : ((a)+(b))) -#define silk_ADD_POS_SAT32(a, b) ((((opus_uint32)(a)+(opus_uint32)(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b))) +#define silk_ADD_POS_SAT32(a, b) ((((a)+(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b))) +#define silk_ADD_POS_SAT64(a, b) ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b))) #define silk_LSHIFT8(a, shift) ((opus_int8)((opus_uint8)(a)<<(shift))) /* shift >= 0, shift < 8 */ #define silk_LSHIFT16(a, shift) ((opus_int16)((opus_uint16)(a)<<(shift))) /* shift >= 0, shift < 16 */ @@ -596,9 +572,7 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b) /* Make sure to store the result as the seed for the next call (also in between */ /* frames), otherwise result won't be random at all. When only using some of the */ /* bits, take the most significant bits by right-shifting. */ -#define RAND_MULTIPLIER 196314165 -#define RAND_INCREMENT 907633515 -#define silk_RAND(seed) (silk_MLA_ovflw((RAND_INCREMENT), (seed), (RAND_MULTIPLIER))) +#define silk_RAND(seed) (silk_MLA_ovflw(907633515, (seed), 196314165)) /* Add some multiplication functions that can be easily mapped to ARM. */ diff --git a/thirdparty/opus/silk/VAD.c b/thirdparty/opus/silk/VAD.c index d0cda52162..0a782af2f1 100644 --- a/thirdparty/opus/silk/VAD.c +++ b/thirdparty/opus/silk/VAD.c @@ -101,9 +101,9 @@ opus_int silk_VAD_GetSA_Q8_c( /* O Return v /* Safety checks */ silk_assert( VAD_N_BANDS == 4 ); - celt_assert( MAX_FRAME_LENGTH >= psEncC->frame_length ); - celt_assert( psEncC->frame_length <= 512 ); - celt_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) ); + silk_assert( MAX_FRAME_LENGTH >= psEncC->frame_length ); + silk_assert( psEncC->frame_length <= 512 ); + silk_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) ); /***********************/ /* Filter and Decimate */ @@ -252,14 +252,15 @@ opus_int silk_VAD_GetSA_Q8_c( /* O Return v speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 ); } - if( psEncC->frame_length == 20 * psEncC->fs_kHz ) { - speech_nrg = silk_RSHIFT32( speech_nrg, 1 ); - } /* Power scaling */ if( speech_nrg <= 0 ) { SA_Q15 = silk_RSHIFT( SA_Q15, 1 ); - } else if( speech_nrg < 16384 ) { - speech_nrg = silk_LSHIFT32( speech_nrg, 16 ); + } else if( speech_nrg < 32768 ) { + if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { + speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 16 ); + } else { + speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 15 ); + } /* square-root */ speech_nrg = silk_SQRT_APPROX( speech_nrg ); @@ -312,8 +313,6 @@ void silk_VAD_GetNoiseLevels( /* Initially faster smoothing */ if( psSilk_VAD->counter < 1000 ) { /* 1000 = 20 sec */ min_coef = silk_DIV32_16( silk_int16_MAX, silk_RSHIFT( psSilk_VAD->counter, 4 ) + 1 ); - /* Increment frame counter */ - psSilk_VAD->counter++; } else { min_coef = 0; } @@ -357,4 +356,7 @@ void silk_VAD_GetNoiseLevels( /* Store as part of state */ psSilk_VAD->NL[ k ] = nl; } + + /* Increment frame counter */ + psSilk_VAD->counter++; } diff --git a/thirdparty/opus/silk/VQ_WMat_EC.c b/thirdparty/opus/silk/VQ_WMat_EC.c index 0f3d545c4e..7983f1db80 100644 --- a/thirdparty/opus/silk/VQ_WMat_EC.c +++ b/thirdparty/opus/silk/VQ_WMat_EC.c @@ -34,95 +34,84 @@ POSSIBILITY OF SUCH DAMAGE. /* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */ void silk_VQ_WMat_EC_c( opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *res_nrg_Q15, /* O best residual energy */ - opus_int32 *rate_dist_Q8, /* O best total bitrate */ + opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int32 *XX_Q17, /* I correlation matrix */ - const opus_int32 *xX_Q17, /* I correlation vector */ + const opus_int16 *in_Q14, /* I input vector to be quantized */ + const opus_int32 *W_Q18, /* I weighting matrix */ const opus_int8 *cb_Q7, /* I codebook */ const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int subfr_len, /* I number of samples per subframe */ + const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - const opus_int L /* I number of vectors in codebook */ + opus_int L /* I number of vectors in codebook */ ) { opus_int k, gain_tmp_Q7; const opus_int8 *cb_row_Q7; - opus_int32 neg_xX_Q24[ 5 ]; - opus_int32 sum1_Q15, sum2_Q24; - opus_int32 bits_res_Q8, bits_tot_Q8; - - /* Negate and convert to new Q domain */ - neg_xX_Q24[ 0 ] = -silk_LSHIFT32( xX_Q17[ 0 ], 7 ); - neg_xX_Q24[ 1 ] = -silk_LSHIFT32( xX_Q17[ 1 ], 7 ); - neg_xX_Q24[ 2 ] = -silk_LSHIFT32( xX_Q17[ 2 ], 7 ); - neg_xX_Q24[ 3 ] = -silk_LSHIFT32( xX_Q17[ 3 ], 7 ); - neg_xX_Q24[ 4 ] = -silk_LSHIFT32( xX_Q17[ 4 ], 7 ); + opus_int16 diff_Q14[ 5 ]; + opus_int32 sum1_Q14, sum2_Q16; /* Loop over codebook */ - *rate_dist_Q8 = silk_int32_MAX; - *res_nrg_Q15 = silk_int32_MAX; + *rate_dist_Q14 = silk_int32_MAX; cb_row_Q7 = cb_Q7; - /* In things go really bad, at least *ind is set to something safe. */ - *ind = 0; for( k = 0; k < L; k++ ) { - opus_int32 penalty; gain_tmp_Q7 = cb_gain_Q7[k]; + + diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 ); + diff_Q14[ 1 ] = in_Q14[ 1 ] - silk_LSHIFT( cb_row_Q7[ 1 ], 7 ); + diff_Q14[ 2 ] = in_Q14[ 2 ] - silk_LSHIFT( cb_row_Q7[ 2 ], 7 ); + diff_Q14[ 3 ] = in_Q14[ 3 ] - silk_LSHIFT( cb_row_Q7[ 3 ], 7 ); + diff_Q14[ 4 ] = in_Q14[ 4 ] - silk_LSHIFT( cb_row_Q7[ 4 ], 7 ); + /* Weighted rate */ - /* Quantization error: 1 - 2 * xX * cb + cb' * XX * cb */ - sum1_Q15 = SILK_FIX_CONST( 1.001, 15 ); + sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] ); /* Penalty for too large gain */ - penalty = silk_LSHIFT32( silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 11 ); - - /* first row of XX_Q17 */ - sum2_Q24 = silk_MLA( neg_xX_Q24[ 0 ], XX_Q17[ 1 ], cb_row_Q7[ 1 ] ); - sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 2 ], cb_row_Q7[ 2 ] ); - sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 3 ], cb_row_Q7[ 3 ] ); - sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 4 ], cb_row_Q7[ 4 ] ); - sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 ); - sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 0 ], cb_row_Q7[ 0 ] ); - sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 0 ] ); - - /* second row of XX_Q17 */ - sum2_Q24 = silk_MLA( neg_xX_Q24[ 1 ], XX_Q17[ 7 ], cb_row_Q7[ 2 ] ); - sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 8 ], cb_row_Q7[ 3 ] ); - sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 9 ], cb_row_Q7[ 4 ] ); - sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 ); - sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 6 ], cb_row_Q7[ 1 ] ); - sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 1 ] ); - - /* third row of XX_Q17 */ - sum2_Q24 = silk_MLA( neg_xX_Q24[ 2 ], XX_Q17[ 13 ], cb_row_Q7[ 3 ] ); - sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 14 ], cb_row_Q7[ 4 ] ); - sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 ); - sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 12 ], cb_row_Q7[ 2 ] ); - sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 2 ] ); - - /* fourth row of XX_Q17 */ - sum2_Q24 = silk_MLA( neg_xX_Q24[ 3 ], XX_Q17[ 19 ], cb_row_Q7[ 4 ] ); - sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 ); - sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 18 ], cb_row_Q7[ 3 ] ); - sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 3 ] ); - - /* last row of XX_Q17 */ - sum2_Q24 = silk_LSHIFT32( neg_xX_Q24[ 4 ], 1 ); - sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 24 ], cb_row_Q7[ 4 ] ); - sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 4 ] ); + sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 ); + + silk_assert( sum1_Q14 >= 0 ); + + /* first row of W_Q18 */ + sum2_Q16 = silk_SMULWB( W_Q18[ 1 ], diff_Q14[ 1 ] ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 2 ], diff_Q14[ 2 ] ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 3 ], diff_Q14[ 3 ] ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 4 ], diff_Q14[ 4 ] ); + sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 0 ], diff_Q14[ 0 ] ); + sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 0 ] ); + + /* second row of W_Q18 */ + sum2_Q16 = silk_SMULWB( W_Q18[ 7 ], diff_Q14[ 2 ] ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 8 ], diff_Q14[ 3 ] ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 9 ], diff_Q14[ 4 ] ); + sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 6 ], diff_Q14[ 1 ] ); + sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 1 ] ); + + /* third row of W_Q18 */ + sum2_Q16 = silk_SMULWB( W_Q18[ 13 ], diff_Q14[ 3 ] ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 14 ], diff_Q14[ 4 ] ); + sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 12 ], diff_Q14[ 2 ] ); + sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 2 ] ); + + /* fourth row of W_Q18 */ + sum2_Q16 = silk_SMULWB( W_Q18[ 19 ], diff_Q14[ 4 ] ); + sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 18 ], diff_Q14[ 3 ] ); + sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 3 ] ); + + /* last row of W_Q18 */ + sum2_Q16 = silk_SMULWB( W_Q18[ 24 ], diff_Q14[ 4 ] ); + sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 4 ] ); + + silk_assert( sum1_Q14 >= 0 ); /* find best */ - if( sum1_Q15 >= 0 ) { - /* Translate residual energy to bits using high-rate assumption (6 dB ==> 1 bit/sample) */ - bits_res_Q8 = silk_SMULBB( subfr_len, silk_lin2log( sum1_Q15 + penalty) - (15 << 7) ); - /* In the following line we reduce the codelength component by half ("-1"); seems to slghtly improve quality */ - bits_tot_Q8 = silk_ADD_LSHIFT32( bits_res_Q8, cl_Q5[ k ], 3-1 ); - if( bits_tot_Q8 <= *rate_dist_Q8 ) { - *rate_dist_Q8 = bits_tot_Q8; - *res_nrg_Q15 = sum1_Q15 + penalty; - *ind = (opus_int8)k; - *gain_Q7 = gain_tmp_Q7; - } + if( sum1_Q14 < *rate_dist_Q14 ) { + *rate_dist_Q14 = sum1_Q14; + *ind = (opus_int8)k; + *gain_Q7 = gain_tmp_Q7; } /* Go to next cbk vector */ diff --git a/thirdparty/opus/silk/arm/LPC_inv_pred_gain_arm.h b/thirdparty/opus/silk/arm/LPC_inv_pred_gain_arm.h deleted file mode 100644 index 9895b555c8..0000000000 --- a/thirdparty/opus/silk/arm/LPC_inv_pred_gain_arm.h +++ /dev/null @@ -1,57 +0,0 @@ -/*********************************************************************** -Copyright (c) 2017 Google Inc. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_LPC_INV_PRED_GAIN_ARM_H -# define SILK_LPC_INV_PRED_GAIN_ARM_H - -# include "celt/arm/armcpu.h" - -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -opus_int32 silk_LPC_inverse_pred_gain_neon( /* O Returns inverse prediction gain in energy domain, Q30 */ - const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */ - const opus_int order /* I Prediction order */ -); - -# if !defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_PRESUME_NEON) -# define OVERRIDE_silk_LPC_inverse_pred_gain (1) -# define silk_LPC_inverse_pred_gain(A_Q12, order, arch) ((void)(arch), PRESUME_NEON(silk_LPC_inverse_pred_gain)(A_Q12, order)) -# endif -# endif - -# if !defined(OVERRIDE_silk_LPC_inverse_pred_gain) -/*Is run-time CPU detection enabled on this platform?*/ -# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)) -extern opus_int32 (*const SILK_LPC_INVERSE_PRED_GAIN_IMPL[OPUS_ARCHMASK+1])(const opus_int16 *A_Q12, const opus_int order); -# define OVERRIDE_silk_LPC_inverse_pred_gain (1) -# define silk_LPC_inverse_pred_gain(A_Q12, order, arch) ((*SILK_LPC_INVERSE_PRED_GAIN_IMPL[(arch)&OPUS_ARCHMASK])(A_Q12, order)) -# elif defined(OPUS_ARM_PRESUME_NEON_INTR) -# define OVERRIDE_silk_LPC_inverse_pred_gain (1) -# define silk_LPC_inverse_pred_gain(A_Q12, order, arch) ((void)(arch), silk_LPC_inverse_pred_gain_neon(A_Q12, order)) -# endif -# endif - -#endif /* end SILK_LPC_INV_PRED_GAIN_ARM_H */ diff --git a/thirdparty/opus/silk/arm/LPC_inv_pred_gain_neon_intr.c b/thirdparty/opus/silk/arm/LPC_inv_pred_gain_neon_intr.c deleted file mode 100644 index ab426bcd66..0000000000 --- a/thirdparty/opus/silk/arm/LPC_inv_pred_gain_neon_intr.c +++ /dev/null @@ -1,280 +0,0 @@ -/*********************************************************************** -Copyright (c) 2017 Google Inc. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <arm_neon.h> -#include "SigProc_FIX.h" -#include "define.h" - -#define QA 24 -#define A_LIMIT SILK_FIX_CONST( 0.99975, QA ) - -#define MUL32_FRAC_Q(a32, b32, Q) ((opus_int32)(silk_RSHIFT_ROUND64(silk_SMULL(a32, b32), Q))) - -/* The difficulty is how to judge a 64-bit signed integer tmp64 is 32-bit overflowed, - * since NEON has no 64-bit min, max or comparison instructions. - * A failed idea is to compare the results of vmovn(tmp64) and vqmovn(tmp64) whether they are equal or not. - * However, this idea fails when the tmp64 is something like 0xFFFFFFF980000000. - * Here we know that mult2Q >= 1, so the highest bit (bit 63, sign bit) of tmp64 must equal to bit 62. - * tmp64 was shifted left by 1 and we got tmp64'. If high_half(tmp64') != 0 and high_half(tmp64') != -1, - * then we know that bit 31 to bit 63 of tmp64 can not all be the sign bit, and therefore tmp64 is 32-bit overflowed. - * That is, we judge if tmp64' > 0x00000000FFFFFFFF, or tmp64' <= 0xFFFFFFFF00000000. - * We use narrowing shift right 31 bits to tmp32' to save data bandwidth and instructions. - * That is, we judge if tmp32' > 0x00000000, or tmp32' <= 0xFFFFFFFF. - */ - -/* Compute inverse of LPC prediction gain, and */ -/* test if LPC coefficients are stable (all poles within unit circle) */ -static OPUS_INLINE opus_int32 LPC_inverse_pred_gain_QA_neon( /* O Returns inverse prediction gain in energy domain, Q30 */ - opus_int32 A_QA[ SILK_MAX_ORDER_LPC ], /* I Prediction coefficients */ - const opus_int order /* I Prediction order */ -) -{ - opus_int k, n, mult2Q; - opus_int32 invGain_Q30, rc_Q31, rc_mult1_Q30, rc_mult2, tmp1, tmp2; - opus_int32 max, min; - int32x4_t max_s32x4, min_s32x4; - int32x2_t max_s32x2, min_s32x2; - - max_s32x4 = vdupq_n_s32( silk_int32_MIN ); - min_s32x4 = vdupq_n_s32( silk_int32_MAX ); - invGain_Q30 = SILK_FIX_CONST( 1, 30 ); - for( k = order - 1; k > 0; k-- ) { - int32x2_t rc_Q31_s32x2, rc_mult2_s32x2; - int64x2_t mult2Q_s64x2; - - /* Check for stability */ - if( ( A_QA[ k ] > A_LIMIT ) || ( A_QA[ k ] < -A_LIMIT ) ) { - return 0; - } - - /* Set RC equal to negated AR coef */ - rc_Q31 = -silk_LSHIFT( A_QA[ k ], 31 - QA ); - - /* rc_mult1_Q30 range: [ 1 : 2^30 ] */ - rc_mult1_Q30 = silk_SUB32( SILK_FIX_CONST( 1, 30 ), silk_SMMUL( rc_Q31, rc_Q31 ) ); - silk_assert( rc_mult1_Q30 > ( 1 << 15 ) ); /* reduce A_LIMIT if fails */ - silk_assert( rc_mult1_Q30 <= ( 1 << 30 ) ); - - /* Update inverse gain */ - /* invGain_Q30 range: [ 0 : 2^30 ] */ - invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 ); - silk_assert( invGain_Q30 >= 0 ); - silk_assert( invGain_Q30 <= ( 1 << 30 ) ); - if( invGain_Q30 < SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN, 30 ) ) { - return 0; - } - - /* rc_mult2 range: [ 2^30 : silk_int32_MAX ] */ - mult2Q = 32 - silk_CLZ32( silk_abs( rc_mult1_Q30 ) ); - rc_mult2 = silk_INVERSE32_varQ( rc_mult1_Q30, mult2Q + 30 ); - - /* Update AR coefficient */ - rc_Q31_s32x2 = vdup_n_s32( rc_Q31 ); - mult2Q_s64x2 = vdupq_n_s64( -mult2Q ); - rc_mult2_s32x2 = vdup_n_s32( rc_mult2 ); - - for( n = 0; n < ( ( k + 1 ) >> 1 ) - 3; n += 4 ) { - /* We always calculate extra elements of A_QA buffer when ( k % 4 ) != 0, to take the advantage of SIMD parallelization. */ - int32x4_t tmp1_s32x4, tmp2_s32x4, t0_s32x4, t1_s32x4, s0_s32x4, s1_s32x4, t_QA0_s32x4, t_QA1_s32x4; - int64x2_t t0_s64x2, t1_s64x2, t2_s64x2, t3_s64x2; - tmp1_s32x4 = vld1q_s32( A_QA + n ); - tmp2_s32x4 = vld1q_s32( A_QA + k - n - 4 ); - tmp2_s32x4 = vrev64q_s32( tmp2_s32x4 ); - tmp2_s32x4 = vcombine_s32( vget_high_s32( tmp2_s32x4 ), vget_low_s32( tmp2_s32x4 ) ); - t0_s32x4 = vqrdmulhq_lane_s32( tmp2_s32x4, rc_Q31_s32x2, 0 ); - t1_s32x4 = vqrdmulhq_lane_s32( tmp1_s32x4, rc_Q31_s32x2, 0 ); - t_QA0_s32x4 = vqsubq_s32( tmp1_s32x4, t0_s32x4 ); - t_QA1_s32x4 = vqsubq_s32( tmp2_s32x4, t1_s32x4 ); - t0_s64x2 = vmull_s32( vget_low_s32 ( t_QA0_s32x4 ), rc_mult2_s32x2 ); - t1_s64x2 = vmull_s32( vget_high_s32( t_QA0_s32x4 ), rc_mult2_s32x2 ); - t2_s64x2 = vmull_s32( vget_low_s32 ( t_QA1_s32x4 ), rc_mult2_s32x2 ); - t3_s64x2 = vmull_s32( vget_high_s32( t_QA1_s32x4 ), rc_mult2_s32x2 ); - t0_s64x2 = vrshlq_s64( t0_s64x2, mult2Q_s64x2 ); - t1_s64x2 = vrshlq_s64( t1_s64x2, mult2Q_s64x2 ); - t2_s64x2 = vrshlq_s64( t2_s64x2, mult2Q_s64x2 ); - t3_s64x2 = vrshlq_s64( t3_s64x2, mult2Q_s64x2 ); - t0_s32x4 = vcombine_s32( vmovn_s64( t0_s64x2 ), vmovn_s64( t1_s64x2 ) ); - t1_s32x4 = vcombine_s32( vmovn_s64( t2_s64x2 ), vmovn_s64( t3_s64x2 ) ); - s0_s32x4 = vcombine_s32( vshrn_n_s64( t0_s64x2, 31 ), vshrn_n_s64( t1_s64x2, 31 ) ); - s1_s32x4 = vcombine_s32( vshrn_n_s64( t2_s64x2, 31 ), vshrn_n_s64( t3_s64x2, 31 ) ); - max_s32x4 = vmaxq_s32( max_s32x4, s0_s32x4 ); - min_s32x4 = vminq_s32( min_s32x4, s0_s32x4 ); - max_s32x4 = vmaxq_s32( max_s32x4, s1_s32x4 ); - min_s32x4 = vminq_s32( min_s32x4, s1_s32x4 ); - t1_s32x4 = vrev64q_s32( t1_s32x4 ); - t1_s32x4 = vcombine_s32( vget_high_s32( t1_s32x4 ), vget_low_s32( t1_s32x4 ) ); - vst1q_s32( A_QA + n, t0_s32x4 ); - vst1q_s32( A_QA + k - n - 4, t1_s32x4 ); - } - for( ; n < (k + 1) >> 1; n++ ) { - opus_int64 tmp64; - tmp1 = A_QA[ n ]; - tmp2 = A_QA[ k - n - 1 ]; - tmp64 = silk_RSHIFT_ROUND64( silk_SMULL( silk_SUB_SAT32(tmp1, - MUL32_FRAC_Q( tmp2, rc_Q31, 31 ) ), rc_mult2 ), mult2Q); - if( tmp64 > silk_int32_MAX || tmp64 < silk_int32_MIN ) { - return 0; - } - A_QA[ n ] = ( opus_int32 )tmp64; - tmp64 = silk_RSHIFT_ROUND64( silk_SMULL( silk_SUB_SAT32(tmp2, - MUL32_FRAC_Q( tmp1, rc_Q31, 31 ) ), rc_mult2), mult2Q); - if( tmp64 > silk_int32_MAX || tmp64 < silk_int32_MIN ) { - return 0; - } - A_QA[ k - n - 1 ] = ( opus_int32 )tmp64; - } - } - - /* Check for stability */ - if( ( A_QA[ k ] > A_LIMIT ) || ( A_QA[ k ] < -A_LIMIT ) ) { - return 0; - } - - max_s32x2 = vmax_s32( vget_low_s32( max_s32x4 ), vget_high_s32( max_s32x4 ) ); - min_s32x2 = vmin_s32( vget_low_s32( min_s32x4 ), vget_high_s32( min_s32x4 ) ); - max_s32x2 = vmax_s32( max_s32x2, vreinterpret_s32_s64( vshr_n_s64( vreinterpret_s64_s32( max_s32x2 ), 32 ) ) ); - min_s32x2 = vmin_s32( min_s32x2, vreinterpret_s32_s64( vshr_n_s64( vreinterpret_s64_s32( min_s32x2 ), 32 ) ) ); - max = vget_lane_s32( max_s32x2, 0 ); - min = vget_lane_s32( min_s32x2, 0 ); - if( ( max > 0 ) || ( min < -1 ) ) { - return 0; - } - - /* Set RC equal to negated AR coef */ - rc_Q31 = -silk_LSHIFT( A_QA[ 0 ], 31 - QA ); - - /* Range: [ 1 : 2^30 ] */ - rc_mult1_Q30 = silk_SUB32( SILK_FIX_CONST( 1, 30 ), silk_SMMUL( rc_Q31, rc_Q31 ) ); - - /* Update inverse gain */ - /* Range: [ 0 : 2^30 ] */ - invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 ); - silk_assert( invGain_Q30 >= 0 ); - silk_assert( invGain_Q30 <= ( 1 << 30 ) ); - if( invGain_Q30 < SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN, 30 ) ) { - return 0; - } - - return invGain_Q30; -} - -/* For input in Q12 domain */ -opus_int32 silk_LPC_inverse_pred_gain_neon( /* O Returns inverse prediction gain in energy domain, Q30 */ - const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */ - const opus_int order /* I Prediction order */ -) -{ -#ifdef OPUS_CHECK_ASM - const opus_int32 invGain_Q30_c = silk_LPC_inverse_pred_gain_c( A_Q12, order ); -#endif - - opus_int32 invGain_Q30; - if( ( SILK_MAX_ORDER_LPC != 24 ) || ( order & 1 )) { - invGain_Q30 = silk_LPC_inverse_pred_gain_c( A_Q12, order ); - } - else { - opus_int32 Atmp_QA[ SILK_MAX_ORDER_LPC ]; - opus_int32 DC_resp; - int16x8_t t0_s16x8, t1_s16x8, t2_s16x8; - int32x4_t t0_s32x4; - const opus_int leftover = order & 7; - - /* Increase Q domain of the AR coefficients */ - t0_s16x8 = vld1q_s16( A_Q12 + 0 ); - t1_s16x8 = vld1q_s16( A_Q12 + 8 ); - t2_s16x8 = vld1q_s16( A_Q12 + 16 ); - t0_s32x4 = vpaddlq_s16( t0_s16x8 ); - - switch( order - leftover ) - { - case 24: - t0_s32x4 = vpadalq_s16( t0_s32x4, t2_s16x8 ); - /* FALLTHROUGH */ - - case 16: - t0_s32x4 = vpadalq_s16( t0_s32x4, t1_s16x8 ); - vst1q_s32( Atmp_QA + 16, vshll_n_s16( vget_low_s16 ( t2_s16x8 ), QA - 12 ) ); - vst1q_s32( Atmp_QA + 20, vshll_n_s16( vget_high_s16( t2_s16x8 ), QA - 12 ) ); - /* FALLTHROUGH */ - - case 8: - { - const int32x2_t t_s32x2 = vpadd_s32( vget_low_s32( t0_s32x4 ), vget_high_s32( t0_s32x4 ) ); - const int64x1_t t_s64x1 = vpaddl_s32( t_s32x2 ); - DC_resp = vget_lane_s32( vreinterpret_s32_s64( t_s64x1 ), 0 ); - vst1q_s32( Atmp_QA + 8, vshll_n_s16( vget_low_s16 ( t1_s16x8 ), QA - 12 ) ); - vst1q_s32( Atmp_QA + 12, vshll_n_s16( vget_high_s16( t1_s16x8 ), QA - 12 ) ); - } - break; - - default: - DC_resp = 0; - break; - } - A_Q12 += order - leftover; - - switch( leftover ) - { - case 6: - DC_resp += (opus_int32)A_Q12[ 5 ]; - DC_resp += (opus_int32)A_Q12[ 4 ]; - /* FALLTHROUGH */ - - case 4: - DC_resp += (opus_int32)A_Q12[ 3 ]; - DC_resp += (opus_int32)A_Q12[ 2 ]; - /* FALLTHROUGH */ - - case 2: - DC_resp += (opus_int32)A_Q12[ 1 ]; - DC_resp += (opus_int32)A_Q12[ 0 ]; - /* FALLTHROUGH */ - - default: - break; - } - - /* If the DC is unstable, we don't even need to do the full calculations */ - if( DC_resp >= 4096 ) { - invGain_Q30 = 0; - } else { - vst1q_s32( Atmp_QA + 0, vshll_n_s16( vget_low_s16 ( t0_s16x8 ), QA - 12 ) ); - vst1q_s32( Atmp_QA + 4, vshll_n_s16( vget_high_s16( t0_s16x8 ), QA - 12 ) ); - invGain_Q30 = LPC_inverse_pred_gain_QA_neon( Atmp_QA, order ); - } - } - -#ifdef OPUS_CHECK_ASM - silk_assert( invGain_Q30_c == invGain_Q30 ); -#endif - - return invGain_Q30; -} diff --git a/thirdparty/opus/silk/arm/NSQ_del_dec_arm.h b/thirdparty/opus/silk/arm/NSQ_del_dec_arm.h deleted file mode 100644 index 9e76e16927..0000000000 --- a/thirdparty/opus/silk/arm/NSQ_del_dec_arm.h +++ /dev/null @@ -1,100 +0,0 @@ -/*********************************************************************** -Copyright (c) 2017 Google Inc. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_NSQ_DEL_DEC_ARM_H -#define SILK_NSQ_DEL_DEC_ARM_H - -#include "celt/arm/armcpu.h" - -#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -void silk_NSQ_del_dec_neon( - const silk_encoder_state *psEncC, silk_nsq_state *NSQ, - SideInfoIndices *psIndices, const opus_int16 x16[], opus_int8 pulses[], - const opus_int16 PredCoef_Q12[2 * MAX_LPC_ORDER], - const opus_int16 LTPCoef_Q14[LTP_ORDER * MAX_NB_SUBFR], - const opus_int16 AR_Q13[MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER], - const opus_int HarmShapeGain_Q14[MAX_NB_SUBFR], - const opus_int Tilt_Q14[MAX_NB_SUBFR], - const opus_int32 LF_shp_Q14[MAX_NB_SUBFR], - const opus_int32 Gains_Q16[MAX_NB_SUBFR], - const opus_int pitchL[MAX_NB_SUBFR], const opus_int Lambda_Q10, - const opus_int LTP_scale_Q14); - -#if !defined(OPUS_HAVE_RTCD) -#define OVERRIDE_silk_NSQ_del_dec (1) -#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \ - LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \ - LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \ - LTP_scale_Q14, arch) \ - ((void)(arch), \ - PRESUME_NEON(silk_NSQ_del_dec)( \ - psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, \ - AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, \ - Lambda_Q10, LTP_scale_Q14)) -#endif -#endif - -#if !defined(OVERRIDE_silk_NSQ_del_dec) -/*Is run-time CPU detection enabled on this platform?*/ -#if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && \ - !defined(OPUS_ARM_PRESUME_NEON_INTR)) -extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])( - const silk_encoder_state *psEncC, silk_nsq_state *NSQ, - SideInfoIndices *psIndices, const opus_int16 x16[], opus_int8 pulses[], - const opus_int16 PredCoef_Q12[2 * MAX_LPC_ORDER], - const opus_int16 LTPCoef_Q14[LTP_ORDER * MAX_NB_SUBFR], - const opus_int16 AR_Q13[MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER], - const opus_int HarmShapeGain_Q14[MAX_NB_SUBFR], - const opus_int Tilt_Q14[MAX_NB_SUBFR], - const opus_int32 LF_shp_Q14[MAX_NB_SUBFR], - const opus_int32 Gains_Q16[MAX_NB_SUBFR], - const opus_int pitchL[MAX_NB_SUBFR], const opus_int Lambda_Q10, - const opus_int LTP_scale_Q14); -#define OVERRIDE_silk_NSQ_del_dec (1) -#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \ - LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \ - LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \ - LTP_scale_Q14, arch) \ - ((*SILK_NSQ_DEL_DEC_IMPL[(arch)&OPUS_ARCHMASK])( \ - psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, \ - AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, \ - Lambda_Q10, LTP_scale_Q14)) -#elif defined(OPUS_ARM_PRESUME_NEON_INTR) -#define OVERRIDE_silk_NSQ_del_dec (1) -#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \ - LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \ - LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \ - LTP_scale_Q14, arch) \ - ((void)(arch), \ - silk_NSQ_del_dec_neon(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \ - LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \ - LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \ - LTP_scale_Q14)) -#endif -#endif - -#endif /* end SILK_NSQ_DEL_DEC_ARM_H */ diff --git a/thirdparty/opus/silk/arm/NSQ_del_dec_neon_intr.c b/thirdparty/opus/silk/arm/NSQ_del_dec_neon_intr.c deleted file mode 100644 index 212410f362..0000000000 --- a/thirdparty/opus/silk/arm/NSQ_del_dec_neon_intr.c +++ /dev/null @@ -1,1124 +0,0 @@ -/*********************************************************************** -Copyright (c) 2017 Google Inc. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <arm_neon.h> -#ifdef OPUS_CHECK_ASM -# include <string.h> -#endif -#include "main.h" -#include "stack_alloc.h" - -/* NEON intrinsics optimization now can only parallelize up to 4 delay decision states. */ -/* If there are more states, C function is called, and this optimization must be expanded. */ -#define NEON_MAX_DEL_DEC_STATES 4 - -typedef struct { - opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ][ NEON_MAX_DEL_DEC_STATES ]; - opus_int32 RandState[ DECISION_DELAY ][ NEON_MAX_DEL_DEC_STATES ]; - opus_int32 Q_Q10[ DECISION_DELAY ][ NEON_MAX_DEL_DEC_STATES ]; - opus_int32 Xq_Q14[ DECISION_DELAY ][ NEON_MAX_DEL_DEC_STATES ]; - opus_int32 Pred_Q15[ DECISION_DELAY ][ NEON_MAX_DEL_DEC_STATES ]; - opus_int32 Shape_Q14[ DECISION_DELAY ][ NEON_MAX_DEL_DEC_STATES ]; - opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ][ NEON_MAX_DEL_DEC_STATES ]; - opus_int32 LF_AR_Q14[ NEON_MAX_DEL_DEC_STATES ]; - opus_int32 Diff_Q14[ NEON_MAX_DEL_DEC_STATES ]; - opus_int32 Seed[ NEON_MAX_DEL_DEC_STATES ]; - opus_int32 SeedInit[ NEON_MAX_DEL_DEC_STATES ]; - opus_int32 RD_Q10[ NEON_MAX_DEL_DEC_STATES ]; -} NSQ_del_decs_struct; - -typedef struct { - opus_int32 Q_Q10[ NEON_MAX_DEL_DEC_STATES ]; - opus_int32 RD_Q10[ NEON_MAX_DEL_DEC_STATES ]; - opus_int32 xq_Q14[ NEON_MAX_DEL_DEC_STATES ]; - opus_int32 LF_AR_Q14[ NEON_MAX_DEL_DEC_STATES ]; - opus_int32 Diff_Q14[ NEON_MAX_DEL_DEC_STATES ]; - opus_int32 sLTP_shp_Q14[ NEON_MAX_DEL_DEC_STATES ]; - opus_int32 LPC_exc_Q14[ NEON_MAX_DEL_DEC_STATES ]; -} NSQ_samples_struct; - -static OPUS_INLINE void silk_nsq_del_dec_scale_states_neon( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_decs_struct psDelDec[], /* I/O Delayed decision states */ - const opus_int16 x16[], /* I Input */ - opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ - const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I Subframe number */ - const opus_int LTP_scale_Q14, /* I LTP state scaling */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type, /* I Signal type */ - const opus_int decisionDelay /* I Decision delay */ -); - -/******************************************/ -/* Noise shape quantizer for one subframe */ -/******************************************/ -static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_neon( - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_decs_struct psDelDec[], /* I/O Delayed decision states */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP filter state */ - opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int Lambda_Q10, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int subfr, /* I Subframe number */ - opus_int shapingLPCOrder, /* I Shaping LPC filter order */ - opus_int predictLPCOrder, /* I Prediction filter order */ - opus_int warping_Q16, /* I */ - opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ - opus_int *smpl_buf_idx, /* I/O Index to newest samples in buffers */ - opus_int decisionDelay /* I */ -); - -static OPUS_INLINE void copy_winner_state_kernel( - const NSQ_del_decs_struct *psDelDec, - const opus_int offset, - const opus_int last_smple_idx, - const opus_int Winner_ind, - const int32x2_t gain_lo_s32x2, - const int32x2_t gain_hi_s32x2, - const int32x4_t shift_s32x4, - int32x4_t t0_s32x4, - int32x4_t t1_s32x4, - opus_int8 *const pulses, - opus_int16 *pxq, - silk_nsq_state *NSQ -) -{ - int16x8_t t_s16x8; - int32x4_t o0_s32x4, o1_s32x4; - - t0_s32x4 = vld1q_lane_s32( &psDelDec->Q_Q10[ last_smple_idx - 0 ][ Winner_ind ], t0_s32x4, 0 ); - t0_s32x4 = vld1q_lane_s32( &psDelDec->Q_Q10[ last_smple_idx - 1 ][ Winner_ind ], t0_s32x4, 1 ); - t0_s32x4 = vld1q_lane_s32( &psDelDec->Q_Q10[ last_smple_idx - 2 ][ Winner_ind ], t0_s32x4, 2 ); - t0_s32x4 = vld1q_lane_s32( &psDelDec->Q_Q10[ last_smple_idx - 3 ][ Winner_ind ], t0_s32x4, 3 ); - t1_s32x4 = vld1q_lane_s32( &psDelDec->Q_Q10[ last_smple_idx - 4 ][ Winner_ind ], t1_s32x4, 0 ); - t1_s32x4 = vld1q_lane_s32( &psDelDec->Q_Q10[ last_smple_idx - 5 ][ Winner_ind ], t1_s32x4, 1 ); - t1_s32x4 = vld1q_lane_s32( &psDelDec->Q_Q10[ last_smple_idx - 6 ][ Winner_ind ], t1_s32x4, 2 ); - t1_s32x4 = vld1q_lane_s32( &psDelDec->Q_Q10[ last_smple_idx - 7 ][ Winner_ind ], t1_s32x4, 3 ); - t_s16x8 = vcombine_s16( vrshrn_n_s32( t0_s32x4, 10 ), vrshrn_n_s32( t1_s32x4, 10 ) ); - vst1_s8( &pulses[ offset ], vmovn_s16( t_s16x8 ) ); - - t0_s32x4 = vld1q_lane_s32( &psDelDec->Xq_Q14[ last_smple_idx - 0 ][ Winner_ind ], t0_s32x4, 0 ); - t0_s32x4 = vld1q_lane_s32( &psDelDec->Xq_Q14[ last_smple_idx - 1 ][ Winner_ind ], t0_s32x4, 1 ); - t0_s32x4 = vld1q_lane_s32( &psDelDec->Xq_Q14[ last_smple_idx - 2 ][ Winner_ind ], t0_s32x4, 2 ); - t0_s32x4 = vld1q_lane_s32( &psDelDec->Xq_Q14[ last_smple_idx - 3 ][ Winner_ind ], t0_s32x4, 3 ); - t1_s32x4 = vld1q_lane_s32( &psDelDec->Xq_Q14[ last_smple_idx - 4 ][ Winner_ind ], t1_s32x4, 0 ); - t1_s32x4 = vld1q_lane_s32( &psDelDec->Xq_Q14[ last_smple_idx - 5 ][ Winner_ind ], t1_s32x4, 1 ); - t1_s32x4 = vld1q_lane_s32( &psDelDec->Xq_Q14[ last_smple_idx - 6 ][ Winner_ind ], t1_s32x4, 2 ); - t1_s32x4 = vld1q_lane_s32( &psDelDec->Xq_Q14[ last_smple_idx - 7 ][ Winner_ind ], t1_s32x4, 3 ); - o0_s32x4 = vqdmulhq_lane_s32( t0_s32x4, gain_lo_s32x2, 0 ); - o1_s32x4 = vqdmulhq_lane_s32( t1_s32x4, gain_lo_s32x2, 0 ); - o0_s32x4 = vmlaq_lane_s32( o0_s32x4, t0_s32x4, gain_hi_s32x2, 0 ); - o1_s32x4 = vmlaq_lane_s32( o1_s32x4, t1_s32x4, gain_hi_s32x2, 0 ); - o0_s32x4 = vrshlq_s32( o0_s32x4, shift_s32x4 ); - o1_s32x4 = vrshlq_s32( o1_s32x4, shift_s32x4 ); - vst1_s16( &pxq[ offset + 0 ], vqmovn_s32( o0_s32x4 ) ); - vst1_s16( &pxq[ offset + 4 ], vqmovn_s32( o1_s32x4 ) ); - - t0_s32x4 = vld1q_lane_s32( &psDelDec->Shape_Q14[ last_smple_idx - 0 ][ Winner_ind ], t0_s32x4, 0 ); - t0_s32x4 = vld1q_lane_s32( &psDelDec->Shape_Q14[ last_smple_idx - 1 ][ Winner_ind ], t0_s32x4, 1 ); - t0_s32x4 = vld1q_lane_s32( &psDelDec->Shape_Q14[ last_smple_idx - 2 ][ Winner_ind ], t0_s32x4, 2 ); - t0_s32x4 = vld1q_lane_s32( &psDelDec->Shape_Q14[ last_smple_idx - 3 ][ Winner_ind ], t0_s32x4, 3 ); - t1_s32x4 = vld1q_lane_s32( &psDelDec->Shape_Q14[ last_smple_idx - 4 ][ Winner_ind ], t1_s32x4, 0 ); - t1_s32x4 = vld1q_lane_s32( &psDelDec->Shape_Q14[ last_smple_idx - 5 ][ Winner_ind ], t1_s32x4, 1 ); - t1_s32x4 = vld1q_lane_s32( &psDelDec->Shape_Q14[ last_smple_idx - 6 ][ Winner_ind ], t1_s32x4, 2 ); - t1_s32x4 = vld1q_lane_s32( &psDelDec->Shape_Q14[ last_smple_idx - 7 ][ Winner_ind ], t1_s32x4, 3 ); - vst1q_s32( &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx + offset + 0 ], t0_s32x4 ); - vst1q_s32( &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx + offset + 4 ], t1_s32x4 ); -} - -static OPUS_INLINE void copy_winner_state( - const NSQ_del_decs_struct *psDelDec, - const opus_int decisionDelay, - const opus_int smpl_buf_idx, - const opus_int Winner_ind, - const opus_int32 gain, - const opus_int32 shift, - opus_int8 *const pulses, - opus_int16 *pxq, - silk_nsq_state *NSQ -) -{ - opus_int i, last_smple_idx; - const int32x2_t gain_lo_s32x2 = vdup_n_s32( silk_LSHIFT32( gain & 0x0000FFFF, 15 ) ); - const int32x2_t gain_hi_s32x2 = vdup_n_s32( gain >> 16 ); - const int32x4_t shift_s32x4 = vdupq_n_s32( -shift ); - int32x4_t t0_s32x4, t1_s32x4; - - t0_s32x4 = t1_s32x4 = vdupq_n_s32( 0 ); /* initialization */ - last_smple_idx = smpl_buf_idx + decisionDelay - 1 + DECISION_DELAY; - if( last_smple_idx >= DECISION_DELAY ) last_smple_idx -= DECISION_DELAY; - if( last_smple_idx >= DECISION_DELAY ) last_smple_idx -= DECISION_DELAY; - - for( i = 0; ( i < ( decisionDelay - 7 ) ) && ( last_smple_idx >= 7 ); i += 8, last_smple_idx -= 8 ) { - copy_winner_state_kernel( psDelDec, i - decisionDelay, last_smple_idx, Winner_ind, gain_lo_s32x2, gain_hi_s32x2, shift_s32x4, t0_s32x4, t1_s32x4, pulses, pxq, NSQ ); - } - for( ; ( i < decisionDelay ) && ( last_smple_idx >= 0 ); i++, last_smple_idx-- ) { - pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDelDec->Q_Q10[ last_smple_idx ][ Winner_ind ], 10 ); - pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psDelDec->Xq_Q14[ last_smple_idx ][ Winner_ind ], gain ), shift ) ); - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDelDec->Shape_Q14[ last_smple_idx ][ Winner_ind ]; - } - - last_smple_idx += DECISION_DELAY; - for( ; i < ( decisionDelay - 7 ); i++, last_smple_idx-- ) { - copy_winner_state_kernel( psDelDec, i - decisionDelay, last_smple_idx, Winner_ind, gain_lo_s32x2, gain_hi_s32x2, shift_s32x4, t0_s32x4, t1_s32x4, pulses, pxq, NSQ ); - } - for( ; i < decisionDelay; i++, last_smple_idx-- ) { - pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDelDec->Q_Q10[ last_smple_idx ][ Winner_ind ], 10 ); - pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psDelDec->Xq_Q14[ last_smple_idx ][ Winner_ind ], gain ), shift ) ); - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDelDec->Shape_Q14[ last_smple_idx ][ Winner_ind ]; - } -} - -void silk_NSQ_del_dec_neon( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int16 x16[], /* I Input */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -) -{ -#ifdef OPUS_CHECK_ASM - silk_nsq_state NSQ_c; - SideInfoIndices psIndices_c; - opus_int8 pulses_c[ MAX_FRAME_LENGTH ]; - const opus_int8 *const pulses_a = pulses; - - ( void )pulses_a; - silk_memcpy( &NSQ_c, NSQ, sizeof( NSQ_c ) ); - silk_memcpy( &psIndices_c, psIndices, sizeof( psIndices_c ) ); - silk_memcpy( pulses_c, pulses, sizeof( pulses_c ) ); - silk_NSQ_del_dec_c( psEncC, &NSQ_c, &psIndices_c, x16, pulses_c, PredCoef_Q12, LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, - pitchL, Lambda_Q10, LTP_scale_Q14 ); -#endif - - /* The optimization parallelizes the different delay decision states. */ - if(( psEncC->nStatesDelayedDecision > NEON_MAX_DEL_DEC_STATES ) || ( psEncC->nStatesDelayedDecision <= 2 )) { - /* NEON intrinsics optimization now can only parallelize up to 4 delay decision states. */ - /* If there are more states, C function is called, and this optimization must be expanded. */ - /* When the number of delay decision states is less than 3, there are penalties using this */ - /* optimization, and C function is called. */ - /* When the number of delay decision states is 2, it's better to specialize another */ - /* structure NSQ_del_dec2_struct and optimize with shorter NEON registers. (Low priority) */ - silk_NSQ_del_dec_c( psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, - Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14 ); - } else { - opus_int i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr; - opus_int smpl_buf_idx, decisionDelay; - const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13; - opus_int16 *pxq; - VARDECL( opus_int32, sLTP_Q15 ); - VARDECL( opus_int16, sLTP ); - opus_int32 HarmShapeFIRPacked_Q14; - opus_int offset_Q10; - opus_int32 RDmin_Q10, Gain_Q10; - VARDECL( opus_int32, x_sc_Q10 ); - VARDECL( opus_int32, delayedGain_Q10 ); - VARDECL( NSQ_del_decs_struct, psDelDec ); - int32x4_t t_s32x4; - SAVE_STACK; - - /* Set unvoiced lag to the previous one, overwrite later for voiced */ - lag = NSQ->lagPrev; - - silk_assert( NSQ->prev_gain_Q16 != 0 ); - - /* Initialize delayed decision states */ - ALLOC( psDelDec, 1, NSQ_del_decs_struct ); - /* Only RandState and RD_Q10 need to be initialized to 0. */ - silk_memset( psDelDec->RandState, 0, sizeof( psDelDec->RandState ) ); - vst1q_s32( psDelDec->RD_Q10, vdupq_n_s32( 0 ) ); - - for( k = 0; k < psEncC->nStatesDelayedDecision; k++ ) { - psDelDec->SeedInit[ k ] = psDelDec->Seed[ k ] = ( k + psIndices->Seed ) & 3; - } - vst1q_s32( psDelDec->LF_AR_Q14, vld1q_dup_s32( &NSQ->sLF_AR_shp_Q14 ) ); - vst1q_s32( psDelDec->Diff_Q14, vld1q_dup_s32( &NSQ->sDiff_shp_Q14 ) ); - vst1q_s32( psDelDec->Shape_Q14[ 0 ], vld1q_dup_s32( &NSQ->sLTP_shp_Q14[ psEncC->ltp_mem_length - 1 ] ) ); - for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { - vst1q_s32( psDelDec->sLPC_Q14[ i ], vld1q_dup_s32( &NSQ->sLPC_Q14[ i ] ) ); - } - for( i = 0; i < (opus_int)( sizeof( NSQ->sAR2_Q14 ) / sizeof( NSQ->sAR2_Q14[ 0 ] ) ); i++ ) { - vst1q_s32( psDelDec->sAR2_Q14[ i ], vld1q_dup_s32( &NSQ->sAR2_Q14[ i ] ) ); - } - - offset_Q10 = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ]; - smpl_buf_idx = 0; /* index of oldest samples */ - - decisionDelay = silk_min_int( DECISION_DELAY, psEncC->subfr_length ); - - /* For voiced frames limit the decision delay to lower than the pitch lag */ - if( psIndices->signalType == TYPE_VOICED ) { - opus_int pitch_min = pitchL[ 0 ]; - for( k = 1; k < psEncC->nb_subfr; k++ ) { - pitch_min = silk_min_int( pitch_min, pitchL[ k ] ); - } - decisionDelay = silk_min_int( decisionDelay, pitch_min - LTP_ORDER / 2 - 1 ); - } else { - if( lag > 0 ) { - decisionDelay = silk_min_int( decisionDelay, lag - LTP_ORDER / 2 - 1 ); - } - } - - if( psIndices->NLSFInterpCoef_Q2 == 4 ) { - LSF_interpolation_flag = 0; - } else { - LSF_interpolation_flag = 1; - } - - ALLOC( sLTP_Q15, psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); - ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 ); - ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 ); - ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 ); - /* Set up pointers to start of sub frame */ - pxq = &NSQ->xq[ psEncC->ltp_mem_length ]; - NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length; - NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; - subfr = 0; - for( k = 0; k < psEncC->nb_subfr; k++ ) { - A_Q12 = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ]; - B_Q14 = <PCoef_Q14[ k * LTP_ORDER ]; - AR_shp_Q13 = &AR_Q13[ k * MAX_SHAPE_LPC_ORDER ]; - - /* Noise shape parameters */ - silk_assert( HarmShapeGain_Q14[ k ] >= 0 ); - HarmShapeFIRPacked_Q14 = silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 ); - HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 ); - - NSQ->rewhite_flag = 0; - if( psIndices->signalType == TYPE_VOICED ) { - /* Voiced */ - lag = pitchL[ k ]; - - /* Re-whitening */ - if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) { - if( k == 2 ) { - /* RESET DELAYED DECISIONS */ - /* Find winner */ - int32x4_t RD_Q10_s32x4; - RDmin_Q10 = psDelDec->RD_Q10[ 0 ]; - Winner_ind = 0; - for( i = 1; i < psEncC->nStatesDelayedDecision; i++ ) { - if( psDelDec->RD_Q10[ i ] < RDmin_Q10 ) { - RDmin_Q10 = psDelDec->RD_Q10[ i ]; - Winner_ind = i; - } - } - psDelDec->RD_Q10[ Winner_ind ] -= ( silk_int32_MAX >> 4 ); - RD_Q10_s32x4 = vld1q_s32( psDelDec->RD_Q10 ); - RD_Q10_s32x4 = vaddq_s32( RD_Q10_s32x4, vdupq_n_s32( silk_int32_MAX >> 4 ) ); - vst1q_s32( psDelDec->RD_Q10, RD_Q10_s32x4 ); - - /* Copy final part of signals from winner state to output and long-term filter states */ - copy_winner_state( psDelDec, decisionDelay, smpl_buf_idx, Winner_ind, Gains_Q16[ 1 ], 14, pulses, pxq, NSQ ); - - subfr = 0; - } - - /* Rewhiten with new A coefs */ - start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2; - silk_assert( start_idx > 0 ); - - silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], - A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch ); - - NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; - NSQ->rewhite_flag = 1; - } - } - - silk_nsq_del_dec_scale_states_neon( psEncC, NSQ, psDelDec, x16, x_sc_Q10, sLTP, sLTP_Q15, k, - LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay ); - - silk_noise_shape_quantizer_del_dec_neon( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, - delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], - Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder, - psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay ); - - x16 += psEncC->subfr_length; - pulses += psEncC->subfr_length; - pxq += psEncC->subfr_length; - } - - /* Find winner */ - RDmin_Q10 = psDelDec->RD_Q10[ 0 ]; - Winner_ind = 0; - for( k = 1; k < psEncC->nStatesDelayedDecision; k++ ) { - if( psDelDec->RD_Q10[ k ] < RDmin_Q10 ) { - RDmin_Q10 = psDelDec->RD_Q10[ k ]; - Winner_ind = k; - } - } - - /* Copy final part of signals from winner state to output and long-term filter states */ - psIndices->Seed = psDelDec->SeedInit[ Winner_ind ]; - Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 ); - copy_winner_state( psDelDec, decisionDelay, smpl_buf_idx, Winner_ind, Gain_Q10, 8, pulses, pxq, NSQ ); - - t_s32x4 = vdupq_n_s32( 0 ); /* initialization */ - for( i = 0; i < ( NSQ_LPC_BUF_LENGTH - 3 ); i += 4 ) { - t_s32x4 = vld1q_lane_s32( &psDelDec->sLPC_Q14[ i + 0 ][ Winner_ind ], t_s32x4, 0 ); - t_s32x4 = vld1q_lane_s32( &psDelDec->sLPC_Q14[ i + 1 ][ Winner_ind ], t_s32x4, 1 ); - t_s32x4 = vld1q_lane_s32( &psDelDec->sLPC_Q14[ i + 2 ][ Winner_ind ], t_s32x4, 2 ); - t_s32x4 = vld1q_lane_s32( &psDelDec->sLPC_Q14[ i + 3 ][ Winner_ind ], t_s32x4, 3 ); - vst1q_s32( &NSQ->sLPC_Q14[ i ], t_s32x4 ); - } - - for( ; i < NSQ_LPC_BUF_LENGTH; i++ ) { - NSQ->sLPC_Q14[ i ] = psDelDec->sLPC_Q14[ i ][ Winner_ind ]; - } - - for( i = 0; i < (opus_int)( sizeof( NSQ->sAR2_Q14 ) / sizeof( NSQ->sAR2_Q14[ 0 ] ) - 3 ); i += 4 ) { - t_s32x4 = vld1q_lane_s32( &psDelDec->sAR2_Q14[ i + 0 ][ Winner_ind ], t_s32x4, 0 ); - t_s32x4 = vld1q_lane_s32( &psDelDec->sAR2_Q14[ i + 1 ][ Winner_ind ], t_s32x4, 1 ); - t_s32x4 = vld1q_lane_s32( &psDelDec->sAR2_Q14[ i + 2 ][ Winner_ind ], t_s32x4, 2 ); - t_s32x4 = vld1q_lane_s32( &psDelDec->sAR2_Q14[ i + 3 ][ Winner_ind ], t_s32x4, 3 ); - vst1q_s32( &NSQ->sAR2_Q14[ i ], t_s32x4 ); - } - - for( ; i < (opus_int)( sizeof( NSQ->sAR2_Q14 ) / sizeof( NSQ->sAR2_Q14[ 0 ] ) ); i++ ) { - NSQ->sAR2_Q14[ i ] = psDelDec->sAR2_Q14[ i ][ Winner_ind ]; - } - - /* Update states */ - NSQ->sLF_AR_shp_Q14 = psDelDec->LF_AR_Q14[ Winner_ind ]; - NSQ->sDiff_shp_Q14 = psDelDec->Diff_Q14[ Winner_ind ]; - NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ]; - - /* Save quantized speech signal */ - silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); - silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); - RESTORE_STACK; - } - -#ifdef OPUS_CHECK_ASM - silk_assert( !memcmp( &NSQ_c, NSQ, sizeof( NSQ_c ) ) ); - silk_assert( !memcmp( &psIndices_c, psIndices, sizeof( psIndices_c ) ) ); - silk_assert( !memcmp( pulses_c, pulses_a, sizeof( pulses_c ) ) ); -#endif -} - -/******************************************/ -/* Noise shape quantizer for one subframe */ -/******************************************/ -/* Note: Function silk_short_prediction_create_arch_coef_neon() defined in NSQ_neon.h is actually a hacking C function. */ -/* Therefore here we append "_local" to the NEON function name to avoid confusion. */ -static OPUS_INLINE void silk_short_prediction_create_arch_coef_neon_local(opus_int32 *out, const opus_int16 *in, opus_int order) -{ - int16x8_t t_s16x8; - int32x4_t t0_s32x4, t1_s32x4, t2_s32x4, t3_s32x4; - silk_assert( order == 10 || order == 16 ); - - t_s16x8 = vld1q_s16( in + 0 ); /* 7 6 5 4 3 2 1 0 */ - t_s16x8 = vrev64q_s16( t_s16x8 ); /* 4 5 6 7 0 1 2 3 */ - t2_s32x4 = vshll_n_s16( vget_high_s16( t_s16x8 ), 15 ); /* 4 5 6 7 */ - t3_s32x4 = vshll_n_s16( vget_low_s16( t_s16x8 ), 15 ); /* 0 1 2 3 */ - - if( order == 16 ) { - t_s16x8 = vld1q_s16( in + 8 ); /* F E D C B A 9 8 */ - t_s16x8 = vrev64q_s16( t_s16x8 ); /* C D E F 8 9 A B */ - t0_s32x4 = vshll_n_s16( vget_high_s16( t_s16x8 ), 15 ); /* C D E F */ - t1_s32x4 = vshll_n_s16( vget_low_s16( t_s16x8 ), 15 ); /* 8 9 A B */ - } else { - int16x4_t t_s16x4; - - t0_s32x4 = vdupq_n_s32( 0 ); /* zero zero zero zero */ - t_s16x4 = vld1_s16( in + 6 ); /* 9 8 7 6 */ - t_s16x4 = vrev64_s16( t_s16x4 ); /* 6 7 8 9 */ - t1_s32x4 = vshll_n_s16( t_s16x4, 15 ); - t1_s32x4 = vcombine_s32( vget_low_s32(t0_s32x4), vget_low_s32( t1_s32x4 ) ); /* 8 9 zero zero */ - } - vst1q_s32( out + 0, t0_s32x4 ); - vst1q_s32( out + 4, t1_s32x4 ); - vst1q_s32( out + 8, t2_s32x4 ); - vst1q_s32( out + 12, t3_s32x4 ); -} - -static OPUS_INLINE int32x4_t silk_SMLAWB_lane0_neon( - const int32x4_t out_s32x4, - const int32x4_t in_s32x4, - const int32x2_t coef_s32x2 -) -{ - return vaddq_s32( out_s32x4, vqdmulhq_lane_s32( in_s32x4, coef_s32x2, 0 ) ); -} - -static OPUS_INLINE int32x4_t silk_SMLAWB_lane1_neon( - const int32x4_t out_s32x4, - const int32x4_t in_s32x4, - const int32x2_t coef_s32x2 -) -{ - return vaddq_s32( out_s32x4, vqdmulhq_lane_s32( in_s32x4, coef_s32x2, 1 ) ); -} - -/* Note: This function has different return value than silk_noise_shape_quantizer_short_prediction_neon(). */ -/* Therefore here we append "_local" to the function name to avoid confusion. */ -static OPUS_INLINE int32x4_t silk_noise_shape_quantizer_short_prediction_neon_local(const opus_int32 *buf32, const opus_int32 *a_Q12_arch, opus_int order) -{ - const int32x4_t a_Q12_arch0_s32x4 = vld1q_s32( a_Q12_arch + 0 ); - const int32x4_t a_Q12_arch1_s32x4 = vld1q_s32( a_Q12_arch + 4 ); - const int32x4_t a_Q12_arch2_s32x4 = vld1q_s32( a_Q12_arch + 8 ); - const int32x4_t a_Q12_arch3_s32x4 = vld1q_s32( a_Q12_arch + 12 ); - int32x4_t LPC_pred_Q14_s32x4; - - silk_assert( order == 10 || order == 16 ); - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LPC_pred_Q14_s32x4 = vdupq_n_s32( silk_RSHIFT( order, 1 ) ); - LPC_pred_Q14_s32x4 = silk_SMLAWB_lane0_neon( LPC_pred_Q14_s32x4, vld1q_s32( buf32 + 0 * NEON_MAX_DEL_DEC_STATES ), vget_low_s32( a_Q12_arch0_s32x4 ) ); - LPC_pred_Q14_s32x4 = silk_SMLAWB_lane1_neon( LPC_pred_Q14_s32x4, vld1q_s32( buf32 + 1 * NEON_MAX_DEL_DEC_STATES ), vget_low_s32( a_Q12_arch0_s32x4 ) ); - LPC_pred_Q14_s32x4 = silk_SMLAWB_lane0_neon( LPC_pred_Q14_s32x4, vld1q_s32( buf32 + 2 * NEON_MAX_DEL_DEC_STATES ), vget_high_s32( a_Q12_arch0_s32x4 ) ); - LPC_pred_Q14_s32x4 = silk_SMLAWB_lane1_neon( LPC_pred_Q14_s32x4, vld1q_s32( buf32 + 3 * NEON_MAX_DEL_DEC_STATES ), vget_high_s32( a_Q12_arch0_s32x4 ) ); - LPC_pred_Q14_s32x4 = silk_SMLAWB_lane0_neon( LPC_pred_Q14_s32x4, vld1q_s32( buf32 + 4 * NEON_MAX_DEL_DEC_STATES ), vget_low_s32( a_Q12_arch1_s32x4 ) ); - LPC_pred_Q14_s32x4 = silk_SMLAWB_lane1_neon( LPC_pred_Q14_s32x4, vld1q_s32( buf32 + 5 * NEON_MAX_DEL_DEC_STATES ), vget_low_s32( a_Q12_arch1_s32x4 ) ); - LPC_pred_Q14_s32x4 = silk_SMLAWB_lane0_neon( LPC_pred_Q14_s32x4, vld1q_s32( buf32 + 6 * NEON_MAX_DEL_DEC_STATES ), vget_high_s32( a_Q12_arch1_s32x4 ) ); - LPC_pred_Q14_s32x4 = silk_SMLAWB_lane1_neon( LPC_pred_Q14_s32x4, vld1q_s32( buf32 + 7 * NEON_MAX_DEL_DEC_STATES ), vget_high_s32( a_Q12_arch1_s32x4 ) ); - LPC_pred_Q14_s32x4 = silk_SMLAWB_lane0_neon( LPC_pred_Q14_s32x4, vld1q_s32( buf32 + 8 * NEON_MAX_DEL_DEC_STATES ), vget_low_s32( a_Q12_arch2_s32x4 ) ); - LPC_pred_Q14_s32x4 = silk_SMLAWB_lane1_neon( LPC_pred_Q14_s32x4, vld1q_s32( buf32 + 9 * NEON_MAX_DEL_DEC_STATES ), vget_low_s32( a_Q12_arch2_s32x4 ) ); - LPC_pred_Q14_s32x4 = silk_SMLAWB_lane0_neon( LPC_pred_Q14_s32x4, vld1q_s32( buf32 + 10 * NEON_MAX_DEL_DEC_STATES ), vget_high_s32( a_Q12_arch2_s32x4 ) ); - LPC_pred_Q14_s32x4 = silk_SMLAWB_lane1_neon( LPC_pred_Q14_s32x4, vld1q_s32( buf32 + 11 * NEON_MAX_DEL_DEC_STATES ), vget_high_s32( a_Q12_arch2_s32x4 ) ); - LPC_pred_Q14_s32x4 = silk_SMLAWB_lane0_neon( LPC_pred_Q14_s32x4, vld1q_s32( buf32 + 12 * NEON_MAX_DEL_DEC_STATES ), vget_low_s32( a_Q12_arch3_s32x4 ) ); - LPC_pred_Q14_s32x4 = silk_SMLAWB_lane1_neon( LPC_pred_Q14_s32x4, vld1q_s32( buf32 + 13 * NEON_MAX_DEL_DEC_STATES ), vget_low_s32( a_Q12_arch3_s32x4 ) ); - LPC_pred_Q14_s32x4 = silk_SMLAWB_lane0_neon( LPC_pred_Q14_s32x4, vld1q_s32( buf32 + 14 * NEON_MAX_DEL_DEC_STATES ), vget_high_s32( a_Q12_arch3_s32x4 ) ); - LPC_pred_Q14_s32x4 = silk_SMLAWB_lane1_neon( LPC_pred_Q14_s32x4, vld1q_s32( buf32 + 15 * NEON_MAX_DEL_DEC_STATES ), vget_high_s32( a_Q12_arch3_s32x4 ) ); - - return LPC_pred_Q14_s32x4; -} - -static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_neon( - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_decs_struct psDelDec[], /* I/O Delayed decision states */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP filter state */ - opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int Lambda_Q10, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int subfr, /* I Subframe number */ - opus_int shapingLPCOrder, /* I Shaping LPC filter order */ - opus_int predictLPCOrder, /* I Prediction filter order */ - opus_int warping_Q16, /* I */ - opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ - opus_int *smpl_buf_idx, /* I/O Index to newest samples in buffers */ - opus_int decisionDelay /* I */ -) -{ - opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx; - opus_int32 Winner_rand_state; - opus_int32 LTP_pred_Q14, n_LTP_Q14; - opus_int32 RDmin_Q10, RDmax_Q10; - opus_int32 Gain_Q10; - opus_int32 *pred_lag_ptr, *shp_lag_ptr; - opus_int32 a_Q12_arch[MAX_LPC_ORDER]; - const int32x2_t warping_Q16_s32x2 = vdup_n_s32( silk_LSHIFT32( warping_Q16, 16 ) >> 1 ); - const opus_int32 LF_shp_Q29 = silk_LSHIFT32( LF_shp_Q14, 16 ) >> 1; - opus_int32 AR_shp_Q28[ MAX_SHAPE_LPC_ORDER ]; - const uint32x4_t rand_multiplier_u32x4 = vdupq_n_u32( RAND_MULTIPLIER ); - const uint32x4_t rand_increment_u32x4 = vdupq_n_u32( RAND_INCREMENT ); - - VARDECL( NSQ_samples_struct, psSampleState ); - SAVE_STACK; - - silk_assert( nStatesDelayedDecision > 0 ); - silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ - ALLOC( psSampleState, 2, NSQ_samples_struct ); - - shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; - pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; - Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); - - for( i = 0; i < ( MAX_SHAPE_LPC_ORDER - 7 ); i += 8 ) { - const int16x8_t t_s16x8 = vld1q_s16( AR_shp_Q13 + i ); - vst1q_s32( AR_shp_Q28 + i + 0, vshll_n_s16( vget_low_s16( t_s16x8 ), 15 ) ); - vst1q_s32( AR_shp_Q28 + i + 4, vshll_n_s16( vget_high_s16( t_s16x8 ), 15 ) ); - } - - for( ; i < MAX_SHAPE_LPC_ORDER; i++ ) { - AR_shp_Q28[i] = silk_LSHIFT32( AR_shp_Q13[i], 15 ); - } - - silk_short_prediction_create_arch_coef_neon_local( a_Q12_arch, a_Q12, predictLPCOrder ); - - for( i = 0; i < length; i++ ) { - int32x4_t Seed_s32x4, LPC_pred_Q14_s32x4; - int32x4_t sign_s32x4, tmp1_s32x4, tmp2_s32x4; - int32x4_t n_AR_Q14_s32x4, n_LF_Q14_s32x4; - int32x2_t AR_shp_Q28_s32x2; - int16x4_t r_Q10_s16x4, rr_Q10_s16x4; - - /* Perform common calculations used in all states */ - - /* Long-term prediction */ - if( signalType == TYPE_VOICED ) { - /* Unrolled loop */ - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LTP_pred_Q14 = 2; - LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ 0 ], b_Q14[ 0 ] ); - LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -1 ], b_Q14[ 1 ] ); - LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -2 ], b_Q14[ 2 ] ); - LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -3 ], b_Q14[ 3 ] ); - LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_Q14[ 4 ] ); - LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 ); /* Q13 -> Q14 */ - pred_lag_ptr++; - } else { - LTP_pred_Q14 = 0; - } - - /* Long-term shaping */ - if( lag > 0 ) { - /* Symmetric, packed FIR coefficients */ - n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); - n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); - n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 ); /* Q12 -> Q14 */ - shp_lag_ptr++; - } else { - n_LTP_Q14 = 0; - } - - /* Generate dither */ - Seed_s32x4 = vld1q_s32( psDelDec->Seed ); - Seed_s32x4 = vreinterpretq_s32_u32( vmlaq_u32( rand_increment_u32x4, vreinterpretq_u32_s32( Seed_s32x4 ), rand_multiplier_u32x4 ) ); - vst1q_s32( psDelDec->Seed, Seed_s32x4 ); - - /* Short-term prediction */ - LPC_pred_Q14_s32x4 = silk_noise_shape_quantizer_short_prediction_neon_local(psDelDec->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 16 + i ], a_Q12_arch, predictLPCOrder); - LPC_pred_Q14_s32x4 = vshlq_n_s32( LPC_pred_Q14_s32x4, 4 ); /* Q10 -> Q14 */ - - /* Noise shape feedback */ - /* Output of lowpass section */ - tmp2_s32x4 = silk_SMLAWB_lane0_neon( vld1q_s32( psDelDec->Diff_Q14 ), vld1q_s32( psDelDec->sAR2_Q14[ 0 ] ), warping_Q16_s32x2 ); - /* Output of allpass section */ - tmp1_s32x4 = vsubq_s32( vld1q_s32( psDelDec->sAR2_Q14[ 1 ] ), tmp2_s32x4 ); - tmp1_s32x4 = silk_SMLAWB_lane0_neon( vld1q_s32( psDelDec->sAR2_Q14[ 0 ] ), tmp1_s32x4, warping_Q16_s32x2 ); - vst1q_s32( psDelDec->sAR2_Q14[ 0 ], tmp2_s32x4 ); - AR_shp_Q28_s32x2 = vld1_s32( AR_shp_Q28 ); - n_AR_Q14_s32x4 = vaddq_s32( vdupq_n_s32( silk_RSHIFT( shapingLPCOrder, 1 ) ), vqdmulhq_lane_s32( tmp2_s32x4, AR_shp_Q28_s32x2, 0 ) ); - - /* Loop over allpass sections */ - for( j = 2; j < shapingLPCOrder; j += 2 ) { - /* Output of allpass section */ - tmp2_s32x4 = vsubq_s32( vld1q_s32( psDelDec->sAR2_Q14[ j + 0 ] ), tmp1_s32x4 ); - tmp2_s32x4 = silk_SMLAWB_lane0_neon( vld1q_s32( psDelDec->sAR2_Q14[ j - 1 ] ), tmp2_s32x4, warping_Q16_s32x2 ); - vst1q_s32( psDelDec->sAR2_Q14[ j - 1 ], tmp1_s32x4 ); - n_AR_Q14_s32x4 = vaddq_s32( n_AR_Q14_s32x4, vqdmulhq_lane_s32( tmp1_s32x4, AR_shp_Q28_s32x2, 1 ) ); - /* Output of allpass section */ - tmp1_s32x4 = vsubq_s32( vld1q_s32( psDelDec->sAR2_Q14[ j + 1 ] ), tmp2_s32x4 ); - tmp1_s32x4 = silk_SMLAWB_lane0_neon( vld1q_s32( psDelDec->sAR2_Q14[ j + 0 ] ), tmp1_s32x4, warping_Q16_s32x2 ); - vst1q_s32( psDelDec->sAR2_Q14[ j + 0 ], tmp2_s32x4 ); - AR_shp_Q28_s32x2 = vld1_s32( &AR_shp_Q28[ j ] ); - n_AR_Q14_s32x4 = vaddq_s32( n_AR_Q14_s32x4, vqdmulhq_lane_s32( tmp2_s32x4, AR_shp_Q28_s32x2, 0 ) ); - } - vst1q_s32( psDelDec->sAR2_Q14[ shapingLPCOrder - 1 ], tmp1_s32x4 ); - n_AR_Q14_s32x4 = vaddq_s32( n_AR_Q14_s32x4, vqdmulhq_lane_s32( tmp1_s32x4, AR_shp_Q28_s32x2, 1 ) ); - n_AR_Q14_s32x4 = vshlq_n_s32( n_AR_Q14_s32x4, 1 ); /* Q11 -> Q12 */ - n_AR_Q14_s32x4 = vaddq_s32( n_AR_Q14_s32x4, vqdmulhq_n_s32( vld1q_s32( psDelDec->LF_AR_Q14 ), silk_LSHIFT32( Tilt_Q14, 16 ) >> 1 ) ); /* Q12 */ - n_AR_Q14_s32x4 = vshlq_n_s32( n_AR_Q14_s32x4, 2 ); /* Q12 -> Q14 */ - n_LF_Q14_s32x4 = vqdmulhq_n_s32( vld1q_s32( psDelDec->Shape_Q14[ *smpl_buf_idx ] ), LF_shp_Q29 ); /* Q12 */ - n_LF_Q14_s32x4 = vaddq_s32( n_LF_Q14_s32x4, vqdmulhq_n_s32( vld1q_s32( psDelDec->LF_AR_Q14 ), silk_LSHIFT32( LF_shp_Q14 >> 16 , 15 ) ) ); /* Q12 */ - n_LF_Q14_s32x4 = vshlq_n_s32( n_LF_Q14_s32x4, 2 ); /* Q12 -> Q14 */ - - /* Input minus prediction plus noise feedback */ - /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP */ - tmp1_s32x4 = vaddq_s32( n_AR_Q14_s32x4, n_LF_Q14_s32x4 ); /* Q14 */ - tmp2_s32x4 = vaddq_s32( vdupq_n_s32( n_LTP_Q14 ), LPC_pred_Q14_s32x4 ); /* Q13 */ - tmp1_s32x4 = vsubq_s32( tmp2_s32x4, tmp1_s32x4 ); /* Q13 */ - tmp1_s32x4 = vrshrq_n_s32( tmp1_s32x4, 4 ); /* Q10 */ - tmp1_s32x4 = vsubq_s32( vdupq_n_s32( x_Q10[ i ] ), tmp1_s32x4 ); /* residual error Q10 */ - - /* Flip sign depending on dither */ - sign_s32x4 = vreinterpretq_s32_u32( vcltq_s32( Seed_s32x4, vdupq_n_s32( 0 ) ) ); - tmp1_s32x4 = veorq_s32( tmp1_s32x4, sign_s32x4 ); - tmp1_s32x4 = vsubq_s32( tmp1_s32x4, sign_s32x4 ); - tmp1_s32x4 = vmaxq_s32( tmp1_s32x4, vdupq_n_s32( -( 31 << 10 ) ) ); - tmp1_s32x4 = vminq_s32( tmp1_s32x4, vdupq_n_s32( 30 << 10 ) ); - r_Q10_s16x4 = vmovn_s32( tmp1_s32x4 ); - - /* Find two quantization level candidates and measure their rate-distortion */ - { - int16x4_t q1_Q10_s16x4 = vsub_s16( r_Q10_s16x4, vdup_n_s16( offset_Q10 ) ); - int16x4_t q1_Q0_s16x4 = vshr_n_s16( q1_Q10_s16x4, 10 ); - int16x4_t q2_Q10_s16x4; - int32x4_t rd1_Q10_s32x4, rd2_Q10_s32x4; - uint32x4_t t_u32x4; - - if( Lambda_Q10 > 2048 ) { - /* For aggressive RDO, the bias becomes more than one pulse. */ - const int rdo_offset = Lambda_Q10/2 - 512; - const uint16x4_t greaterThanRdo = vcgt_s16( q1_Q10_s16x4, vdup_n_s16( rdo_offset ) ); - const uint16x4_t lessThanMinusRdo = vclt_s16( q1_Q10_s16x4, vdup_n_s16( -rdo_offset ) ); - /* If Lambda_Q10 > 32767, then q1_Q0, q1_Q10 and q2_Q10 must change to 32-bit. */ - silk_assert( Lambda_Q10 <= 32767 ); - - q1_Q0_s16x4 = vreinterpret_s16_u16( vclt_s16( q1_Q10_s16x4, vdup_n_s16( 0 ) ) ); - q1_Q0_s16x4 = vbsl_s16( greaterThanRdo, vsub_s16( q1_Q10_s16x4, vdup_n_s16( rdo_offset ) ), q1_Q0_s16x4 ); - q1_Q0_s16x4 = vbsl_s16( lessThanMinusRdo, vadd_s16( q1_Q10_s16x4, vdup_n_s16( rdo_offset ) ), q1_Q0_s16x4 ); - q1_Q0_s16x4 = vshr_n_s16( q1_Q0_s16x4, 10 ); - } - { - const uint16x4_t equal0_u16x4 = vceq_s16( q1_Q0_s16x4, vdup_n_s16( 0 ) ); - const uint16x4_t equalMinus1_u16x4 = vceq_s16( q1_Q0_s16x4, vdup_n_s16( -1 ) ); - const uint16x4_t lessThanMinus1_u16x4 = vclt_s16( q1_Q0_s16x4, vdup_n_s16( -1 ) ); - int16x4_t tmp1_s16x4, tmp2_s16x4; - - q1_Q10_s16x4 = vshl_n_s16( q1_Q0_s16x4, 10 ); - tmp1_s16x4 = vadd_s16( q1_Q10_s16x4, vdup_n_s16( offset_Q10 - QUANT_LEVEL_ADJUST_Q10 ) ); - q1_Q10_s16x4 = vadd_s16( q1_Q10_s16x4, vdup_n_s16( offset_Q10 + QUANT_LEVEL_ADJUST_Q10 ) ); - q1_Q10_s16x4 = vbsl_s16( lessThanMinus1_u16x4, q1_Q10_s16x4, tmp1_s16x4 ); - q1_Q10_s16x4 = vbsl_s16( equal0_u16x4, vdup_n_s16( offset_Q10 ), q1_Q10_s16x4 ); - q1_Q10_s16x4 = vbsl_s16( equalMinus1_u16x4, vdup_n_s16( offset_Q10 - ( 1024 - QUANT_LEVEL_ADJUST_Q10 ) ), q1_Q10_s16x4 ); - q2_Q10_s16x4 = vadd_s16( q1_Q10_s16x4, vdup_n_s16( 1024 ) ); - q2_Q10_s16x4 = vbsl_s16( equal0_u16x4, vdup_n_s16( offset_Q10 + 1024 - QUANT_LEVEL_ADJUST_Q10 ), q2_Q10_s16x4 ); - q2_Q10_s16x4 = vbsl_s16( equalMinus1_u16x4, vdup_n_s16( offset_Q10 ), q2_Q10_s16x4 ); - tmp1_s16x4 = q1_Q10_s16x4; - tmp2_s16x4 = q2_Q10_s16x4; - tmp1_s16x4 = vbsl_s16( vorr_u16( equalMinus1_u16x4, lessThanMinus1_u16x4 ), vneg_s16( tmp1_s16x4 ), tmp1_s16x4 ); - tmp2_s16x4 = vbsl_s16( lessThanMinus1_u16x4, vneg_s16( tmp2_s16x4 ), tmp2_s16x4 ); - rd1_Q10_s32x4 = vmull_s16( tmp1_s16x4, vdup_n_s16( Lambda_Q10 ) ); - rd2_Q10_s32x4 = vmull_s16( tmp2_s16x4, vdup_n_s16( Lambda_Q10 ) ); - } - - rr_Q10_s16x4 = vsub_s16( r_Q10_s16x4, q1_Q10_s16x4 ); - rd1_Q10_s32x4 = vmlal_s16( rd1_Q10_s32x4, rr_Q10_s16x4, rr_Q10_s16x4 ); - rd1_Q10_s32x4 = vshrq_n_s32( rd1_Q10_s32x4, 10 ); - - rr_Q10_s16x4 = vsub_s16( r_Q10_s16x4, q2_Q10_s16x4 ); - rd2_Q10_s32x4 = vmlal_s16( rd2_Q10_s32x4, rr_Q10_s16x4, rr_Q10_s16x4 ); - rd2_Q10_s32x4 = vshrq_n_s32( rd2_Q10_s32x4, 10 ); - - tmp2_s32x4 = vld1q_s32( psDelDec->RD_Q10 ); - tmp1_s32x4 = vaddq_s32( tmp2_s32x4, vminq_s32( rd1_Q10_s32x4, rd2_Q10_s32x4 ) ); - tmp2_s32x4 = vaddq_s32( tmp2_s32x4, vmaxq_s32( rd1_Q10_s32x4, rd2_Q10_s32x4 ) ); - vst1q_s32( psSampleState[ 0 ].RD_Q10, tmp1_s32x4 ); - vst1q_s32( psSampleState[ 1 ].RD_Q10, tmp2_s32x4 ); - t_u32x4 = vcltq_s32( rd1_Q10_s32x4, rd2_Q10_s32x4 ); - tmp1_s32x4 = vbslq_s32( t_u32x4, vmovl_s16( q1_Q10_s16x4 ), vmovl_s16( q2_Q10_s16x4 ) ); - tmp2_s32x4 = vbslq_s32( t_u32x4, vmovl_s16( q2_Q10_s16x4 ), vmovl_s16( q1_Q10_s16x4 ) ); - vst1q_s32( psSampleState[ 0 ].Q_Q10, tmp1_s32x4 ); - vst1q_s32( psSampleState[ 1 ].Q_Q10, tmp2_s32x4 ); - } - - { - /* Update states for best quantization */ - int32x4_t exc_Q14_s32x4, LPC_exc_Q14_s32x4, xq_Q14_s32x4, sLF_AR_shp_Q14_s32x4; - - /* Quantized excitation */ - exc_Q14_s32x4 = vshlq_n_s32( tmp1_s32x4, 4 ); - exc_Q14_s32x4 = veorq_s32( exc_Q14_s32x4, sign_s32x4 ); - exc_Q14_s32x4 = vsubq_s32( exc_Q14_s32x4, sign_s32x4 ); - - /* Add predictions */ - LPC_exc_Q14_s32x4 = vaddq_s32( exc_Q14_s32x4, vdupq_n_s32( LTP_pred_Q14 ) ); - xq_Q14_s32x4 = vaddq_s32( LPC_exc_Q14_s32x4, LPC_pred_Q14_s32x4 ); - - /* Update states */ - tmp1_s32x4 = vsubq_s32( xq_Q14_s32x4, vshlq_n_s32( vdupq_n_s32( x_Q10[ i ] ), 4 ) ); - vst1q_s32( psSampleState[ 0 ].Diff_Q14, tmp1_s32x4 ); - sLF_AR_shp_Q14_s32x4 = vsubq_s32( tmp1_s32x4, n_AR_Q14_s32x4 ); - vst1q_s32( psSampleState[ 0 ].sLTP_shp_Q14, vsubq_s32( sLF_AR_shp_Q14_s32x4, n_LF_Q14_s32x4 ) ); - vst1q_s32( psSampleState[ 0 ].LF_AR_Q14, sLF_AR_shp_Q14_s32x4 ); - vst1q_s32( psSampleState[ 0 ].LPC_exc_Q14, LPC_exc_Q14_s32x4 ); - vst1q_s32( psSampleState[ 0 ].xq_Q14, xq_Q14_s32x4 ); - - /* Quantized excitation */ - exc_Q14_s32x4 = vshlq_n_s32( tmp2_s32x4, 4 ); - exc_Q14_s32x4 = veorq_s32( exc_Q14_s32x4, sign_s32x4 ); - exc_Q14_s32x4 = vsubq_s32( exc_Q14_s32x4, sign_s32x4 ); - - /* Add predictions */ - LPC_exc_Q14_s32x4 = vaddq_s32( exc_Q14_s32x4, vdupq_n_s32( LTP_pred_Q14 ) ); - xq_Q14_s32x4 = vaddq_s32( LPC_exc_Q14_s32x4, LPC_pred_Q14_s32x4 ); - - /* Update states */ - tmp1_s32x4 = vsubq_s32( xq_Q14_s32x4, vshlq_n_s32( vdupq_n_s32( x_Q10[ i ] ), 4 ) ); - vst1q_s32( psSampleState[ 1 ].Diff_Q14, tmp1_s32x4 ); - sLF_AR_shp_Q14_s32x4 = vsubq_s32( tmp1_s32x4, n_AR_Q14_s32x4 ); - vst1q_s32( psSampleState[ 1 ].sLTP_shp_Q14, vsubq_s32( sLF_AR_shp_Q14_s32x4, n_LF_Q14_s32x4 ) ); - vst1q_s32( psSampleState[ 1 ].LF_AR_Q14, sLF_AR_shp_Q14_s32x4 ); - vst1q_s32( psSampleState[ 1 ].LPC_exc_Q14, LPC_exc_Q14_s32x4 ); - vst1q_s32( psSampleState[ 1 ].xq_Q14, xq_Q14_s32x4 ); - } - - *smpl_buf_idx = *smpl_buf_idx ? ( *smpl_buf_idx - 1 ) : ( DECISION_DELAY - 1); - last_smple_idx = *smpl_buf_idx + decisionDelay + DECISION_DELAY; - if( last_smple_idx >= DECISION_DELAY ) last_smple_idx -= DECISION_DELAY; - if( last_smple_idx >= DECISION_DELAY ) last_smple_idx -= DECISION_DELAY; - - /* Find winner */ - RDmin_Q10 = psSampleState[ 0 ].RD_Q10[ 0 ]; - Winner_ind = 0; - for( k = 1; k < nStatesDelayedDecision; k++ ) { - if( psSampleState[ 0 ].RD_Q10[ k ] < RDmin_Q10 ) { - RDmin_Q10 = psSampleState[ 0 ].RD_Q10[ k ]; - Winner_ind = k; - } - } - - /* Increase RD values of expired states */ - { - uint32x4_t t_u32x4; - Winner_rand_state = psDelDec->RandState[ last_smple_idx ][ Winner_ind ]; - t_u32x4 = vceqq_s32( vld1q_s32( psDelDec->RandState[ last_smple_idx ] ), vdupq_n_s32( Winner_rand_state ) ); - t_u32x4 = vmvnq_u32( t_u32x4 ); - t_u32x4 = vshrq_n_u32( t_u32x4, 5 ); - tmp1_s32x4 = vld1q_s32( psSampleState[ 0 ].RD_Q10 ); - tmp2_s32x4 = vld1q_s32( psSampleState[ 1 ].RD_Q10 ); - tmp1_s32x4 = vaddq_s32( tmp1_s32x4, vreinterpretq_s32_u32( t_u32x4 ) ); - tmp2_s32x4 = vaddq_s32( tmp2_s32x4, vreinterpretq_s32_u32( t_u32x4 ) ); - vst1q_s32( psSampleState[ 0 ].RD_Q10, tmp1_s32x4 ); - vst1q_s32( psSampleState[ 1 ].RD_Q10, tmp2_s32x4 ); - - /* Find worst in first set and best in second set */ - RDmax_Q10 = psSampleState[ 0 ].RD_Q10[ 0 ]; - RDmin_Q10 = psSampleState[ 1 ].RD_Q10[ 0 ]; - RDmax_ind = 0; - RDmin_ind = 0; - for( k = 1; k < nStatesDelayedDecision; k++ ) { - /* find worst in first set */ - if( psSampleState[ 0 ].RD_Q10[ k ] > RDmax_Q10 ) { - RDmax_Q10 = psSampleState[ 0 ].RD_Q10[ k ]; - RDmax_ind = k; - } - /* find best in second set */ - if( psSampleState[ 1 ].RD_Q10[ k ] < RDmin_Q10 ) { - RDmin_Q10 = psSampleState[ 1 ].RD_Q10[ k ]; - RDmin_ind = k; - } - } - } - - /* Replace a state if best from second set outperforms worst in first set */ - if( RDmin_Q10 < RDmax_Q10 ) { - opus_int32 (*ptr)[NEON_MAX_DEL_DEC_STATES] = psDelDec->RandState; - const int numOthers = (int)( ( sizeof( NSQ_del_decs_struct ) - sizeof( ( (NSQ_del_decs_struct *)0 )->sLPC_Q14 ) ) - / ( NEON_MAX_DEL_DEC_STATES * sizeof( opus_int32 ) ) ); - /* Only ( predictLPCOrder - 1 ) of sLPC_Q14 buffer need to be updated, though the first several */ - /* useless sLPC_Q14[] will be different comparing with C when predictLPCOrder < NSQ_LPC_BUF_LENGTH. */ - /* Here just update constant ( NSQ_LPC_BUF_LENGTH - 1 ) for simplicity. */ - for( j = i + 1; j < i + NSQ_LPC_BUF_LENGTH; j++ ) { - psDelDec->sLPC_Q14[ j ][ RDmax_ind ] = psDelDec->sLPC_Q14[ j ][ RDmin_ind ]; - } - for( j = 0; j < numOthers; j++ ) { - ptr[ j ][ RDmax_ind ] = ptr[ j ][ RDmin_ind ]; - } - - psSampleState[ 0 ].Q_Q10[ RDmax_ind ] = psSampleState[ 1 ].Q_Q10[ RDmin_ind ]; - psSampleState[ 0 ].RD_Q10[ RDmax_ind ] = psSampleState[ 1 ].RD_Q10[ RDmin_ind ]; - psSampleState[ 0 ].xq_Q14[ RDmax_ind ] = psSampleState[ 1 ].xq_Q14[ RDmin_ind ]; - psSampleState[ 0 ].LF_AR_Q14[ RDmax_ind ] = psSampleState[ 1 ].LF_AR_Q14[ RDmin_ind ]; - psSampleState[ 0 ].Diff_Q14[ RDmax_ind ] = psSampleState[ 1 ].Diff_Q14[ RDmin_ind ]; - psSampleState[ 0 ].sLTP_shp_Q14[ RDmax_ind ] = psSampleState[ 1 ].sLTP_shp_Q14[ RDmin_ind ]; - psSampleState[ 0 ].LPC_exc_Q14[ RDmax_ind ] = psSampleState[ 1 ].LPC_exc_Q14[ RDmin_ind ]; - } - - /* Write samples from winner to output and long-term filter states */ - if( subfr > 0 || i >= decisionDelay ) { - pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDelDec->Q_Q10[ last_smple_idx ][ Winner_ind ], 10 ); - xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( - silk_SMULWW( psDelDec->Xq_Q14[ last_smple_idx ][ Winner_ind ], delayedGain_Q10[ last_smple_idx ] ), 8 ) ); - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDelDec->Shape_Q14[ last_smple_idx ][ Winner_ind ]; - sLTP_Q15[ NSQ->sLTP_buf_idx - decisionDelay ] = psDelDec->Pred_Q15[ last_smple_idx ][ Winner_ind ]; - } - NSQ->sLTP_shp_buf_idx++; - NSQ->sLTP_buf_idx++; - - /* Update states */ - vst1q_s32( psDelDec->LF_AR_Q14, vld1q_s32( psSampleState[ 0 ].LF_AR_Q14 ) ); - vst1q_s32( psDelDec->Diff_Q14, vld1q_s32( psSampleState[ 0 ].Diff_Q14 ) ); - vst1q_s32( psDelDec->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ], vld1q_s32( psSampleState[ 0 ].xq_Q14 ) ); - vst1q_s32( psDelDec->Xq_Q14[ *smpl_buf_idx ], vld1q_s32( psSampleState[ 0 ].xq_Q14 ) ); - tmp1_s32x4 = vld1q_s32( psSampleState[ 0 ].Q_Q10 ); - vst1q_s32( psDelDec->Q_Q10[ *smpl_buf_idx ], tmp1_s32x4 ); - vst1q_s32( psDelDec->Pred_Q15[ *smpl_buf_idx ], vshlq_n_s32( vld1q_s32( psSampleState[ 0 ].LPC_exc_Q14 ), 1 ) ); - vst1q_s32( psDelDec->Shape_Q14[ *smpl_buf_idx ], vld1q_s32( psSampleState[ 0 ].sLTP_shp_Q14 ) ); - tmp1_s32x4 = vrshrq_n_s32( tmp1_s32x4, 10 ); - tmp1_s32x4 = vaddq_s32( vld1q_s32( psDelDec->Seed ), tmp1_s32x4 ); - vst1q_s32( psDelDec->Seed, tmp1_s32x4 ); - vst1q_s32( psDelDec->RandState[ *smpl_buf_idx ], tmp1_s32x4 ); - vst1q_s32( psDelDec->RD_Q10, vld1q_s32( psSampleState[ 0 ].RD_Q10 ) ); - delayedGain_Q10[ *smpl_buf_idx ] = Gain_Q10; - } - /* Update LPC states */ - silk_memcpy( psDelDec->sLPC_Q14[ 0 ], psDelDec->sLPC_Q14[ length ], NEON_MAX_DEL_DEC_STATES * NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); - - RESTORE_STACK; -} - -static OPUS_INLINE void silk_SMULWB_8_neon( - const opus_int16 *a, - const int32x2_t b, - opus_int32 *o -) -{ - const int16x8_t a_s16x8 = vld1q_s16( a ); - int32x4_t o0_s32x4, o1_s32x4; - - o0_s32x4 = vshll_n_s16( vget_low_s16( a_s16x8 ), 15 ); - o1_s32x4 = vshll_n_s16( vget_high_s16( a_s16x8 ), 15 ); - o0_s32x4 = vqdmulhq_lane_s32( o0_s32x4, b, 0 ); - o1_s32x4 = vqdmulhq_lane_s32( o1_s32x4, b, 0 ); - vst1q_s32( o, o0_s32x4 ); - vst1q_s32( o + 4, o1_s32x4 ); -} - -/* Only works when ( b >= -65536 ) && ( b < 65536 ). */ -static OPUS_INLINE void silk_SMULWW_small_b_4_neon( - opus_int32 *a, - const int32x2_t b_s32x2) -{ - int32x4_t o_s32x4; - - o_s32x4 = vld1q_s32( a ); - o_s32x4 = vqdmulhq_lane_s32( o_s32x4, b_s32x2, 0 ); - vst1q_s32( a, o_s32x4 ); -} - -/* Only works when ( b >= -65536 ) && ( b < 65536 ). */ -static OPUS_INLINE void silk_SMULWW_small_b_8_neon( - opus_int32 *a, - const int32x2_t b_s32x2 -) -{ - int32x4_t o0_s32x4, o1_s32x4; - - o0_s32x4 = vld1q_s32( a ); - o1_s32x4 = vld1q_s32( a + 4 ); - o0_s32x4 = vqdmulhq_lane_s32( o0_s32x4, b_s32x2, 0 ); - o1_s32x4 = vqdmulhq_lane_s32( o1_s32x4, b_s32x2, 0 ); - vst1q_s32( a, o0_s32x4 ); - vst1q_s32( a + 4, o1_s32x4 ); -} - -static OPUS_INLINE void silk_SMULWW_4_neon( - opus_int32 *a, - const int32x2_t b_s32x2) -{ - int32x4_t a_s32x4, o_s32x4; - - a_s32x4 = vld1q_s32( a ); - o_s32x4 = vqdmulhq_lane_s32( a_s32x4, b_s32x2, 0 ); - o_s32x4 = vmlaq_lane_s32( o_s32x4, a_s32x4, b_s32x2, 1 ); - vst1q_s32( a, o_s32x4 ); -} - -static OPUS_INLINE void silk_SMULWW_8_neon( - opus_int32 *a, - const int32x2_t b_s32x2 -) -{ - int32x4_t a0_s32x4, a1_s32x4, o0_s32x4, o1_s32x4; - - a0_s32x4 = vld1q_s32( a ); - a1_s32x4 = vld1q_s32( a + 4 ); - o0_s32x4 = vqdmulhq_lane_s32( a0_s32x4, b_s32x2, 0 ); - o1_s32x4 = vqdmulhq_lane_s32( a1_s32x4, b_s32x2, 0 ); - o0_s32x4 = vmlaq_lane_s32( o0_s32x4, a0_s32x4, b_s32x2, 1 ); - o1_s32x4 = vmlaq_lane_s32( o1_s32x4, a1_s32x4, b_s32x2, 1 ); - vst1q_s32( a, o0_s32x4 ); - vst1q_s32( a + 4, o1_s32x4 ); -} - -static OPUS_INLINE void silk_SMULWW_loop_neon( - const opus_int16 *a, - const opus_int32 b, - opus_int32 *o, - const opus_int loop_num -) -{ - opus_int i; - int32x2_t b_s32x2; - - b_s32x2 = vdup_n_s32( b ); - for( i = 0; i < loop_num - 7; i += 8 ) { - silk_SMULWB_8_neon( a + i, b_s32x2, o + i ); - } - for( ; i < loop_num; i++ ) { - o[ i ] = silk_SMULWW( a[ i ], b ); - } -} - -static OPUS_INLINE void silk_nsq_del_dec_scale_states_neon( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_decs_struct psDelDec[], /* I/O Delayed decision states */ - const opus_int16 x16[], /* I Input */ - opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ - const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I Subframe number */ - const opus_int LTP_scale_Q14, /* I LTP state scaling */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type, /* I Signal type */ - const opus_int decisionDelay /* I Decision delay */ -) -{ - opus_int i, lag; - opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q26; - - lag = pitchL[ subfr ]; - inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); - silk_assert( inv_gain_Q31 != 0 ); - - /* Scale input */ - inv_gain_Q26 = silk_RSHIFT_ROUND( inv_gain_Q31, 5 ); - silk_SMULWW_loop_neon( x16, inv_gain_Q26, x_sc_Q10, psEncC->subfr_length ); - - /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */ - if( NSQ->rewhite_flag ) { - if( subfr == 0 ) { - /* Do LTP downscaling */ - inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 ); - } - silk_SMULWW_loop_neon( sLTP + NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2, inv_gain_Q31, sLTP_Q15 + NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2, lag + LTP_ORDER / 2 ); - } - - /* Adjust for changing gain */ - if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { - int32x2_t gain_adj_Q16_s32x2; - gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); - - /* Scale long-term shaping state */ - if( ( gain_adj_Q16 >= -65536 ) && ( gain_adj_Q16 < 65536 ) ) { - gain_adj_Q16_s32x2 = vdup_n_s32( silk_LSHIFT32( gain_adj_Q16, 15 ) ); - for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx - 7; i += 8 ) { - silk_SMULWW_small_b_8_neon( NSQ->sLTP_shp_Q14 + i, gain_adj_Q16_s32x2 ); - } - for( ; i < NSQ->sLTP_shp_buf_idx; i++ ) { - NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] ); - } - - /* Scale long-term prediction state */ - if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) { - for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx - decisionDelay - 7; i += 8 ) { - silk_SMULWW_small_b_8_neon( sLTP_Q15 + i, gain_adj_Q16_s32x2 ); - } - for( ; i < NSQ->sLTP_buf_idx - decisionDelay; i++ ) { - sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] ); - } - } - - /* Scale scalar states */ - silk_SMULWW_small_b_4_neon( psDelDec->LF_AR_Q14, gain_adj_Q16_s32x2 ); - silk_SMULWW_small_b_4_neon( psDelDec->Diff_Q14, gain_adj_Q16_s32x2 ); - - /* Scale short-term prediction and shaping states */ - for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { - silk_SMULWW_small_b_4_neon( psDelDec->sLPC_Q14[ i ], gain_adj_Q16_s32x2 ); - } - - for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { - silk_SMULWW_small_b_4_neon( psDelDec->sAR2_Q14[ i ], gain_adj_Q16_s32x2 ); - } - - for( i = 0; i < DECISION_DELAY; i++ ) { - silk_SMULWW_small_b_4_neon( psDelDec->Pred_Q15[ i ], gain_adj_Q16_s32x2 ); - silk_SMULWW_small_b_4_neon( psDelDec->Shape_Q14[ i ], gain_adj_Q16_s32x2 ); - } - } else { - gain_adj_Q16_s32x2 = vdup_n_s32( silk_LSHIFT32( gain_adj_Q16 & 0x0000FFFF, 15 ) ); - gain_adj_Q16_s32x2 = vset_lane_s32( gain_adj_Q16 >> 16, gain_adj_Q16_s32x2, 1 ); - for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx - 7; i += 8 ) { - silk_SMULWW_8_neon( NSQ->sLTP_shp_Q14 + i, gain_adj_Q16_s32x2 ); - } - for( ; i < NSQ->sLTP_shp_buf_idx; i++ ) { - NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] ); - } - - /* Scale long-term prediction state */ - if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) { - for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx - decisionDelay - 7; i += 8 ) { - silk_SMULWW_8_neon( sLTP_Q15 + i, gain_adj_Q16_s32x2 ); - } - for( ; i < NSQ->sLTP_buf_idx - decisionDelay; i++ ) { - sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] ); - } - } - - /* Scale scalar states */ - silk_SMULWW_4_neon( psDelDec->LF_AR_Q14, gain_adj_Q16_s32x2 ); - silk_SMULWW_4_neon( psDelDec->Diff_Q14, gain_adj_Q16_s32x2 ); - - /* Scale short-term prediction and shaping states */ - for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { - silk_SMULWW_4_neon( psDelDec->sLPC_Q14[ i ], gain_adj_Q16_s32x2 ); - } - - for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { - silk_SMULWW_4_neon( psDelDec->sAR2_Q14[ i ], gain_adj_Q16_s32x2 ); - } - - for( i = 0; i < DECISION_DELAY; i++ ) { - silk_SMULWW_4_neon( psDelDec->Pred_Q15[ i ], gain_adj_Q16_s32x2 ); - silk_SMULWW_4_neon( psDelDec->Shape_Q14[ i ], gain_adj_Q16_s32x2 ); - } - } - - /* Save inverse gain */ - NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; - } -} diff --git a/thirdparty/opus/silk/arm/NSQ_neon.h b/thirdparty/opus/silk/arm/NSQ_neon.h index b31d9442d6..77c946af85 100644 --- a/thirdparty/opus/silk/arm/NSQ_neon.h +++ b/thirdparty/opus/silk/arm/NSQ_neon.h @@ -28,31 +28,30 @@ POSSIBILITY OF SUCH DAMAGE. #define SILK_NSQ_NEON_H #include "cpu_support.h" -#include "SigProc_FIX.h" #undef silk_short_prediction_create_arch_coef /* For vectorized calc, reverse a_Q12 coefs, convert to 32-bit, and shift for vqdmulhq_s32. */ static OPUS_INLINE void silk_short_prediction_create_arch_coef_neon(opus_int32 *out, const opus_int16 *in, opus_int order) { - out[15] = silk_LSHIFT32(in[0], 15); - out[14] = silk_LSHIFT32(in[1], 15); - out[13] = silk_LSHIFT32(in[2], 15); - out[12] = silk_LSHIFT32(in[3], 15); - out[11] = silk_LSHIFT32(in[4], 15); - out[10] = silk_LSHIFT32(in[5], 15); - out[9] = silk_LSHIFT32(in[6], 15); - out[8] = silk_LSHIFT32(in[7], 15); - out[7] = silk_LSHIFT32(in[8], 15); - out[6] = silk_LSHIFT32(in[9], 15); + out[15] = in[0] << 15; + out[14] = in[1] << 15; + out[13] = in[2] << 15; + out[12] = in[3] << 15; + out[11] = in[4] << 15; + out[10] = in[5] << 15; + out[9] = in[6] << 15; + out[8] = in[7] << 15; + out[7] = in[8] << 15; + out[6] = in[9] << 15; if (order == 16) { - out[5] = silk_LSHIFT32(in[10], 15); - out[4] = silk_LSHIFT32(in[11], 15); - out[3] = silk_LSHIFT32(in[12], 15); - out[2] = silk_LSHIFT32(in[13], 15); - out[1] = silk_LSHIFT32(in[14], 15); - out[0] = silk_LSHIFT32(in[15], 15); + out[5] = in[10] << 15; + out[4] = in[11] << 15; + out[3] = in[12] << 15; + out[2] = in[13] << 15; + out[1] = in[14] << 15; + out[0] = in[15] << 15; } else { diff --git a/thirdparty/opus/silk/arm/arm_silk_map.c b/thirdparty/opus/silk/arm/arm_silk_map.c index 0b9bfec2ca..9bd86a7b21 100644 --- a/thirdparty/opus/silk/arm/arm_silk_map.c +++ b/thirdparty/opus/silk/arm/arm_silk_map.c @@ -28,62 +28,13 @@ POSSIBILITY OF SUCH DAMAGE. # include "config.h" #endif -#include "main_FIX.h" #include "NSQ.h" -#include "SigProc_FIX.h" #if defined(OPUS_HAVE_RTCD) # if (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && \ !defined(OPUS_ARM_PRESUME_NEON_INTR)) -void (*const SILK_BIQUAD_ALT_STRIDE2_IMPL[OPUS_ARCHMASK + 1])( - const opus_int16 *in, /* I input signal */ - const opus_int32 *B_Q28, /* I MA coefficients [3] */ - const opus_int32 *A_Q28, /* I AR coefficients [2] */ - opus_int32 *S, /* I/O State vector [4] */ - opus_int16 *out, /* O output signal */ - const opus_int32 len /* I signal length (must be even) */ -) = { - silk_biquad_alt_stride2_c, /* ARMv4 */ - silk_biquad_alt_stride2_c, /* EDSP */ - silk_biquad_alt_stride2_c, /* Media */ - silk_biquad_alt_stride2_neon, /* Neon */ -}; - -opus_int32 (*const SILK_LPC_INVERSE_PRED_GAIN_IMPL[OPUS_ARCHMASK + 1])( /* O Returns inverse prediction gain in energy domain, Q30 */ - const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */ - const opus_int order /* I Prediction order */ -) = { - silk_LPC_inverse_pred_gain_c, /* ARMv4 */ - silk_LPC_inverse_pred_gain_c, /* EDSP */ - silk_LPC_inverse_pred_gain_c, /* Media */ - silk_LPC_inverse_pred_gain_neon, /* Neon */ -}; - -void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int16 x16[], /* I Input */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -) = { - silk_NSQ_del_dec_c, /* ARMv4 */ - silk_NSQ_del_dec_c, /* EDSP */ - silk_NSQ_del_dec_c, /* Media */ - silk_NSQ_del_dec_neon, /* Neon */ -}; - /*There is no table for silk_noise_shape_quantizer_short_prediction because the NEON version takes different parameters than the C version. Instead RTCD is done via if statements at the call sites. @@ -101,23 +52,4 @@ opus_int32 # endif -# if defined(FIXED_POINT) && \ - defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR) - -void (*const SILK_WARPED_AUTOCORRELATION_FIX_IMPL[OPUS_ARCHMASK + 1])( - opus_int32 *corr, /* O Result [order + 1] */ - opus_int *scale, /* O Scaling of the correlation vector */ - const opus_int16 *input, /* I Input data to correlate */ - const opus_int warping_Q16, /* I Warping coefficient */ - const opus_int length, /* I Length of input */ - const opus_int order /* I Correlation order (even) */ -) = { - silk_warped_autocorrelation_FIX_c, /* ARMv4 */ - silk_warped_autocorrelation_FIX_c, /* EDSP */ - silk_warped_autocorrelation_FIX_c, /* Media */ - silk_warped_autocorrelation_FIX_neon, /* Neon */ -}; - -# endif - #endif /* OPUS_HAVE_RTCD */ diff --git a/thirdparty/opus/silk/arm/biquad_alt_arm.h b/thirdparty/opus/silk/arm/biquad_alt_arm.h deleted file mode 100644 index 66ea9f43dd..0000000000 --- a/thirdparty/opus/silk/arm/biquad_alt_arm.h +++ /dev/null @@ -1,68 +0,0 @@ -/*********************************************************************** -Copyright (c) 2017 Google Inc. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_BIQUAD_ALT_ARM_H -# define SILK_BIQUAD_ALT_ARM_H - -# include "celt/arm/armcpu.h" - -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -void silk_biquad_alt_stride2_neon( - const opus_int16 *in, /* I input signal */ - const opus_int32 *B_Q28, /* I MA coefficients [3] */ - const opus_int32 *A_Q28, /* I AR coefficients [2] */ - opus_int32 *S, /* I/O State vector [4] */ - opus_int16 *out, /* O output signal */ - const opus_int32 len /* I signal length (must be even) */ -); - -# if !defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_PRESUME_NEON) -# define OVERRIDE_silk_biquad_alt_stride2 (1) -# define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((void)(arch), PRESUME_NEON(silk_biquad_alt_stride2)(in, B_Q28, A_Q28, S, out, len)) -# endif -# endif - -# if !defined(OVERRIDE_silk_biquad_alt_stride2) -/*Is run-time CPU detection enabled on this platform?*/ -# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)) -extern void (*const SILK_BIQUAD_ALT_STRIDE2_IMPL[OPUS_ARCHMASK+1])( - const opus_int16 *in, /* I input signal */ - const opus_int32 *B_Q28, /* I MA coefficients [3] */ - const opus_int32 *A_Q28, /* I AR coefficients [2] */ - opus_int32 *S, /* I/O State vector [4] */ - opus_int16 *out, /* O output signal */ - const opus_int32 len /* I signal length (must be even) */ - ); -# define OVERRIDE_silk_biquad_alt_stride2 (1) -# define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((*SILK_BIQUAD_ALT_STRIDE2_IMPL[(arch)&OPUS_ARCHMASK])(in, B_Q28, A_Q28, S, out, len)) -# elif defined(OPUS_ARM_PRESUME_NEON_INTR) -# define OVERRIDE_silk_biquad_alt_stride2 (1) -# define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((void)(arch), silk_biquad_alt_stride2_neon(in, B_Q28, A_Q28, S, out, len)) -# endif -# endif - -#endif /* end SILK_BIQUAD_ALT_ARM_H */ diff --git a/thirdparty/opus/silk/arm/biquad_alt_neon_intr.c b/thirdparty/opus/silk/arm/biquad_alt_neon_intr.c deleted file mode 100644 index 9715733185..0000000000 --- a/thirdparty/opus/silk/arm/biquad_alt_neon_intr.c +++ /dev/null @@ -1,156 +0,0 @@ -/*********************************************************************** -Copyright (c) 2017 Google Inc. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <arm_neon.h> -#ifdef OPUS_CHECK_ASM -# include <string.h> -# include "stack_alloc.h" -#endif -#include "SigProc_FIX.h" - -static inline void silk_biquad_alt_stride2_kernel( const int32x4_t A_L_s32x4, const int32x4_t A_U_s32x4, const int32x4_t B_Q28_s32x4, const int32x2_t t_s32x2, const int32x4_t in_s32x4, int32x4_t *S_s32x4, int32x2_t *out32_Q14_s32x2 ) -{ - int32x4_t t_s32x4, out32_Q14_s32x4; - - *out32_Q14_s32x2 = vadd_s32( vget_low_s32( *S_s32x4 ), t_s32x2 ); /* silk_SMLAWB( S{0,1}, B_Q28[ 0 ], in{0,1} ) */ - *S_s32x4 = vcombine_s32( vget_high_s32( *S_s32x4 ), vdup_n_s32( 0 ) ); /* S{0,1} = S{2,3}; S{2,3} = 0; */ - *out32_Q14_s32x2 = vshl_n_s32( *out32_Q14_s32x2, 2 ); /* out32_Q14_{0,1} = silk_LSHIFT( silk_SMLAWB( S{0,1}, B_Q28[ 0 ], in{0,1} ), 2 ); */ - out32_Q14_s32x4 = vcombine_s32( *out32_Q14_s32x2, *out32_Q14_s32x2 ); /* out32_Q14_{0,1,0,1} */ - t_s32x4 = vqdmulhq_s32( out32_Q14_s32x4, A_L_s32x4 ); /* silk_SMULWB( out32_Q14_{0,1,0,1}, A{0,0,1,1}_L_Q28 ) */ - *S_s32x4 = vrsraq_n_s32( *S_s32x4, t_s32x4, 14 ); /* S{0,1} = S{2,3} + silk_RSHIFT_ROUND(); S{2,3} = silk_RSHIFT_ROUND(); */ - t_s32x4 = vqdmulhq_s32( out32_Q14_s32x4, A_U_s32x4 ); /* silk_SMULWB( out32_Q14_{0,1,0,1}, A{0,0,1,1}_U_Q28 ) */ - *S_s32x4 = vaddq_s32( *S_s32x4, t_s32x4 ); /* S0 = silk_SMLAWB( S{0,1,2,3}, out32_Q14_{0,1,0,1}, A{0,0,1,1}_U_Q28 ); */ - t_s32x4 = vqdmulhq_s32( in_s32x4, B_Q28_s32x4 ); /* silk_SMULWB( B_Q28[ {1,1,2,2} ], in{0,1,0,1} ) */ - *S_s32x4 = vaddq_s32( *S_s32x4, t_s32x4 ); /* S0 = silk_SMLAWB( S0, B_Q28[ {1,1,2,2} ], in{0,1,0,1} ); */ -} - -void silk_biquad_alt_stride2_neon( - const opus_int16 *in, /* I input signal */ - const opus_int32 *B_Q28, /* I MA coefficients [3] */ - const opus_int32 *A_Q28, /* I AR coefficients [2] */ - opus_int32 *S, /* I/O State vector [4] */ - opus_int16 *out, /* O output signal */ - const opus_int32 len /* I signal length (must be even) */ -) -{ - /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */ - opus_int k = 0; - const int32x2_t offset_s32x2 = vdup_n_s32( (1<<14) - 1 ); - const int32x4_t offset_s32x4 = vcombine_s32( offset_s32x2, offset_s32x2 ); - int16x4_t in_s16x4 = vdup_n_s16( 0 ); - int16x4_t out_s16x4; - int32x2_t A_Q28_s32x2, A_L_s32x2, A_U_s32x2, B_Q28_s32x2, t_s32x2; - int32x4_t A_L_s32x4, A_U_s32x4, B_Q28_s32x4, S_s32x4, out32_Q14_s32x4; - int32x2x2_t t0_s32x2x2, t1_s32x2x2, t2_s32x2x2, S_s32x2x2; - -#ifdef OPUS_CHECK_ASM - opus_int32 S_c[ 4 ]; - VARDECL( opus_int16, out_c ); - SAVE_STACK; - ALLOC( out_c, 2 * len, opus_int16 ); - - silk_memcpy( &S_c, S, sizeof( S_c ) ); - silk_biquad_alt_stride2_c( in, B_Q28, A_Q28, S_c, out_c, len ); -#endif - - /* Negate A_Q28 values and split in two parts */ - A_Q28_s32x2 = vld1_s32( A_Q28 ); - A_Q28_s32x2 = vneg_s32( A_Q28_s32x2 ); - A_L_s32x2 = vshl_n_s32( A_Q28_s32x2, 18 ); /* ( -A_Q28[] & 0x00003FFF ) << 18 */ - A_L_s32x2 = vreinterpret_s32_u32( vshr_n_u32( vreinterpret_u32_s32( A_L_s32x2 ), 3 ) ); /* ( -A_Q28[] & 0x00003FFF ) << 15 */ - A_U_s32x2 = vshr_n_s32( A_Q28_s32x2, 14 ); /* silk_RSHIFT( -A_Q28[], 14 ) */ - A_U_s32x2 = vshl_n_s32( A_U_s32x2, 16 ); /* silk_RSHIFT( -A_Q28[], 14 ) << 16 (Clip two leading bits to conform to C function.) */ - A_U_s32x2 = vshr_n_s32( A_U_s32x2, 1 ); /* silk_RSHIFT( -A_Q28[], 14 ) << 15 */ - - B_Q28_s32x2 = vld1_s32( B_Q28 ); - t_s32x2 = vld1_s32( B_Q28 + 1 ); - t0_s32x2x2 = vzip_s32( A_L_s32x2, A_L_s32x2 ); - t1_s32x2x2 = vzip_s32( A_U_s32x2, A_U_s32x2 ); - t2_s32x2x2 = vzip_s32( t_s32x2, t_s32x2 ); - A_L_s32x4 = vcombine_s32( t0_s32x2x2.val[ 0 ], t0_s32x2x2.val[ 1 ] ); /* A{0,0,1,1}_L_Q28 */ - A_U_s32x4 = vcombine_s32( t1_s32x2x2.val[ 0 ], t1_s32x2x2.val[ 1 ] ); /* A{0,0,1,1}_U_Q28 */ - B_Q28_s32x4 = vcombine_s32( t2_s32x2x2.val[ 0 ], t2_s32x2x2.val[ 1 ] ); /* B_Q28[ {1,1,2,2} ] */ - S_s32x4 = vld1q_s32( S ); /* S0 = S[ 0 ]; S3 = S[ 3 ]; */ - S_s32x2x2 = vtrn_s32( vget_low_s32( S_s32x4 ), vget_high_s32( S_s32x4 ) ); /* S2 = S[ 1 ]; S1 = S[ 2 ]; */ - S_s32x4 = vcombine_s32( S_s32x2x2.val[ 0 ], S_s32x2x2.val[ 1 ] ); - - for( ; k < len - 1; k += 2 ) { - int32x4_t in_s32x4[ 2 ], t_s32x4; - int32x2_t out32_Q14_s32x2[ 2 ]; - - /* S[ 2 * i + 0 ], S[ 2 * i + 1 ], S[ 2 * i + 2 ], S[ 2 * i + 3 ]: Q12 */ - in_s16x4 = vld1_s16( &in[ 2 * k ] ); /* in{0,1,2,3} = in[ 2 * k + {0,1,2,3} ]; */ - in_s32x4[ 0 ] = vshll_n_s16( in_s16x4, 15 ); /* in{0,1,2,3} << 15 */ - t_s32x4 = vqdmulhq_lane_s32( in_s32x4[ 0 ], B_Q28_s32x2, 0 ); /* silk_SMULWB( B_Q28[ 0 ], in{0,1,2,3} ) */ - in_s32x4[ 1 ] = vcombine_s32( vget_high_s32( in_s32x4[ 0 ] ), vget_high_s32( in_s32x4[ 0 ] ) ); /* in{2,3,2,3} << 15 */ - in_s32x4[ 0 ] = vcombine_s32( vget_low_s32 ( in_s32x4[ 0 ] ), vget_low_s32 ( in_s32x4[ 0 ] ) ); /* in{0,1,0,1} << 15 */ - silk_biquad_alt_stride2_kernel( A_L_s32x4, A_U_s32x4, B_Q28_s32x4, vget_low_s32 ( t_s32x4 ), in_s32x4[ 0 ], &S_s32x4, &out32_Q14_s32x2[ 0 ] ); - silk_biquad_alt_stride2_kernel( A_L_s32x4, A_U_s32x4, B_Q28_s32x4, vget_high_s32( t_s32x4 ), in_s32x4[ 1 ], &S_s32x4, &out32_Q14_s32x2[ 1 ] ); - - /* Scale back to Q0 and saturate */ - out32_Q14_s32x4 = vcombine_s32( out32_Q14_s32x2[ 0 ], out32_Q14_s32x2[ 1 ] ); /* out32_Q14_{0,1,2,3} */ - out32_Q14_s32x4 = vaddq_s32( out32_Q14_s32x4, offset_s32x4 ); /* out32_Q14_{0,1,2,3} + (1<<14) - 1 */ - out_s16x4 = vqshrn_n_s32( out32_Q14_s32x4, 14 ); /* (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_{0,1,2,3} + (1<<14) - 1, 14 ) ) */ - vst1_s16( &out[ 2 * k ], out_s16x4 ); /* out[ 2 * k + {0,1,2,3} ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_{0,1,2,3} + (1<<14) - 1, 14 ) ); */ - } - - /* Process leftover. */ - if( k < len ) { - int32x4_t in_s32x4; - int32x2_t out32_Q14_s32x2; - - /* S[ 2 * i + 0 ], S[ 2 * i + 1 ]: Q12 */ - in_s16x4 = vld1_lane_s16( &in[ 2 * k + 0 ], in_s16x4, 0 ); /* in{0,1} = in[ 2 * k + {0,1} ]; */ - in_s16x4 = vld1_lane_s16( &in[ 2 * k + 1 ], in_s16x4, 1 ); /* in{0,1} = in[ 2 * k + {0,1} ]; */ - in_s32x4 = vshll_n_s16( in_s16x4, 15 ); /* in{0,1} << 15 */ - t_s32x2 = vqdmulh_lane_s32( vget_low_s32( in_s32x4 ), B_Q28_s32x2, 0 ); /* silk_SMULWB( B_Q28[ 0 ], in{0,1} ) */ - in_s32x4 = vcombine_s32( vget_low_s32( in_s32x4 ), vget_low_s32( in_s32x4 ) ); /* in{0,1,0,1} << 15 */ - silk_biquad_alt_stride2_kernel( A_L_s32x4, A_U_s32x4, B_Q28_s32x4, t_s32x2, in_s32x4, &S_s32x4, &out32_Q14_s32x2 ); - - /* Scale back to Q0 and saturate */ - out32_Q14_s32x2 = vadd_s32( out32_Q14_s32x2, offset_s32x2 ); /* out32_Q14_{0,1} + (1<<14) - 1 */ - out32_Q14_s32x4 = vcombine_s32( out32_Q14_s32x2, out32_Q14_s32x2 ); /* out32_Q14_{0,1,0,1} + (1<<14) - 1 */ - out_s16x4 = vqshrn_n_s32( out32_Q14_s32x4, 14 ); /* (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_{0,1,0,1} + (1<<14) - 1, 14 ) ) */ - vst1_lane_s16( &out[ 2 * k + 0 ], out_s16x4, 0 ); /* out[ 2 * k + 0 ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_0 + (1<<14) - 1, 14 ) ); */ - vst1_lane_s16( &out[ 2 * k + 1 ], out_s16x4, 1 ); /* out[ 2 * k + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_1 + (1<<14) - 1, 14 ) ); */ - } - - vst1q_lane_s32( &S[ 0 ], S_s32x4, 0 ); /* S[ 0 ] = S0; */ - vst1q_lane_s32( &S[ 1 ], S_s32x4, 2 ); /* S[ 1 ] = S2; */ - vst1q_lane_s32( &S[ 2 ], S_s32x4, 1 ); /* S[ 2 ] = S1; */ - vst1q_lane_s32( &S[ 3 ], S_s32x4, 3 ); /* S[ 3 ] = S3; */ - -#ifdef OPUS_CHECK_ASM - silk_assert( !memcmp( S_c, S, sizeof( S_c ) ) ); - silk_assert( !memcmp( out_c, out, 2 * len * sizeof( opus_int16 ) ) ); - RESTORE_STACK; -#endif -} diff --git a/thirdparty/opus/silk/arm/macros_armv4.h b/thirdparty/opus/silk/arm/macros_armv4.h index 877eb18dd5..3f30e97288 100644 --- a/thirdparty/opus/silk/arm/macros_armv4.h +++ b/thirdparty/opus/silk/arm/macros_armv4.h @@ -28,11 +28,6 @@ POSSIBILITY OF SUCH DAMAGE. #ifndef SILK_MACROS_ARMv4_H #define SILK_MACROS_ARMv4_H -/* This macro only avoids the undefined behaviour from a left shift of - a negative value. It should only be used in macros that can't include - SigProc_FIX.h. In other cases, use silk_LSHIFT32(). */ -#define SAFE_SHL(a,b) ((opus_int32)((opus_uint32)(a) << (b))) - /* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */ #undef silk_SMULWB static OPUS_INLINE opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b) @@ -43,7 +38,7 @@ static OPUS_INLINE opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b) "#silk_SMULWB\n\t" "smull %0, %1, %2, %3\n\t" : "=&r"(rd_lo), "=&r"(rd_hi) - : "%r"(a), "r"(SAFE_SHL(b,16)) + : "%r"(a), "r"(b<<16) ); return rd_hi; } @@ -85,7 +80,7 @@ static OPUS_INLINE opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b) : "=&r"(rd_lo), "=&r"(rd_hi) : "%r"(a), "r"(b) ); - return SAFE_SHL(rd_hi,16)+(rd_lo>>16); + return (rd_hi<<16)+(rd_lo>>16); } #define silk_SMULWW(a, b) (silk_SMULWW_armv4(a, b)) @@ -101,10 +96,8 @@ static OPUS_INLINE opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b, : "=&r"(rd_lo), "=&r"(rd_hi) : "%r"(b), "r"(c) ); - return a+SAFE_SHL(rd_hi,16)+(rd_lo>>16); + return a+(rd_hi<<16)+(rd_lo>>16); } #define silk_SMLAWW(a, b, c) (silk_SMLAWW_armv4(a, b, c)) -#undef SAFE_SHL - #endif /* SILK_MACROS_ARMv4_H */ diff --git a/thirdparty/opus/silk/arm/macros_armv5e.h b/thirdparty/opus/silk/arm/macros_armv5e.h index b14ec65ddb..aad4117e46 100644 --- a/thirdparty/opus/silk/arm/macros_armv5e.h +++ b/thirdparty/opus/silk/arm/macros_armv5e.h @@ -29,11 +29,6 @@ POSSIBILITY OF SUCH DAMAGE. #ifndef SILK_MACROS_ARMv5E_H #define SILK_MACROS_ARMv5E_H -/* This macro only avoids the undefined behaviour from a left shift of - a negative value. It should only be used in macros that can't include - SigProc_FIX.h. In other cases, use silk_LSHIFT32(). */ -#define SAFE_SHL(a,b) ((opus_int32)((opus_uint32)(a) << (b))) - /* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */ #undef silk_SMULWB static OPUS_INLINE opus_int32 silk_SMULWB_armv5e(opus_int32 a, opus_int16 b) @@ -195,7 +190,7 @@ static OPUS_INLINE opus_int32 silk_CLZ16_armv5(opus_int16 in16) "#silk_CLZ16\n\t" "clz %0, %1;\n" : "=r"(res) - : "r"(SAFE_SHL(in16,16)|0x8000) + : "r"(in16<<16|0x8000) ); return res; } @@ -215,6 +210,4 @@ static OPUS_INLINE opus_int32 silk_CLZ32_armv5(opus_int32 in32) } #define silk_CLZ32(in32) (silk_CLZ32_armv5(in32)) -#undef SAFE_SHL - #endif /* SILK_MACROS_ARMv5E_H */ diff --git a/thirdparty/opus/silk/biquad_alt.c b/thirdparty/opus/silk/biquad_alt.c index 54566a43c0..d55f5ee92e 100644 --- a/thirdparty/opus/silk/biquad_alt.c +++ b/thirdparty/opus/silk/biquad_alt.c @@ -39,13 +39,14 @@ POSSIBILITY OF SUCH DAMAGE. #include "SigProc_FIX.h" /* Second order ARMA filter, alternative implementation */ -void silk_biquad_alt_stride1( +void silk_biquad_alt( const opus_int16 *in, /* I input signal */ const opus_int32 *B_Q28, /* I MA coefficients [3] */ const opus_int32 *A_Q28, /* I AR coefficients [2] */ opus_int32 *S, /* I/O State vector [2] */ opus_int16 *out, /* O output signal */ - const opus_int32 len /* I signal length (must be even) */ + const opus_int32 len, /* I signal length (must be even) */ + opus_int stride /* I Operate on interleaved signal if > 1 */ ) { /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */ @@ -60,7 +61,7 @@ void silk_biquad_alt_stride1( for( k = 0; k < len; k++ ) { /* S[ 0 ], S[ 1 ]: Q12 */ - inval = in[ k ]; + inval = in[ k * stride ]; out32_Q14 = silk_LSHIFT( silk_SMLAWB( S[ 0 ], B_Q28[ 0 ], inval ), 2 ); S[ 0 ] = S[1] + silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14, A0_L_Q28 ), 14 ); @@ -72,50 +73,6 @@ void silk_biquad_alt_stride1( S[ 1 ] = silk_SMLAWB( S[ 1 ], B_Q28[ 2 ], inval ); /* Scale back to Q0 and saturate */ - out[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14 + (1<<14) - 1, 14 ) ); - } -} - -void silk_biquad_alt_stride2_c( - const opus_int16 *in, /* I input signal */ - const opus_int32 *B_Q28, /* I MA coefficients [3] */ - const opus_int32 *A_Q28, /* I AR coefficients [2] */ - opus_int32 *S, /* I/O State vector [4] */ - opus_int16 *out, /* O output signal */ - const opus_int32 len /* I signal length (must be even) */ -) -{ - /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */ - opus_int k; - opus_int32 A0_U_Q28, A0_L_Q28, A1_U_Q28, A1_L_Q28, out32_Q14[ 2 ]; - - /* Negate A_Q28 values and split in two parts */ - A0_L_Q28 = ( -A_Q28[ 0 ] ) & 0x00003FFF; /* lower part */ - A0_U_Q28 = silk_RSHIFT( -A_Q28[ 0 ], 14 ); /* upper part */ - A1_L_Q28 = ( -A_Q28[ 1 ] ) & 0x00003FFF; /* lower part */ - A1_U_Q28 = silk_RSHIFT( -A_Q28[ 1 ], 14 ); /* upper part */ - - for( k = 0; k < len; k++ ) { - /* S[ 0 ], S[ 1 ], S[ 2 ], S[ 3 ]: Q12 */ - out32_Q14[ 0 ] = silk_LSHIFT( silk_SMLAWB( S[ 0 ], B_Q28[ 0 ], in[ 2 * k + 0 ] ), 2 ); - out32_Q14[ 1 ] = silk_LSHIFT( silk_SMLAWB( S[ 2 ], B_Q28[ 0 ], in[ 2 * k + 1 ] ), 2 ); - - S[ 0 ] = S[ 1 ] + silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14[ 0 ], A0_L_Q28 ), 14 ); - S[ 2 ] = S[ 3 ] + silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14[ 1 ], A0_L_Q28 ), 14 ); - S[ 0 ] = silk_SMLAWB( S[ 0 ], out32_Q14[ 0 ], A0_U_Q28 ); - S[ 2 ] = silk_SMLAWB( S[ 2 ], out32_Q14[ 1 ], A0_U_Q28 ); - S[ 0 ] = silk_SMLAWB( S[ 0 ], B_Q28[ 1 ], in[ 2 * k + 0 ] ); - S[ 2 ] = silk_SMLAWB( S[ 2 ], B_Q28[ 1 ], in[ 2 * k + 1 ] ); - - S[ 1 ] = silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14[ 0 ], A1_L_Q28 ), 14 ); - S[ 3 ] = silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14[ 1 ], A1_L_Q28 ), 14 ); - S[ 1 ] = silk_SMLAWB( S[ 1 ], out32_Q14[ 0 ], A1_U_Q28 ); - S[ 3 ] = silk_SMLAWB( S[ 3 ], out32_Q14[ 1 ], A1_U_Q28 ); - S[ 1 ] = silk_SMLAWB( S[ 1 ], B_Q28[ 2 ], in[ 2 * k + 0 ] ); - S[ 3 ] = silk_SMLAWB( S[ 3 ], B_Q28[ 2 ], in[ 2 * k + 1 ] ); - - /* Scale back to Q0 and saturate */ - out[ 2 * k + 0 ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14[ 0 ] + (1<<14) - 1, 14 ) ); - out[ 2 * k + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14[ 1 ] + (1<<14) - 1, 14 ) ); + out[ k * stride ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14 + (1<<14) - 1, 14 ) ); } } diff --git a/thirdparty/opus/silk/bwexpander.c b/thirdparty/opus/silk/bwexpander.c index afa97907ec..2eb4456695 100644 --- a/thirdparty/opus/silk/bwexpander.c +++ b/thirdparty/opus/silk/bwexpander.c @@ -45,7 +45,7 @@ void silk_bwexpander( /* Bias in silk_SMULWB can lead to unstable filters */ for( i = 0; i < d - 1; i++ ) { ar[ i ] = (opus_int16)silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, ar[ i ] ), 16 ); - chirp_Q16 += silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, chirp_minus_one_Q16 ), 16 ); + chirp_Q16 += silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, chirp_minus_one_Q16 ), 16 ); } ar[ d - 1 ] = (opus_int16)silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, ar[ d - 1 ] ), 16 ); } diff --git a/thirdparty/opus/silk/check_control_input.c b/thirdparty/opus/silk/check_control_input.c index 739fb01f1e..b5de9ce48d 100644 --- a/thirdparty/opus/silk/check_control_input.c +++ b/thirdparty/opus/silk/check_control_input.c @@ -38,7 +38,7 @@ opus_int check_control_input( silk_EncControlStruct *encControl /* I Control structure */ ) { - celt_assert( encControl != NULL ); + silk_assert( encControl != NULL ); if( ( ( encControl->API_sampleRate != 8000 ) && ( encControl->API_sampleRate != 12000 ) && @@ -59,46 +59,46 @@ opus_int check_control_input( ( encControl->minInternalSampleRate > encControl->desiredInternalSampleRate ) || ( encControl->maxInternalSampleRate < encControl->desiredInternalSampleRate ) || ( encControl->minInternalSampleRate > encControl->maxInternalSampleRate ) ) { - celt_assert( 0 ); + silk_assert( 0 ); return SILK_ENC_FS_NOT_SUPPORTED; } if( encControl->payloadSize_ms != 10 && encControl->payloadSize_ms != 20 && encControl->payloadSize_ms != 40 && encControl->payloadSize_ms != 60 ) { - celt_assert( 0 ); + silk_assert( 0 ); return SILK_ENC_PACKET_SIZE_NOT_SUPPORTED; } if( encControl->packetLossPercentage < 0 || encControl->packetLossPercentage > 100 ) { - celt_assert( 0 ); + silk_assert( 0 ); return SILK_ENC_INVALID_LOSS_RATE; } if( encControl->useDTX < 0 || encControl->useDTX > 1 ) { - celt_assert( 0 ); + silk_assert( 0 ); return SILK_ENC_INVALID_DTX_SETTING; } if( encControl->useCBR < 0 || encControl->useCBR > 1 ) { - celt_assert( 0 ); + silk_assert( 0 ); return SILK_ENC_INVALID_CBR_SETTING; } if( encControl->useInBandFEC < 0 || encControl->useInBandFEC > 1 ) { - celt_assert( 0 ); + silk_assert( 0 ); return SILK_ENC_INVALID_INBAND_FEC_SETTING; } if( encControl->nChannelsAPI < 1 || encControl->nChannelsAPI > ENCODER_NUM_CHANNELS ) { - celt_assert( 0 ); + silk_assert( 0 ); return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR; } if( encControl->nChannelsInternal < 1 || encControl->nChannelsInternal > ENCODER_NUM_CHANNELS ) { - celt_assert( 0 ); + silk_assert( 0 ); return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR; } if( encControl->nChannelsInternal > encControl->nChannelsAPI ) { - celt_assert( 0 ); + silk_assert( 0 ); return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR; } if( encControl->complexity < 0 || encControl->complexity > 10 ) { - celt_assert( 0 ); + silk_assert( 0 ); return SILK_ENC_INVALID_COMPLEXITY_SETTING; } diff --git a/thirdparty/opus/silk/control.h b/thirdparty/opus/silk/control.h index b76ec33cd6..747e5426a0 100644 --- a/thirdparty/opus/silk/control.h +++ b/thirdparty/opus/silk/control.h @@ -77,9 +77,6 @@ typedef struct { /* I: Flag to enable in-band Forward Error Correction (FEC); 0/1 */ opus_int useInBandFEC; - /* I: Flag to actually code in-band Forward Error Correction (FEC) in the current packet; 0/1 */ - opus_int LBRR_coded; - /* I: Flag to enable discontinuous transmission (DTX); 0/1 */ opus_int useDTX; @@ -113,11 +110,6 @@ typedef struct { /* O: Tells the Opus encoder we're ready to switch */ opus_int switchReady; - /* O: SILK Signal type */ - opus_int signalType; - - /* O: SILK offset (dithering) */ - opus_int offset; } silk_EncControlStruct; /**************************************************************************/ diff --git a/thirdparty/opus/silk/control_SNR.c b/thirdparty/opus/silk/control_SNR.c index 9a6db27543..cee87eb0d8 100644 --- a/thirdparty/opus/silk/control_SNR.c +++ b/thirdparty/opus/silk/control_SNR.c @@ -32,82 +32,45 @@ POSSIBILITY OF SUCH DAMAGE. #include "main.h" #include "tuning_parameters.h" -/* These tables hold SNR values divided by 21 (so they fit in 8 bits) - for different target bitrates spaced at 400 bps interval. The first - 10 values are omitted (0-4 kb/s) because they're all zeros. - These tables were obtained by running different SNRs through the - encoder and measuring the active bitrate. */ -static const unsigned char silk_TargetRate_NB_21[117 - 10] = { - 0, 15, 39, 52, 61, 68, - 74, 79, 84, 88, 92, 95, 99,102,105,108,111,114,117,119,122,124, - 126,129,131,133,135,137,139,142,143,145,147,149,151,153,155,157, - 158,160,162,163,165,167,168,170,171,173,174,176,177,179,180,182, - 183,185,186,187,189,190,192,193,194,196,197,199,200,201,203,204, - 205,207,208,209,211,212,213,215,216,217,219,220,221,223,224,225, - 227,228,230,231,232,234,235,236,238,239,241,242,243,245,246,248, - 249,250,252,253,255 -}; - -static const unsigned char silk_TargetRate_MB_21[165 - 10] = { - 0, 0, 28, 43, 52, 59, - 65, 70, 74, 78, 81, 85, 87, 90, 93, 95, 98,100,102,105,107,109, - 111,113,115,116,118,120,122,123,125,127,128,130,131,133,134,136, - 137,138,140,141,143,144,145,147,148,149,151,152,153,154,156,157, - 158,159,160,162,163,164,165,166,167,168,169,171,172,173,174,175, - 176,177,178,179,180,181,182,183,184,185,186,187,188,188,189,190, - 191,192,193,194,195,196,197,198,199,200,201,202,203,203,204,205, - 206,207,208,209,210,211,212,213,214,214,215,216,217,218,219,220, - 221,222,223,224,224,225,226,227,228,229,230,231,232,233,234,235, - 236,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250, - 251,252,253,254,255 -}; - -static const unsigned char silk_TargetRate_WB_21[201 - 10] = { - 0, 0, 0, 8, 29, 41, - 49, 56, 62, 66, 70, 74, 77, 80, 83, 86, 88, 91, 93, 95, 97, 99, - 101,103,105,107,108,110,112,113,115,116,118,119,121,122,123,125, - 126,127,129,130,131,132,134,135,136,137,138,140,141,142,143,144, - 145,146,147,148,149,150,151,152,153,154,156,157,158,159,159,160, - 161,162,163,164,165,166,167,168,169,170,171,171,172,173,174,175, - 176,177,177,178,179,180,181,181,182,183,184,185,185,186,187,188, - 189,189,190,191,192,192,193,194,195,195,196,197,198,198,199,200, - 200,201,202,203,203,204,205,206,206,207,208,209,209,210,211,211, - 212,213,214,214,215,216,216,217,218,219,219,220,221,221,222,223, - 224,224,225,226,226,227,228,229,229,230,231,232,232,233,234,234, - 235,236,237,237,238,239,240,240,241,242,243,243,244,245,246,246, - 247,248,249,249,250,251,252,253,255 -}; - /* Control SNR of redidual quantizer */ opus_int silk_control_SNR( silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */ opus_int32 TargetRate_bps /* I Target max bitrate (bps) */ ) { - int id; - int bound; - const unsigned char *snr_table; + opus_int k, ret = SILK_NO_ERROR; + opus_int32 frac_Q6; + const opus_int32 *rateTable; - psEncC->TargetRate_bps = TargetRate_bps; - if( psEncC->nb_subfr == 2 ) { - TargetRate_bps -= 2000 + psEncC->fs_kHz/16; - } - if( psEncC->fs_kHz == 8 ) { - bound = sizeof(silk_TargetRate_NB_21); - snr_table = silk_TargetRate_NB_21; - } else if( psEncC->fs_kHz == 12 ) { - bound = sizeof(silk_TargetRate_MB_21); - snr_table = silk_TargetRate_MB_21; - } else { - bound = sizeof(silk_TargetRate_WB_21); - snr_table = silk_TargetRate_WB_21; - } - id = (TargetRate_bps+200)/400; - id = silk_min(id - 10, bound-1); - if( id <= 0 ) { - psEncC->SNR_dB_Q7 = 0; - } else { - psEncC->SNR_dB_Q7 = snr_table[id]*21; + /* Set bitrate/coding quality */ + TargetRate_bps = silk_LIMIT( TargetRate_bps, MIN_TARGET_RATE_BPS, MAX_TARGET_RATE_BPS ); + if( TargetRate_bps != psEncC->TargetRate_bps ) { + psEncC->TargetRate_bps = TargetRate_bps; + + /* If new TargetRate_bps, translate to SNR_dB value */ + if( psEncC->fs_kHz == 8 ) { + rateTable = silk_TargetRate_table_NB; + } else if( psEncC->fs_kHz == 12 ) { + rateTable = silk_TargetRate_table_MB; + } else { + rateTable = silk_TargetRate_table_WB; + } + + /* Reduce bitrate for 10 ms modes in these calculations */ + if( psEncC->nb_subfr == 2 ) { + TargetRate_bps -= REDUCE_BITRATE_10_MS_BPS; + } + + /* Find bitrate interval in table and interpolate */ + for( k = 1; k < TARGET_RATE_TAB_SZ; k++ ) { + if( TargetRate_bps <= rateTable[ k ] ) { + frac_Q6 = silk_DIV32( silk_LSHIFT( TargetRate_bps - rateTable[ k - 1 ], 6 ), + rateTable[ k ] - rateTable[ k - 1 ] ); + psEncC->SNR_dB_Q7 = silk_LSHIFT( silk_SNR_table_Q1[ k - 1 ], 6 ) + silk_MUL( frac_Q6, silk_SNR_table_Q1[ k ] - silk_SNR_table_Q1[ k - 1 ] ); + break; + } + } } - return SILK_NO_ERROR; + + return ret; } diff --git a/thirdparty/opus/silk/control_audio_bandwidth.c b/thirdparty/opus/silk/control_audio_bandwidth.c index f6d22d8395..4f9bc5cbda 100644 --- a/thirdparty/opus/silk/control_audio_bandwidth.c +++ b/thirdparty/opus/silk/control_audio_bandwidth.c @@ -39,15 +39,9 @@ opus_int silk_control_audio_bandwidth( ) { opus_int fs_kHz; - opus_int orig_kHz; opus_int32 fs_Hz; - orig_kHz = psEncC->fs_kHz; - /* Handle a bandwidth-switching reset where we need to be aware what the last sampling rate was. */ - if( orig_kHz == 0 ) { - orig_kHz = psEncC->sLP.saved_fs_kHz; - } - fs_kHz = orig_kHz; + fs_kHz = psEncC->fs_kHz; fs_Hz = silk_SMULBB( fs_kHz, 1000 ); if( fs_Hz == 0 ) { /* Encoder has just been initialized */ @@ -67,7 +61,7 @@ opus_int silk_control_audio_bandwidth( } if( psEncC->allow_bandwidth_switch || encControl->opusCanSwitch ) { /* Check if we should switch down */ - if( silk_SMULBB( orig_kHz, 1000 ) > psEncC->desiredInternal_fs_Hz ) + if( silk_SMULBB( psEncC->fs_kHz, 1000 ) > psEncC->desiredInternal_fs_Hz ) { /* Switch down */ if( psEncC->sLP.mode == 0 ) { @@ -82,7 +76,7 @@ opus_int silk_control_audio_bandwidth( psEncC->sLP.mode = 0; /* Switch to a lower sample frequency */ - fs_kHz = orig_kHz == 16 ? 12 : 8; + fs_kHz = psEncC->fs_kHz == 16 ? 12 : 8; } else { if( psEncC->sLP.transition_frame_no <= 0 ) { encControl->switchReady = 1; @@ -96,12 +90,12 @@ opus_int silk_control_audio_bandwidth( } else /* Check if we should switch up */ - if( silk_SMULBB( orig_kHz, 1000 ) < psEncC->desiredInternal_fs_Hz ) + if( silk_SMULBB( psEncC->fs_kHz, 1000 ) < psEncC->desiredInternal_fs_Hz ) { /* Switch up */ if( encControl->opusCanSwitch ) { /* Switch to a higher sample frequency */ - fs_kHz = orig_kHz == 8 ? 12 : 16; + fs_kHz = psEncC->fs_kHz == 8 ? 12 : 16; /* New transition */ psEncC->sLP.transition_frame_no = 0; diff --git a/thirdparty/opus/silk/control_codec.c b/thirdparty/opus/silk/control_codec.c index 52aa8fded3..044eea3f2a 100644 --- a/thirdparty/opus/silk/control_codec.c +++ b/thirdparty/opus/silk/control_codec.c @@ -57,7 +57,7 @@ static opus_int silk_setup_complexity( static OPUS_INLINE opus_int silk_setup_LBRR( silk_encoder_state *psEncC, /* I/O */ - const silk_EncControlStruct *encControl /* I */ + const opus_int32 TargetRate_bps /* I */ ); @@ -65,6 +65,7 @@ static OPUS_INLINE opus_int silk_setup_LBRR( opus_int silk_control_encoder( silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk encoder state */ silk_EncControlStruct *encControl, /* I Control structure */ + const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */ const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */ const opus_int channelNb, /* I Channel number */ const opus_int force_fs_kHz @@ -124,7 +125,7 @@ opus_int silk_control_encoder( /********************************************/ /* Set LBRR usage */ /********************************************/ - ret += silk_setup_LBRR( &psEnc->sCmn, encControl ); + ret += silk_setup_LBRR( &psEnc->sCmn, TargetRate_bps ); psEnc->sCmn.controlled_since_last_payload = 1; @@ -238,11 +239,12 @@ static opus_int silk_setup_fs( } /* Set internal sampling frequency */ - celt_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 ); - celt_assert( psEnc->sCmn.nb_subfr == 2 || psEnc->sCmn.nb_subfr == 4 ); + silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 ); + silk_assert( psEnc->sCmn.nb_subfr == 2 || psEnc->sCmn.nb_subfr == 4 ); if( psEnc->sCmn.fs_kHz != fs_kHz ) { /* reset part of the state */ silk_memset( &psEnc->sShape, 0, sizeof( psEnc->sShape ) ); + silk_memset( &psEnc->sPrefilt, 0, sizeof( psEnc->sPrefilt ) ); silk_memset( &psEnc->sCmn.sNSQ, 0, sizeof( psEnc->sCmn.sNSQ ) ); silk_memset( psEnc->sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) ); silk_memset( &psEnc->sCmn.sLP.In_LP_State, 0, sizeof( psEnc->sCmn.sLP.In_LP_State ) ); @@ -253,6 +255,7 @@ static opus_int silk_setup_fs( /* Initialize non-zero parameters */ psEnc->sCmn.prevLag = 100; psEnc->sCmn.first_frame_after_reset = 1; + psEnc->sPrefilt.lagPrev = 100; psEnc->sShape.LastGainIndex = 10; psEnc->sCmn.sNSQ.lagPrev = 100; psEnc->sCmn.sNSQ.prev_gain_Q16 = 65536; @@ -290,16 +293,19 @@ static opus_int silk_setup_fs( psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz ); } if( psEnc->sCmn.fs_kHz == 16 ) { + psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_WB, 9 ); psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform8_iCDF; } else if( psEnc->sCmn.fs_kHz == 12 ) { + psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_MB, 9 ); psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform6_iCDF; } else { + psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_NB, 9 ); psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform4_iCDF; } } /* Check that settings are valid */ - celt_assert( ( psEnc->sCmn.subfr_length * psEnc->sCmn.nb_subfr ) == psEnc->sCmn.frame_length ); + silk_assert( ( psEnc->sCmn.subfr_length * psEnc->sCmn.nb_subfr ) == psEnc->sCmn.frame_length ); return ret; } @@ -312,76 +318,61 @@ static opus_int silk_setup_complexity( opus_int ret = 0; /* Set encoding complexity */ - celt_assert( Complexity >= 0 && Complexity <= 10 ); - if( Complexity < 1 ) { + silk_assert( Complexity >= 0 && Complexity <= 10 ); + if( Complexity < 2 ) { psEncC->pitchEstimationComplexity = SILK_PE_MIN_COMPLEX; psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.8, 16 ); psEncC->pitchEstimationLPCOrder = 6; - psEncC->shapingLPCOrder = 12; + psEncC->shapingLPCOrder = 8; psEncC->la_shape = 3 * psEncC->fs_kHz; psEncC->nStatesDelayedDecision = 1; psEncC->useInterpolatedNLSFs = 0; - psEncC->NLSF_MSVQ_Survivors = 2; - psEncC->warping_Q16 = 0; - } else if( Complexity < 2 ) { - psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX; - psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.76, 16 ); - psEncC->pitchEstimationLPCOrder = 8; - psEncC->shapingLPCOrder = 14; - psEncC->la_shape = 5 * psEncC->fs_kHz; - psEncC->nStatesDelayedDecision = 1; - psEncC->useInterpolatedNLSFs = 0; - psEncC->NLSF_MSVQ_Survivors = 3; - psEncC->warping_Q16 = 0; - } else if( Complexity < 3 ) { - psEncC->pitchEstimationComplexity = SILK_PE_MIN_COMPLEX; - psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.8, 16 ); - psEncC->pitchEstimationLPCOrder = 6; - psEncC->shapingLPCOrder = 12; - psEncC->la_shape = 3 * psEncC->fs_kHz; - psEncC->nStatesDelayedDecision = 2; - psEncC->useInterpolatedNLSFs = 0; + psEncC->LTPQuantLowComplexity = 1; psEncC->NLSF_MSVQ_Survivors = 2; psEncC->warping_Q16 = 0; } else if( Complexity < 4 ) { psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX; psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.76, 16 ); psEncC->pitchEstimationLPCOrder = 8; - psEncC->shapingLPCOrder = 14; + psEncC->shapingLPCOrder = 10; psEncC->la_shape = 5 * psEncC->fs_kHz; - psEncC->nStatesDelayedDecision = 2; + psEncC->nStatesDelayedDecision = 1; psEncC->useInterpolatedNLSFs = 0; + psEncC->LTPQuantLowComplexity = 0; psEncC->NLSF_MSVQ_Survivors = 4; psEncC->warping_Q16 = 0; } else if( Complexity < 6 ) { psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX; psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.74, 16 ); psEncC->pitchEstimationLPCOrder = 10; - psEncC->shapingLPCOrder = 16; + psEncC->shapingLPCOrder = 12; psEncC->la_shape = 5 * psEncC->fs_kHz; psEncC->nStatesDelayedDecision = 2; psEncC->useInterpolatedNLSFs = 1; - psEncC->NLSF_MSVQ_Survivors = 6; + psEncC->LTPQuantLowComplexity = 0; + psEncC->NLSF_MSVQ_Survivors = 8; psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 ); } else if( Complexity < 8 ) { psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX; psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.72, 16 ); psEncC->pitchEstimationLPCOrder = 12; - psEncC->shapingLPCOrder = 20; + psEncC->shapingLPCOrder = 14; psEncC->la_shape = 5 * psEncC->fs_kHz; psEncC->nStatesDelayedDecision = 3; psEncC->useInterpolatedNLSFs = 1; - psEncC->NLSF_MSVQ_Survivors = 8; + psEncC->LTPQuantLowComplexity = 0; + psEncC->NLSF_MSVQ_Survivors = 16; psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 ); } else { psEncC->pitchEstimationComplexity = SILK_PE_MAX_COMPLEX; psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.7, 16 ); psEncC->pitchEstimationLPCOrder = 16; - psEncC->shapingLPCOrder = 24; + psEncC->shapingLPCOrder = 16; psEncC->la_shape = 5 * psEncC->fs_kHz; psEncC->nStatesDelayedDecision = MAX_DEL_DEC_STATES; psEncC->useInterpolatedNLSFs = 1; - psEncC->NLSF_MSVQ_Survivors = 16; + psEncC->LTPQuantLowComplexity = 0; + psEncC->NLSF_MSVQ_Survivors = 32; psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 ); } @@ -390,32 +381,46 @@ static opus_int silk_setup_complexity( psEncC->shapeWinLength = SUB_FRAME_LENGTH_MS * psEncC->fs_kHz + 2 * psEncC->la_shape; psEncC->Complexity = Complexity; - celt_assert( psEncC->pitchEstimationLPCOrder <= MAX_FIND_PITCH_LPC_ORDER ); - celt_assert( psEncC->shapingLPCOrder <= MAX_SHAPE_LPC_ORDER ); - celt_assert( psEncC->nStatesDelayedDecision <= MAX_DEL_DEC_STATES ); - celt_assert( psEncC->warping_Q16 <= 32767 ); - celt_assert( psEncC->la_shape <= LA_SHAPE_MAX ); - celt_assert( psEncC->shapeWinLength <= SHAPE_LPC_WIN_MAX ); + silk_assert( psEncC->pitchEstimationLPCOrder <= MAX_FIND_PITCH_LPC_ORDER ); + silk_assert( psEncC->shapingLPCOrder <= MAX_SHAPE_LPC_ORDER ); + silk_assert( psEncC->nStatesDelayedDecision <= MAX_DEL_DEC_STATES ); + silk_assert( psEncC->warping_Q16 <= 32767 ); + silk_assert( psEncC->la_shape <= LA_SHAPE_MAX ); + silk_assert( psEncC->shapeWinLength <= SHAPE_LPC_WIN_MAX ); + silk_assert( psEncC->NLSF_MSVQ_Survivors <= NLSF_VQ_MAX_SURVIVORS ); return ret; } static OPUS_INLINE opus_int silk_setup_LBRR( silk_encoder_state *psEncC, /* I/O */ - const silk_EncControlStruct *encControl /* I */ + const opus_int32 TargetRate_bps /* I */ ) { opus_int LBRR_in_previous_packet, ret = SILK_NO_ERROR; + opus_int32 LBRR_rate_thres_bps; LBRR_in_previous_packet = psEncC->LBRR_enabled; - psEncC->LBRR_enabled = encControl->LBRR_coded; - if( psEncC->LBRR_enabled ) { - /* Set gain increase for coding LBRR excitation */ - if( LBRR_in_previous_packet == 0 ) { - /* Previous packet did not have LBRR, and was therefore coded at a higher bitrate */ - psEncC->LBRR_GainIncreases = 7; + psEncC->LBRR_enabled = 0; + if( psEncC->useInBandFEC && psEncC->PacketLoss_perc > 0 ) { + if( psEncC->fs_kHz == 8 ) { + LBRR_rate_thres_bps = LBRR_NB_MIN_RATE_BPS; + } else if( psEncC->fs_kHz == 12 ) { + LBRR_rate_thres_bps = LBRR_MB_MIN_RATE_BPS; } else { - psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.4, 16 ) ), 2 ); + LBRR_rate_thres_bps = LBRR_WB_MIN_RATE_BPS; + } + LBRR_rate_thres_bps = silk_SMULWB( silk_MUL( LBRR_rate_thres_bps, 125 - silk_min( psEncC->PacketLoss_perc, 25 ) ), SILK_FIX_CONST( 0.01, 16 ) ); + + if( TargetRate_bps > LBRR_rate_thres_bps ) { + /* Set gain increase for coding LBRR excitation */ + if( LBRR_in_previous_packet == 0 ) { + /* Previous packet did not have LBRR, and was therefore coded at a higher bitrate */ + psEncC->LBRR_GainIncreases = 7; + } else { + psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.4, 16 ) ), 2 ); + } + psEncC->LBRR_enabled = 1; } } diff --git a/thirdparty/opus/silk/debug.h b/thirdparty/opus/silk/debug.h index 6f68c1ca0f..efb6d3e99e 100644 --- a/thirdparty/opus/silk/debug.h +++ b/thirdparty/opus/silk/debug.h @@ -39,10 +39,23 @@ extern "C" unsigned long GetHighResolutionTime(void); /* O time in usec*/ -/* Set to 1 to enable DEBUG_STORE_DATA() macros for dumping - * intermediate signals from the codec. - */ -#define SILK_DEBUG 0 +/* make SILK_DEBUG dependent on compiler's _DEBUG */ +#if defined _WIN32 + #ifdef _DEBUG + #define SILK_DEBUG 1 + #else + #define SILK_DEBUG 0 + #endif + + /* overrule the above */ + #if 0 + /* #define NO_ASSERTS*/ + #undef SILK_DEBUG + #define SILK_DEBUG 1 + #endif +#else + #define SILK_DEBUG 0 +#endif /* Flag for using timers */ #define SILK_TIC_TOC 0 diff --git a/thirdparty/opus/silk/dec_API.c b/thirdparty/opus/silk/dec_API.c index 7d5ca7fb9f..b7d8ed48d8 100644 --- a/thirdparty/opus/silk/dec_API.c +++ b/thirdparty/opus/silk/dec_API.c @@ -104,7 +104,7 @@ opus_int silk_Decode( /* O Returns error co int delay_stack_alloc; SAVE_STACK; - celt_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 ); + silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 ); /**********************************/ /* Test if first frame in payload */ @@ -143,13 +143,13 @@ opus_int silk_Decode( /* O Returns error co channel_state[ n ].nFramesPerPacket = 3; channel_state[ n ].nb_subfr = 4; } else { - celt_assert( 0 ); + silk_assert( 0 ); RESTORE_STACK; return SILK_DEC_INVALID_FRAME_SIZE; } fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1; if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) { - celt_assert( 0 ); + silk_assert( 0 ); RESTORE_STACK; return SILK_DEC_INVALID_SAMPLING_FREQUENCY; } diff --git a/thirdparty/opus/silk/decode_core.c b/thirdparty/opus/silk/decode_core.c index 1c352a6522..e569c0e72b 100644 --- a/thirdparty/opus/silk/decode_core.c +++ b/thirdparty/opus/silk/decode_core.c @@ -141,7 +141,7 @@ void silk_decode_core( if( k == 0 || ( k == 2 && NLSF_interpolation_flag ) ) { /* Rewhiten with new A coefs */ start_idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2; - celt_assert( start_idx > 0 ); + silk_assert( start_idx > 0 ); if( k == 2 ) { silk_memcpy( &psDec->outBuf[ psDec->ltp_mem_length ], xq, 2 * psDec->subfr_length * sizeof( opus_int16 ) ); @@ -196,7 +196,7 @@ void silk_decode_core( for( i = 0; i < psDec->subfr_length; i++ ) { /* Short-term prediction */ - celt_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 ); + silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 ); /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 1 ], A_Q12_tmp[ 0 ] ); @@ -225,6 +225,8 @@ void silk_decode_core( pxq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14[ MAX_LPC_ORDER + i ], Gain_Q10 ), 8 ) ); } + /* DEBUG_STORE_DATA( dec.pcm, pxq, psDec->subfr_length * sizeof( opus_int16 ) ) */ + /* Update LPC filter state */ silk_memcpy( sLPC_Q14, &sLPC_Q14[ psDec->subfr_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) ); pexc_Q14 += psDec->subfr_length; diff --git a/thirdparty/opus/silk/decode_frame.c b/thirdparty/opus/silk/decode_frame.c index e73825b267..a605d95ac6 100644 --- a/thirdparty/opus/silk/decode_frame.c +++ b/thirdparty/opus/silk/decode_frame.c @@ -55,7 +55,7 @@ opus_int silk_decode_frame( psDecCtrl->LTP_scale_Q14 = 0; /* Safety checks */ - celt_assert( L > 0 && L <= MAX_FRAME_LENGTH ); + silk_assert( L > 0 && L <= MAX_FRAME_LENGTH ); if( lostFlag == FLAG_DECODE_NORMAL || ( lostFlag == FLAG_DECODE_LBRR && psDec->LBRR_flags[ psDec->nFramesDecoded ] == 1 ) ) @@ -91,20 +91,19 @@ opus_int silk_decode_frame( psDec->lossCnt = 0; psDec->prevSignalType = psDec->indices.signalType; - celt_assert( psDec->prevSignalType >= 0 && psDec->prevSignalType <= 2 ); + silk_assert( psDec->prevSignalType >= 0 && psDec->prevSignalType <= 2 ); /* A frame has been decoded without errors */ psDec->first_frame_after_reset = 0; } else { /* Handle packet loss by extrapolation */ - psDec->indices.signalType = psDec->prevSignalType; silk_PLC( psDec, psDecCtrl, pOut, 1, arch ); } /*************************/ /* Update output buffer. */ /*************************/ - celt_assert( psDec->ltp_mem_length >= psDec->frame_length ); + silk_assert( psDec->ltp_mem_length >= psDec->frame_length ); mv_len = psDec->ltp_mem_length - psDec->frame_length; silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) ); silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) ); diff --git a/thirdparty/opus/silk/decode_indices.c b/thirdparty/opus/silk/decode_indices.c index 0bb4a997a5..7afe5c26c1 100644 --- a/thirdparty/opus/silk/decode_indices.c +++ b/thirdparty/opus/silk/decode_indices.c @@ -79,7 +79,7 @@ void silk_decode_indices( /**********************/ psDec->indices.NLSFIndices[ 0 ] = (opus_int8)ec_dec_icdf( psRangeDec, &psDec->psNLSF_CB->CB1_iCDF[ ( psDec->indices.signalType >> 1 ) * psDec->psNLSF_CB->nVectors ], 8 ); silk_NLSF_unpack( ec_ix, pred_Q8, psDec->psNLSF_CB, psDec->indices.NLSFIndices[ 0 ] ); - celt_assert( psDec->psNLSF_CB->order == psDec->LPC_order ); + silk_assert( psDec->psNLSF_CB->order == psDec->LPC_order ); for( i = 0; i < psDec->psNLSF_CB->order; i++ ) { Ix = ec_dec_icdf( psRangeDec, &psDec->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 ); if( Ix == 0 ) { diff --git a/thirdparty/opus/silk/decode_parameters.c b/thirdparty/opus/silk/decode_parameters.c index a56a409858..e345b1dcef 100644 --- a/thirdparty/opus/silk/decode_parameters.c +++ b/thirdparty/opus/silk/decode_parameters.c @@ -52,7 +52,7 @@ void silk_decode_parameters( silk_NLSF_decode( pNLSF_Q15, psDec->indices.NLSFIndices, psDec->psNLSF_CB ); /* Convert NLSF parameters to AR prediction filter coefficients */ - silk_NLSF2A( psDecCtrl->PredCoef_Q12[ 1 ], pNLSF_Q15, psDec->LPC_order, psDec->arch ); + silk_NLSF2A( psDecCtrl->PredCoef_Q12[ 1 ], pNLSF_Q15, psDec->LPC_order ); /* If just reset, e.g., because internal Fs changed, do not allow interpolation */ /* improves the case of packet loss in the first frame after a switch */ @@ -69,7 +69,7 @@ void silk_decode_parameters( } /* Convert NLSF parameters to AR prediction filter coefficients */ - silk_NLSF2A( psDecCtrl->PredCoef_Q12[ 0 ], pNLSF0_Q15, psDec->LPC_order, psDec->arch ); + silk_NLSF2A( psDecCtrl->PredCoef_Q12[ 0 ], pNLSF0_Q15, psDec->LPC_order ); } else { /* Copy LPC coefficients for first half from second half */ silk_memcpy( psDecCtrl->PredCoef_Q12[ 0 ], psDecCtrl->PredCoef_Q12[ 1 ], psDec->LPC_order * sizeof( opus_int16 ) ); diff --git a/thirdparty/opus/silk/decode_pitch.c b/thirdparty/opus/silk/decode_pitch.c index fd1b6bf551..fedbc6a525 100644 --- a/thirdparty/opus/silk/decode_pitch.c +++ b/thirdparty/opus/silk/decode_pitch.c @@ -51,7 +51,7 @@ void silk_decode_pitch( Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ]; cbk_size = PE_NB_CBKS_STAGE2_EXT; } else { - celt_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1 ); + silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1 ); Lag_CB_ptr = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ]; cbk_size = PE_NB_CBKS_STAGE2_10MS; } @@ -60,7 +60,7 @@ void silk_decode_pitch( Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; cbk_size = PE_NB_CBKS_STAGE3_MAX; } else { - celt_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1 ); + silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1 ); Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; cbk_size = PE_NB_CBKS_STAGE3_10MS; } diff --git a/thirdparty/opus/silk/decode_pulses.c b/thirdparty/opus/silk/decode_pulses.c index a56d2d3074..d6bbec9225 100644 --- a/thirdparty/opus/silk/decode_pulses.c +++ b/thirdparty/opus/silk/decode_pulses.c @@ -56,7 +56,7 @@ void silk_decode_pulses( silk_assert( 1 << LOG2_SHELL_CODEC_FRAME_LENGTH == SHELL_CODEC_FRAME_LENGTH ); iter = silk_RSHIFT( frame_length, LOG2_SHELL_CODEC_FRAME_LENGTH ); if( iter * SHELL_CODEC_FRAME_LENGTH < frame_length ) { - celt_assert( frame_length == 12 * 10 ); /* Make sure only happens for 10 ms @ 12 kHz */ + silk_assert( frame_length == 12 * 10 ); /* Make sure only happens for 10 ms @ 12 kHz */ iter++; } diff --git a/thirdparty/opus/silk/decoder_set_fs.c b/thirdparty/opus/silk/decoder_set_fs.c index d9a13d0f0c..eef0fd25e1 100644 --- a/thirdparty/opus/silk/decoder_set_fs.c +++ b/thirdparty/opus/silk/decoder_set_fs.c @@ -40,8 +40,8 @@ opus_int silk_decoder_set_fs( { opus_int frame_length, ret = 0; - celt_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 ); - celt_assert( psDec->nb_subfr == MAX_NB_SUBFR || psDec->nb_subfr == MAX_NB_SUBFR/2 ); + silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 ); + silk_assert( psDec->nb_subfr == MAX_NB_SUBFR || psDec->nb_subfr == MAX_NB_SUBFR/2 ); /* New (sub)frame length */ psDec->subfr_length = silk_SMULBB( SUB_FRAME_LENGTH_MS, fs_kHz ); @@ -86,7 +86,7 @@ opus_int silk_decoder_set_fs( psDec->pitch_lag_low_bits_iCDF = silk_uniform4_iCDF; } else { /* unsupported sampling rate */ - celt_assert( 0 ); + silk_assert( 0 ); } psDec->first_frame_after_reset = 1; psDec->lagPrev = 100; @@ -101,7 +101,7 @@ opus_int silk_decoder_set_fs( } /* Check that settings are valid */ - celt_assert( psDec->frame_length > 0 && psDec->frame_length <= MAX_FRAME_LENGTH ); + silk_assert( psDec->frame_length > 0 && psDec->frame_length <= MAX_FRAME_LENGTH ); return ret; } diff --git a/thirdparty/opus/silk/define.h b/thirdparty/opus/silk/define.h index 247cb0bf71..19c9b00e25 100644 --- a/thirdparty/opus/silk/define.h +++ b/thirdparty/opus/silk/define.h @@ -46,6 +46,7 @@ extern "C" /* Limits on bitrate */ #define MIN_TARGET_RATE_BPS 5000 #define MAX_TARGET_RATE_BPS 80000 +#define TARGET_RATE_TAB_SZ 8 /* LBRR thresholds */ #define LBRR_NB_MIN_RATE_BPS 12000 @@ -55,12 +56,6 @@ extern "C" /* DTX settings */ #define NB_SPEECH_FRAMES_BEFORE_DTX 10 /* eq 200 ms */ #define MAX_CONSECUTIVE_DTX 20 /* eq 400 ms */ -#define DTX_ACTIVITY_THRESHOLD 0.1f - -/* VAD decision */ -#define VAD_NO_DECISION -1 -#define VAD_NO_ACTIVITY 0 -#define VAD_ACTIVITY 1 /* Maximum sampling frequency */ #define MAX_FS_KHZ 16 @@ -152,7 +147,7 @@ extern "C" #define USE_HARM_SHAPING 1 /* Max LPC order of noise shaping filters */ -#define MAX_SHAPE_LPC_ORDER 24 +#define MAX_SHAPE_LPC_ORDER 16 #define HARM_SHAPE_FIR_TAPS 3 @@ -162,7 +157,8 @@ extern "C" #define LTP_BUF_LENGTH 512 #define LTP_MASK ( LTP_BUF_LENGTH - 1 ) -#define DECISION_DELAY 40 +#define DECISION_DELAY 32 +#define DECISION_DELAY_MASK ( DECISION_DELAY - 1 ) /* Number of subframes for excitation entropy coding */ #define SHELL_CODEC_FRAME_LENGTH 16 @@ -177,7 +173,11 @@ extern "C" #define MAX_MATRIX_SIZE MAX_LPC_ORDER /* Max of LPC Order and LTP order */ +#if( MAX_LPC_ORDER > DECISION_DELAY ) # define NSQ_LPC_BUF_LENGTH MAX_LPC_ORDER +#else +# define NSQ_LPC_BUF_LENGTH DECISION_DELAY +#endif /***************************/ /* Voice activity detector */ @@ -205,6 +205,7 @@ extern "C" /******************/ #define NLSF_W_Q 2 #define NLSF_VQ_MAX_VECTORS 32 +#define NLSF_VQ_MAX_SURVIVORS 32 #define NLSF_QUANT_MAX_AMPLITUDE 4 #define NLSF_QUANT_MAX_AMPLITUDE_EXT 10 #define NLSF_QUANT_LEVEL_ADJ 0.1 diff --git a/thirdparty/opus/silk/enc_API.c b/thirdparty/opus/silk/enc_API.c index 55a33f37e9..f8060286db 100644 --- a/thirdparty/opus/silk/enc_API.c +++ b/thirdparty/opus/silk/enc_API.c @@ -82,7 +82,7 @@ opus_int silk_InitEncoder( /* O Returns error co silk_memset( psEnc, 0, sizeof( silk_encoder ) ); for( n = 0; n < ENCODER_NUM_CHANNELS; n++ ) { if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ], arch ) ) { - celt_assert( 0 ); + silk_assert( 0 ); } } @@ -91,7 +91,7 @@ opus_int silk_InitEncoder( /* O Returns error co /* Read control structure */ if( ret += silk_QueryEncoder( encState, encStatus ) ) { - celt_assert( 0 ); + silk_assert( 0 ); } return ret; @@ -144,8 +144,7 @@ opus_int silk_Encode( /* O Returns error co opus_int nSamplesIn, /* I Number of samples in input vector */ ec_enc *psRangeEnc, /* I/O Compressor data structure */ opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */ - const opus_int prefillFlag, /* I Flag to indicate prefilling buffers no coding */ - opus_int activity /* I Decision of Opus voice activity detector */ + const opus_int prefillFlag /* I Flag to indicate prefilling buffers no coding */ ) { opus_int n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0; @@ -167,7 +166,7 @@ opus_int silk_Encode( /* O Returns error co /* Check values in encoder control structure */ if( ( ret = check_control_input( encControl ) ) != 0 ) { - celt_assert( 0 ); + silk_assert( 0 ); RESTORE_STACK; return ret; } @@ -200,26 +199,16 @@ opus_int silk_Encode( /* O Returns error co tot_blocks = ( nBlocksOf10ms > 1 ) ? nBlocksOf10ms >> 1 : 1; curr_block = 0; if( prefillFlag ) { - silk_LP_state save_LP; /* Only accept input length of 10 ms */ if( nBlocksOf10ms != 1 ) { - celt_assert( 0 ); + silk_assert( 0 ); RESTORE_STACK; return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; } - if ( prefillFlag == 2 ) { - save_LP = psEnc->state_Fxx[ 0 ].sCmn.sLP; - /* Save the sampling rate so the bandwidth switching code can keep handling transitions. */ - save_LP.saved_fs_kHz = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz; - } /* Reset Encoder */ for( n = 0; n < encControl->nChannelsInternal; n++ ) { ret = silk_init_encoder( &psEnc->state_Fxx[ n ], psEnc->state_Fxx[ n ].sCmn.arch ); - /* Restore the variable LP state. */ - if ( prefillFlag == 2 ) { - psEnc->state_Fxx[ n ].sCmn.sLP = save_LP; - } - celt_assert( !ret ); + silk_assert( !ret ); } tmp_payloadSize_ms = encControl->payloadSize_ms; encControl->payloadSize_ms = 10; @@ -232,22 +221,23 @@ opus_int silk_Encode( /* O Returns error co } else { /* Only accept input lengths that are a multiple of 10 ms */ if( nBlocksOf10ms * encControl->API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0 ) { - celt_assert( 0 ); + silk_assert( 0 ); RESTORE_STACK; return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; } /* Make sure no more than one packet can be produced */ if( 1000 * (opus_int32)nSamplesIn > encControl->payloadSize_ms * encControl->API_sampleRate ) { - celt_assert( 0 ); + silk_assert( 0 ); RESTORE_STACK; return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; } } + TargetRate_bps = silk_RSHIFT32( encControl->bitRate, encControl->nChannelsInternal - 1 ); for( n = 0; n < encControl->nChannelsInternal; n++ ) { /* Force the side channel to the same rate as the mid */ opus_int force_fs_kHz = (n==1) ? psEnc->state_Fxx[0].sCmn.fs_kHz : 0; - if( ( ret = silk_control_encoder( &psEnc->state_Fxx[ n ], encControl, psEnc->allowBandwidthSwitch, n, force_fs_kHz ) ) != 0 ) { + if( ( ret = silk_control_encoder( &psEnc->state_Fxx[ n ], encControl, TargetRate_bps, psEnc->allowBandwidthSwitch, n, force_fs_kHz ) ) != 0 ) { silk_assert( 0 ); RESTORE_STACK; return ret; @@ -259,7 +249,7 @@ opus_int silk_Encode( /* O Returns error co } psEnc->state_Fxx[ n ].sCmn.inDTX = psEnc->state_Fxx[ n ].sCmn.useDTX; } - celt_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz ); + silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz ); /* Input buffering/resampling and encoding */ nSamplesToBufferMax = @@ -317,7 +307,7 @@ opus_int silk_Encode( /* O Returns error co } psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; } else { - celt_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 ); + silk_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 ); silk_memcpy(buf, samplesIn, nSamplesFromInput*sizeof(opus_int16)); ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); @@ -333,8 +323,8 @@ opus_int silk_Encode( /* O Returns error co /* Silk encoder */ if( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx >= psEnc->state_Fxx[ 0 ].sCmn.frame_length ) { /* Enough data in input buffer, so encode */ - celt_assert( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx == psEnc->state_Fxx[ 0 ].sCmn.frame_length ); - celt_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inputBufIx == psEnc->state_Fxx[ 1 ].sCmn.frame_length ); + silk_assert( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx == psEnc->state_Fxx[ 0 ].sCmn.frame_length ); + silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inputBufIx == psEnc->state_Fxx[ 1 ].sCmn.frame_length ); /* Deal with LBRR data */ if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 && !prefillFlag ) { @@ -426,6 +416,7 @@ opus_int silk_Encode( /* O Returns error co /* Reset side channel encoder memory for first frame with side coding */ if( psEnc->prev_decode_only_middle == 1 ) { silk_memset( &psEnc->state_Fxx[ 1 ].sShape, 0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) ); + silk_memset( &psEnc->state_Fxx[ 1 ].sPrefilt, 0, sizeof( psEnc->state_Fxx[ 1 ].sPrefilt ) ); silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) ); silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) ); silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) ); @@ -436,7 +427,7 @@ opus_int silk_Encode( /* O Returns error co psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_gain_Q16 = 65536; psEnc->state_Fxx[ 1 ].sCmn.first_frame_after_reset = 1; } - silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ], activity ); + silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ] ); } else { psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0; } @@ -451,7 +442,7 @@ opus_int silk_Encode( /* O Returns error co silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) ); silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) ); } - silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ], activity ); + silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ] ); /* Encode */ for( n = 0; n < encControl->nChannelsInternal; n++ ) { @@ -566,10 +557,6 @@ opus_int silk_Encode( /* O Returns error co } } - encControl->signalType = psEnc->state_Fxx[0].sCmn.indices.signalType; - encControl->offset = silk_Quantization_Offsets_Q10 - [ psEnc->state_Fxx[0].sCmn.indices.signalType >> 1 ] - [ psEnc->state_Fxx[0].sCmn.indices.quantOffsetType ]; RESTORE_STACK; return ret; } diff --git a/thirdparty/opus/silk/encode_indices.c b/thirdparty/opus/silk/encode_indices.c index 4bcbc3347b..666c8c0b13 100644 --- a/thirdparty/opus/silk/encode_indices.c +++ b/thirdparty/opus/silk/encode_indices.c @@ -56,8 +56,8 @@ void silk_encode_indices( /* Encode signal type and quantizer offset */ /*******************************************/ typeOffset = 2 * psIndices->signalType + psIndices->quantOffsetType; - celt_assert( typeOffset >= 0 && typeOffset < 6 ); - celt_assert( encode_LBRR == 0 || typeOffset >= 2 ); + silk_assert( typeOffset >= 0 && typeOffset < 6 ); + silk_assert( encode_LBRR == 0 || typeOffset >= 2 ); if( encode_LBRR || typeOffset >= 2 ) { ec_enc_icdf( psRangeEnc, typeOffset - 2, silk_type_offset_VAD_iCDF, 8 ); } else { @@ -90,7 +90,7 @@ void silk_encode_indices( /****************/ ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ 0 ], &psEncC->psNLSF_CB->CB1_iCDF[ ( psIndices->signalType >> 1 ) * psEncC->psNLSF_CB->nVectors ], 8 ); silk_NLSF_unpack( ec_ix, pred_Q8, psEncC->psNLSF_CB, psIndices->NLSFIndices[ 0 ] ); - celt_assert( psEncC->psNLSF_CB->order == psEncC->predictLPCOrder ); + silk_assert( psEncC->psNLSF_CB->order == psEncC->predictLPCOrder ); for( i = 0; i < psEncC->psNLSF_CB->order; i++ ) { if( psIndices->NLSFIndices[ i+1 ] >= NLSF_QUANT_MAX_AMPLITUDE ) { ec_enc_icdf( psRangeEnc, 2 * NLSF_QUANT_MAX_AMPLITUDE, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 ); diff --git a/thirdparty/opus/silk/encode_pulses.c b/thirdparty/opus/silk/encode_pulses.c index 8a1999138b..ab00264f99 100644 --- a/thirdparty/opus/silk/encode_pulses.c +++ b/thirdparty/opus/silk/encode_pulses.c @@ -86,7 +86,7 @@ void silk_encode_pulses( silk_assert( 1 << LOG2_SHELL_CODEC_FRAME_LENGTH == SHELL_CODEC_FRAME_LENGTH ); iter = silk_RSHIFT( frame_length, LOG2_SHELL_CODEC_FRAME_LENGTH ); if( iter * SHELL_CODEC_FRAME_LENGTH < frame_length ) { - celt_assert( frame_length == 12 * 10 ); /* Make sure only happens for 10 ms @ 12 kHz */ + silk_assert( frame_length == 12 * 10 ); /* Make sure only happens for 10 ms @ 12 kHz */ iter++; silk_memset( &pulses[ frame_length ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof(opus_int8)); } diff --git a/thirdparty/opus/silk/fixed/apply_sine_window_FIX.c b/thirdparty/opus/silk/fixed/apply_sine_window_FIX.c index 03e088a6de..4502b7130e 100644 --- a/thirdparty/opus/silk/fixed/apply_sine_window_FIX.c +++ b/thirdparty/opus/silk/fixed/apply_sine_window_FIX.c @@ -57,15 +57,15 @@ void silk_apply_sine_window( opus_int k, f_Q16, c_Q16; opus_int32 S0_Q16, S1_Q16; - celt_assert( win_type == 1 || win_type == 2 ); + silk_assert( win_type == 1 || win_type == 2 ); /* Length must be in a range from 16 to 120 and a multiple of 4 */ - celt_assert( length >= 16 && length <= 120 ); - celt_assert( ( length & 3 ) == 0 ); + silk_assert( length >= 16 && length <= 120 ); + silk_assert( ( length & 3 ) == 0 ); /* Frequency */ k = ( length >> 2 ) - 4; - celt_assert( k >= 0 && k <= 26 ); + silk_assert( k >= 0 && k <= 26 ); f_Q16 = (opus_int)freq_table_Q16[ k ]; /* Factor used for cosine approximation */ diff --git a/thirdparty/opus/silk/fixed/arm/warped_autocorrelation_FIX_arm.h b/thirdparty/opus/silk/fixed/arm/warped_autocorrelation_FIX_arm.h deleted file mode 100644 index 1992e43288..0000000000 --- a/thirdparty/opus/silk/fixed/arm/warped_autocorrelation_FIX_arm.h +++ /dev/null @@ -1,68 +0,0 @@ -/*********************************************************************** -Copyright (c) 2017 Google Inc. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_WARPED_AUTOCORRELATION_FIX_ARM_H -# define SILK_WARPED_AUTOCORRELATION_FIX_ARM_H - -# include "celt/arm/armcpu.h" - -# if defined(FIXED_POINT) - -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -void silk_warped_autocorrelation_FIX_neon( - opus_int32 *corr, /* O Result [order + 1] */ - opus_int *scale, /* O Scaling of the correlation vector */ - const opus_int16 *input, /* I Input data to correlate */ - const opus_int warping_Q16, /* I Warping coefficient */ - const opus_int length, /* I Length of input */ - const opus_int order /* I Correlation order (even) */ -); - -# if !defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_PRESUME_NEON) -# define OVERRIDE_silk_warped_autocorrelation_FIX (1) -# define silk_warped_autocorrelation_FIX(corr, scale, input, warping_Q16, length, order, arch) \ - ((void)(arch), PRESUME_NEON(silk_warped_autocorrelation_FIX)(corr, scale, input, warping_Q16, length, order)) -# endif -# endif - -# if !defined(OVERRIDE_silk_warped_autocorrelation_FIX) -/*Is run-time CPU detection enabled on this platform?*/ -# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)) -extern void (*const SILK_WARPED_AUTOCORRELATION_FIX_IMPL[OPUS_ARCHMASK+1])(opus_int32*, opus_int*, const opus_int16*, const opus_int, const opus_int, const opus_int); -# define OVERRIDE_silk_warped_autocorrelation_FIX (1) -# define silk_warped_autocorrelation_FIX(corr, scale, input, warping_Q16, length, order, arch) \ - ((*SILK_WARPED_AUTOCORRELATION_FIX_IMPL[(arch)&OPUS_ARCHMASK])(corr, scale, input, warping_Q16, length, order)) -# elif defined(OPUS_ARM_PRESUME_NEON_INTR) -# define OVERRIDE_silk_warped_autocorrelation_FIX (1) -# define silk_warped_autocorrelation_FIX(corr, scale, input, warping_Q16, length, order, arch) \ - ((void)(arch), silk_warped_autocorrelation_FIX_neon(corr, scale, input, warping_Q16, length, order)) -# endif -# endif - -# endif /* end FIXED_POINT */ - -#endif /* end SILK_WARPED_AUTOCORRELATION_FIX_ARM_H */ diff --git a/thirdparty/opus/silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c b/thirdparty/opus/silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c deleted file mode 100644 index 00a70cb51f..0000000000 --- a/thirdparty/opus/silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c +++ /dev/null @@ -1,260 +0,0 @@ -/*********************************************************************** -Copyright (c) 2017 Google Inc., Jean-Marc Valin -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <arm_neon.h> -#ifdef OPUS_CHECK_ASM -# include <string.h> -#endif -#include "stack_alloc.h" -#include "main_FIX.h" - -static OPUS_INLINE void calc_corr( const opus_int32 *const input_QS, opus_int64 *const corr_QC, const opus_int offset, const int32x4_t state_QS_s32x4 ) -{ - int64x2_t corr_QC_s64x2[ 2 ], t_s64x2[ 2 ]; - const int32x4_t input_QS_s32x4 = vld1q_s32( input_QS + offset ); - corr_QC_s64x2[ 0 ] = vld1q_s64( corr_QC + offset + 0 ); - corr_QC_s64x2[ 1 ] = vld1q_s64( corr_QC + offset + 2 ); - t_s64x2[ 0 ] = vmull_s32( vget_low_s32( state_QS_s32x4 ), vget_low_s32( input_QS_s32x4 ) ); - t_s64x2[ 1 ] = vmull_s32( vget_high_s32( state_QS_s32x4 ), vget_high_s32( input_QS_s32x4 ) ); - corr_QC_s64x2[ 0 ] = vsraq_n_s64( corr_QC_s64x2[ 0 ], t_s64x2[ 0 ], 2 * QS - QC ); - corr_QC_s64x2[ 1 ] = vsraq_n_s64( corr_QC_s64x2[ 1 ], t_s64x2[ 1 ], 2 * QS - QC ); - vst1q_s64( corr_QC + offset + 0, corr_QC_s64x2[ 0 ] ); - vst1q_s64( corr_QC + offset + 2, corr_QC_s64x2[ 1 ] ); -} - -static OPUS_INLINE int32x4_t calc_state( const int32x4_t state_QS0_s32x4, const int32x4_t state_QS0_1_s32x4, const int32x4_t state_QS1_1_s32x4, const int32x4_t warping_Q16_s32x4 ) -{ - int32x4_t t_s32x4 = vsubq_s32( state_QS0_s32x4, state_QS0_1_s32x4 ); - t_s32x4 = vqdmulhq_s32( t_s32x4, warping_Q16_s32x4 ); - return vaddq_s32( state_QS1_1_s32x4, t_s32x4 ); -} - -void silk_warped_autocorrelation_FIX_neon( - opus_int32 *corr, /* O Result [order + 1] */ - opus_int *scale, /* O Scaling of the correlation vector */ - const opus_int16 *input, /* I Input data to correlate */ - const opus_int warping_Q16, /* I Warping coefficient */ - const opus_int length, /* I Length of input */ - const opus_int order /* I Correlation order (even) */ -) -{ - if( ( MAX_SHAPE_LPC_ORDER > 24 ) || ( order < 6 ) ) { - silk_warped_autocorrelation_FIX_c( corr, scale, input, warping_Q16, length, order ); - } else { - opus_int n, i, lsh; - opus_int64 corr_QC[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 }; /* In reverse order */ - opus_int64 corr_QC_orderT; - int64x2_t lsh_s64x2; - const opus_int orderT = ( order + 3 ) & ~3; - opus_int64 *corr_QCT; - opus_int32 *input_QS; - VARDECL( opus_int32, input_QST ); - VARDECL( opus_int32, state ); - SAVE_STACK; - - /* Order must be even */ - silk_assert( ( order & 1 ) == 0 ); - silk_assert( 2 * QS - QC >= 0 ); - - ALLOC( input_QST, length + 2 * MAX_SHAPE_LPC_ORDER, opus_int32 ); - - input_QS = input_QST; - /* input_QS has zero paddings in the beginning and end. */ - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - - /* Loop over samples */ - for( n = 0; n < length - 7; n += 8, input_QS += 8 ) { - const int16x8_t t0_s16x4 = vld1q_s16( input + n ); - vst1q_s32( input_QS + 0, vshll_n_s16( vget_low_s16( t0_s16x4 ), QS ) ); - vst1q_s32( input_QS + 4, vshll_n_s16( vget_high_s16( t0_s16x4 ), QS ) ); - } - for( ; n < length; n++, input_QS++ ) { - input_QS[ 0 ] = silk_LSHIFT32( (opus_int32)input[ n ], QS ); - } - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS = input_QST + MAX_SHAPE_LPC_ORDER - orderT; - - /* The following loop runs ( length + order ) times, with ( order ) extra epilogues. */ - /* The zero paddings in input_QS guarantee corr_QC's correctness even with the extra epilogues. */ - /* The values of state_QS will be polluted by the extra epilogues, however they are temporary values. */ - - /* Keep the C code here to help understand the intrinsics optimization. */ - /* - { - opus_int32 state_QS[ 2 ][ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 }; - opus_int32 *state_QST[ 3 ]; - state_QST[ 0 ] = state_QS[ 0 ]; - state_QST[ 1 ] = state_QS[ 1 ]; - for( n = 0; n < length + order; n++, input_QS++ ) { - state_QST[ 0 ][ orderT ] = input_QS[ orderT ]; - for( i = 0; i < orderT; i++ ) { - corr_QC[ i ] += silk_RSHIFT64( silk_SMULL( state_QST[ 0 ][ i ], input_QS[ i ] ), 2 * QS - QC ); - state_QST[ 1 ][ i ] = silk_SMLAWB( state_QST[ 1 ][ i + 1 ], state_QST[ 0 ][ i ] - state_QST[ 0 ][ i + 1 ], warping_Q16 ); - } - state_QST[ 2 ] = state_QST[ 0 ]; - state_QST[ 0 ] = state_QST[ 1 ]; - state_QST[ 1 ] = state_QST[ 2 ]; - } - } - */ - - { - const int32x4_t warping_Q16_s32x4 = vdupq_n_s32( warping_Q16 << 15 ); - const opus_int32 *in = input_QS + orderT; - opus_int o = orderT; - int32x4_t state_QS_s32x4[ 3 ][ 2 ]; - - ALLOC( state, length + orderT, opus_int32 ); - state_QS_s32x4[ 2 ][ 1 ] = vdupq_n_s32( 0 ); - - /* Calculate 8 taps of all inputs in each loop. */ - do { - state_QS_s32x4[ 0 ][ 0 ] = state_QS_s32x4[ 0 ][ 1 ] = - state_QS_s32x4[ 1 ][ 0 ] = state_QS_s32x4[ 1 ][ 1 ] = vdupq_n_s32( 0 ); - n = 0; - do { - calc_corr( input_QS + n, corr_QC, o - 8, state_QS_s32x4[ 0 ][ 0 ] ); - calc_corr( input_QS + n, corr_QC, o - 4, state_QS_s32x4[ 0 ][ 1 ] ); - state_QS_s32x4[ 2 ][ 1 ] = vld1q_s32( in + n ); - vst1q_lane_s32( state + n, state_QS_s32x4[ 0 ][ 0 ], 0 ); - state_QS_s32x4[ 2 ][ 0 ] = vextq_s32( state_QS_s32x4[ 0 ][ 0 ], state_QS_s32x4[ 0 ][ 1 ], 1 ); - state_QS_s32x4[ 2 ][ 1 ] = vextq_s32( state_QS_s32x4[ 0 ][ 1 ], state_QS_s32x4[ 2 ][ 1 ], 1 ); - state_QS_s32x4[ 0 ][ 0 ] = calc_state( state_QS_s32x4[ 0 ][ 0 ], state_QS_s32x4[ 2 ][ 0 ], state_QS_s32x4[ 1 ][ 0 ], warping_Q16_s32x4 ); - state_QS_s32x4[ 0 ][ 1 ] = calc_state( state_QS_s32x4[ 0 ][ 1 ], state_QS_s32x4[ 2 ][ 1 ], state_QS_s32x4[ 1 ][ 1 ], warping_Q16_s32x4 ); - state_QS_s32x4[ 1 ][ 0 ] = state_QS_s32x4[ 2 ][ 0 ]; - state_QS_s32x4[ 1 ][ 1 ] = state_QS_s32x4[ 2 ][ 1 ]; - } while( ++n < ( length + order ) ); - in = state; - o -= 8; - } while( o > 4 ); - - if( o ) { - /* Calculate the last 4 taps of all inputs. */ - opus_int32 *stateT = state; - silk_assert( o == 4 ); - state_QS_s32x4[ 0 ][ 0 ] = state_QS_s32x4[ 1 ][ 0 ] = vdupq_n_s32( 0 ); - n = length + order; - do { - calc_corr( input_QS, corr_QC, 0, state_QS_s32x4[ 0 ][ 0 ] ); - state_QS_s32x4[ 2 ][ 0 ] = vld1q_s32( stateT ); - vst1q_lane_s32( stateT, state_QS_s32x4[ 0 ][ 0 ], 0 ); - state_QS_s32x4[ 2 ][ 0 ] = vextq_s32( state_QS_s32x4[ 0 ][ 0 ], state_QS_s32x4[ 2 ][ 0 ], 1 ); - state_QS_s32x4[ 0 ][ 0 ] = calc_state( state_QS_s32x4[ 0 ][ 0 ], state_QS_s32x4[ 2 ][ 0 ], state_QS_s32x4[ 1 ][ 0 ], warping_Q16_s32x4 ); - state_QS_s32x4[ 1 ][ 0 ] = state_QS_s32x4[ 2 ][ 0 ]; - input_QS++; - stateT++; - } while( --n ); - } - } - - { - const opus_int16 *inputT = input; - int32x4_t t_s32x4; - int64x1_t t_s64x1; - int64x2_t t_s64x2 = vdupq_n_s64( 0 ); - for( n = 0; n <= length - 8; n += 8 ) { - int16x8_t input_s16x8 = vld1q_s16( inputT ); - t_s32x4 = vmull_s16( vget_low_s16( input_s16x8 ), vget_low_s16( input_s16x8 ) ); - t_s32x4 = vmlal_s16( t_s32x4, vget_high_s16( input_s16x8 ), vget_high_s16( input_s16x8 ) ); - t_s64x2 = vaddw_s32( t_s64x2, vget_low_s32( t_s32x4 ) ); - t_s64x2 = vaddw_s32( t_s64x2, vget_high_s32( t_s32x4 ) ); - inputT += 8; - } - t_s64x1 = vadd_s64( vget_low_s64( t_s64x2 ), vget_high_s64( t_s64x2 ) ); - corr_QC_orderT = vget_lane_s64( t_s64x1, 0 ); - for( ; n < length; n++ ) { - corr_QC_orderT += silk_SMULL( input[ n ], input[ n ] ); - } - corr_QC_orderT = silk_LSHIFT64( corr_QC_orderT, QC ); - corr_QC[ orderT ] = corr_QC_orderT; - } - - corr_QCT = corr_QC + orderT - order; - lsh = silk_CLZ64( corr_QC_orderT ) - 35; - lsh = silk_LIMIT( lsh, -12 - QC, 30 - QC ); - *scale = -( QC + lsh ); - silk_assert( *scale >= -30 && *scale <= 12 ); - lsh_s64x2 = vdupq_n_s64( lsh ); - for( i = 0; i <= order - 3; i += 4 ) { - int32x4_t corr_s32x4; - int64x2_t corr_QC0_s64x2, corr_QC1_s64x2; - corr_QC0_s64x2 = vld1q_s64( corr_QCT + i ); - corr_QC1_s64x2 = vld1q_s64( corr_QCT + i + 2 ); - corr_QC0_s64x2 = vshlq_s64( corr_QC0_s64x2, lsh_s64x2 ); - corr_QC1_s64x2 = vshlq_s64( corr_QC1_s64x2, lsh_s64x2 ); - corr_s32x4 = vcombine_s32( vmovn_s64( corr_QC1_s64x2 ), vmovn_s64( corr_QC0_s64x2 ) ); - corr_s32x4 = vrev64q_s32( corr_s32x4 ); - vst1q_s32( corr + order - i - 3, corr_s32x4 ); - } - if( lsh >= 0 ) { - for( ; i < order + 1; i++ ) { - corr[ order - i ] = (opus_int32)silk_CHECK_FIT32( silk_LSHIFT64( corr_QCT[ i ], lsh ) ); - } - } else { - for( ; i < order + 1; i++ ) { - corr[ order - i ] = (opus_int32)silk_CHECK_FIT32( silk_RSHIFT64( corr_QCT[ i ], -lsh ) ); - } - } - silk_assert( corr_QCT[ order ] >= 0 ); /* If breaking, decrease QC*/ - RESTORE_STACK; - } - -#ifdef OPUS_CHECK_ASM - { - opus_int32 corr_c[ MAX_SHAPE_LPC_ORDER + 1 ]; - opus_int scale_c; - silk_warped_autocorrelation_FIX_c( corr_c, &scale_c, input, warping_Q16, length, order ); - silk_assert( !memcmp( corr_c, corr, sizeof( corr_c[ 0 ] ) * ( order + 1 ) ) ); - silk_assert( scale_c == *scale ); - } -#endif -} diff --git a/thirdparty/opus/silk/fixed/burg_modified_FIX.c b/thirdparty/opus/silk/fixed/burg_modified_FIX.c index 274d4b28e1..17d0e0993c 100644 --- a/thirdparty/opus/silk/fixed/burg_modified_FIX.c +++ b/thirdparty/opus/silk/fixed/burg_modified_FIX.c @@ -37,7 +37,7 @@ POSSIBILITY OF SUCH DAMAGE. #define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */ #define QA 25 -#define N_BITS_HEAD_ROOM 3 +#define N_BITS_HEAD_ROOM 2 #define MIN_RSHIFTS -16 #define MAX_RSHIFTS (32 - QA) @@ -65,7 +65,7 @@ void silk_burg_modified_c( opus_int32 xcorr[ SILK_MAX_ORDER_LPC ]; opus_int64 C0_64; - celt_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); + silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); /* Compute autocorrelations, added over subframes */ C0_64 = silk_inner_prod16_aligned_64( x, x, subfr_length*nb_subfr, arch ); diff --git a/thirdparty/opus/silk/fixed/corrMatrix_FIX.c b/thirdparty/opus/silk/fixed/corrMatrix_FIX.c index 1b4a29c232..c1d437c785 100644 --- a/thirdparty/opus/silk/fixed/corrMatrix_FIX.c +++ b/thirdparty/opus/silk/fixed/corrMatrix_FIX.c @@ -58,7 +58,7 @@ void silk_corrVector_FIX( for( lag = 0; lag < order; lag++ ) { inner_prod = 0; for( i = 0; i < L; i++ ) { - inner_prod = silk_ADD_RSHIFT32( inner_prod, silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts ); + inner_prod += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts ); } Xt[ lag ] = inner_prod; /* X[:,lag]'*t */ ptr1--; /* Go to next column of X */ @@ -77,54 +77,61 @@ void silk_corrMatrix_FIX( const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */ const opus_int L, /* I Length of vectors */ const opus_int order, /* I Max lag for correlation */ + const opus_int head_room, /* I Desired headroom */ opus_int32 *XX, /* O Pointer to X'*X correlation matrix [ order x order ] */ - opus_int32 *nrg, /* O Energy of x vector */ - opus_int *rshifts, /* O Right shifts of correlations and energy */ + opus_int *rshifts, /* I/O Right shifts of correlations */ int arch /* I Run-time architecture */ ) { - opus_int i, j, lag; + opus_int i, j, lag, rshifts_local, head_room_rshifts; opus_int32 energy; const opus_int16 *ptr1, *ptr2; /* Calculate energy to find shift used to fit in 32 bits */ - silk_sum_sqr_shift( nrg, rshifts, x, L + order - 1 ); - energy = *nrg; + silk_sum_sqr_shift( &energy, &rshifts_local, x, L + order - 1 ); + /* Add shifts to get the desired head room */ + head_room_rshifts = silk_max( head_room - silk_CLZ32( energy ), 0 ); + + energy = silk_RSHIFT32( energy, head_room_rshifts ); + rshifts_local += head_room_rshifts; /* Calculate energy of first column (0) of X: X[:,0]'*X[:,0] */ /* Remove contribution of first order - 1 samples */ for( i = 0; i < order - 1; i++ ) { - energy -= silk_RSHIFT32( silk_SMULBB( x[ i ], x[ i ] ), *rshifts ); + energy -= silk_RSHIFT32( silk_SMULBB( x[ i ], x[ i ] ), rshifts_local ); + } + if( rshifts_local < *rshifts ) { + /* Adjust energy */ + energy = silk_RSHIFT32( energy, *rshifts - rshifts_local ); + rshifts_local = *rshifts; } /* Calculate energy of remaining columns of X: X[:,j]'*X[:,j] */ /* Fill out the diagonal of the correlation matrix */ matrix_ptr( XX, 0, 0, order ) = energy; - silk_assert( energy >= 0 ); ptr1 = &x[ order - 1 ]; /* First sample of column 0 of X */ for( j = 1; j < order; j++ ) { - energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr1[ L - j ] ), *rshifts ) ); - energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr1[ -j ] ), *rshifts ) ); + energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr1[ L - j ] ), rshifts_local ) ); + energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr1[ -j ] ), rshifts_local ) ); matrix_ptr( XX, j, j, order ) = energy; - silk_assert( energy >= 0 ); } ptr2 = &x[ order - 2 ]; /* First sample of column 1 of X */ /* Calculate the remaining elements of the correlation matrix */ - if( *rshifts > 0 ) { + if( rshifts_local > 0 ) { /* Right shifting used */ for( lag = 1; lag < order; lag++ ) { /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */ energy = 0; for( i = 0; i < L; i++ ) { - energy += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), *rshifts ); + energy += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts_local ); } /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */ matrix_ptr( XX, lag, 0, order ) = energy; matrix_ptr( XX, 0, lag, order ) = energy; for( j = 1; j < ( order - lag ); j++ ) { - energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ), *rshifts ) ); - energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr2[ -j ] ), *rshifts ) ); + energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ), rshifts_local ) ); + energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr2[ -j ] ), rshifts_local ) ); matrix_ptr( XX, lag + j, j, order ) = energy; matrix_ptr( XX, j, lag + j, order ) = energy; } @@ -146,5 +153,6 @@ void silk_corrMatrix_FIX( ptr2--;/* Update pointer to first sample of next column (lag) in X */ } } + *rshifts = rshifts_local; } diff --git a/thirdparty/opus/silk/fixed/encode_frame_FIX.c b/thirdparty/opus/silk/fixed/encode_frame_FIX.c index a02bf87dbb..5ef44b03fc 100644 --- a/thirdparty/opus/silk/fixed/encode_frame_FIX.c +++ b/thirdparty/opus/silk/fixed/encode_frame_FIX.c @@ -29,7 +29,6 @@ POSSIBILITY OF SUCH DAMAGE. #include "config.h" #endif -#include <stdlib.h> #include "main_FIX.h" #include "stack_alloc.h" #include "tuning_parameters.h" @@ -38,33 +37,26 @@ POSSIBILITY OF SUCH DAMAGE. static OPUS_INLINE void silk_LBRR_encode_FIX( silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ silk_encoder_control_FIX *psEncCtrl, /* I/O Pointer to Silk FIX encoder control struct */ - const opus_int16 x16[], /* I Input signal */ + const opus_int32 xfw_Q3[], /* I Input signal */ opus_int condCoding /* I The type of conditional coding used so far for this frame */ ); void silk_encode_do_VAD_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ - opus_int activity /* I Decision of Opus voice activity detector */ + silk_encoder_state_FIX *psEnc /* I/O Pointer to Silk FIX encoder state */ ) { - const opus_int activity_threshold = SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ); - /****************************/ /* Voice Activity Detection */ /****************************/ silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch ); - /* If Opus VAD is inactive and Silk VAD is active: lower Silk VAD to just under the threshold */ - if( activity == VAD_NO_ACTIVITY && psEnc->sCmn.speech_activity_Q8 >= activity_threshold ) { - psEnc->sCmn.speech_activity_Q8 = activity_threshold - 1; - } /**************************************************/ /* Convert speech activity into VAD and DTX flags */ /**************************************************/ - if( psEnc->sCmn.speech_activity_Q8 < activity_threshold ) { + if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) { psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY; psEnc->sCmn.noSpeechCounter++; - if( psEnc->sCmn.noSpeechCounter <= NB_SPEECH_FRAMES_BEFORE_DTX ) { + if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) { psEnc->sCmn.inDTX = 0; } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) { psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX; @@ -102,9 +94,6 @@ opus_int silk_encode_frame_FIX( opus_int16 ec_prevLagIndex_copy; opus_int ec_prevSignalType_copy; opus_int8 LastGainIndex_copy2; - opus_int gain_lock[ MAX_NB_SUBFR ] = {0}; - opus_int16 best_gain_mult[ MAX_NB_SUBFR ]; - opus_int best_sum[ MAX_NB_SUBFR ]; SAVE_STACK; /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */ @@ -129,6 +118,7 @@ opus_int silk_encode_frame_FIX( silk_memcpy( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length * sizeof( opus_int16 ) ); if( !psEnc->sCmn.prefillFlag ) { + VARDECL( opus_int32, xfw_Q3 ); VARDECL( opus_int16, res_pitch ); VARDECL( opus_uint8, ec_buf_copy ); opus_int16 *res_pitch_frame; @@ -142,7 +132,7 @@ opus_int silk_encode_frame_FIX( /*****************************************/ /* Find pitch lags, initial LPC analysis */ /*****************************************/ - silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame - psEnc->sCmn.ltp_mem_length, psEnc->sCmn.arch ); + silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch ); /************************/ /* Noise shape analysis */ @@ -152,17 +142,23 @@ opus_int silk_encode_frame_FIX( /***************************************************/ /* Find linear prediction coefficients (LPC + LTP) */ /***************************************************/ - silk_find_pred_coefs_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame, condCoding ); + silk_find_pred_coefs_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding ); /****************************************/ /* Process gains */ /****************************************/ silk_process_gains_FIX( psEnc, &sEncCtrl, condCoding ); + /*****************************************/ + /* Prefiltering for noise shaper */ + /*****************************************/ + ALLOC( xfw_Q3, psEnc->sCmn.frame_length, opus_int32 ); + silk_prefilter_FIX( psEnc, &sEncCtrl, xfw_Q3, x_frame ); + /****************************************/ /* Low Bitrate Redundant Encoding */ /****************************************/ - silk_LBRR_encode_FIX( psEnc, &sEncCtrl, x_frame, condCoding ); + silk_LBRR_encode_FIX( psEnc, &sEncCtrl, xfw_Q3, condCoding ); /* Loop over quantizer and entropy coding to control bitrate */ maxIter = 6; @@ -198,21 +194,17 @@ opus_int silk_encode_frame_FIX( /* Noise shaping quantization */ /*****************************************/ if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) { - silk_NSQ_del_dec( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, x_frame, psEnc->sCmn.pulses, - sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR_Q13, sEncCtrl.HarmShapeGain_Q14, + silk_NSQ_del_dec( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses, + sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14, sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14, psEnc->sCmn.arch ); } else { - silk_NSQ( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, x_frame, psEnc->sCmn.pulses, - sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR_Q13, sEncCtrl.HarmShapeGain_Q14, + silk_NSQ( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses, + sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14, sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14, psEnc->sCmn.arch); } - if ( iter == maxIter && !found_lower ) { - silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) ); - } - /****************************************/ /* Encode Parameters */ /****************************************/ @@ -226,33 +218,6 @@ opus_int silk_encode_frame_FIX( nBits = ec_tell( psRangeEnc ); - /* If we still bust after the last iteration, do some damage control. */ - if ( iter == maxIter && !found_lower && nBits > maxBits ) { - silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) ); - - /* Keep gains the same as the last frame. */ - psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev; - for ( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - psEnc->sCmn.indices.GainsIndices[ i ] = 4; - } - if (condCoding != CODE_CONDITIONALLY) { - psEnc->sCmn.indices.GainsIndices[ 0 ] = sEncCtrl.lastGainIndexPrev; - } - psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy; - psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy; - /* Clear all pulses. */ - for ( i = 0; i < psEnc->sCmn.frame_length; i++ ) { - psEnc->sCmn.pulses[ i ] = 0; - } - - silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding ); - - silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType, - psEnc->sCmn.pulses, psEnc->sCmn.frame_length ); - - nBits = ec_tell( psRangeEnc ); - } - if( useCBR == 0 && iter == 0 && nBits <= maxBits ) { break; } @@ -262,7 +227,7 @@ opus_int silk_encode_frame_FIX( if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) { /* Restore output state from earlier iteration that did meet the bitrate budget */ silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) ); - celt_assert( sRangeEnc_copy2.offs <= 1275 ); + silk_assert( sRangeEnc_copy2.offs <= 1275 ); silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs ); silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) ); psEnc->sShape.LastGainIndex = LastGainIndex_copy2; @@ -290,7 +255,7 @@ opus_int silk_encode_frame_FIX( gainsID_lower = gainsID; /* Copy part of the output state */ silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) ); - celt_assert( psRangeEnc->offs <= 1275 ); + silk_assert( psRangeEnc->offs <= 1275 ); silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs ); silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); LastGainIndex_copy2 = psEnc->sShape.LastGainIndex; @@ -300,35 +265,15 @@ opus_int silk_encode_frame_FIX( break; } - if ( !found_lower && nBits > maxBits ) { - int j; - for ( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - int sum=0; - for ( j = i*psEnc->sCmn.subfr_length; j < (i+1)*psEnc->sCmn.subfr_length; j++ ) { - sum += abs( psEnc->sCmn.pulses[j] ); - } - if ( iter == 0 || (sum < best_sum[i] && !gain_lock[i]) ) { - best_sum[i] = sum; - best_gain_mult[i] = gainMult_Q8; - } else { - gain_lock[i] = 1; - } - } - } if( ( found_lower & found_upper ) == 0 ) { /* Adjust gain according to high-rate rate/distortion curve */ + opus_int32 gain_factor_Q16; + gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) ); + gain_factor_Q16 = silk_min_32( gain_factor_Q16, SILK_FIX_CONST( 2, 16 ) ); if( nBits > maxBits ) { - if (gainMult_Q8 < 16384) { - gainMult_Q8 *= 2; - } else { - gainMult_Q8 = 32767; - } - } else { - opus_int32 gain_factor_Q16; - gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) ); - gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 ); + gain_factor_Q16 = silk_max_32( gain_factor_Q16, SILK_FIX_CONST( 1.3, 16 ) ); } - + gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 ); } else { /* Adjust gain by interpolating */ gainMult_Q8 = gainMult_lower + silk_DIV32_16( silk_MUL( gainMult_upper - gainMult_lower, maxBits - nBits_lower ), nBits_upper - nBits_lower ); @@ -342,13 +287,7 @@ opus_int silk_encode_frame_FIX( } for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - opus_int16 tmp; - if ( gain_lock[i] ) { - tmp = best_gain_mult[i]; - } else { - tmp = gainMult_Q8; - } - sEncCtrl.Gains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], tmp ), 8 ); + sEncCtrl.Gains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 ); } /* Quantize gains */ @@ -392,7 +331,7 @@ opus_int silk_encode_frame_FIX( static OPUS_INLINE void silk_LBRR_encode_FIX( silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ silk_encoder_control_FIX *psEncCtrl, /* I/O Pointer to Silk FIX encoder control struct */ - const opus_int16 x16[], /* I Input signal */ + const opus_int32 xfw_Q3[], /* I Input signal */ opus_int condCoding /* I The type of conditional coding used so far for this frame */ ) { @@ -431,14 +370,14 @@ static OPUS_INLINE void silk_LBRR_encode_FIX( /* Noise shaping quantization */ /*****************************************/ if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) { - silk_NSQ_del_dec( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, x16, + silk_NSQ_del_dec( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3, psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14, - psEncCtrl->AR_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14, + psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14, psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch ); } else { - silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, x16, + silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3, psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14, - psEncCtrl->AR_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14, + psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14, psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch ); } diff --git a/thirdparty/opus/silk/fixed/find_LPC_FIX.c b/thirdparty/opus/silk/fixed/find_LPC_FIX.c index c762a0f2a2..e11cdc86e6 100644 --- a/thirdparty/opus/silk/fixed/find_LPC_FIX.c +++ b/thirdparty/opus/silk/fixed/find_LPC_FIX.c @@ -92,7 +92,7 @@ void silk_find_LPC_FIX( silk_interpolate( NLSF0_Q15, psEncC->prev_NLSFq_Q15, NLSF_Q15, k, psEncC->predictLPCOrder ); /* Convert to LPC for residual energy evaluation */ - silk_NLSF2A( a_tmp_Q12, NLSF0_Q15, psEncC->predictLPCOrder, psEncC->arch ); + silk_NLSF2A( a_tmp_Q12, NLSF0_Q15, psEncC->predictLPCOrder ); /* Calculate residual energy with NLSF interpolation */ silk_LPC_analysis_filter( LPC_res, x, a_tmp_Q12, 2 * subfr_length, psEncC->predictLPCOrder, psEncC->arch ); @@ -146,6 +146,6 @@ void silk_find_LPC_FIX( silk_A2NLSF( NLSF_Q15, a_Q16, psEncC->predictLPCOrder ); } - celt_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) ); + silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) ); RESTORE_STACK; } diff --git a/thirdparty/opus/silk/fixed/find_LTP_FIX.c b/thirdparty/opus/silk/fixed/find_LTP_FIX.c index 62d4afb250..1314a28137 100644 --- a/thirdparty/opus/silk/fixed/find_LTP_FIX.c +++ b/thirdparty/opus/silk/fixed/find_LTP_FIX.c @@ -32,68 +32,214 @@ POSSIBILITY OF SUCH DAMAGE. #include "main_FIX.h" #include "tuning_parameters.h" +/* Head room for correlations */ +#define LTP_CORRS_HEAD_ROOM 2 + +void silk_fit_LTP( + opus_int32 LTP_coefs_Q16[ LTP_ORDER ], + opus_int16 LTP_coefs_Q14[ LTP_ORDER ] +); + void silk_find_LTP_FIX( - opus_int32 XXLTP_Q17[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Correlation matrix */ - opus_int32 xXLTP_Q17[ MAX_NB_SUBFR * LTP_ORDER ], /* O Correlation vector */ - const opus_int16 r_ptr[], /* I Residual signal after LPC */ + opus_int16 b_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */ + opus_int32 WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ + opus_int *LTPredCodGain_Q7, /* O LTP coding gain */ + const opus_int16 r_lpc[], /* I residual signal after LPC signal + state for first 10 ms */ const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */ - const opus_int subfr_length, /* I Subframe length */ - const opus_int nb_subfr, /* I Number of subframes */ + const opus_int32 Wght_Q15[ MAX_NB_SUBFR ], /* I weights */ + const opus_int subfr_length, /* I subframe length */ + const opus_int nb_subfr, /* I number of subframes */ + const opus_int mem_offset, /* I number of samples in LTP memory */ + opus_int corr_rshifts[ MAX_NB_SUBFR ], /* O right shifts applied to correlations */ int arch /* I Run-time architecture */ ) { - opus_int i, k, extra_shifts; - opus_int xx_shifts, xX_shifts, XX_shifts; - const opus_int16 *lag_ptr; - opus_int32 *XXLTP_Q17_ptr, *xXLTP_Q17_ptr; - opus_int32 xx, nrg, temp; - - xXLTP_Q17_ptr = xXLTP_Q17; - XXLTP_Q17_ptr = XXLTP_Q17; + opus_int i, k, lshift; + const opus_int16 *r_ptr, *lag_ptr; + opus_int16 *b_Q14_ptr; + + opus_int32 regu; + opus_int32 *WLTP_ptr; + opus_int32 b_Q16[ LTP_ORDER ], delta_b_Q14[ LTP_ORDER ], d_Q14[ MAX_NB_SUBFR ], nrg[ MAX_NB_SUBFR ], g_Q26; + opus_int32 w[ MAX_NB_SUBFR ], WLTP_max, max_abs_d_Q14, max_w_bits; + + opus_int32 temp32, denom32; + opus_int extra_shifts; + opus_int rr_shifts, maxRshifts, maxRshifts_wxtra, LZs; + opus_int32 LPC_res_nrg, LPC_LTP_res_nrg, div_Q16; + opus_int32 Rr[ LTP_ORDER ], rr[ MAX_NB_SUBFR ]; + opus_int32 wd, m_Q12; + + b_Q14_ptr = b_Q14; + WLTP_ptr = WLTP; + r_ptr = &r_lpc[ mem_offset ]; for( k = 0; k < nb_subfr; k++ ) { lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 ); - silk_sum_sqr_shift( &xx, &xx_shifts, r_ptr, subfr_length + LTP_ORDER ); /* xx in Q( -xx_shifts ) */ - silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, XXLTP_Q17_ptr, &nrg, &XX_shifts, arch ); /* XXLTP_Q17_ptr and nrg in Q( -XX_shifts ) */ - extra_shifts = xx_shifts - XX_shifts; - if( extra_shifts > 0 ) { - /* Shift XX */ - xX_shifts = xx_shifts; - for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) { - XXLTP_Q17_ptr[ i ] = silk_RSHIFT32( XXLTP_Q17_ptr[ i ], extra_shifts ); /* Q( -xX_shifts ) */ - } - nrg = silk_RSHIFT32( nrg, extra_shifts ); /* Q( -xX_shifts ) */ - } else if( extra_shifts < 0 ) { - /* Shift xx */ - xX_shifts = XX_shifts; - xx = silk_RSHIFT32( xx, -extra_shifts ); /* Q( -xX_shifts ) */ - } else { - xX_shifts = xx_shifts; + silk_sum_sqr_shift( &rr[ k ], &rr_shifts, r_ptr, subfr_length ); /* rr[ k ] in Q( -rr_shifts ) */ + + /* Assure headroom */ + LZs = silk_CLZ32( rr[k] ); + if( LZs < LTP_CORRS_HEAD_ROOM ) { + rr[ k ] = silk_RSHIFT_ROUND( rr[ k ], LTP_CORRS_HEAD_ROOM - LZs ); + rr_shifts += ( LTP_CORRS_HEAD_ROOM - LZs ); } - silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, xXLTP_Q17_ptr, xX_shifts, arch ); /* xXLTP_Q17_ptr in Q( -xX_shifts ) */ + corr_rshifts[ k ] = rr_shifts; + silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, LTP_CORRS_HEAD_ROOM, WLTP_ptr, &corr_rshifts[ k ], arch ); /* WLTP_fix_ptr in Q( -corr_rshifts[ k ] ) */ + + /* The correlation vector always has lower max abs value than rr and/or RR so head room is assured */ + silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr, corr_rshifts[ k ], arch ); /* Rr_fix_ptr in Q( -corr_rshifts[ k ] ) */ + if( corr_rshifts[ k ] > rr_shifts ) { + rr[ k ] = silk_RSHIFT( rr[ k ], corr_rshifts[ k ] - rr_shifts ); /* rr[ k ] in Q( -corr_rshifts[ k ] ) */ + } + silk_assert( rr[ k ] >= 0 ); + + regu = 1; + regu = silk_SMLAWB( regu, rr[ k ], SILK_FIX_CONST( LTP_DAMPING/3, 16 ) ); + regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, 0, 0, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) ); + regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, LTP_ORDER-1, LTP_ORDER-1, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) ); + silk_regularize_correlations_FIX( WLTP_ptr, &rr[k], regu, LTP_ORDER ); + + silk_solve_LDL_FIX( WLTP_ptr, LTP_ORDER, Rr, b_Q16 ); /* WLTP_fix_ptr and Rr_fix_ptr both in Q(-corr_rshifts[k]) */ + + /* Limit and store in Q14 */ + silk_fit_LTP( b_Q16, b_Q14_ptr ); + + /* Calculate residual energy */ + nrg[ k ] = silk_residual_energy16_covar_FIX( b_Q14_ptr, WLTP_ptr, Rr, rr[ k ], LTP_ORDER, 14 ); /* nrg_fix in Q( -corr_rshifts[ k ] ) */ + + /* temp = Wght[ k ] / ( nrg[ k ] * Wght[ k ] + 0.01f * subfr_length ); */ + extra_shifts = silk_min_int( corr_rshifts[ k ], LTP_CORRS_HEAD_ROOM ); + denom32 = silk_LSHIFT_SAT32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 + extra_shifts ) + /* Q( -corr_rshifts[ k ] + extra_shifts ) */ + silk_RSHIFT( silk_SMULWB( (opus_int32)subfr_length, 655 ), corr_rshifts[ k ] - extra_shifts ); /* Q( -corr_rshifts[ k ] + extra_shifts ) */ + denom32 = silk_max( denom32, 1 ); + silk_assert( ((opus_int64)Wght_Q15[ k ] << 16 ) < silk_int32_MAX ); /* Wght always < 0.5 in Q0 */ + temp32 = silk_DIV32( silk_LSHIFT( (opus_int32)Wght_Q15[ k ], 16 ), denom32 ); /* Q( 15 + 16 + corr_rshifts[k] - extra_shifts ) */ + temp32 = silk_RSHIFT( temp32, 31 + corr_rshifts[ k ] - extra_shifts - 26 ); /* Q26 */ - /* At this point all correlations are in Q(-xX_shifts) */ - temp = silk_SMLAWB( 1, nrg, SILK_FIX_CONST( LTP_CORR_INV_MAX, 16 ) ); - temp = silk_max( temp, xx ); -TIC(div) -#if 0 + /* Limit temp such that the below scaling never wraps around */ + WLTP_max = 0; for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) { - XXLTP_Q17_ptr[ i ] = silk_DIV32_varQ( XXLTP_Q17_ptr[ i ], temp, 17 ); + WLTP_max = silk_max( WLTP_ptr[ i ], WLTP_max ); } + lshift = silk_CLZ32( WLTP_max ) - 1 - 3; /* keep 3 bits free for vq_nearest_neighbor_fix */ + silk_assert( 26 - 18 + lshift >= 0 ); + if( 26 - 18 + lshift < 31 ) { + temp32 = silk_min_32( temp32, silk_LSHIFT( (opus_int32)1, 26 - 18 + lshift ) ); + } + + silk_scale_vector32_Q26_lshift_18( WLTP_ptr, temp32, LTP_ORDER * LTP_ORDER ); /* WLTP_ptr in Q( 18 - corr_rshifts[ k ] ) */ + + w[ k ] = matrix_ptr( WLTP_ptr, LTP_ORDER/2, LTP_ORDER/2, LTP_ORDER ); /* w in Q( 18 - corr_rshifts[ k ] ) */ + silk_assert( w[k] >= 0 ); + + r_ptr += subfr_length; + b_Q14_ptr += LTP_ORDER; + WLTP_ptr += LTP_ORDER * LTP_ORDER; + } + + maxRshifts = 0; + for( k = 0; k < nb_subfr; k++ ) { + maxRshifts = silk_max_int( corr_rshifts[ k ], maxRshifts ); + } + + /* Compute LTP coding gain */ + if( LTPredCodGain_Q7 != NULL ) { + LPC_LTP_res_nrg = 0; + LPC_res_nrg = 0; + silk_assert( LTP_CORRS_HEAD_ROOM >= 2 ); /* Check that no overflow will happen when adding */ + for( k = 0; k < nb_subfr; k++ ) { + LPC_res_nrg = silk_ADD32( LPC_res_nrg, silk_RSHIFT( silk_ADD32( silk_SMULWB( rr[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */ + LPC_LTP_res_nrg = silk_ADD32( LPC_LTP_res_nrg, silk_RSHIFT( silk_ADD32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */ + } + LPC_LTP_res_nrg = silk_max( LPC_LTP_res_nrg, 1 ); /* avoid division by zero */ + + div_Q16 = silk_DIV32_varQ( LPC_res_nrg, LPC_LTP_res_nrg, 16 ); + *LTPredCodGain_Q7 = ( opus_int )silk_SMULBB( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) ); + + silk_assert( *LTPredCodGain_Q7 == ( opus_int )silk_SAT16( silk_MUL( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) ) ) ); + } + + /* smoothing */ + /* d = sum( B, 1 ); */ + b_Q14_ptr = b_Q14; + for( k = 0; k < nb_subfr; k++ ) { + d_Q14[ k ] = 0; for( i = 0; i < LTP_ORDER; i++ ) { - xXLTP_Q17_ptr[ i ] = silk_DIV32_varQ( xXLTP_Q17_ptr[ i ], temp, 17 ); + d_Q14[ k ] += b_Q14_ptr[ i ]; } -#else - for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) { - XXLTP_Q17_ptr[ i ] = (opus_int32)( silk_LSHIFT64( (opus_int64)XXLTP_Q17_ptr[ i ], 17 ) / temp ); + b_Q14_ptr += LTP_ORDER; + } + + /* m = ( w * d' ) / ( sum( w ) + 1e-3 ); */ + + /* Find maximum absolute value of d_Q14 and the bits used by w in Q0 */ + max_abs_d_Q14 = 0; + max_w_bits = 0; + for( k = 0; k < nb_subfr; k++ ) { + max_abs_d_Q14 = silk_max_32( max_abs_d_Q14, silk_abs( d_Q14[ k ] ) ); + /* w[ k ] is in Q( 18 - corr_rshifts[ k ] ) */ + /* Find bits needed in Q( 18 - maxRshifts ) */ + max_w_bits = silk_max_32( max_w_bits, 32 - silk_CLZ32( w[ k ] ) + corr_rshifts[ k ] - maxRshifts ); + } + + /* max_abs_d_Q14 = (5 << 15); worst case, i.e. LTP_ORDER * -silk_int16_MIN */ + silk_assert( max_abs_d_Q14 <= ( 5 << 15 ) ); + + /* How many bits is needed for w*d' in Q( 18 - maxRshifts ) in the worst case, of all d_Q14's being equal to max_abs_d_Q14 */ + extra_shifts = max_w_bits + 32 - silk_CLZ32( max_abs_d_Q14 ) - 14; + + /* Subtract what we got available; bits in output var plus maxRshifts */ + extra_shifts -= ( 32 - 1 - 2 + maxRshifts ); /* Keep sign bit free as well as 2 bits for accumulation */ + extra_shifts = silk_max_int( extra_shifts, 0 ); + + maxRshifts_wxtra = maxRshifts + extra_shifts; + + temp32 = silk_RSHIFT( 262, maxRshifts + extra_shifts ) + 1; /* 1e-3f in Q( 18 - (maxRshifts + extra_shifts) ) */ + wd = 0; + for( k = 0; k < nb_subfr; k++ ) { + /* w has at least 2 bits of headroom so no overflow should happen */ + temp32 = silk_ADD32( temp32, silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ) ); /* Q( 18 - maxRshifts_wxtra ) */ + wd = silk_ADD32( wd, silk_LSHIFT( silk_SMULWW( silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ), d_Q14[ k ] ), 2 ) ); /* Q( 18 - maxRshifts_wxtra ) */ + } + m_Q12 = silk_DIV32_varQ( wd, temp32, 12 ); + + b_Q14_ptr = b_Q14; + for( k = 0; k < nb_subfr; k++ ) { + /* w_fix[ k ] from Q( 18 - corr_rshifts[ k ] ) to Q( 16 ) */ + if( 2 - corr_rshifts[k] > 0 ) { + temp32 = silk_RSHIFT( w[ k ], 2 - corr_rshifts[ k ] ); + } else { + temp32 = silk_LSHIFT_SAT32( w[ k ], corr_rshifts[ k ] - 2 ); } + + g_Q26 = silk_MUL( + silk_DIV32( + SILK_FIX_CONST( LTP_SMOOTHING, 26 ), + silk_RSHIFT( SILK_FIX_CONST( LTP_SMOOTHING, 26 ), 10 ) + temp32 ), /* Q10 */ + silk_LSHIFT_SAT32( silk_SUB_SAT32( (opus_int32)m_Q12, silk_RSHIFT( d_Q14[ k ], 2 ) ), 4 ) ); /* Q16 */ + + temp32 = 0; for( i = 0; i < LTP_ORDER; i++ ) { - xXLTP_Q17_ptr[ i ] = (opus_int32)( silk_LSHIFT64( (opus_int64)xXLTP_Q17_ptr[ i ], 17 ) / temp ); + delta_b_Q14[ i ] = silk_max_16( b_Q14_ptr[ i ], 1638 ); /* 1638_Q14 = 0.1_Q0 */ + temp32 += delta_b_Q14[ i ]; /* Q14 */ } -#endif -TOC(div) - r_ptr += subfr_length; - XXLTP_Q17_ptr += LTP_ORDER * LTP_ORDER; - xXLTP_Q17_ptr += LTP_ORDER; + temp32 = silk_DIV32( g_Q26, temp32 ); /* Q14 -> Q12 */ + for( i = 0; i < LTP_ORDER; i++ ) { + b_Q14_ptr[ i ] = silk_LIMIT_32( (opus_int32)b_Q14_ptr[ i ] + silk_SMULWB( silk_LSHIFT_SAT32( temp32, 4 ), delta_b_Q14[ i ] ), -16000, 28000 ); + } + b_Q14_ptr += LTP_ORDER; + } +} + +void silk_fit_LTP( + opus_int32 LTP_coefs_Q16[ LTP_ORDER ], + opus_int16 LTP_coefs_Q14[ LTP_ORDER ] +) +{ + opus_int i; + + for( i = 0; i < LTP_ORDER; i++ ) { + LTP_coefs_Q14[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( LTP_coefs_Q16[ i ], 2 ) ); } } diff --git a/thirdparty/opus/silk/fixed/find_pitch_lags_FIX.c b/thirdparty/opus/silk/fixed/find_pitch_lags_FIX.c index 6c3379f2bb..b8440a8247 100644 --- a/thirdparty/opus/silk/fixed/find_pitch_lags_FIX.c +++ b/thirdparty/opus/silk/fixed/find_pitch_lags_FIX.c @@ -44,7 +44,7 @@ void silk_find_pitch_lags_FIX( { opus_int buf_len, i, scale; opus_int32 thrhld_Q13, res_nrg; - const opus_int16 *x_ptr; + const opus_int16 *x_buf, *x_buf_ptr; VARDECL( opus_int16, Wsig ); opus_int16 *Wsig_ptr; opus_int32 auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ]; @@ -59,7 +59,9 @@ void silk_find_pitch_lags_FIX( buf_len = psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length + psEnc->sCmn.ltp_mem_length; /* Safety check */ - celt_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length ); + silk_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length ); + + x_buf = x - psEnc->sCmn.ltp_mem_length; /*************************************/ /* Estimate LPC AR coefficients */ @@ -70,19 +72,19 @@ void silk_find_pitch_lags_FIX( ALLOC( Wsig, psEnc->sCmn.pitch_LPC_win_length, opus_int16 ); /* First LA_LTP samples */ - x_ptr = x + buf_len - psEnc->sCmn.pitch_LPC_win_length; + x_buf_ptr = x_buf + buf_len - psEnc->sCmn.pitch_LPC_win_length; Wsig_ptr = Wsig; - silk_apply_sine_window( Wsig_ptr, x_ptr, 1, psEnc->sCmn.la_pitch ); + silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 1, psEnc->sCmn.la_pitch ); /* Middle un - windowed samples */ Wsig_ptr += psEnc->sCmn.la_pitch; - x_ptr += psEnc->sCmn.la_pitch; - silk_memcpy( Wsig_ptr, x_ptr, ( psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ) ) * sizeof( opus_int16 ) ); + x_buf_ptr += psEnc->sCmn.la_pitch; + silk_memcpy( Wsig_ptr, x_buf_ptr, ( psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ) ) * sizeof( opus_int16 ) ); /* Last LA_LTP samples */ Wsig_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ); - x_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ); - silk_apply_sine_window( Wsig_ptr, x_ptr, 2, psEnc->sCmn.la_pitch ); + x_buf_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ); + silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch ); /* Calculate autocorrelation sequence */ silk_autocorr( auto_corr, &scale, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1, arch ); @@ -110,7 +112,7 @@ void silk_find_pitch_lags_FIX( /*****************************************/ /* LPC analysis filtering */ /*****************************************/ - silk_LPC_analysis_filter( res, x, A_Q12, buf_len, psEnc->sCmn.pitchEstimationLPCOrder, psEnc->sCmn.arch ); + silk_LPC_analysis_filter( res, x_buf, A_Q12, buf_len, psEnc->sCmn.pitchEstimationLPCOrder, psEnc->sCmn.arch ); if( psEnc->sCmn.indices.signalType != TYPE_NO_VOICE_ACTIVITY && psEnc->sCmn.first_frame_after_reset == 0 ) { /* Threshold for pitch estimator */ diff --git a/thirdparty/opus/silk/fixed/find_pred_coefs_FIX.c b/thirdparty/opus/silk/fixed/find_pred_coefs_FIX.c index 606d863347..d308e9cf5f 100644 --- a/thirdparty/opus/silk/fixed/find_pred_coefs_FIX.c +++ b/thirdparty/opus/silk/fixed/find_pred_coefs_FIX.c @@ -41,12 +41,13 @@ void silk_find_pred_coefs_FIX( ) { opus_int i; - opus_int32 invGains_Q16[ MAX_NB_SUBFR ], local_gains[ MAX_NB_SUBFR ]; + opus_int32 invGains_Q16[ MAX_NB_SUBFR ], local_gains[ MAX_NB_SUBFR ], Wght_Q15[ MAX_NB_SUBFR ]; opus_int16 NLSF_Q15[ MAX_LPC_ORDER ]; const opus_int16 *x_ptr; opus_int16 *x_pre_ptr; VARDECL( opus_int16, LPC_in_pre ); - opus_int32 min_gain_Q16, minInvGain_Q30; + opus_int32 tmp, min_gain_Q16, minInvGain_Q30; + opus_int LTP_corrs_rshift[ MAX_NB_SUBFR ]; SAVE_STACK; /* weighting for weighted least squares */ @@ -60,11 +61,13 @@ void silk_find_pred_coefs_FIX( /* Invert and normalize gains, and ensure that maximum invGains_Q16 is within range of a 16 bit int */ invGains_Q16[ i ] = silk_DIV32_varQ( min_gain_Q16, psEncCtrl->Gains_Q16[ i ], 16 - 2 ); - /* Limit inverse */ - invGains_Q16[ i ] = silk_max( invGains_Q16[ i ], 100 ); + /* Ensure Wght_Q15 a minimum value 1 */ + invGains_Q16[ i ] = silk_max( invGains_Q16[ i ], 363 ); /* Square the inverted gains */ silk_assert( invGains_Q16[ i ] == silk_SAT16( invGains_Q16[ i ] ) ); + tmp = silk_SMULWB( invGains_Q16[ i ], invGains_Q16[ i ] ); + Wght_Q15[ i ] = silk_RSHIFT( tmp, 1 ); /* Invert the inverted and normalized gains */ local_gains[ i ] = silk_DIV32( ( (opus_int32)1 << 16 ), invGains_Q16[ i ] ); @@ -74,24 +77,24 @@ void silk_find_pred_coefs_FIX( psEnc->sCmn.nb_subfr * psEnc->sCmn.predictLPCOrder + psEnc->sCmn.frame_length, opus_int16 ); if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - VARDECL( opus_int32, xXLTP_Q17 ); - VARDECL( opus_int32, XXLTP_Q17 ); + VARDECL( opus_int32, WLTP ); /**********/ /* VOICED */ /**********/ - celt_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 ); + silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 ); - ALLOC( xXLTP_Q17, psEnc->sCmn.nb_subfr * LTP_ORDER, opus_int32 ); - ALLOC( XXLTP_Q17, psEnc->sCmn.nb_subfr * LTP_ORDER * LTP_ORDER, opus_int32 ); + ALLOC( WLTP, psEnc->sCmn.nb_subfr * LTP_ORDER * LTP_ORDER, opus_int32 ); /* LTP analysis */ - silk_find_LTP_FIX( XXLTP_Q17, xXLTP_Q17, res_pitch, - psEncCtrl->pitchL, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.arch ); + silk_find_LTP_FIX( psEncCtrl->LTPCoef_Q14, WLTP, &psEncCtrl->LTPredCodGain_Q7, + res_pitch, psEncCtrl->pitchL, Wght_Q15, psEnc->sCmn.subfr_length, + psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length, LTP_corrs_rshift, psEnc->sCmn.arch ); /* Quantize LTP gain parameters */ silk_quant_LTP_gains( psEncCtrl->LTPCoef_Q14, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex, - &psEnc->sCmn.sum_log_gain_Q7, &psEncCtrl->LTPredCodGain_Q7, XXLTP_Q17, xXLTP_Q17, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.arch ); + &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr, + psEnc->sCmn.arch); /* Control LTP scaling */ silk_LTP_scale_ctrl_FIX( psEnc, psEncCtrl, condCoding ); diff --git a/thirdparty/opus/silk/fixed/k2a_FIX.c b/thirdparty/opus/silk/fixed/k2a_FIX.c index 549f6eadaa..5fee599bcb 100644 --- a/thirdparty/opus/silk/fixed/k2a_FIX.c +++ b/thirdparty/opus/silk/fixed/k2a_FIX.c @@ -39,15 +39,14 @@ void silk_k2a( ) { opus_int k, n; - opus_int32 rc, tmp1, tmp2; + opus_int32 Atmp[ SILK_MAX_ORDER_LPC ]; for( k = 0; k < order; k++ ) { - rc = rc_Q15[ k ]; - for( n = 0; n < (k + 1) >> 1; n++ ) { - tmp1 = A_Q24[ n ]; - tmp2 = A_Q24[ k - n - 1 ]; - A_Q24[ n ] = silk_SMLAWB( tmp1, silk_LSHIFT( tmp2, 1 ), rc ); - A_Q24[ k - n - 1 ] = silk_SMLAWB( tmp2, silk_LSHIFT( tmp1, 1 ), rc ); + for( n = 0; n < k; n++ ) { + Atmp[ n ] = A_Q24[ n ]; + } + for( n = 0; n < k; n++ ) { + A_Q24[ n ] = silk_SMLAWB( A_Q24[ n ], silk_LSHIFT( Atmp[ k - n - 1 ], 1 ), rc_Q15[ k ] ); } A_Q24[ k ] = -silk_LSHIFT( (opus_int32)rc_Q15[ k ], 9 ); } diff --git a/thirdparty/opus/silk/fixed/k2a_Q16_FIX.c b/thirdparty/opus/silk/fixed/k2a_Q16_FIX.c index 1595aa6212..3b03987544 100644 --- a/thirdparty/opus/silk/fixed/k2a_Q16_FIX.c +++ b/thirdparty/opus/silk/fixed/k2a_Q16_FIX.c @@ -39,16 +39,15 @@ void silk_k2a_Q16( ) { opus_int k, n; - opus_int32 rc, tmp1, tmp2; + opus_int32 Atmp[ SILK_MAX_ORDER_LPC ]; for( k = 0; k < order; k++ ) { - rc = rc_Q16[ k ]; - for( n = 0; n < (k + 1) >> 1; n++ ) { - tmp1 = A_Q24[ n ]; - tmp2 = A_Q24[ k - n - 1 ]; - A_Q24[ n ] = silk_SMLAWW( tmp1, tmp2, rc ); - A_Q24[ k - n - 1 ] = silk_SMLAWW( tmp2, tmp1, rc ); + for( n = 0; n < k; n++ ) { + Atmp[ n ] = A_Q24[ n ]; } - A_Q24[ k ] = -silk_LSHIFT( rc, 8 ); + for( n = 0; n < k; n++ ) { + A_Q24[ n ] = silk_SMLAWW( A_Q24[ n ], Atmp[ k - n - 1 ], rc_Q16[ k ] ); + } + A_Q24[ k ] = -silk_LSHIFT( rc_Q16[ k ], 8 ); } } diff --git a/thirdparty/opus/silk/fixed/main_FIX.h b/thirdparty/opus/silk/fixed/main_FIX.h index 6d2112e511..375b5eb32e 100644 --- a/thirdparty/opus/silk/fixed/main_FIX.h +++ b/thirdparty/opus/silk/fixed/main_FIX.h @@ -36,11 +36,6 @@ POSSIBILITY OF SUCH DAMAGE. #include "debug.h" #include "entenc.h" -#if ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \ - || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)) -#include "fixed/arm/warped_autocorrelation_FIX_arm.h" -#endif - #ifndef FORCE_CPP_BUILD #ifdef __cplusplus extern "C" @@ -52,9 +47,6 @@ extern "C" #define silk_encode_do_VAD_Fxx silk_encode_do_VAD_FIX #define silk_encode_frame_Fxx silk_encode_frame_FIX -#define QC 10 -#define QS 13 - /*********************/ /* Encoder Functions */ /*********************/ @@ -66,8 +58,7 @@ void silk_HP_variable_cutoff( /* Encoder main function */ void silk_encode_do_VAD_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ - opus_int activity /* I Decision of Opus voice activity detector */ + silk_encoder_state_FIX *psEnc /* I/O Pointer to Silk FIX encoder state */ ); /* Encoder main function */ @@ -90,11 +81,33 @@ opus_int silk_init_encoder( opus_int silk_control_encoder( silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk encoder state */ silk_EncControlStruct *encControl, /* I Control structure */ + const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */ const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */ const opus_int channelNb, /* I Channel number */ const opus_int force_fs_kHz ); +/****************/ +/* Prefiltering */ +/****************/ +void silk_prefilter_FIX( + silk_encoder_state_FIX *psEnc, /* I/O Encoder state */ + const silk_encoder_control_FIX *psEncCtrl, /* I Encoder control */ + opus_int32 xw_Q10[], /* O Weighted signal */ + const opus_int16 x[] /* I Speech signal */ +); + +void silk_warped_LPC_analysis_filter_FIX_c( + opus_int32 state[], /* I/O State [order + 1] */ + opus_int32 res_Q2[], /* O Residual signal [length] */ + const opus_int16 coef_Q13[], /* I Coefficients [order] */ + const opus_int16 input[], /* I Input signal [length] */ + const opus_int16 lambda_Q16, /* I Warping factor */ + const opus_int length, /* I Length of input signal */ + const opus_int order /* I Filter order (even) */ +); + + /**************************/ /* Noise shaping analysis */ /**************************/ @@ -108,7 +121,7 @@ void silk_noise_shape_analysis_FIX( ); /* Autocorrelations for a warped frequency axis */ -void silk_warped_autocorrelation_FIX_c( +void silk_warped_autocorrelation_FIX( opus_int32 *corr, /* O Result [order + 1] */ opus_int *scale, /* O Scaling of the correlation vector */ const opus_int16 *input, /* I Input data to correlate */ @@ -117,11 +130,6 @@ void silk_warped_autocorrelation_FIX_c( const opus_int order /* I Correlation order (even) */ ); -#if !defined(OVERRIDE_silk_warped_autocorrelation_FIX) -#define silk_warped_autocorrelation_FIX(corr, scale, input, warping_Q16, length, order, arch) \ - ((void)(arch), silk_warped_autocorrelation_FIX_c(corr, scale, input, warping_Q16, length, order)) -#endif - /* Calculation of LTP state scaling */ void silk_LTP_scale_ctrl_FIX( silk_encoder_state_FIX *psEnc, /* I/O encoder state */ @@ -160,12 +168,16 @@ void silk_find_LPC_FIX( /* LTP analysis */ void silk_find_LTP_FIX( - opus_int32 XXLTP_Q17[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Correlation matrix */ - opus_int32 xXLTP_Q17[ MAX_NB_SUBFR * LTP_ORDER ], /* O Correlation vector */ - const opus_int16 r_lpc[], /* I Residual signal after LPC */ + opus_int16 b_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */ + opus_int32 WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ + opus_int *LTPredCodGain_Q7, /* O LTP coding gain */ + const opus_int16 r_lpc[], /* I residual signal after LPC signal + state for first 10 ms */ const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */ - const opus_int subfr_length, /* I Subframe length */ - const opus_int nb_subfr, /* I Number of subframes */ + const opus_int32 Wght_Q15[ MAX_NB_SUBFR ], /* I weights */ + const opus_int subfr_length, /* I subframe length */ + const opus_int nb_subfr, /* I number of subframes */ + const opus_int mem_offset, /* I number of samples in LTP memory */ + opus_int corr_rshifts[ MAX_NB_SUBFR ], /* O right shifts applied to correlations */ int arch /* I Run-time architecture */ ); @@ -219,9 +231,9 @@ void silk_corrMatrix_FIX( const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */ const opus_int L, /* I Length of vectors */ const opus_int order, /* I Max lag for correlation */ + const opus_int head_room, /* I Desired headroom */ opus_int32 *XX, /* O Pointer to X'*X correlation matrix [ order x order ] */ - opus_int32 *nrg, /* O Energy of x vector */ - opus_int *rshifts, /* O Right shifts of correlations */ + opus_int *rshifts, /* I/O Right shifts of correlations */ int arch /* I Run-time architecture */ ); @@ -236,6 +248,22 @@ void silk_corrVector_FIX( int arch /* I Run-time architecture */ ); +/* Add noise to matrix diagonal */ +void silk_regularize_correlations_FIX( + opus_int32 *XX, /* I/O Correlation matrices */ + opus_int32 *xx, /* I/O Correlation values */ + opus_int32 noise, /* I Noise to add */ + opus_int D /* I Dimension of XX */ +); + +/* Solves Ax = b, assuming A is symmetric */ +void silk_solve_LDL_FIX( + opus_int32 *A, /* I Pointer to symetric square matrix A */ + opus_int M, /* I Size of matrix */ + const opus_int32 *b, /* I Pointer to b vector */ + opus_int32 *x_Q16 /* O Pointer to x solution vector */ +); + #ifndef FORCE_CPP_BUILD #ifdef __cplusplus } diff --git a/thirdparty/opus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h b/thirdparty/opus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h index 3999b5bd09..c30481e437 100644 --- a/thirdparty/opus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h +++ b/thirdparty/opus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h @@ -169,7 +169,7 @@ void silk_noise_shape_analysis_FIX( if( psEnc->sCmn.warping_Q16 > 0 ) { /* Calculate warped auto correlation */ - silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder, arch ); + silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder ); } else { /* Calculate regular auto correlation */ silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch ); @@ -224,8 +224,8 @@ void silk_noise_shape_analysis_FIX( silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 ); /* Ratio of prediction gains, in energy domain */ - pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder, arch ); - nrg = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder, arch ); + pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder ); + nrg = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder ); /*psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/ pre_nrg_Q30 = silk_LSHIFT32( silk_SMULWB( pre_nrg_Q30, SILK_FIX_CONST( 0.7, 15 ) ), 1 ); diff --git a/thirdparty/opus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h b/thirdparty/opus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h index 66eb2ed26d..e803ef0fce 100644 --- a/thirdparty/opus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h +++ b/thirdparty/opus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h @@ -41,8 +41,8 @@ POSSIBILITY OF SUCH DAMAGE. #define QS 14 /* Autocorrelations for a warped frequency axis */ -#define OVERRIDE_silk_warped_autocorrelation_FIX_c -void silk_warped_autocorrelation_FIX_c( +#define OVERRIDE_silk_warped_autocorrelation_FIX +void silk_warped_autocorrelation_FIX( opus_int32 *corr, /* O Result [order + 1] */ opus_int *scale, /* O Scaling of the correlation vector */ const opus_int16 *input, /* I Input data to correlate */ diff --git a/thirdparty/opus/silk/fixed/noise_shape_analysis_FIX.c b/thirdparty/opus/silk/fixed/noise_shape_analysis_FIX.c index 85fea0bf09..22a89f75ae 100644 --- a/thirdparty/opus/silk/fixed/noise_shape_analysis_FIX.c +++ b/thirdparty/opus/silk/fixed/noise_shape_analysis_FIX.c @@ -57,79 +57,88 @@ static OPUS_INLINE opus_int32 warped_gain( /* gain in Q16*/ /* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum */ /* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */ static OPUS_INLINE void limit_warped_coefs( - opus_int32 *coefs_Q24, + opus_int32 *coefs_syn_Q24, + opus_int32 *coefs_ana_Q24, opus_int lambda_Q16, opus_int32 limit_Q24, opus_int order ) { opus_int i, iter, ind = 0; - opus_int32 tmp, maxabs_Q24, chirp_Q16, gain_Q16; + opus_int32 tmp, maxabs_Q24, chirp_Q16, gain_syn_Q16, gain_ana_Q16; opus_int32 nom_Q16, den_Q24; - opus_int32 limit_Q20, maxabs_Q20; /* Convert to monic coefficients */ lambda_Q16 = -lambda_Q16; for( i = order - 1; i > 0; i-- ) { - coefs_Q24[ i - 1 ] = silk_SMLAWB( coefs_Q24[ i - 1 ], coefs_Q24[ i ], lambda_Q16 ); + coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 ); + coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 ); } lambda_Q16 = -lambda_Q16; - nom_Q16 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 16 ), -(opus_int32)lambda_Q16, lambda_Q16 ); - den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_Q24[ 0 ], lambda_Q16 ); - gain_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); + nom_Q16 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 16 ), -(opus_int32)lambda_Q16, lambda_Q16 ); + den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_syn_Q24[ 0 ], lambda_Q16 ); + gain_syn_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); + den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_ana_Q24[ 0 ], lambda_Q16 ); + gain_ana_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); for( i = 0; i < order; i++ ) { - coefs_Q24[ i ] = silk_SMULWW( gain_Q16, coefs_Q24[ i ] ); + coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] ); + coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] ); } - limit_Q20 = silk_RSHIFT(limit_Q24, 4); + for( iter = 0; iter < 10; iter++ ) { /* Find maximum absolute value */ maxabs_Q24 = -1; for( i = 0; i < order; i++ ) { - tmp = silk_abs_int32( coefs_Q24[ i ] ); + tmp = silk_max( silk_abs_int32( coefs_syn_Q24[ i ] ), silk_abs_int32( coefs_ana_Q24[ i ] ) ); if( tmp > maxabs_Q24 ) { maxabs_Q24 = tmp; ind = i; } } - /* Use Q20 to avoid any overflow when multiplying by (ind + 1) later. */ - maxabs_Q20 = silk_RSHIFT(maxabs_Q24, 4); - if( maxabs_Q20 <= limit_Q20 ) { + if( maxabs_Q24 <= limit_Q24 ) { /* Coefficients are within range - done */ return; } /* Convert back to true warped coefficients */ for( i = 1; i < order; i++ ) { - coefs_Q24[ i - 1 ] = silk_SMLAWB( coefs_Q24[ i - 1 ], coefs_Q24[ i ], lambda_Q16 ); + coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 ); + coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 ); } - gain_Q16 = silk_INVERSE32_varQ( gain_Q16, 32 ); + gain_syn_Q16 = silk_INVERSE32_varQ( gain_syn_Q16, 32 ); + gain_ana_Q16 = silk_INVERSE32_varQ( gain_ana_Q16, 32 ); for( i = 0; i < order; i++ ) { - coefs_Q24[ i ] = silk_SMULWW( gain_Q16, coefs_Q24[ i ] ); + coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] ); + coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] ); } /* Apply bandwidth expansion */ chirp_Q16 = SILK_FIX_CONST( 0.99, 16 ) - silk_DIV32_varQ( - silk_SMULWB( maxabs_Q20 - limit_Q20, silk_SMLABB( SILK_FIX_CONST( 0.8, 10 ), SILK_FIX_CONST( 0.1, 10 ), iter ) ), - silk_MUL( maxabs_Q20, ind + 1 ), 22 ); - silk_bwexpander_32( coefs_Q24, order, chirp_Q16 ); + silk_SMULWB( maxabs_Q24 - limit_Q24, silk_SMLABB( SILK_FIX_CONST( 0.8, 10 ), SILK_FIX_CONST( 0.1, 10 ), iter ) ), + silk_MUL( maxabs_Q24, ind + 1 ), 22 ); + silk_bwexpander_32( coefs_syn_Q24, order, chirp_Q16 ); + silk_bwexpander_32( coefs_ana_Q24, order, chirp_Q16 ); /* Convert to monic warped coefficients */ lambda_Q16 = -lambda_Q16; for( i = order - 1; i > 0; i-- ) { - coefs_Q24[ i - 1 ] = silk_SMLAWB( coefs_Q24[ i - 1 ], coefs_Q24[ i ], lambda_Q16 ); + coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 ); + coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 ); } lambda_Q16 = -lambda_Q16; nom_Q16 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 16 ), -(opus_int32)lambda_Q16, lambda_Q16 ); - den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_Q24[ 0 ], lambda_Q16 ); - gain_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); + den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_syn_Q24[ 0 ], lambda_Q16 ); + gain_syn_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); + den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_ana_Q24[ 0 ], lambda_Q16 ); + gain_ana_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); for( i = 0; i < order; i++ ) { - coefs_Q24[ i ] = silk_SMULWW( gain_Q16, coefs_Q24[ i ] ); + coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] ); + coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] ); } } silk_assert( 0 ); } -/* Disable MIPS version until it's updated. */ -#if 0 && defined(MIPSr1_ASM) +#if defined(MIPSr1_ASM) #include "mips/noise_shape_analysis_FIX_mipsr1.h" #endif @@ -146,13 +155,14 @@ void silk_noise_shape_analysis_FIX( ) { silk_shape_state_FIX *psShapeSt = &psEnc->sShape; - opus_int k, i, nSamples, nSegs, Qnrg, b_Q14, warping_Q16, scale = 0; - opus_int32 SNR_adj_dB_Q7, HarmShapeGain_Q16, Tilt_Q16, tmp32; - opus_int32 nrg, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7; - opus_int32 BWExp_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8; + opus_int k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0; + opus_int32 SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32; + opus_int32 nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7; + opus_int32 delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8; opus_int32 auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ]; opus_int32 refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ]; - opus_int32 AR_Q24[ MAX_SHAPE_LPC_ORDER ]; + opus_int32 AR1_Q24[ MAX_SHAPE_LPC_ORDER ]; + opus_int32 AR2_Q24[ MAX_SHAPE_LPC_ORDER ]; VARDECL( opus_int16, x_windowed ); const opus_int16 *x_ptr, *pitch_res_ptr; SAVE_STACK; @@ -199,14 +209,14 @@ void silk_noise_shape_analysis_FIX( if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { /* Initially set to 0; may be overruled in process_gains(..) */ psEnc->sCmn.indices.quantOffsetType = 0; + psEncCtrl->sparseness_Q8 = 0; } else { /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */ nSamples = silk_LSHIFT( psEnc->sCmn.fs_kHz, 1 ); energy_variation_Q7 = 0; log_energy_prev_Q7 = 0; pitch_res_ptr = pitch_res; - nSegs = silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; - for( k = 0; k < nSegs; k++ ) { + for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) { silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples ); nrg += silk_RSHIFT( nSamples, scale ); /* Q(-scale)*/ @@ -218,12 +228,18 @@ void silk_noise_shape_analysis_FIX( pitch_res_ptr += nSamples; } + psEncCtrl->sparseness_Q8 = silk_RSHIFT( silk_sigm_Q15( silk_SMULWB( energy_variation_Q7 - + SILK_FIX_CONST( 5.0, 7 ), SILK_FIX_CONST( 0.1, 16 ) ) ), 7 ); + /* Set quantization offset depending on sparseness measure */ - if( energy_variation_Q7 > SILK_FIX_CONST( ENERGY_VARIATION_THRESHOLD_QNT_OFFSET, 7 ) * (nSegs-1) ) { + if( psEncCtrl->sparseness_Q8 > SILK_FIX_CONST( SPARSENESS_THRESHOLD_QNT_OFFSET, 8 ) ) { psEnc->sCmn.indices.quantOffsetType = 0; } else { psEnc->sCmn.indices.quantOffsetType = 1; } + + /* Increase coding SNR for sparse signals */ + SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( SPARSE_SNR_INCR_dB, 15 ), psEncCtrl->sparseness_Q8 - SILK_FIX_CONST( 0.5, 8 ) ); } /*******************************/ @@ -231,8 +247,14 @@ void silk_noise_shape_analysis_FIX( /*******************************/ /* More BWE for signals with high prediction gain */ strength_Q16 = silk_SMULWB( psEncCtrl->predGain_Q16, SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) ); - BWExp_Q16 = silk_DIV32_varQ( SILK_FIX_CONST( BANDWIDTH_EXPANSION, 16 ), + BWExp1_Q16 = BWExp2_Q16 = silk_DIV32_varQ( SILK_FIX_CONST( BANDWIDTH_EXPANSION, 16 ), silk_SMLAWW( SILK_FIX_CONST( 1.0, 16 ), strength_Q16, strength_Q16 ), 16 ); + delta_Q16 = silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - silk_SMULBB( 3, psEncCtrl->coding_quality_Q14 ), + SILK_FIX_CONST( LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16 ) ); + BWExp1_Q16 = silk_SUB32( BWExp1_Q16, delta_Q16 ); + BWExp2_Q16 = silk_ADD32( BWExp2_Q16, delta_Q16 ); + /* BWExp1 will be applied after BWExp2, so make it relative */ + BWExp1_Q16 = silk_DIV32_16( silk_LSHIFT( BWExp1_Q16, 14 ), silk_RSHIFT( BWExp2_Q16, 2 ) ); if( psEnc->sCmn.warping_Q16 > 0 ) { /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */ @@ -262,7 +284,7 @@ void silk_noise_shape_analysis_FIX( if( psEnc->sCmn.warping_Q16 > 0 ) { /* Calculate warped auto correlation */ - silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder, arch ); + silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder ); } else { /* Calculate regular auto correlation */ silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch ); @@ -277,7 +299,7 @@ void silk_noise_shape_analysis_FIX( silk_assert( nrg >= 0 ); /* Convert reflection coefficients to prediction coefficients */ - silk_k2a_Q16( AR_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder ); + silk_k2a_Q16( AR2_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder ); Qnrg = -scale; /* range: -12...30*/ silk_assert( Qnrg >= -12 ); @@ -296,34 +318,40 @@ void silk_noise_shape_analysis_FIX( if( psEnc->sCmn.warping_Q16 > 0 ) { /* Adjust gain for warping */ - gain_mult_Q16 = warped_gain( AR_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder ); - silk_assert( psEncCtrl->Gains_Q16[ k ] > 0 ); - if( psEncCtrl->Gains_Q16[ k ] < SILK_FIX_CONST( 0.25, 16 ) ) { - psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 ); + gain_mult_Q16 = warped_gain( AR2_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder ); + silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 ); + if ( silk_SMULWW( silk_RSHIFT_ROUND( psEncCtrl->Gains_Q16[ k ], 1 ), gain_mult_Q16 ) >= ( silk_int32_MAX >> 1 ) ) { + psEncCtrl->Gains_Q16[ k ] = silk_int32_MAX; } else { - psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( silk_RSHIFT_ROUND( psEncCtrl->Gains_Q16[ k ], 1 ), gain_mult_Q16 ); - if ( psEncCtrl->Gains_Q16[ k ] >= ( silk_int32_MAX >> 1 ) ) { - psEncCtrl->Gains_Q16[ k ] = silk_int32_MAX; - } else { - psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT32( psEncCtrl->Gains_Q16[ k ], 1 ); - } + psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 ); } - silk_assert( psEncCtrl->Gains_Q16[ k ] > 0 ); } - /* Bandwidth expansion */ - silk_bwexpander_32( AR_Q24, psEnc->sCmn.shapingLPCOrder, BWExp_Q16 ); + /* Bandwidth expansion for synthesis filter shaping */ + silk_bwexpander_32( AR2_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 ); - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Convert to monic warped prediction coefficients and limit absolute values */ - limit_warped_coefs( AR_Q24, warping_Q16, SILK_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder ); + /* Compute noise shaping filter coefficients */ + silk_memcpy( AR1_Q24, AR2_Q24, psEnc->sCmn.shapingLPCOrder * sizeof( opus_int32 ) ); - /* Convert from Q24 to Q13 and store in int16 */ - for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) { - psEncCtrl->AR_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR_Q24[ i ], 11 ) ); - } - } else { - silk_LPC_fit( &psEncCtrl->AR_Q13[ k * MAX_SHAPE_LPC_ORDER ], AR_Q24, 13, 24, psEnc->sCmn.shapingLPCOrder ); + /* Bandwidth expansion for analysis filter shaping */ + silk_assert( BWExp1_Q16 <= SILK_FIX_CONST( 1.0, 16 ) ); + silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 ); + + /* Ratio of prediction gains, in energy domain */ + pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder ); + nrg = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder ); + + /*psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/ + pre_nrg_Q30 = silk_LSHIFT32( silk_SMULWB( pre_nrg_Q30, SILK_FIX_CONST( 0.7, 15 ) ), 1 ); + psEncCtrl->GainsPre_Q14[ k ] = ( opus_int ) SILK_FIX_CONST( 0.3, 14 ) + silk_DIV32_varQ( pre_nrg_Q30, nrg, 14 ); + + /* Convert to monic warped prediction coefficients and limit absolute values */ + limit_warped_coefs( AR2_Q24, AR1_Q24, warping_Q16, SILK_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder ); + + /* Convert from Q24 to Q13 and store in int16 */ + for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) { + psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR1_Q24[ i ], 11 ) ); + psEncCtrl->AR2_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR2_Q24[ i ], 11 ) ); } } @@ -340,6 +368,11 @@ void silk_noise_shape_analysis_FIX( psEncCtrl->Gains_Q16[ k ] = silk_ADD_POS_SAT32( psEncCtrl->Gains_Q16[ k ], gain_add_Q16 ); } + gain_mult_Q16 = SILK_FIX_CONST( 1.0, 16 ) + silk_RSHIFT_ROUND( silk_MLA( SILK_FIX_CONST( INPUT_TILT, 26 ), + psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ), 10 ); + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + psEncCtrl->GainsPre_Q14[ k ] = silk_SMULWB( gain_mult_Q16, psEncCtrl->GainsPre_Q14[ k ] ); + } /************************************************/ /* Control low-frequency shaping and noise tilt */ @@ -377,6 +410,14 @@ void silk_noise_shape_analysis_FIX( /****************************/ /* HARMONIC SHAPING CONTROL */ /****************************/ + /* Control boosting of harmonic frequencies */ + HarmBoost_Q16 = silk_SMULWB( silk_SMULWB( SILK_FIX_CONST( 1.0, 17 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 3 ), + psEnc->LTPCorr_Q15 ), SILK_FIX_CONST( LOW_RATE_HARMONIC_BOOST, 16 ) ); + + /* More harmonic boost for noisy input signals */ + HarmBoost_Q16 = silk_SMLAWB( HarmBoost_Q16, + SILK_FIX_CONST( 1.0, 16 ) - silk_LSHIFT( psEncCtrl->input_quality_Q14, 2 ), SILK_FIX_CONST( LOW_INPUT_QUALITY_HARMONIC_BOOST, 16 ) ); + if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) { /* More harmonic noise shaping for high bitrates or noisy input */ HarmShapeGain_Q16 = silk_SMLAWB( SILK_FIX_CONST( HARMONIC_SHAPING, 16 ), @@ -394,11 +435,14 @@ void silk_noise_shape_analysis_FIX( /* Smooth over subframes */ /*************************/ for( k = 0; k < MAX_NB_SUBFR; k++ ) { + psShapeSt->HarmBoost_smth_Q16 = + silk_SMLAWB( psShapeSt->HarmBoost_smth_Q16, HarmBoost_Q16 - psShapeSt->HarmBoost_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); psShapeSt->HarmShapeGain_smth_Q16 = silk_SMLAWB( psShapeSt->HarmShapeGain_smth_Q16, HarmShapeGain_Q16 - psShapeSt->HarmShapeGain_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); psShapeSt->Tilt_smth_Q16 = silk_SMLAWB( psShapeSt->Tilt_smth_Q16, Tilt_Q16 - psShapeSt->Tilt_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); + psEncCtrl->HarmBoost_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmBoost_smth_Q16, 2 ); psEncCtrl->HarmShapeGain_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmShapeGain_smth_Q16, 2 ); psEncCtrl->Tilt_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->Tilt_smth_Q16, 2 ); } diff --git a/thirdparty/opus/silk/fixed/pitch_analysis_core_FIX.c b/thirdparty/opus/silk/fixed/pitch_analysis_core_FIX.c index 14729046d2..01bb9fc0a8 100644 --- a/thirdparty/opus/silk/fixed/pitch_analysis_core_FIX.c +++ b/thirdparty/opus/silk/fixed/pitch_analysis_core_FIX.c @@ -80,7 +80,7 @@ static void silk_P_Ana_calc_energy_st3( /* FIXED POINT CORE PITCH ANALYSIS FUNCTION */ /*************************************************************/ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 unvoiced */ - const opus_int16 *frame_unscaled, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */ + const opus_int16 *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */ opus_int *pitch_out, /* O 4 pitch lag values */ opus_int16 *lagIndex, /* O Lag Index */ opus_int8 *contourIndex, /* O Pitch contour Index */ @@ -94,17 +94,16 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 int arch /* I Run-time architecture */ ) { - VARDECL( opus_int16, frame_8kHz_buf ); + VARDECL( opus_int16, frame_8kHz ); VARDECL( opus_int16, frame_4kHz ); - VARDECL( opus_int16, frame_scaled ); opus_int32 filt_state[ 6 ]; - const opus_int16 *frame, *frame_8kHz; + const opus_int16 *input_frame_ptr; opus_int i, k, d, j; VARDECL( opus_int16, C ); VARDECL( opus_int32, xcorr32 ); const opus_int16 *target_ptr, *basis_ptr; - opus_int32 cross_corr, normalizer, energy, energy_basis, energy_target; - opus_int d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp, shift; + opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target; + opus_int d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp; VARDECL( opus_int16, d_comp ); opus_int32 sum, threshold, lag_counter; opus_int CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new; @@ -120,13 +119,12 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13; const opus_int8 *Lag_CB_ptr; SAVE_STACK; - /* Check for valid sampling frequency */ - celt_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 ); + silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 ); /* Check for valid complexity setting */ - celt_assert( complexity >= SILK_PE_MIN_COMPLEX ); - celt_assert( complexity <= SILK_PE_MAX_COMPLEX ); + silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); + silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); silk_assert( search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1<<16) ); silk_assert( search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1<<13) ); @@ -139,33 +137,17 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 min_lag = PE_MIN_LAG_MS * Fs_kHz; max_lag = PE_MAX_LAG_MS * Fs_kHz - 1; - /* Downscale input if necessary */ - silk_sum_sqr_shift( &energy, &shift, frame_unscaled, frame_length ); - shift += 3 - silk_CLZ32( energy ); /* at least two bits headroom */ - ALLOC( frame_scaled, frame_length, opus_int16 ); - if( shift > 0 ) { - shift = silk_RSHIFT( shift + 1, 1 ); - for( i = 0; i < frame_length; i++ ) { - frame_scaled[ i ] = silk_RSHIFT( frame_unscaled[ i ], shift ); - } - frame = frame_scaled; - } else { - frame = frame_unscaled; - } - - ALLOC( frame_8kHz_buf, ( Fs_kHz == 8 ) ? 1 : frame_length_8kHz, opus_int16 ); /* Resample from input sampled at Fs_kHz to 8 kHz */ + ALLOC( frame_8kHz, frame_length_8kHz, opus_int16 ); if( Fs_kHz == 16 ) { silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) ); - silk_resampler_down2( filt_state, frame_8kHz_buf, frame, frame_length ); - frame_8kHz = frame_8kHz_buf; + silk_resampler_down2( filt_state, frame_8kHz, frame, frame_length ); } else if( Fs_kHz == 12 ) { silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) ); - silk_resampler_down2_3( filt_state, frame_8kHz_buf, frame, frame_length ); - frame_8kHz = frame_8kHz_buf; + silk_resampler_down2_3( filt_state, frame_8kHz, frame, frame_length ); } else { - celt_assert( Fs_kHz == 8 ); - frame_8kHz = frame; + silk_assert( Fs_kHz == 8 ); + silk_memcpy( frame_8kHz, frame, frame_length_8kHz * sizeof(opus_int16) ); } /* Decimate again to 4 kHz */ @@ -178,6 +160,19 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 frame_4kHz[ i ] = silk_ADD_SAT16( frame_4kHz[ i ], frame_4kHz[ i - 1 ] ); } + /******************************************************************************* + ** Scale 4 kHz signal down to prevent correlations measures from overflowing + ** find scaling as max scaling for each 8kHz(?) subframe + *******************************************************************************/ + + /* Inner product is calculated with different lengths, so scale for the worst case */ + silk_sum_sqr_shift( &energy, &shift, frame_4kHz, frame_length_4kHz ); + if( shift > 0 ) { + shift = silk_RSHIFT( shift, 1 ); + for( i = 0; i < frame_length_4kHz; i++ ) { + frame_4kHz[ i ] = silk_RSHIFT( frame_4kHz[ i ], shift ); + } + } /****************************************************************************** * FIRST STAGE, operating in 4 khz @@ -188,14 +183,14 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ]; for( k = 0; k < nb_subfr >> 1; k++ ) { /* Check that we are within range of the array */ - celt_assert( target_ptr >= frame_4kHz ); - celt_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); + silk_assert( target_ptr >= frame_4kHz ); + silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); basis_ptr = target_ptr - MIN_LAG_4KHZ; /* Check that we are within range of the array */ - celt_assert( basis_ptr >= frame_4kHz ); - celt_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); + silk_assert( basis_ptr >= frame_4kHz ); + silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch ); @@ -249,7 +244,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 /* Sort */ length_d_srch = silk_ADD_LSHIFT32( 4, complexity, 1 ); - celt_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH ); + silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH ); silk_insertion_sort_decreasing_int16( C, d_srch, CSTRIDE_4KHZ, length_d_srch ); @@ -274,7 +269,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 break; } } - celt_assert( length_d_srch > 0 ); + silk_assert( length_d_srch > 0 ); ALLOC( d_comp, D_COMP_STRIDE, opus_int16 ); for( i = D_COMP_MIN; i < D_COMP_MAX; i++ ) { @@ -316,6 +311,18 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation *************************************************************************************/ + /****************************************************************************** + ** Scale signal down to avoid correlations measures from overflowing + *******************************************************************************/ + /* find scaling as max scaling for each subframe */ + silk_sum_sqr_shift( &energy, &shift, frame_8kHz, frame_length_8kHz ); + if( shift > 0 ) { + shift = silk_RSHIFT( shift, 1 ); + for( i = 0; i < frame_length_8kHz; i++ ) { + frame_8kHz[ i ] = silk_RSHIFT( frame_8kHz[ i ], shift ); + } + } + /********************************************************************************* * Find energy of each subframe projected onto its history, for a range of delays *********************************************************************************/ @@ -325,8 +332,8 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 for( k = 0; k < nb_subfr; k++ ) { /* Check that we are within range of the array */ - celt_assert( target_ptr >= frame_8kHz ); - celt_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz ); + silk_assert( target_ptr >= frame_8kHz ); + silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz ); energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch ), 1 ); for( j = 0; j < length_d_comp; j++ ) { @@ -455,6 +462,24 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 silk_assert( *LTPCorr_Q15 >= 0 ); if( Fs_kHz > 8 ) { + VARDECL( opus_int16, scratch_mem ); + /***************************************************************************/ + /* Scale input signal down to avoid correlations measures from overflowing */ + /***************************************************************************/ + /* find scaling as max scaling for each subframe */ + silk_sum_sqr_shift( &energy, &shift, frame, frame_length ); + ALLOC( scratch_mem, shift > 0 ? frame_length : ALLOC_NONE, opus_int16 ); + if( shift > 0 ) { + /* Move signal to scratch mem because the input signal should be unchanged */ + shift = silk_RSHIFT( shift, 1 ); + for( i = 0; i < frame_length; i++ ) { + scratch_mem[ i ] = silk_RSHIFT( frame[ i ], shift ); + } + input_frame_ptr = scratch_mem; + } else { + input_frame_ptr = frame; + } + /* Search in original signal */ CBimax_old = CBimax; @@ -494,14 +519,14 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 /* Calculate the correlations and energies needed in stage 3 */ ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals ); ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals ); - silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch ); - silk_P_Ana_calc_energy_st3( energies_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch ); + silk_P_Ana_calc_corr_st3( cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch ); + silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch ); lag_counter = 0; silk_assert( lag == silk_SAT16( lag ) ); contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag ); - target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ]; + target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ]; energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length, arch ), 1 ); for( d = start_lag; d <= end_lag; d++ ) { for( j = 0; j < nb_cbk_search; j++ ) { @@ -550,7 +575,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 *lagIndex = (opus_int16)( lag - MIN_LAG_8KHZ ); *contourIndex = (opus_int8)CBimax; } - celt_assert( *lagIndex >= 0 ); + silk_assert( *lagIndex >= 0 ); /* return as voiced */ RESTORE_STACK; return 0; @@ -587,8 +612,8 @@ static void silk_P_Ana_calc_corr_st3( const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; SAVE_STACK; - celt_assert( complexity >= SILK_PE_MIN_COMPLEX ); - celt_assert( complexity <= SILK_PE_MAX_COMPLEX ); + silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); + silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); if( nb_subfr == PE_MAX_NB_SUBFR ) { Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ]; @@ -596,7 +621,7 @@ static void silk_P_Ana_calc_corr_st3( nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ]; cbk_size = PE_NB_CBKS_STAGE3_MAX; } else { - celt_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); + silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ]; Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; @@ -612,7 +637,7 @@ static void silk_P_Ana_calc_corr_st3( /* Calculate the correlations for each subframe */ lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 ); lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 ); - celt_assert(lag_high-lag_low+1 <= SCRATCH_SIZE); + silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE); celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch ); for( j = lag_low; j <= lag_high; j++ ) { silk_assert( lag_counter < SCRATCH_SIZE ); @@ -659,8 +684,8 @@ static void silk_P_Ana_calc_energy_st3( const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; SAVE_STACK; - celt_assert( complexity >= SILK_PE_MIN_COMPLEX ); - celt_assert( complexity <= SILK_PE_MAX_COMPLEX ); + silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); + silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); if( nb_subfr == PE_MAX_NB_SUBFR ) { Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ]; @@ -668,7 +693,7 @@ static void silk_P_Ana_calc_energy_st3( nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ]; cbk_size = PE_NB_CBKS_STAGE3_MAX; } else { - celt_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); + silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ]; Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; diff --git a/thirdparty/opus/silk/fixed/prefilter_FIX.c b/thirdparty/opus/silk/fixed/prefilter_FIX.c new file mode 100644 index 0000000000..6a8e35152e --- /dev/null +++ b/thirdparty/opus/silk/fixed/prefilter_FIX.c @@ -0,0 +1,221 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "main_FIX.h" +#include "stack_alloc.h" +#include "tuning_parameters.h" + +#if defined(MIPSr1_ASM) +#include "mips/prefilter_FIX_mipsr1.h" +#endif + + +#if !defined(OVERRIDE_silk_warped_LPC_analysis_filter_FIX) +#define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \ + ((void)(arch),silk_warped_LPC_analysis_filter_FIX_c(state, res_Q2, coef_Q13, input, lambda_Q16, length, order)) +#endif + +/* Prefilter for finding Quantizer input signal */ +static OPUS_INLINE void silk_prefilt_FIX( + silk_prefilter_state_FIX *P, /* I/O state */ + opus_int32 st_res_Q12[], /* I short term residual signal */ + opus_int32 xw_Q3[], /* O prefiltered signal */ + opus_int32 HarmShapeFIRPacked_Q12, /* I Harmonic shaping coeficients */ + opus_int Tilt_Q14, /* I Tilt shaping coeficient */ + opus_int32 LF_shp_Q14, /* I Low-frequancy shaping coeficients */ + opus_int lag, /* I Lag for harmonic shaping */ + opus_int length /* I Length of signals */ +); + +void silk_warped_LPC_analysis_filter_FIX_c( + opus_int32 state[], /* I/O State [order + 1] */ + opus_int32 res_Q2[], /* O Residual signal [length] */ + const opus_int16 coef_Q13[], /* I Coefficients [order] */ + const opus_int16 input[], /* I Input signal [length] */ + const opus_int16 lambda_Q16, /* I Warping factor */ + const opus_int length, /* I Length of input signal */ + const opus_int order /* I Filter order (even) */ +) +{ + opus_int n, i; + opus_int32 acc_Q11, tmp1, tmp2; + + /* Order must be even */ + silk_assert( ( order & 1 ) == 0 ); + + for( n = 0; n < length; n++ ) { + /* Output of lowpass section */ + tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 ); + state[ 0 ] = silk_LSHIFT( input[ n ], 14 ); + /* Output of allpass section */ + tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 ); + state[ 1 ] = tmp2; + acc_Q11 = silk_RSHIFT( order, 1 ); + acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] ); + /* Loop over allpass sections */ + for( i = 2; i < order; i += 2 ) { + /* Output of allpass section */ + tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 ); + state[ i ] = tmp1; + acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] ); + /* Output of allpass section */ + tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 ); + state[ i + 1 ] = tmp2; + acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] ); + } + state[ order ] = tmp1; + acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] ); + res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 ); + } +} + +void silk_prefilter_FIX( + silk_encoder_state_FIX *psEnc, /* I/O Encoder state */ + const silk_encoder_control_FIX *psEncCtrl, /* I Encoder control */ + opus_int32 xw_Q3[], /* O Weighted signal */ + const opus_int16 x[] /* I Speech signal */ +) +{ + silk_prefilter_state_FIX *P = &psEnc->sPrefilt; + opus_int j, k, lag; + opus_int32 tmp_32; + const opus_int16 *AR1_shp_Q13; + const opus_int16 *px; + opus_int32 *pxw_Q3; + opus_int HarmShapeGain_Q12, Tilt_Q14; + opus_int32 HarmShapeFIRPacked_Q12, LF_shp_Q14; + VARDECL( opus_int32, x_filt_Q12 ); + VARDECL( opus_int32, st_res_Q2 ); + opus_int16 B_Q10[ 2 ]; + SAVE_STACK; + + /* Set up pointers */ + px = x; + pxw_Q3 = xw_Q3; + lag = P->lagPrev; + ALLOC( x_filt_Q12, psEnc->sCmn.subfr_length, opus_int32 ); + ALLOC( st_res_Q2, psEnc->sCmn.subfr_length, opus_int32 ); + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + /* Update Variables that change per sub frame */ + if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + lag = psEncCtrl->pitchL[ k ]; + } + + /* Noise shape parameters */ + HarmShapeGain_Q12 = silk_SMULWB( (opus_int32)psEncCtrl->HarmShapeGain_Q14[ k ], 16384 - psEncCtrl->HarmBoost_Q14[ k ] ); + silk_assert( HarmShapeGain_Q12 >= 0 ); + HarmShapeFIRPacked_Q12 = silk_RSHIFT( HarmShapeGain_Q12, 2 ); + HarmShapeFIRPacked_Q12 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q12, 1 ), 16 ); + Tilt_Q14 = psEncCtrl->Tilt_Q14[ k ]; + LF_shp_Q14 = psEncCtrl->LF_shp_Q14[ k ]; + AR1_shp_Q13 = &psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER ]; + + /* Short term FIR filtering*/ + silk_warped_LPC_analysis_filter_FIX( P->sAR_shp, st_res_Q2, AR1_shp_Q13, px, + psEnc->sCmn.warping_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder, psEnc->sCmn.arch ); + + /* Reduce (mainly) low frequencies during harmonic emphasis */ + B_Q10[ 0 ] = silk_RSHIFT_ROUND( psEncCtrl->GainsPre_Q14[ k ], 4 ); + tmp_32 = silk_SMLABB( SILK_FIX_CONST( INPUT_TILT, 26 ), psEncCtrl->HarmBoost_Q14[ k ], HarmShapeGain_Q12 ); /* Q26 */ + tmp_32 = silk_SMLABB( tmp_32, psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ); /* Q26 */ + tmp_32 = silk_SMULWB( tmp_32, -psEncCtrl->GainsPre_Q14[ k ] ); /* Q24 */ + tmp_32 = silk_RSHIFT_ROUND( tmp_32, 14 ); /* Q10 */ + B_Q10[ 1 ]= silk_SAT16( tmp_32 ); + x_filt_Q12[ 0 ] = silk_MLA( silk_MUL( st_res_Q2[ 0 ], B_Q10[ 0 ] ), P->sHarmHP_Q2, B_Q10[ 1 ] ); + for( j = 1; j < psEnc->sCmn.subfr_length; j++ ) { + x_filt_Q12[ j ] = silk_MLA( silk_MUL( st_res_Q2[ j ], B_Q10[ 0 ] ), st_res_Q2[ j - 1 ], B_Q10[ 1 ] ); + } + P->sHarmHP_Q2 = st_res_Q2[ psEnc->sCmn.subfr_length - 1 ]; + + silk_prefilt_FIX( P, x_filt_Q12, pxw_Q3, HarmShapeFIRPacked_Q12, Tilt_Q14, LF_shp_Q14, lag, psEnc->sCmn.subfr_length ); + + px += psEnc->sCmn.subfr_length; + pxw_Q3 += psEnc->sCmn.subfr_length; + } + + P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ]; + RESTORE_STACK; +} + +#ifndef OVERRIDE_silk_prefilt_FIX +/* Prefilter for finding Quantizer input signal */ +static OPUS_INLINE void silk_prefilt_FIX( + silk_prefilter_state_FIX *P, /* I/O state */ + opus_int32 st_res_Q12[], /* I short term residual signal */ + opus_int32 xw_Q3[], /* O prefiltered signal */ + opus_int32 HarmShapeFIRPacked_Q12, /* I Harmonic shaping coeficients */ + opus_int Tilt_Q14, /* I Tilt shaping coeficient */ + opus_int32 LF_shp_Q14, /* I Low-frequancy shaping coeficients */ + opus_int lag, /* I Lag for harmonic shaping */ + opus_int length /* I Length of signals */ +) +{ + opus_int i, idx, LTP_shp_buf_idx; + opus_int32 n_LTP_Q12, n_Tilt_Q10, n_LF_Q10; + opus_int32 sLF_MA_shp_Q12, sLF_AR_shp_Q12; + opus_int16 *LTP_shp_buf; + + /* To speed up use temp variables instead of using the struct */ + LTP_shp_buf = P->sLTP_shp; + LTP_shp_buf_idx = P->sLTP_shp_buf_idx; + sLF_AR_shp_Q12 = P->sLF_AR_shp_Q12; + sLF_MA_shp_Q12 = P->sLF_MA_shp_Q12; + + for( i = 0; i < length; i++ ) { + if( lag > 0 ) { + /* unrolled loop */ + silk_assert( HARM_SHAPE_FIR_TAPS == 3 ); + idx = lag + LTP_shp_buf_idx; + n_LTP_Q12 = silk_SMULBB( LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); + n_LTP_Q12 = silk_SMLABT( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 ) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); + n_LTP_Q12 = silk_SMLABB( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); + } else { + n_LTP_Q12 = 0; + } + + n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 ); + n_LF_Q10 = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 ); + + sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) ); + sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12, silk_LSHIFT( n_LF_Q10, 2 ) ); + + LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK; + LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) ); + + xw_Q3[i] = silk_RSHIFT_ROUND( silk_SUB32( sLF_MA_shp_Q12, n_LTP_Q12 ), 9 ); + } + + /* Copy temp variable back to state */ + P->sLF_AR_shp_Q12 = sLF_AR_shp_Q12; + P->sLF_MA_shp_Q12 = sLF_MA_shp_Q12; + P->sLTP_shp_buf_idx = LTP_shp_buf_idx; +} +#endif /* OVERRIDE_silk_prefilt_FIX */ diff --git a/thirdparty/opus/silk/fixed/residual_energy16_FIX.c b/thirdparty/opus/silk/fixed/residual_energy16_FIX.c index 7f130f3d3d..ebffb2a66f 100644 --- a/thirdparty/opus/silk/fixed/residual_energy16_FIX.c +++ b/thirdparty/opus/silk/fixed/residual_energy16_FIX.c @@ -47,10 +47,10 @@ opus_int32 silk_residual_energy16_covar_FIX( const opus_int32 *pRow; /* Safety checks */ - celt_assert( D >= 0 ); - celt_assert( D <= 16 ); - celt_assert( cQ > 0 ); - celt_assert( cQ < 16 ); + silk_assert( D >= 0 ); + silk_assert( D <= 16 ); + silk_assert( cQ > 0 ); + silk_assert( cQ < 16 ); lshifts = 16 - cQ; Qxtra = lshifts; diff --git a/thirdparty/opus/silk/fixed/residual_energy_FIX.c b/thirdparty/opus/silk/fixed/residual_energy_FIX.c index 6c7cade9a0..41f74778e8 100644 --- a/thirdparty/opus/silk/fixed/residual_energy_FIX.c +++ b/thirdparty/opus/silk/fixed/residual_energy_FIX.c @@ -58,7 +58,7 @@ void silk_residual_energy_FIX( /* Filter input to create the LPC residual for each frame half, and measure subframe energies */ ALLOC( LPC_res, ( MAX_NB_SUBFR >> 1 ) * offset, opus_int16 ); - celt_assert( ( nb_subfr >> 1 ) * ( MAX_NB_SUBFR >> 1 ) == nb_subfr ); + silk_assert( ( nb_subfr >> 1 ) * ( MAX_NB_SUBFR >> 1 ) == nb_subfr ); for( i = 0; i < nb_subfr >> 1; i++ ) { /* Calculate half frame LPC residual signal including preceding samples */ silk_LPC_analysis_filter( LPC_res, x_ptr, a_Q12[ i ], ( MAX_NB_SUBFR >> 1 ) * offset, LPC_order, arch ); diff --git a/thirdparty/opus/silk/fixed/schur64_FIX.c b/thirdparty/opus/silk/fixed/schur64_FIX.c index 4b7e19ea59..764a10ef3e 100644 --- a/thirdparty/opus/silk/fixed/schur64_FIX.c +++ b/thirdparty/opus/silk/fixed/schur64_FIX.c @@ -43,7 +43,7 @@ opus_int32 silk_schur64( /* O returns residual ene opus_int32 C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ]; opus_int32 Ctmp1_Q30, Ctmp2_Q30, rc_tmp_Q31; - celt_assert( order >= 0 && order <= SILK_MAX_ORDER_LPC ); + silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 ); /* Check for invalid input */ if( c[ 0 ] <= 0 ) { @@ -51,10 +51,9 @@ opus_int32 silk_schur64( /* O returns residual ene return 0; } - k = 0; - do { + for( k = 0; k < order + 1; k++ ) { C[ k ][ 0 ] = C[ k ][ 1 ] = c[ k ]; - } while( ++k <= order ); + } for( k = 0; k < order; k++ ) { /* Check that we won't be getting an unstable rc, otherwise stop here. */ diff --git a/thirdparty/opus/silk/fixed/schur_FIX.c b/thirdparty/opus/silk/fixed/schur_FIX.c index 2840f6b1aa..c4c0ef23b4 100644 --- a/thirdparty/opus/silk/fixed/schur_FIX.c +++ b/thirdparty/opus/silk/fixed/schur_FIX.c @@ -43,29 +43,28 @@ opus_int32 silk_schur( /* O Returns residual ene opus_int32 C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ]; opus_int32 Ctmp1, Ctmp2, rc_tmp_Q15; - celt_assert( order >= 0 && order <= SILK_MAX_ORDER_LPC ); + silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 ); /* Get number of leading zeros */ lz = silk_CLZ32( c[ 0 ] ); /* Copy correlations and adjust level to Q30 */ - k = 0; if( lz < 2 ) { /* lz must be 1, so shift one to the right */ - do { + for( k = 0; k < order + 1; k++ ) { C[ k ][ 0 ] = C[ k ][ 1 ] = silk_RSHIFT( c[ k ], 1 ); - } while( ++k <= order ); + } } else if( lz > 2 ) { /* Shift to the left */ lz -= 2; - do { + for( k = 0; k < order + 1; k++ ) { C[ k ][ 0 ] = C[ k ][ 1 ] = silk_LSHIFT( c[ k ], lz ); - } while( ++k <= order ); + } } else { /* No need to shift */ - do { + for( k = 0; k < order + 1; k++ ) { C[ k ][ 0 ] = C[ k ][ 1 ] = c[ k ]; - } while( ++k <= order ); + } } for( k = 0; k < order; k++ ) { diff --git a/thirdparty/opus/silk/fixed/solve_LS_FIX.c b/thirdparty/opus/silk/fixed/solve_LS_FIX.c new file mode 100644 index 0000000000..51d7d49d02 --- /dev/null +++ b/thirdparty/opus/silk/fixed/solve_LS_FIX.c @@ -0,0 +1,249 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "main_FIX.h" +#include "stack_alloc.h" +#include "tuning_parameters.h" + +/*****************************/ +/* Internal function headers */ +/*****************************/ + +typedef struct { + opus_int32 Q36_part; + opus_int32 Q48_part; +} inv_D_t; + +/* Factorize square matrix A into LDL form */ +static OPUS_INLINE void silk_LDL_factorize_FIX( + opus_int32 *A, /* I/O Pointer to Symetric Square Matrix */ + opus_int M, /* I Size of Matrix */ + opus_int32 *L_Q16, /* I/O Pointer to Square Upper triangular Matrix */ + inv_D_t *inv_D /* I/O Pointer to vector holding inverted diagonal elements of D */ +); + +/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */ +static OPUS_INLINE void silk_LS_SolveFirst_FIX( + const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ + opus_int M, /* I Dim of Matrix equation */ + const opus_int32 *b, /* I b Vector */ + opus_int32 *x_Q16 /* O x Vector */ +); + +/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */ +static OPUS_INLINE void silk_LS_SolveLast_FIX( + const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ + const opus_int M, /* I Dim of Matrix equation */ + const opus_int32 *b, /* I b Vector */ + opus_int32 *x_Q16 /* O x Vector */ +); + +static OPUS_INLINE void silk_LS_divide_Q16_FIX( + opus_int32 T[], /* I/O Numenator vector */ + inv_D_t *inv_D, /* I 1 / D vector */ + opus_int M /* I dimension */ +); + +/* Solves Ax = b, assuming A is symmetric */ +void silk_solve_LDL_FIX( + opus_int32 *A, /* I Pointer to symetric square matrix A */ + opus_int M, /* I Size of matrix */ + const opus_int32 *b, /* I Pointer to b vector */ + opus_int32 *x_Q16 /* O Pointer to x solution vector */ +) +{ + VARDECL( opus_int32, L_Q16 ); + opus_int32 Y[ MAX_MATRIX_SIZE ]; + inv_D_t inv_D[ MAX_MATRIX_SIZE ]; + SAVE_STACK; + + silk_assert( M <= MAX_MATRIX_SIZE ); + ALLOC( L_Q16, M * M, opus_int32 ); + + /*************************************************** + Factorize A by LDL such that A = L*D*L', + where L is lower triangular with ones on diagonal + ****************************************************/ + silk_LDL_factorize_FIX( A, M, L_Q16, inv_D ); + + /**************************************************** + * substitute D*L'*x = Y. ie: + L*D*L'*x = b => L*Y = b <=> Y = inv(L)*b + ******************************************************/ + silk_LS_SolveFirst_FIX( L_Q16, M, b, Y ); + + /**************************************************** + D*L'*x = Y <=> L'*x = inv(D)*Y, because D is + diagonal just multiply with 1/d_i + ****************************************************/ + silk_LS_divide_Q16_FIX( Y, inv_D, M ); + + /**************************************************** + x = inv(L') * inv(D) * Y + *****************************************************/ + silk_LS_SolveLast_FIX( L_Q16, M, Y, x_Q16 ); + RESTORE_STACK; +} + +static OPUS_INLINE void silk_LDL_factorize_FIX( + opus_int32 *A, /* I/O Pointer to Symetric Square Matrix */ + opus_int M, /* I Size of Matrix */ + opus_int32 *L_Q16, /* I/O Pointer to Square Upper triangular Matrix */ + inv_D_t *inv_D /* I/O Pointer to vector holding inverted diagonal elements of D */ +) +{ + opus_int i, j, k, status, loop_count; + const opus_int32 *ptr1, *ptr2; + opus_int32 diag_min_value, tmp_32, err; + opus_int32 v_Q0[ MAX_MATRIX_SIZE ], D_Q0[ MAX_MATRIX_SIZE ]; + opus_int32 one_div_diag_Q36, one_div_diag_Q40, one_div_diag_Q48; + + silk_assert( M <= MAX_MATRIX_SIZE ); + + status = 1; + diag_min_value = silk_max_32( silk_SMMUL( silk_ADD_SAT32( A[ 0 ], A[ silk_SMULBB( M, M ) - 1 ] ), SILK_FIX_CONST( FIND_LTP_COND_FAC, 31 ) ), 1 << 9 ); + for( loop_count = 0; loop_count < M && status == 1; loop_count++ ) { + status = 0; + for( j = 0; j < M; j++ ) { + ptr1 = matrix_adr( L_Q16, j, 0, M ); + tmp_32 = 0; + for( i = 0; i < j; i++ ) { + v_Q0[ i ] = silk_SMULWW( D_Q0[ i ], ptr1[ i ] ); /* Q0 */ + tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ i ], ptr1[ i ] ); /* Q0 */ + } + tmp_32 = silk_SUB32( matrix_ptr( A, j, j, M ), tmp_32 ); + + if( tmp_32 < diag_min_value ) { + tmp_32 = silk_SUB32( silk_SMULBB( loop_count + 1, diag_min_value ), tmp_32 ); + /* Matrix not positive semi-definite, or ill conditioned */ + for( i = 0; i < M; i++ ) { + matrix_ptr( A, i, i, M ) = silk_ADD32( matrix_ptr( A, i, i, M ), tmp_32 ); + } + status = 1; + break; + } + D_Q0[ j ] = tmp_32; /* always < max(Correlation) */ + + /* two-step division */ + one_div_diag_Q36 = silk_INVERSE32_varQ( tmp_32, 36 ); /* Q36 */ + one_div_diag_Q40 = silk_LSHIFT( one_div_diag_Q36, 4 ); /* Q40 */ + err = silk_SUB32( (opus_int32)1 << 24, silk_SMULWW( tmp_32, one_div_diag_Q40 ) ); /* Q24 */ + one_div_diag_Q48 = silk_SMULWW( err, one_div_diag_Q40 ); /* Q48 */ + + /* Save 1/Ds */ + inv_D[ j ].Q36_part = one_div_diag_Q36; + inv_D[ j ].Q48_part = one_div_diag_Q48; + + matrix_ptr( L_Q16, j, j, M ) = 65536; /* 1.0 in Q16 */ + ptr1 = matrix_adr( A, j, 0, M ); + ptr2 = matrix_adr( L_Q16, j + 1, 0, M ); + for( i = j + 1; i < M; i++ ) { + tmp_32 = 0; + for( k = 0; k < j; k++ ) { + tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ k ], ptr2[ k ] ); /* Q0 */ + } + tmp_32 = silk_SUB32( ptr1[ i ], tmp_32 ); /* always < max(Correlation) */ + + /* tmp_32 / D_Q0[j] : Divide to Q16 */ + matrix_ptr( L_Q16, i, j, M ) = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), + silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) ); + + /* go to next column */ + ptr2 += M; + } + } + } + + silk_assert( status == 0 ); +} + +static OPUS_INLINE void silk_LS_divide_Q16_FIX( + opus_int32 T[], /* I/O Numenator vector */ + inv_D_t *inv_D, /* I 1 / D vector */ + opus_int M /* I dimension */ +) +{ + opus_int i; + opus_int32 tmp_32; + opus_int32 one_div_diag_Q36, one_div_diag_Q48; + + for( i = 0; i < M; i++ ) { + one_div_diag_Q36 = inv_D[ i ].Q36_part; + one_div_diag_Q48 = inv_D[ i ].Q48_part; + + tmp_32 = T[ i ]; + T[ i ] = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) ); + } +} + +/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */ +static OPUS_INLINE void silk_LS_SolveFirst_FIX( + const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ + opus_int M, /* I Dim of Matrix equation */ + const opus_int32 *b, /* I b Vector */ + opus_int32 *x_Q16 /* O x Vector */ +) +{ + opus_int i, j; + const opus_int32 *ptr32; + opus_int32 tmp_32; + + for( i = 0; i < M; i++ ) { + ptr32 = matrix_adr( L_Q16, i, 0, M ); + tmp_32 = 0; + for( j = 0; j < i; j++ ) { + tmp_32 = silk_SMLAWW( tmp_32, ptr32[ j ], x_Q16[ j ] ); + } + x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 ); + } +} + +/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */ +static OPUS_INLINE void silk_LS_SolveLast_FIX( + const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ + const opus_int M, /* I Dim of Matrix equation */ + const opus_int32 *b, /* I b Vector */ + opus_int32 *x_Q16 /* O x Vector */ +) +{ + opus_int i, j; + const opus_int32 *ptr32; + opus_int32 tmp_32; + + for( i = M - 1; i >= 0; i-- ) { + ptr32 = matrix_adr( L_Q16, 0, i, M ); + tmp_32 = 0; + for( j = M - 1; j > i; j-- ) { + tmp_32 = silk_SMLAWW( tmp_32, ptr32[ silk_SMULBB( j, M ) ], x_Q16[ j ] ); + } + x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 ); + } +} diff --git a/thirdparty/opus/silk/fixed/structs_FIX.h b/thirdparty/opus/silk/fixed/structs_FIX.h index 2774a97b24..3294b25128 100644 --- a/thirdparty/opus/silk/fixed/structs_FIX.h +++ b/thirdparty/opus/silk/fixed/structs_FIX.h @@ -48,16 +48,30 @@ typedef struct { } silk_shape_state_FIX; /********************************/ +/* Prefilter state */ +/********************************/ +typedef struct { + opus_int16 sLTP_shp[ LTP_BUF_LENGTH ]; + opus_int32 sAR_shp[ MAX_SHAPE_LPC_ORDER + 1 ]; + opus_int sLTP_shp_buf_idx; + opus_int32 sLF_AR_shp_Q12; + opus_int32 sLF_MA_shp_Q12; + opus_int32 sHarmHP_Q2; + opus_int32 rand_seed; + opus_int lagPrev; +} silk_prefilter_state_FIX; + +/********************************/ /* Encoder state FIX */ /********************************/ typedef struct { silk_encoder_state sCmn; /* Common struct, shared with floating-point code */ silk_shape_state_FIX sShape; /* Shape state */ + silk_prefilter_state_FIX sPrefilt; /* Prefilter State */ /* Buffer for find pitch and noise shape analysis */ silk_DWORD_ALIGN opus_int16 x_buf[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];/* Buffer for find pitch and noise shape analysis */ opus_int LTPCorr_Q15; /* Normalized correlation from pitch lag estimator */ - opus_int32 resNrgSmth; } silk_encoder_state_FIX; /************************/ @@ -73,8 +87,11 @@ typedef struct { /* Noise shaping parameters */ /* Testing */ - silk_DWORD_ALIGN opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; + silk_DWORD_ALIGN opus_int16 AR1_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; + silk_DWORD_ALIGN opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ]; /* Packs two int16 coefficients per int32 value */ + opus_int GainsPre_Q14[ MAX_NB_SUBFR ]; + opus_int HarmBoost_Q14[ MAX_NB_SUBFR ]; opus_int Tilt_Q14[ MAX_NB_SUBFR ]; opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ]; opus_int Lambda_Q10; @@ -82,6 +99,7 @@ typedef struct { opus_int coding_quality_Q14; /* measures */ + opus_int sparseness_Q8; opus_int32 predGain_Q16; opus_int LTPredCodGain_Q7; opus_int32 ResNrg[ MAX_NB_SUBFR ]; /* Residual energy per subframe */ diff --git a/thirdparty/opus/silk/fixed/warped_autocorrelation_FIX.c b/thirdparty/opus/silk/fixed/warped_autocorrelation_FIX.c index 5c79553bc0..6ca6c1184d 100644 --- a/thirdparty/opus/silk/fixed/warped_autocorrelation_FIX.c +++ b/thirdparty/opus/silk/fixed/warped_autocorrelation_FIX.c @@ -31,14 +31,17 @@ POSSIBILITY OF SUCH DAMAGE. #include "main_FIX.h" +#define QC 10 +#define QS 14 + #if defined(MIPSr1_ASM) #include "mips/warped_autocorrelation_FIX_mipsr1.h" #endif +#ifndef OVERRIDE_silk_warped_autocorrelation_FIX /* Autocorrelations for a warped frequency axis */ -#ifndef OVERRIDE_silk_warped_autocorrelation_FIX_c -void silk_warped_autocorrelation_FIX_c( +void silk_warped_autocorrelation_FIX( opus_int32 *corr, /* O Result [order + 1] */ opus_int *scale, /* O Scaling of the correlation vector */ const opus_int16 *input, /* I Input data to correlate */ @@ -53,7 +56,7 @@ void silk_warped_autocorrelation_FIX_c( opus_int64 corr_QC[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 }; /* Order must be even */ - celt_assert( ( order & 1 ) == 0 ); + silk_assert( ( order & 1 ) == 0 ); silk_assert( 2 * QS - QC >= 0 ); /* Loop over samples */ @@ -89,4 +92,4 @@ void silk_warped_autocorrelation_FIX_c( } silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/ } -#endif /* OVERRIDE_silk_warped_autocorrelation_FIX_c */ +#endif /* OVERRIDE_silk_warped_autocorrelation_FIX */ diff --git a/thirdparty/opus/silk/fixed/x86/burg_modified_FIX_sse4_1.c b/thirdparty/opus/silk/fixed/x86/burg_modified_FIX_sse.c index bbb1ce0fcc..3c3583c5fc 100644 --- a/thirdparty/opus/silk/fixed/x86/burg_modified_FIX_sse4_1.c +++ b/thirdparty/opus/silk/fixed/x86/burg_modified_FIX_sse.c @@ -72,7 +72,7 @@ void silk_burg_modified_sse4_1( __m128i FIRST_3210, LAST_3210, ATMP_3210, TMP1_3210, TMP2_3210, T1_3210, T2_3210, PTR_3210, SUBFR_3210, X1_3210, X2_3210; __m128i CONST1 = _mm_set1_epi32(1); - celt_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); + silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); /* Compute autocorrelations, added over subframes */ silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length ); diff --git a/thirdparty/opus/silk/fixed/x86/prefilter_FIX_sse.c b/thirdparty/opus/silk/fixed/x86/prefilter_FIX_sse.c index 555432cd96..488a603f5d 100644 --- a/thirdparty/opus/silk/fixed/x86/prefilter_FIX_sse.c +++ b/thirdparty/opus/silk/fixed/x86/prefilter_FIX_sse.c @@ -49,7 +49,7 @@ void silk_warped_LPC_analysis_filter_FIX_sse4_1( opus_int32 acc_Q11, tmp1, tmp2; /* Order must be even */ - celt_assert( ( order & 1 ) == 0 ); + silk_assert( ( order & 1 ) == 0 ); if (order == 10) { @@ -65,7 +65,7 @@ void silk_warped_LPC_analysis_filter_FIX_sse4_1( register opus_int32 state_8, state_9, state_a; register opus_int64 coef_Q13_8, coef_Q13_9; - celt_assert( length > 0 ); + silk_assert( length > 0 ); coef_Q13_3210 = OP_CVTEPI16_EPI32_M64( &coef_Q13[ 0 ] ); coef_Q13_7654 = OP_CVTEPI16_EPI32_M64( &coef_Q13[ 4 ] ); @@ -107,8 +107,8 @@ void silk_warped_LPC_analysis_filter_FIX_sse4_1( xmm_tempb = _mm_add_epi32( xmm_tempb, xmm_product2 ); xmm_tempa = _mm_add_epi32( xmm_tempa, xmm_tempb ); - sum = (opus_int32)((coef_Q13_8 * state_8) >> 16); - sum += (opus_int32)((coef_Q13_9 * state_9) >> 16); + sum = (coef_Q13_8 * state_8) >> 16; + sum += (coef_Q13_9 * state_9) >> 16; xmm_tempa = _mm_add_epi32( xmm_tempa, _mm_shuffle_epi32( xmm_tempa, _MM_SHUFFLE( 0, 0, 0, 2 ) ) ); sum += _mm_cvtsi128_si32( xmm_tempa); diff --git a/thirdparty/opus/silk/fixed/x86/vector_ops_FIX_sse4_1.c b/thirdparty/opus/silk/fixed/x86/vector_ops_FIX_sse.c index c1e90564d0..c1e90564d0 100644 --- a/thirdparty/opus/silk/fixed/x86/vector_ops_FIX_sse4_1.c +++ b/thirdparty/opus/silk/fixed/x86/vector_ops_FIX_sse.c diff --git a/thirdparty/opus/silk/float/LPC_analysis_filter_FLP.c b/thirdparty/opus/silk/float/LPC_analysis_filter_FLP.c index 0e1a1fed0f..cae89a0a18 100644 --- a/thirdparty/opus/silk/float/LPC_analysis_filter_FLP.c +++ b/thirdparty/opus/silk/float/LPC_analysis_filter_FLP.c @@ -215,7 +215,7 @@ void silk_LPC_analysis_filter_FLP( const opus_int Order /* I LPC order */ ) { - celt_assert( Order <= length ); + silk_assert( Order <= length ); switch( Order ) { case 6: @@ -239,7 +239,7 @@ void silk_LPC_analysis_filter_FLP( break; default: - celt_assert( 0 ); + silk_assert( 0 ); break; } diff --git a/thirdparty/opus/silk/float/LPC_inv_pred_gain_FLP.c b/thirdparty/opus/silk/float/LPC_inv_pred_gain_FLP.c index 2be2122d61..25178bacdd 100644 --- a/thirdparty/opus/silk/float/LPC_inv_pred_gain_FLP.c +++ b/thirdparty/opus/silk/float/LPC_inv_pred_gain_FLP.c @@ -31,7 +31,8 @@ POSSIBILITY OF SUCH DAMAGE. #include "SigProc_FIX.h" #include "SigProc_FLP.h" -#include "define.h" + +#define RC_THRESHOLD 0.9999f /* compute inverse of LPC prediction gain, and */ /* test if LPC coefficients are stable (all poles within unit circle) */ @@ -42,32 +43,34 @@ silk_float silk_LPC_inverse_pred_gain_FLP( /* O return inverse prediction ga ) { opus_int k, n; - double invGain, rc, rc_mult1, rc_mult2, tmp1, tmp2; - silk_float Atmp[ SILK_MAX_ORDER_LPC ]; + double invGain, rc, rc_mult1, rc_mult2; + silk_float Atmp[ 2 ][ SILK_MAX_ORDER_LPC ]; + silk_float *Aold, *Anew; - silk_memcpy( Atmp, A, order * sizeof(silk_float) ); + Anew = Atmp[ order & 1 ]; + silk_memcpy( Anew, A, order * sizeof(silk_float) ); invGain = 1.0; for( k = order - 1; k > 0; k-- ) { - rc = -Atmp[ k ]; - rc_mult1 = 1.0f - rc * rc; - invGain *= rc_mult1; - if( invGain * MAX_PREDICTION_POWER_GAIN < 1.0f ) { + rc = -Anew[ k ]; + if( rc > RC_THRESHOLD || rc < -RC_THRESHOLD ) { return 0.0f; } + rc_mult1 = 1.0f - rc * rc; rc_mult2 = 1.0f / rc_mult1; - for( n = 0; n < (k + 1) >> 1; n++ ) { - tmp1 = Atmp[ n ]; - tmp2 = Atmp[ k - n - 1 ]; - Atmp[ n ] = (silk_float)( ( tmp1 - tmp2 * rc ) * rc_mult2 ); - Atmp[ k - n - 1 ] = (silk_float)( ( tmp2 - tmp1 * rc ) * rc_mult2 ); + invGain *= rc_mult1; + /* swap pointers */ + Aold = Anew; + Anew = Atmp[ k & 1 ]; + for( n = 0; n < k; n++ ) { + Anew[ n ] = (silk_float)( ( Aold[ n ] - Aold[ k - n - 1 ] * rc ) * rc_mult2 ); } } - rc = -Atmp[ 0 ]; - rc_mult1 = 1.0f - rc * rc; - invGain *= rc_mult1; - if( invGain * MAX_PREDICTION_POWER_GAIN < 1.0f ) { + rc = -Anew[ 0 ]; + if( rc > RC_THRESHOLD || rc < -RC_THRESHOLD ) { return 0.0f; } + rc_mult1 = 1.0f - rc * rc; + invGain *= rc_mult1; return (silk_float)invGain; } diff --git a/thirdparty/opus/silk/float/SigProc_FLP.h b/thirdparty/opus/silk/float/SigProc_FLP.h index 953de8b09e..f0cb3733be 100644 --- a/thirdparty/opus/silk/float/SigProc_FLP.h +++ b/thirdparty/opus/silk/float/SigProc_FLP.h @@ -68,6 +68,13 @@ void silk_k2a_FLP( opus_int32 order /* I prediction order */ ); +/* Solve the normal equations using the Levinson-Durbin recursion */ +silk_float silk_levinsondurbin_FLP( /* O prediction error energy */ + silk_float A[], /* O prediction coefficients [order] */ + const silk_float corr[], /* I input auto-correlations [order + 1] */ + const opus_int order /* I prediction order */ +); + /* compute autocorrelation */ void silk_autocorrelation_FLP( silk_float *results, /* O result (length correlationCount) */ diff --git a/thirdparty/opus/silk/float/apply_sine_window_FLP.c b/thirdparty/opus/silk/float/apply_sine_window_FLP.c index e49e717991..6aae57c0ab 100644 --- a/thirdparty/opus/silk/float/apply_sine_window_FLP.c +++ b/thirdparty/opus/silk/float/apply_sine_window_FLP.c @@ -45,10 +45,10 @@ void silk_apply_sine_window_FLP( opus_int k; silk_float freq, c, S0, S1; - celt_assert( win_type == 1 || win_type == 2 ); + silk_assert( win_type == 1 || win_type == 2 ); /* Length must be multiple of 4 */ - celt_assert( ( length & 3 ) == 0 ); + silk_assert( ( length & 3 ) == 0 ); freq = PI / ( length + 1 ); diff --git a/thirdparty/opus/silk/float/burg_modified_FLP.c b/thirdparty/opus/silk/float/burg_modified_FLP.c index 756b76a35b..ea5dc25a93 100644 --- a/thirdparty/opus/silk/float/burg_modified_FLP.c +++ b/thirdparty/opus/silk/float/burg_modified_FLP.c @@ -52,7 +52,7 @@ silk_float silk_burg_modified_FLP( /* O returns residual energy double CAf[ SILK_MAX_ORDER_LPC + 1 ], CAb[ SILK_MAX_ORDER_LPC + 1 ]; double Af[ SILK_MAX_ORDER_LPC ]; - celt_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); + silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); /* Compute autocorrelations, added over subframes */ C0 = silk_energy_FLP( x, nb_subfr * subfr_length ); diff --git a/thirdparty/opus/silk/float/encode_frame_FLP.c b/thirdparty/opus/silk/float/encode_frame_FLP.c index b029c3f5ca..2092a4d9e2 100644 --- a/thirdparty/opus/silk/float/encode_frame_FLP.c +++ b/thirdparty/opus/silk/float/encode_frame_FLP.c @@ -29,7 +29,6 @@ POSSIBILITY OF SUCH DAMAGE. #include "config.h" #endif -#include <stdlib.h> #include "main_FLP.h" #include "tuning_parameters.h" @@ -42,28 +41,21 @@ static OPUS_INLINE void silk_LBRR_encode_FLP( ); void silk_encode_do_VAD_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - opus_int activity /* I Decision of Opus voice activity detector */ + silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */ ) { - const opus_int activity_threshold = SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ); - /****************************/ /* Voice Activity Detection */ /****************************/ silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch ); - /* If Opus VAD is inactive and Silk VAD is active: lower Silk VAD to just under the threshold */ - if( activity == VAD_NO_ACTIVITY && psEnc->sCmn.speech_activity_Q8 >= activity_threshold ) { - psEnc->sCmn.speech_activity_Q8 = activity_threshold - 1; - } /**************************************************/ /* Convert speech activity into VAD and DTX flags */ /**************************************************/ - if( psEnc->sCmn.speech_activity_Q8 < activity_threshold ) { + if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) { psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY; psEnc->sCmn.noSpeechCounter++; - if( psEnc->sCmn.noSpeechCounter <= NB_SPEECH_FRAMES_BEFORE_DTX ) { + if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) { psEnc->sCmn.inDTX = 0; } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) { psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX; @@ -93,6 +85,7 @@ opus_int silk_encode_frame_FLP( silk_encoder_control_FLP sEncCtrl; opus_int i, iter, maxIter, found_upper, found_lower, ret = 0; silk_float *x_frame, *res_pitch_frame; + silk_float xfw[ MAX_FRAME_LENGTH ]; silk_float res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ]; ec_enc sRangeEnc_copy, sRangeEnc_copy2; silk_nsq_state sNSQ_copy, sNSQ_copy2; @@ -104,9 +97,6 @@ opus_int silk_encode_frame_FLP( opus_int8 LastGainIndex_copy2; opus_int32 pGains_Q16[ MAX_NB_SUBFR ]; opus_uint8 ec_buf_copy[ 1275 ]; - opus_int gain_lock[ MAX_NB_SUBFR ] = {0}; - opus_int16 best_gain_mult[ MAX_NB_SUBFR ]; - opus_int best_sum[ MAX_NB_SUBFR ]; /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */ LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0; @@ -149,17 +139,22 @@ opus_int silk_encode_frame_FLP( /***************************************************/ /* Find linear prediction coefficients (LPC + LTP) */ /***************************************************/ - silk_find_pred_coefs_FLP( psEnc, &sEncCtrl, res_pitch_frame, x_frame, condCoding ); + silk_find_pred_coefs_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding ); /****************************************/ /* Process gains */ /****************************************/ silk_process_gains_FLP( psEnc, &sEncCtrl, condCoding ); + /*****************************************/ + /* Prefiltering for noise shaper */ + /*****************************************/ + silk_prefilter_FLP( psEnc, &sEncCtrl, xfw, x_frame ); + /****************************************/ /* Low Bitrate Redundant Encoding */ /****************************************/ - silk_LBRR_encode_FLP( psEnc, &sEncCtrl, x_frame, condCoding ); + silk_LBRR_encode_FLP( psEnc, &sEncCtrl, xfw, condCoding ); /* Loop over quantizer and entroy coding to control bitrate */ maxIter = 6; @@ -193,11 +188,7 @@ opus_int silk_encode_frame_FLP( /*****************************************/ /* Noise shaping quantization */ /*****************************************/ - silk_NSQ_wrapper_FLP( psEnc, &sEncCtrl, &psEnc->sCmn.indices, &psEnc->sCmn.sNSQ, psEnc->sCmn.pulses, x_frame ); - - if ( iter == maxIter && !found_lower ) { - silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) ); - } + silk_NSQ_wrapper_FLP( psEnc, &sEncCtrl, &psEnc->sCmn.indices, &psEnc->sCmn.sNSQ, psEnc->sCmn.pulses, xfw ); /****************************************/ /* Encode Parameters */ @@ -212,33 +203,6 @@ opus_int silk_encode_frame_FLP( nBits = ec_tell( psRangeEnc ); - /* If we still bust after the last iteration, do some damage control. */ - if ( iter == maxIter && !found_lower && nBits > maxBits ) { - silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) ); - - /* Keep gains the same as the last frame. */ - psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev; - for ( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - psEnc->sCmn.indices.GainsIndices[ i ] = 4; - } - if (condCoding != CODE_CONDITIONALLY) { - psEnc->sCmn.indices.GainsIndices[ 0 ] = sEncCtrl.lastGainIndexPrev; - } - psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy; - psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy; - /* Clear all pulses. */ - for ( i = 0; i < psEnc->sCmn.frame_length; i++ ) { - psEnc->sCmn.pulses[ i ] = 0; - } - - silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding ); - - silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType, - psEnc->sCmn.pulses, psEnc->sCmn.frame_length ); - - nBits = ec_tell( psRangeEnc ); - } - if( useCBR == 0 && iter == 0 && nBits <= maxBits ) { break; } @@ -248,7 +212,7 @@ opus_int silk_encode_frame_FLP( if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) { /* Restore output state from earlier iteration that did meet the bitrate budget */ silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) ); - celt_assert( sRangeEnc_copy2.offs <= 1275 ); + silk_assert( sRangeEnc_copy2.offs <= 1275 ); silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs ); silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) ); psEnc->sShape.LastGainIndex = LastGainIndex_copy2; @@ -259,9 +223,7 @@ opus_int silk_encode_frame_FLP( if( nBits > maxBits ) { if( found_lower == 0 && iter >= 2 ) { /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */ - sEncCtrl.Lambda = silk_max_float(sEncCtrl.Lambda*1.5f, 1.5f); - /* Reducing dithering can help us hit the target. */ - psEnc->sCmn.indices.quantOffsetType = 0; + sEncCtrl.Lambda *= 1.5f; found_upper = 0; gainsID_upper = -1; } else { @@ -278,7 +240,7 @@ opus_int silk_encode_frame_FLP( gainsID_lower = gainsID; /* Copy part of the output state */ silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) ); - celt_assert( psRangeEnc->offs <= 1275 ); + silk_assert( psRangeEnc->offs <= 1275 ); silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs ); silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); LastGainIndex_copy2 = psEnc->sShape.LastGainIndex; @@ -288,34 +250,15 @@ opus_int silk_encode_frame_FLP( break; } - if ( !found_lower && nBits > maxBits ) { - int j; - for ( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - int sum=0; - for ( j = i*psEnc->sCmn.subfr_length; j < (i+1)*psEnc->sCmn.subfr_length; j++ ) { - sum += abs( psEnc->sCmn.pulses[j] ); - } - if ( iter == 0 || (sum < best_sum[i] && !gain_lock[i]) ) { - best_sum[i] = sum; - best_gain_mult[i] = gainMult_Q8; - } else { - gain_lock[i] = 1; - } - } - } if( ( found_lower & found_upper ) == 0 ) { /* Adjust gain according to high-rate rate/distortion curve */ + opus_int32 gain_factor_Q16; + gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) ); + gain_factor_Q16 = silk_min_32( gain_factor_Q16, SILK_FIX_CONST( 2, 16 ) ); if( nBits > maxBits ) { - if (gainMult_Q8 < 16384) { - gainMult_Q8 *= 2; - } else { - gainMult_Q8 = 32767; - } - } else { - opus_int32 gain_factor_Q16; - gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) ); - gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 ); + gain_factor_Q16 = silk_max_32( gain_factor_Q16, SILK_FIX_CONST( 1.3, 16 ) ); } + gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 ); } else { /* Adjust gain by interpolating */ gainMult_Q8 = gainMult_lower + ( ( gainMult_upper - gainMult_lower ) * ( maxBits - nBits_lower ) ) / ( nBits_upper - nBits_lower ); @@ -329,13 +272,7 @@ opus_int silk_encode_frame_FLP( } for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - opus_int16 tmp; - if ( gain_lock[i] ) { - tmp = best_gain_mult[i]; - } else { - tmp = gainMult_Q8; - } - pGains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], tmp ), 8 ); + pGains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 ); } /* Quantize gains */ diff --git a/thirdparty/opus/silk/float/energy_FLP.c b/thirdparty/opus/silk/float/energy_FLP.c index 7bc7173c9c..24b8179f9e 100644 --- a/thirdparty/opus/silk/float/energy_FLP.c +++ b/thirdparty/opus/silk/float/energy_FLP.c @@ -37,12 +37,13 @@ double silk_energy_FLP( opus_int dataSize ) { - opus_int i; + opus_int i, dataSize4; double result; /* 4x unrolled loop */ result = 0.0; - for( i = 0; i < dataSize - 3; i += 4 ) { + dataSize4 = dataSize & 0xFFFC; + for( i = 0; i < dataSize4; i += 4 ) { result += data[ i + 0 ] * (double)data[ i + 0 ] + data[ i + 1 ] * (double)data[ i + 1 ] + data[ i + 2 ] * (double)data[ i + 2 ] + diff --git a/thirdparty/opus/silk/float/find_LPC_FLP.c b/thirdparty/opus/silk/float/find_LPC_FLP.c index fa3ffe7f8b..fcfe1c3681 100644 --- a/thirdparty/opus/silk/float/find_LPC_FLP.c +++ b/thirdparty/opus/silk/float/find_LPC_FLP.c @@ -73,7 +73,7 @@ void silk_find_LPC_FLP( silk_interpolate( NLSF0_Q15, psEncC->prev_NLSFq_Q15, NLSF_Q15, k, psEncC->predictLPCOrder ); /* Convert to LPC for residual energy evaluation */ - silk_NLSF2A_FLP( a_tmp, NLSF0_Q15, psEncC->predictLPCOrder, psEncC->arch ); + silk_NLSF2A_FLP( a_tmp, NLSF0_Q15, psEncC->predictLPCOrder ); /* Calculate residual energy with LSF interpolation */ silk_LPC_analysis_filter_FLP( LPC_res, a_tmp, x, 2 * subfr_length, psEncC->predictLPCOrder ); @@ -99,6 +99,6 @@ void silk_find_LPC_FLP( silk_A2NLSF_FLP( NLSF_Q15, a, psEncC->predictLPCOrder ); } - celt_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || + silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) ); } diff --git a/thirdparty/opus/silk/float/find_LTP_FLP.c b/thirdparty/opus/silk/float/find_LTP_FLP.c index f97064930e..7229996014 100644 --- a/thirdparty/opus/silk/float/find_LTP_FLP.c +++ b/thirdparty/opus/silk/float/find_LTP_FLP.c @@ -33,32 +33,100 @@ POSSIBILITY OF SUCH DAMAGE. #include "tuning_parameters.h" void silk_find_LTP_FLP( - silk_float XX[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ - silk_float xX[ MAX_NB_SUBFR * LTP_ORDER ], /* O Weight for LTP quantization */ - const silk_float r_ptr[], /* I LPC residual */ - const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */ + silk_float b[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */ + silk_float WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ + silk_float *LTPredCodGain, /* O LTP coding gain */ + const silk_float r_lpc[], /* I LPC residual */ + const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */ + const silk_float Wght[ MAX_NB_SUBFR ], /* I Weights */ const opus_int subfr_length, /* I Subframe length */ - const opus_int nb_subfr /* I number of subframes */ + const opus_int nb_subfr, /* I number of subframes */ + const opus_int mem_offset /* I Number of samples in LTP memory */ ) { - opus_int k; - silk_float *xX_ptr, *XX_ptr; - const silk_float *lag_ptr; - silk_float xx, temp; + opus_int i, k; + silk_float *b_ptr, temp, *WLTP_ptr; + silk_float LPC_res_nrg, LPC_LTP_res_nrg; + silk_float d[ MAX_NB_SUBFR ], m, g, delta_b[ LTP_ORDER ]; + silk_float w[ MAX_NB_SUBFR ], nrg[ MAX_NB_SUBFR ], regu; + silk_float Rr[ LTP_ORDER ], rr[ MAX_NB_SUBFR ]; + const silk_float *r_ptr, *lag_ptr; - xX_ptr = xX; - XX_ptr = XX; + b_ptr = b; + WLTP_ptr = WLTP; + r_ptr = &r_lpc[ mem_offset ]; for( k = 0; k < nb_subfr; k++ ) { lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 ); - silk_corrMatrix_FLP( lag_ptr, subfr_length, LTP_ORDER, XX_ptr ); - silk_corrVector_FLP( lag_ptr, r_ptr, subfr_length, LTP_ORDER, xX_ptr ); - xx = ( silk_float )silk_energy_FLP( r_ptr, subfr_length + LTP_ORDER ); - temp = 1.0f / silk_max( xx, LTP_CORR_INV_MAX * 0.5f * ( XX_ptr[ 0 ] + XX_ptr[ 24 ] ) + 1.0f ); - silk_scale_vector_FLP( XX_ptr, temp, LTP_ORDER * LTP_ORDER ); - silk_scale_vector_FLP( xX_ptr, temp, LTP_ORDER ); - r_ptr += subfr_length; - XX_ptr += LTP_ORDER * LTP_ORDER; - xX_ptr += LTP_ORDER; + silk_corrMatrix_FLP( lag_ptr, subfr_length, LTP_ORDER, WLTP_ptr ); + silk_corrVector_FLP( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr ); + + rr[ k ] = ( silk_float )silk_energy_FLP( r_ptr, subfr_length ); + regu = 1.0f + rr[ k ] + + matrix_ptr( WLTP_ptr, 0, 0, LTP_ORDER ) + + matrix_ptr( WLTP_ptr, LTP_ORDER-1, LTP_ORDER-1, LTP_ORDER ); + regu *= LTP_DAMPING / 3; + silk_regularize_correlations_FLP( WLTP_ptr, &rr[ k ], regu, LTP_ORDER ); + silk_solve_LDL_FLP( WLTP_ptr, LTP_ORDER, Rr, b_ptr ); + + /* Calculate residual energy */ + nrg[ k ] = silk_residual_energy_covar_FLP( b_ptr, WLTP_ptr, Rr, rr[ k ], LTP_ORDER ); + + temp = Wght[ k ] / ( nrg[ k ] * Wght[ k ] + 0.01f * subfr_length ); + silk_scale_vector_FLP( WLTP_ptr, temp, LTP_ORDER * LTP_ORDER ); + w[ k ] = matrix_ptr( WLTP_ptr, LTP_ORDER / 2, LTP_ORDER / 2, LTP_ORDER ); + + r_ptr += subfr_length; + b_ptr += LTP_ORDER; + WLTP_ptr += LTP_ORDER * LTP_ORDER; + } + + /* Compute LTP coding gain */ + if( LTPredCodGain != NULL ) { + LPC_LTP_res_nrg = 1e-6f; + LPC_res_nrg = 0.0f; + for( k = 0; k < nb_subfr; k++ ) { + LPC_res_nrg += rr[ k ] * Wght[ k ]; + LPC_LTP_res_nrg += nrg[ k ] * Wght[ k ]; + } + + silk_assert( LPC_LTP_res_nrg > 0 ); + *LTPredCodGain = 3.0f * silk_log2( LPC_res_nrg / LPC_LTP_res_nrg ); + } + + /* Smoothing */ + /* d = sum( B, 1 ); */ + b_ptr = b; + for( k = 0; k < nb_subfr; k++ ) { + d[ k ] = 0; + for( i = 0; i < LTP_ORDER; i++ ) { + d[ k ] += b_ptr[ i ]; + } + b_ptr += LTP_ORDER; + } + /* m = ( w * d' ) / ( sum( w ) + 1e-3 ); */ + temp = 1e-3f; + for( k = 0; k < nb_subfr; k++ ) { + temp += w[ k ]; + } + m = 0; + for( k = 0; k < nb_subfr; k++ ) { + m += d[ k ] * w[ k ]; + } + m = m / temp; + + b_ptr = b; + for( k = 0; k < nb_subfr; k++ ) { + g = LTP_SMOOTHING / ( LTP_SMOOTHING + w[ k ] ) * ( m - d[ k ] ); + temp = 0; + for( i = 0; i < LTP_ORDER; i++ ) { + delta_b[ i ] = silk_max_float( b_ptr[ i ], 0.1f ); + temp += delta_b[ i ]; + } + temp = g / temp; + for( i = 0; i < LTP_ORDER; i++ ) { + b_ptr[ i ] = b_ptr[ i ] + delta_b[ i ] * temp; + } + b_ptr += LTP_ORDER; } } diff --git a/thirdparty/opus/silk/float/find_pitch_lags_FLP.c b/thirdparty/opus/silk/float/find_pitch_lags_FLP.c index dedbcd2836..f3b22d25ce 100644 --- a/thirdparty/opus/silk/float/find_pitch_lags_FLP.c +++ b/thirdparty/opus/silk/float/find_pitch_lags_FLP.c @@ -56,7 +56,7 @@ void silk_find_pitch_lags_FLP( buf_len = psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length + psEnc->sCmn.ltp_mem_length; /* Safety check */ - celt_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length ); + silk_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length ); x_buf = x - psEnc->sCmn.ltp_mem_length; diff --git a/thirdparty/opus/silk/float/find_pred_coefs_FLP.c b/thirdparty/opus/silk/float/find_pred_coefs_FLP.c index dcf7c5202d..1af4fe5f1b 100644 --- a/thirdparty/opus/silk/float/find_pred_coefs_FLP.c +++ b/thirdparty/opus/silk/float/find_pred_coefs_FLP.c @@ -41,9 +41,8 @@ void silk_find_pred_coefs_FLP( ) { opus_int i; - silk_float XXLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ]; - silk_float xXLTP[ MAX_NB_SUBFR * LTP_ORDER ]; - silk_float invGains[ MAX_NB_SUBFR ]; + silk_float WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ]; + silk_float invGains[ MAX_NB_SUBFR ], Wght[ MAX_NB_SUBFR ]; opus_int16 NLSF_Q15[ MAX_LPC_ORDER ]; const silk_float *x_ptr; silk_float *x_pre_ptr, LPC_in_pre[ MAX_NB_SUBFR * MAX_LPC_ORDER + MAX_FRAME_LENGTH ]; @@ -53,20 +52,23 @@ void silk_find_pred_coefs_FLP( for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { silk_assert( psEncCtrl->Gains[ i ] > 0.0f ); invGains[ i ] = 1.0f / psEncCtrl->Gains[ i ]; + Wght[ i ] = invGains[ i ] * invGains[ i ]; } if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { /**********/ /* VOICED */ /**********/ - celt_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 ); + silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 ); /* LTP analysis */ - silk_find_LTP_FLP( XXLTP, xXLTP, res_pitch, psEncCtrl->pitchL, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr ); + silk_find_LTP_FLP( psEncCtrl->LTPCoef, WLTP, &psEncCtrl->LTPredCodGain, res_pitch, + psEncCtrl->pitchL, Wght, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length ); /* Quantize LTP gain parameters */ silk_quant_LTP_gains_FLP( psEncCtrl->LTPCoef, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex, - &psEnc->sCmn.sum_log_gain_Q7, &psEncCtrl->LTPredCodGain, XXLTP, xXLTP, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.arch ); + &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr, + psEnc->sCmn.arch ); /* Control LTP scaling */ silk_LTP_scale_ctrl_FLP( psEnc, psEncCtrl, condCoding ); diff --git a/thirdparty/opus/silk/float/inner_product_FLP.c b/thirdparty/opus/silk/float/inner_product_FLP.c index cdd39d24ce..029c012911 100644 --- a/thirdparty/opus/silk/float/inner_product_FLP.c +++ b/thirdparty/opus/silk/float/inner_product_FLP.c @@ -38,12 +38,13 @@ double silk_inner_product_FLP( opus_int dataSize ) { - opus_int i; + opus_int i, dataSize4; double result; /* 4x unrolled loop */ result = 0.0; - for( i = 0; i < dataSize - 3; i += 4 ) { + dataSize4 = dataSize & 0xFFFC; + for( i = 0; i < dataSize4; i += 4 ) { result += data1[ i + 0 ] * (double)data2[ i + 0 ] + data1[ i + 1 ] * (double)data2[ i + 1 ] + data1[ i + 2 ] * (double)data2[ i + 2 ] + diff --git a/thirdparty/opus/silk/float/k2a_FLP.c b/thirdparty/opus/silk/float/k2a_FLP.c index 1448008dbb..12af4e7669 100644 --- a/thirdparty/opus/silk/float/k2a_FLP.c +++ b/thirdparty/opus/silk/float/k2a_FLP.c @@ -39,16 +39,15 @@ void silk_k2a_FLP( ) { opus_int k, n; - silk_float rck, tmp1, tmp2; + silk_float Atmp[ SILK_MAX_ORDER_LPC ]; for( k = 0; k < order; k++ ) { - rck = rc[ k ]; - for( n = 0; n < (k + 1) >> 1; n++ ) { - tmp1 = A[ n ]; - tmp2 = A[ k - n - 1 ]; - A[ n ] = tmp1 + tmp2 * rck; - A[ k - n - 1 ] = tmp2 + tmp1 * rck; + for( n = 0; n < k; n++ ) { + Atmp[ n ] = A[ n ]; } - A[ k ] = -rck; + for( n = 0; n < k; n++ ) { + A[ n ] += Atmp[ k - n - 1 ] * rc[ k ]; + } + A[ k ] = -rc[ k ]; } } diff --git a/thirdparty/opus/silk/float/levinsondurbin_FLP.c b/thirdparty/opus/silk/float/levinsondurbin_FLP.c new file mode 100644 index 0000000000..f0ba606981 --- /dev/null +++ b/thirdparty/opus/silk/float/levinsondurbin_FLP.c @@ -0,0 +1,81 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "SigProc_FLP.h" + +/* Solve the normal equations using the Levinson-Durbin recursion */ +silk_float silk_levinsondurbin_FLP( /* O prediction error energy */ + silk_float A[], /* O prediction coefficients [order] */ + const silk_float corr[], /* I input auto-correlations [order + 1] */ + const opus_int order /* I prediction order */ +) +{ + opus_int i, mHalf, m; + silk_float min_nrg, nrg, t, km, Atmp1, Atmp2; + + min_nrg = 1e-12f * corr[ 0 ] + 1e-9f; + nrg = corr[ 0 ]; + nrg = silk_max_float(min_nrg, nrg); + A[ 0 ] = corr[ 1 ] / nrg; + nrg -= A[ 0 ] * corr[ 1 ]; + nrg = silk_max_float(min_nrg, nrg); + + for( m = 1; m < order; m++ ) + { + t = corr[ m + 1 ]; + for( i = 0; i < m; i++ ) { + t -= A[ i ] * corr[ m - i ]; + } + + /* reflection coefficient */ + km = t / nrg; + + /* residual energy */ + nrg -= km * t; + nrg = silk_max_float(min_nrg, nrg); + + mHalf = m >> 1; + for( i = 0; i < mHalf; i++ ) { + Atmp1 = A[ i ]; + Atmp2 = A[ m - i - 1 ]; + A[ m - i - 1 ] -= km * Atmp1; + A[ i ] -= km * Atmp2; + } + if( m & 1 ) { + A[ mHalf ] -= km * A[ mHalf ]; + } + A[ m ] = km; + } + + /* return the residual energy */ + return nrg; +} + diff --git a/thirdparty/opus/silk/float/main_FLP.h b/thirdparty/opus/silk/float/main_FLP.h index 5dc0ccf4a4..e5a75972e5 100644 --- a/thirdparty/opus/silk/float/main_FLP.h +++ b/thirdparty/opus/silk/float/main_FLP.h @@ -56,8 +56,7 @@ void silk_HP_variable_cutoff( /* Encoder main function */ void silk_encode_do_VAD_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - opus_int activity /* I Decision of Opus voice activity detector */ + silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */ ); /* Encoder main function */ @@ -80,11 +79,22 @@ opus_int silk_init_encoder( opus_int silk_control_encoder( silk_encoder_state_FLP *psEnc, /* I/O Pointer to Silk encoder state FLP */ silk_EncControlStruct *encControl, /* I Control structure */ + const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */ const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */ const opus_int channelNb, /* I Channel number */ const opus_int force_fs_kHz ); +/****************/ +/* Prefiltering */ +/****************/ +void silk_prefilter_FLP( + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + const silk_encoder_control_FLP *psEncCtrl, /* I Encoder control FLP */ + silk_float xw[], /* O Weighted signal */ + const silk_float x[] /* I Speech signal */ +); + /**************************/ /* Noise shaping analysis */ /**************************/ @@ -143,12 +153,15 @@ void silk_find_LPC_FLP( /* LTP analysis */ void silk_find_LTP_FLP( - silk_float XX[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ - silk_float xX[ MAX_NB_SUBFR * LTP_ORDER ], /* O Weight for LTP quantization */ - const silk_float r_ptr[], /* I LPC residual */ + silk_float b[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */ + silk_float WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ + silk_float *LTPredCodGain, /* O LTP coding gain */ + const silk_float r_lpc[], /* I LPC residual */ const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */ + const silk_float Wght[ MAX_NB_SUBFR ], /* I Weights */ const opus_int subfr_length, /* I Subframe length */ - const opus_int nb_subfr /* I number of subframes */ + const opus_int nb_subfr, /* I number of subframes */ + const opus_int mem_offset /* I Number of samples in LTP memory */ ); void silk_LTP_analysis_filter_FLP( @@ -185,15 +198,14 @@ void silk_LPC_analysis_filter_FLP( /* LTP tap quantizer */ void silk_quant_LTP_gains_FLP( - silk_float B[ MAX_NB_SUBFR * LTP_ORDER ], /* O Quantized LTP gains */ + silk_float B[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (Un-)quantized LTP gains */ opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook index */ opus_int8 *periodicity_index, /* O Periodicity index */ opus_int32 *sum_log_gain_Q7, /* I/O Cumulative max prediction gain */ - silk_float *pred_gain_dB, /* O LTP prediction gain */ - const silk_float XX[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I Correlation matrix */ - const silk_float xX[ MAX_NB_SUBFR * LTP_ORDER ], /* I Correlation vector */ - const opus_int subfr_len, /* I Number of samples per subframe */ - const opus_int nb_subfr, /* I Number of subframes */ + const silk_float W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I Error weights */ + const opus_int mu_Q10, /* I Mu value (R/D tradeoff) */ + const opus_int lowComplexity, /* I Flag for low complexity */ + const opus_int nb_subfr, /* I number of subframes */ int arch /* I Run-time architecture */ ); @@ -233,6 +245,22 @@ void silk_corrVector_FLP( silk_float *Xt /* O X'*t correlation vector [order] */ ); +/* Add noise to matrix diagonal */ +void silk_regularize_correlations_FLP( + silk_float *XX, /* I/O Correlation matrices */ + silk_float *xx, /* I/O Correlation values */ + const silk_float noise, /* I Noise energy to add */ + const opus_int D /* I Dimension of XX */ +); + +/* Function to solve linear equation Ax = b, where A is an MxM symmetric matrix */ +void silk_solve_LDL_FLP( + silk_float *A, /* I/O Symmetric square matrix, out: reg. */ + const opus_int M, /* I Size of matrix */ + const silk_float *b, /* I Pointer to b vector */ + silk_float *x /* O Pointer to x solution vector */ +); + /* Apply sine window to signal vector. */ /* Window types: */ /* 1 -> sine window from 0 to pi/2 */ @@ -257,8 +285,7 @@ void silk_A2NLSF_FLP( void silk_NLSF2A_FLP( silk_float *pAR, /* O LPC coefficients [ LPC_order ] */ const opus_int16 *NLSF_Q15, /* I NLSF vector [ LPC_order ] */ - const opus_int LPC_order, /* I LPC order */ - int arch /* I Run-time architecture */ + const opus_int LPC_order /* I LPC order */ ); /* Limit, stabilize, and quantize NLSFs */ diff --git a/thirdparty/opus/silk/float/noise_shape_analysis_FLP.c b/thirdparty/opus/silk/float/noise_shape_analysis_FLP.c index cb3d8a50b7..65f6ea5870 100644 --- a/thirdparty/opus/silk/float/noise_shape_analysis_FLP.c +++ b/thirdparty/opus/silk/float/noise_shape_analysis_FLP.c @@ -55,21 +55,25 @@ static OPUS_INLINE silk_float warped_gain( /* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum */ /* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */ static OPUS_INLINE void warped_true2monic_coefs( - silk_float *coefs, + silk_float *coefs_syn, + silk_float *coefs_ana, silk_float lambda, silk_float limit, opus_int order ) { opus_int i, iter, ind = 0; - silk_float tmp, maxabs, chirp, gain; + silk_float tmp, maxabs, chirp, gain_syn, gain_ana; /* Convert to monic coefficients */ for( i = order - 1; i > 0; i-- ) { - coefs[ i - 1 ] -= lambda * coefs[ i ]; + coefs_syn[ i - 1 ] -= lambda * coefs_syn[ i ]; + coefs_ana[ i - 1 ] -= lambda * coefs_ana[ i ]; } - gain = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs[ 0 ] ); + gain_syn = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_syn[ 0 ] ); + gain_ana = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_ana[ 0 ] ); for( i = 0; i < order; i++ ) { - coefs[ i ] *= gain; + coefs_syn[ i ] *= gain_syn; + coefs_ana[ i ] *= gain_ana; } /* Limit */ @@ -77,7 +81,7 @@ static OPUS_INLINE void warped_true2monic_coefs( /* Find maximum absolute value */ maxabs = -1.0f; for( i = 0; i < order; i++ ) { - tmp = silk_abs_float( coefs[ i ] ); + tmp = silk_max( silk_abs_float( coefs_syn[ i ] ), silk_abs_float( coefs_ana[ i ] ) ); if( tmp > maxabs ) { maxabs = tmp; ind = i; @@ -90,59 +94,36 @@ static OPUS_INLINE void warped_true2monic_coefs( /* Convert back to true warped coefficients */ for( i = 1; i < order; i++ ) { - coefs[ i - 1 ] += lambda * coefs[ i ]; + coefs_syn[ i - 1 ] += lambda * coefs_syn[ i ]; + coefs_ana[ i - 1 ] += lambda * coefs_ana[ i ]; } - gain = 1.0f / gain; + gain_syn = 1.0f / gain_syn; + gain_ana = 1.0f / gain_ana; for( i = 0; i < order; i++ ) { - coefs[ i ] *= gain; + coefs_syn[ i ] *= gain_syn; + coefs_ana[ i ] *= gain_ana; } /* Apply bandwidth expansion */ chirp = 0.99f - ( 0.8f + 0.1f * iter ) * ( maxabs - limit ) / ( maxabs * ( ind + 1 ) ); - silk_bwexpander_FLP( coefs, order, chirp ); + silk_bwexpander_FLP( coefs_syn, order, chirp ); + silk_bwexpander_FLP( coefs_ana, order, chirp ); /* Convert to monic warped coefficients */ for( i = order - 1; i > 0; i-- ) { - coefs[ i - 1 ] -= lambda * coefs[ i ]; + coefs_syn[ i - 1 ] -= lambda * coefs_syn[ i ]; + coefs_ana[ i - 1 ] -= lambda * coefs_ana[ i ]; } - gain = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs[ 0 ] ); + gain_syn = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_syn[ 0 ] ); + gain_ana = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_ana[ 0 ] ); for( i = 0; i < order; i++ ) { - coefs[ i ] *= gain; + coefs_syn[ i ] *= gain_syn; + coefs_ana[ i ] *= gain_ana; } } silk_assert( 0 ); } -static OPUS_INLINE void limit_coefs( - silk_float *coefs, - silk_float limit, - opus_int order -) { - opus_int i, iter, ind = 0; - silk_float tmp, maxabs, chirp; - - for( iter = 0; iter < 10; iter++ ) { - /* Find maximum absolute value */ - maxabs = -1.0f; - for( i = 0; i < order; i++ ) { - tmp = silk_abs_float( coefs[ i ] ); - if( tmp > maxabs ) { - maxabs = tmp; - ind = i; - } - } - if( maxabs <= limit ) { - /* Coefficients are within range - done */ - return; - } - - /* Apply bandwidth expansion */ - chirp = 0.99f - ( 0.8f + 0.1f * iter ) * ( maxabs - limit ) / ( maxabs * ( ind + 1 ) ); - silk_bwexpander_FLP( coefs, order, chirp ); - } - silk_assert( 0 ); -} - /* Compute noise shaping coefficients and initial gain values */ void silk_noise_shape_analysis_FLP( silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ @@ -152,13 +133,12 @@ void silk_noise_shape_analysis_FLP( ) { silk_shape_state_FLP *psShapeSt = &psEnc->sShape; - opus_int k, nSamples, nSegs; - silk_float SNR_adj_dB, HarmShapeGain, Tilt; - silk_float nrg, log_energy, log_energy_prev, energy_variation; - silk_float BWExp, gain_mult, gain_add, strength, b, warping; + opus_int k, nSamples; + silk_float SNR_adj_dB, HarmBoost, HarmShapeGain, Tilt; + silk_float nrg, pre_nrg, log_energy, log_energy_prev, energy_variation; + silk_float delta, BWExp1, BWExp2, gain_mult, gain_add, strength, b, warping; silk_float x_windowed[ SHAPE_LPC_WIN_MAX ]; silk_float auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ]; - silk_float rc[ MAX_SHAPE_LPC_ORDER + 1 ]; const silk_float *x_ptr, *pitch_res_ptr; /* Point to start of first LPC analysis block */ @@ -196,14 +176,14 @@ void silk_noise_shape_analysis_FLP( if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { /* Initially set to 0; may be overruled in process_gains(..) */ psEnc->sCmn.indices.quantOffsetType = 0; + psEncCtrl->sparseness = 0.0f; } else { /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */ nSamples = 2 * psEnc->sCmn.fs_kHz; energy_variation = 0.0f; log_energy_prev = 0.0f; pitch_res_ptr = pitch_res; - nSegs = silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; - for( k = 0; k < nSegs; k++ ) { + for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) { nrg = ( silk_float )nSamples + ( silk_float )silk_energy_FLP( pitch_res_ptr, nSamples ); log_energy = silk_log2( nrg ); if( k > 0 ) { @@ -212,13 +192,17 @@ void silk_noise_shape_analysis_FLP( log_energy_prev = log_energy; pitch_res_ptr += nSamples; } + psEncCtrl->sparseness = silk_sigmoid( 0.4f * ( energy_variation - 5.0f ) ); /* Set quantization offset depending on sparseness measure */ - if( energy_variation > ENERGY_VARIATION_THRESHOLD_QNT_OFFSET * (nSegs-1) ) { + if( psEncCtrl->sparseness > SPARSENESS_THRESHOLD_QNT_OFFSET ) { psEnc->sCmn.indices.quantOffsetType = 0; } else { psEnc->sCmn.indices.quantOffsetType = 1; } + + /* Increase coding SNR for sparse signals */ + SNR_adj_dB += SPARSE_SNR_INCR_dB * ( psEncCtrl->sparseness - 0.5f ); } /*******************************/ @@ -226,10 +210,19 @@ void silk_noise_shape_analysis_FLP( /*******************************/ /* More BWE for signals with high prediction gain */ strength = FIND_PITCH_WHITE_NOISE_FRACTION * psEncCtrl->predGain; /* between 0.0 and 1.0 */ - BWExp = BANDWIDTH_EXPANSION / ( 1.0f + strength * strength ); - - /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */ - warping = (silk_float)psEnc->sCmn.warping_Q16 / 65536.0f + 0.01f * psEncCtrl->coding_quality; + BWExp1 = BWExp2 = BANDWIDTH_EXPANSION / ( 1.0f + strength * strength ); + delta = LOW_RATE_BANDWIDTH_EXPANSION_DELTA * ( 1.0f - 0.75f * psEncCtrl->coding_quality ); + BWExp1 -= delta; + BWExp2 += delta; + /* BWExp1 will be applied after BWExp2, so make it relative */ + BWExp1 /= BWExp2; + + if( psEnc->sCmn.warping_Q16 > 0 ) { + /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */ + warping = (silk_float)psEnc->sCmn.warping_Q16 / 65536.0f + 0.01f * psEncCtrl->coding_quality; + } else { + warping = 0.0f; + } /********************************************/ /* Compute noise shaping AR coefs and gains */ @@ -259,28 +252,37 @@ void silk_noise_shape_analysis_FLP( } /* Add white noise, as a fraction of energy */ - auto_corr[ 0 ] += auto_corr[ 0 ] * SHAPE_WHITE_NOISE_FRACTION + 1.0f; + auto_corr[ 0 ] += auto_corr[ 0 ] * SHAPE_WHITE_NOISE_FRACTION; /* Convert correlations to prediction coefficients, and compute residual energy */ - nrg = silk_schur_FLP( rc, auto_corr, psEnc->sCmn.shapingLPCOrder ); - silk_k2a_FLP( &psEncCtrl->AR[ k * MAX_SHAPE_LPC_ORDER ], rc, psEnc->sCmn.shapingLPCOrder ); + nrg = silk_levinsondurbin_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], auto_corr, psEnc->sCmn.shapingLPCOrder ); psEncCtrl->Gains[ k ] = ( silk_float )sqrt( nrg ); if( psEnc->sCmn.warping_Q16 > 0 ) { /* Adjust gain for warping */ - psEncCtrl->Gains[ k ] *= warped_gain( &psEncCtrl->AR[ k * MAX_SHAPE_LPC_ORDER ], warping, psEnc->sCmn.shapingLPCOrder ); + psEncCtrl->Gains[ k ] *= warped_gain( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], warping, psEnc->sCmn.shapingLPCOrder ); } /* Bandwidth expansion for synthesis filter shaping */ - silk_bwexpander_FLP( &psEncCtrl->AR[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder, BWExp ); + silk_bwexpander_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder, BWExp2 ); - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Convert to monic warped prediction coefficients and limit absolute values */ - warped_true2monic_coefs( &psEncCtrl->AR[ k * MAX_SHAPE_LPC_ORDER ], warping, 3.999f, psEnc->sCmn.shapingLPCOrder ); - } else { - /* Limit absolute values */ - limit_coefs( &psEncCtrl->AR[ k * MAX_SHAPE_LPC_ORDER ], 3.999f, psEnc->sCmn.shapingLPCOrder ); - } + /* Compute noise shaping filter coefficients */ + silk_memcpy( + &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], + &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], + psEnc->sCmn.shapingLPCOrder * sizeof( silk_float ) ); + + /* Bandwidth expansion for analysis filter shaping */ + silk_bwexpander_FLP( &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder, BWExp1 ); + + /* Ratio of prediction gains, in energy domain */ + pre_nrg = silk_LPC_inverse_pred_gain_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder ); + nrg = silk_LPC_inverse_pred_gain_FLP( &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder ); + psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ); + + /* Convert to monic warped prediction coefficients and limit absolute values */ + warped_true2monic_coefs( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], + warping, 3.999f, psEnc->sCmn.shapingLPCOrder ); } /*****************/ @@ -294,6 +296,11 @@ void silk_noise_shape_analysis_FLP( psEncCtrl->Gains[ k ] += gain_add; } + gain_mult = 1.0f + INPUT_TILT + psEncCtrl->coding_quality * HIGH_RATE_INPUT_TILT; + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + psEncCtrl->GainsPre[ k ] *= gain_mult; + } + /************************************************/ /* Control low-frequency shaping and noise tilt */ /************************************************/ @@ -324,6 +331,12 @@ void silk_noise_shape_analysis_FLP( /****************************/ /* HARMONIC SHAPING CONTROL */ /****************************/ + /* Control boosting of harmonic frequencies */ + HarmBoost = LOW_RATE_HARMONIC_BOOST * ( 1.0f - psEncCtrl->coding_quality ) * psEnc->LTPCorr; + + /* More harmonic boost for noisy input signals */ + HarmBoost += LOW_INPUT_QUALITY_HARMONIC_BOOST * ( 1.0f - psEncCtrl->input_quality ); + if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) { /* Harmonic noise shaping */ HarmShapeGain = HARMONIC_SHAPING; @@ -342,6 +355,8 @@ void silk_noise_shape_analysis_FLP( /* Smooth over subframes */ /*************************/ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + psShapeSt->HarmBoost_smth += SUBFR_SMTH_COEF * ( HarmBoost - psShapeSt->HarmBoost_smth ); + psEncCtrl->HarmBoost[ k ] = psShapeSt->HarmBoost_smth; psShapeSt->HarmShapeGain_smth += SUBFR_SMTH_COEF * ( HarmShapeGain - psShapeSt->HarmShapeGain_smth ); psEncCtrl->HarmShapeGain[ k ] = psShapeSt->HarmShapeGain_smth; psShapeSt->Tilt_smth += SUBFR_SMTH_COEF * ( Tilt - psShapeSt->Tilt_smth ); diff --git a/thirdparty/opus/silk/float/pitch_analysis_core_FLP.c b/thirdparty/opus/silk/float/pitch_analysis_core_FLP.c index f351bc3718..d0e637a29d 100644 --- a/thirdparty/opus/silk/float/pitch_analysis_core_FLP.c +++ b/thirdparty/opus/silk/float/pitch_analysis_core_FLP.c @@ -109,11 +109,11 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, const opus_int8 *Lag_CB_ptr; /* Check for valid sampling frequency */ - celt_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 ); + silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 ); /* Check for valid complexity setting */ - celt_assert( complexity >= SILK_PE_MIN_COMPLEX ); - celt_assert( complexity <= SILK_PE_MAX_COMPLEX ); + silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); + silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); silk_assert( search_thres1 >= 0.0f && search_thres1 <= 1.0f ); silk_assert( search_thres2 >= 0.0f && search_thres2 <= 1.0f ); @@ -148,7 +148,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, silk_resampler_down2_3( filt_state, frame_8_FIX, frame_12_FIX, frame_length ); silk_short2float_array( frame_8kHz, frame_8_FIX, frame_length_8kHz ); } else { - celt_assert( Fs_kHz == 8 ); + silk_assert( Fs_kHz == 8 ); silk_float2short_array( frame_8_FIX, frame, frame_length_8kHz ); } @@ -159,7 +159,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, /* Low-pass filter */ for( i = frame_length_4kHz - 1; i > 0; i-- ) { - frame_4kHz[ i ] = silk_ADD_SAT16( frame_4kHz[ i ], frame_4kHz[ i - 1 ] ); + frame_4kHz[ i ] += frame_4kHz[ i - 1 ]; } /****************************************************************************** @@ -169,14 +169,14 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, target_ptr = &frame_4kHz[ silk_LSHIFT( sf_length_4kHz, 2 ) ]; for( k = 0; k < nb_subfr >> 1; k++ ) { /* Check that we are within range of the array */ - celt_assert( target_ptr >= frame_4kHz ); - celt_assert( target_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz ); + silk_assert( target_ptr >= frame_4kHz ); + silk_assert( target_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz ); basis_ptr = target_ptr - min_lag_4kHz; /* Check that we are within range of the array */ - celt_assert( basis_ptr >= frame_4kHz ); - celt_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz ); + silk_assert( basis_ptr >= frame_4kHz ); + silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz ); celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1, arch ); @@ -215,7 +215,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, /* Sort */ length_d_srch = 4 + 2 * complexity; - celt_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH ); + silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH ); silk_insertion_sort_decreasing_FLP( &C[ 0 ][ min_lag_4kHz ], d_srch, max_lag_4kHz - min_lag_4kHz + 1, length_d_srch ); /* Escape if correlation is very low already here */ @@ -238,7 +238,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, break; } } - celt_assert( length_d_srch > 0 ); + silk_assert( length_d_srch > 0 ); for( i = min_lag_8kHz - 5; i < max_lag_8kHz + 5; i++ ) { d_comp[ i ] = 0; @@ -471,7 +471,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, *lagIndex = (opus_int16)( lag - min_lag_8kHz ); *contourIndex = (opus_int8)CBimax; } - celt_assert( *lagIndex >= 0 ); + silk_assert( *lagIndex >= 0 ); /* return as voiced */ return 0; } @@ -506,8 +506,8 @@ static void silk_P_Ana_calc_corr_st3( opus_val32 xcorr[ SCRATCH_SIZE ]; const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; - celt_assert( complexity >= SILK_PE_MIN_COMPLEX ); - celt_assert( complexity <= SILK_PE_MAX_COMPLEX ); + silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); + silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); if( nb_subfr == PE_MAX_NB_SUBFR ) { Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ]; @@ -515,7 +515,7 @@ static void silk_P_Ana_calc_corr_st3( nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ]; cbk_size = PE_NB_CBKS_STAGE3_MAX; } else { - celt_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); + silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ]; Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; @@ -572,8 +572,8 @@ static void silk_P_Ana_calc_energy_st3( silk_float scratch_mem[ SCRATCH_SIZE ]; const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; - celt_assert( complexity >= SILK_PE_MIN_COMPLEX ); - celt_assert( complexity <= SILK_PE_MAX_COMPLEX ); + silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); + silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); if( nb_subfr == PE_MAX_NB_SUBFR ) { Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ]; @@ -581,7 +581,7 @@ static void silk_P_Ana_calc_energy_st3( nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ]; cbk_size = PE_NB_CBKS_STAGE3_MAX; } else { - celt_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); + silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ]; Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; diff --git a/thirdparty/opus/silk/float/prefilter_FLP.c b/thirdparty/opus/silk/float/prefilter_FLP.c new file mode 100644 index 0000000000..8bc32fb410 --- /dev/null +++ b/thirdparty/opus/silk/float/prefilter_FLP.c @@ -0,0 +1,206 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "main_FLP.h" +#include "tuning_parameters.h" + +/* +* Prefilter for finding Quantizer input signal +*/ +static OPUS_INLINE void silk_prefilt_FLP( + silk_prefilter_state_FLP *P, /* I/O state */ + silk_float st_res[], /* I */ + silk_float xw[], /* O */ + silk_float *HarmShapeFIR, /* I */ + silk_float Tilt, /* I */ + silk_float LF_MA_shp, /* I */ + silk_float LF_AR_shp, /* I */ + opus_int lag, /* I */ + opus_int length /* I */ +); + +static void silk_warped_LPC_analysis_filter_FLP( + silk_float state[], /* I/O State [order + 1] */ + silk_float res[], /* O Residual signal [length] */ + const silk_float coef[], /* I Coefficients [order] */ + const silk_float input[], /* I Input signal [length] */ + const silk_float lambda, /* I Warping factor */ + const opus_int length, /* I Length of input signal */ + const opus_int order /* I Filter order (even) */ +) +{ + opus_int n, i; + silk_float acc, tmp1, tmp2; + + /* Order must be even */ + silk_assert( ( order & 1 ) == 0 ); + + for( n = 0; n < length; n++ ) { + /* Output of lowpass section */ + tmp2 = state[ 0 ] + lambda * state[ 1 ]; + state[ 0 ] = input[ n ]; + /* Output of allpass section */ + tmp1 = state[ 1 ] + lambda * ( state[ 2 ] - tmp2 ); + state[ 1 ] = tmp2; + acc = coef[ 0 ] * tmp2; + /* Loop over allpass sections */ + for( i = 2; i < order; i += 2 ) { + /* Output of allpass section */ + tmp2 = state[ i ] + lambda * ( state[ i + 1 ] - tmp1 ); + state[ i ] = tmp1; + acc += coef[ i - 1 ] * tmp1; + /* Output of allpass section */ + tmp1 = state[ i + 1 ] + lambda * ( state[ i + 2 ] - tmp2 ); + state[ i + 1 ] = tmp2; + acc += coef[ i ] * tmp2; + } + state[ order ] = tmp1; + acc += coef[ order - 1 ] * tmp1; + res[ n ] = input[ n ] - acc; + } +} + +/* +* silk_prefilter. Main prefilter function +*/ +void silk_prefilter_FLP( + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + const silk_encoder_control_FLP *psEncCtrl, /* I Encoder control FLP */ + silk_float xw[], /* O Weighted signal */ + const silk_float x[] /* I Speech signal */ +) +{ + silk_prefilter_state_FLP *P = &psEnc->sPrefilt; + opus_int j, k, lag; + silk_float HarmShapeGain, Tilt, LF_MA_shp, LF_AR_shp; + silk_float B[ 2 ]; + const silk_float *AR1_shp; + const silk_float *px; + silk_float *pxw; + silk_float HarmShapeFIR[ 3 ]; + silk_float st_res[ MAX_SUB_FRAME_LENGTH + MAX_LPC_ORDER ]; + + /* Set up pointers */ + px = x; + pxw = xw; + lag = P->lagPrev; + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + /* Update Variables that change per sub frame */ + if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + lag = psEncCtrl->pitchL[ k ]; + } + + /* Noise shape parameters */ + HarmShapeGain = psEncCtrl->HarmShapeGain[ k ] * ( 1.0f - psEncCtrl->HarmBoost[ k ] ); + HarmShapeFIR[ 0 ] = 0.25f * HarmShapeGain; + HarmShapeFIR[ 1 ] = 32767.0f / 65536.0f * HarmShapeGain; + HarmShapeFIR[ 2 ] = 0.25f * HarmShapeGain; + Tilt = psEncCtrl->Tilt[ k ]; + LF_MA_shp = psEncCtrl->LF_MA_shp[ k ]; + LF_AR_shp = psEncCtrl->LF_AR_shp[ k ]; + AR1_shp = &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ]; + + /* Short term FIR filtering */ + silk_warped_LPC_analysis_filter_FLP( P->sAR_shp, st_res, AR1_shp, px, + (silk_float)psEnc->sCmn.warping_Q16 / 65536.0f, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder ); + + /* Reduce (mainly) low frequencies during harmonic emphasis */ + B[ 0 ] = psEncCtrl->GainsPre[ k ]; + B[ 1 ] = -psEncCtrl->GainsPre[ k ] * + ( psEncCtrl->HarmBoost[ k ] * HarmShapeGain + INPUT_TILT + psEncCtrl->coding_quality * HIGH_RATE_INPUT_TILT ); + pxw[ 0 ] = B[ 0 ] * st_res[ 0 ] + B[ 1 ] * P->sHarmHP; + for( j = 1; j < psEnc->sCmn.subfr_length; j++ ) { + pxw[ j ] = B[ 0 ] * st_res[ j ] + B[ 1 ] * st_res[ j - 1 ]; + } + P->sHarmHP = st_res[ psEnc->sCmn.subfr_length - 1 ]; + + silk_prefilt_FLP( P, pxw, pxw, HarmShapeFIR, Tilt, LF_MA_shp, LF_AR_shp, lag, psEnc->sCmn.subfr_length ); + + px += psEnc->sCmn.subfr_length; + pxw += psEnc->sCmn.subfr_length; + } + P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ]; +} + +/* +* Prefilter for finding Quantizer input signal +*/ +static OPUS_INLINE void silk_prefilt_FLP( + silk_prefilter_state_FLP *P, /* I/O state */ + silk_float st_res[], /* I */ + silk_float xw[], /* O */ + silk_float *HarmShapeFIR, /* I */ + silk_float Tilt, /* I */ + silk_float LF_MA_shp, /* I */ + silk_float LF_AR_shp, /* I */ + opus_int lag, /* I */ + opus_int length /* I */ +) +{ + opus_int i; + opus_int idx, LTP_shp_buf_idx; + silk_float n_Tilt, n_LF, n_LTP; + silk_float sLF_AR_shp, sLF_MA_shp; + silk_float *LTP_shp_buf; + + /* To speed up use temp variables instead of using the struct */ + LTP_shp_buf = P->sLTP_shp; + LTP_shp_buf_idx = P->sLTP_shp_buf_idx; + sLF_AR_shp = P->sLF_AR_shp; + sLF_MA_shp = P->sLF_MA_shp; + + for( i = 0; i < length; i++ ) { + if( lag > 0 ) { + silk_assert( HARM_SHAPE_FIR_TAPS == 3 ); + idx = lag + LTP_shp_buf_idx; + n_LTP = LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ] * HarmShapeFIR[ 0 ]; + n_LTP += LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 ) & LTP_MASK ] * HarmShapeFIR[ 1 ]; + n_LTP += LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ] * HarmShapeFIR[ 2 ]; + } else { + n_LTP = 0; + } + + n_Tilt = sLF_AR_shp * Tilt; + n_LF = sLF_AR_shp * LF_AR_shp + sLF_MA_shp * LF_MA_shp; + + sLF_AR_shp = st_res[ i ] - n_Tilt; + sLF_MA_shp = sLF_AR_shp - n_LF; + + LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK; + LTP_shp_buf[ LTP_shp_buf_idx ] = sLF_MA_shp; + + xw[ i ] = sLF_MA_shp - n_LTP; + } + /* Copy temp variable back to state */ + P->sLF_AR_shp = sLF_AR_shp; + P->sLF_MA_shp = sLF_MA_shp; + P->sLTP_shp_buf_idx = LTP_shp_buf_idx; +} diff --git a/thirdparty/opus/silk/float/residual_energy_FLP.c b/thirdparty/opus/silk/float/residual_energy_FLP.c index 1bd07b33a4..b2e03a86a4 100644 --- a/thirdparty/opus/silk/float/residual_energy_FLP.c +++ b/thirdparty/opus/silk/float/residual_energy_FLP.c @@ -47,7 +47,7 @@ silk_float silk_residual_energy_covar_FLP( /* O silk_float tmp, nrg = 0.0f, regularization; /* Safety checks */ - celt_assert( D >= 0 ); + silk_assert( D >= 0 ); regularization = REGULARIZATION_FACTOR * ( wXX[ 0 ] + wXX[ D * D - 1 ] ); for( k = 0; k < MAX_ITERATIONS_RESIDUAL_NRG; k++ ) { diff --git a/thirdparty/opus/silk/float/schur_FLP.c b/thirdparty/opus/silk/float/schur_FLP.c index 8526c748d3..ee436f8351 100644 --- a/thirdparty/opus/silk/float/schur_FLP.c +++ b/thirdparty/opus/silk/float/schur_FLP.c @@ -38,23 +38,22 @@ silk_float silk_schur_FLP( /* O returns residual energy ) { opus_int k, n; - double C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ]; - double Ctmp1, Ctmp2, rc_tmp; + silk_float C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ]; + silk_float Ctmp1, Ctmp2, rc_tmp; - celt_assert( order >= 0 && order <= SILK_MAX_ORDER_LPC ); + silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 ); /* Copy correlations */ - k = 0; - do { + for( k = 0; k < order+1; k++ ) { C[ k ][ 0 ] = C[ k ][ 1 ] = auto_corr[ k ]; - } while( ++k <= order ); + } for( k = 0; k < order; k++ ) { /* Get reflection coefficient */ rc_tmp = -C[ k + 1 ][ 0 ] / silk_max_float( C[ 0 ][ 1 ], 1e-9f ); /* Save the output */ - refl_coef[ k ] = (silk_float)rc_tmp; + refl_coef[ k ] = rc_tmp; /* Update correlations */ for( n = 0; n < order - k; n++ ) { @@ -66,5 +65,6 @@ silk_float silk_schur_FLP( /* O returns residual energy } /* Return residual energy */ - return (silk_float)C[ 0 ][ 1 ]; + return C[ 0 ][ 1 ]; } + diff --git a/thirdparty/opus/silk/float/solve_LS_FLP.c b/thirdparty/opus/silk/float/solve_LS_FLP.c new file mode 100644 index 0000000000..7c90d665a0 --- /dev/null +++ b/thirdparty/opus/silk/float/solve_LS_FLP.c @@ -0,0 +1,207 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "main_FLP.h" +#include "tuning_parameters.h" + +/********************************************************************** + * LDL Factorisation. Finds the upper triangular matrix L and the diagonal + * Matrix D (only the diagonal elements returned in a vector)such that + * the symmetric matric A is given by A = L*D*L'. + **********************************************************************/ +static OPUS_INLINE void silk_LDL_FLP( + silk_float *A, /* I/O Pointer to Symetric Square Matrix */ + opus_int M, /* I Size of Matrix */ + silk_float *L, /* I/O Pointer to Square Upper triangular Matrix */ + silk_float *Dinv /* I/O Pointer to vector holding the inverse diagonal elements of D */ +); + +/********************************************************************** + * Function to solve linear equation Ax = b, when A is a MxM lower + * triangular matrix, with ones on the diagonal. + **********************************************************************/ +static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP( + const silk_float *L, /* I Pointer to Lower Triangular Matrix */ + opus_int M, /* I Dim of Matrix equation */ + const silk_float *b, /* I b Vector */ + silk_float *x /* O x Vector */ +); + +/********************************************************************** + * Function to solve linear equation (A^T)x = b, when A is a MxM lower + * triangular, with ones on the diagonal. (ie then A^T is upper triangular) + **********************************************************************/ +static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( + const silk_float *L, /* I Pointer to Lower Triangular Matrix */ + opus_int M, /* I Dim of Matrix equation */ + const silk_float *b, /* I b Vector */ + silk_float *x /* O x Vector */ +); + +/********************************************************************** + * Function to solve linear equation Ax = b, when A is a MxM + * symmetric square matrix - using LDL factorisation + **********************************************************************/ +void silk_solve_LDL_FLP( + silk_float *A, /* I/O Symmetric square matrix, out: reg. */ + const opus_int M, /* I Size of matrix */ + const silk_float *b, /* I Pointer to b vector */ + silk_float *x /* O Pointer to x solution vector */ +) +{ + opus_int i; + silk_float L[ MAX_MATRIX_SIZE ][ MAX_MATRIX_SIZE ]; + silk_float T[ MAX_MATRIX_SIZE ]; + silk_float Dinv[ MAX_MATRIX_SIZE ]; /* inverse diagonal elements of D*/ + + silk_assert( M <= MAX_MATRIX_SIZE ); + + /*************************************************** + Factorize A by LDL such that A = L*D*(L^T), + where L is lower triangular with ones on diagonal + ****************************************************/ + silk_LDL_FLP( A, M, &L[ 0 ][ 0 ], Dinv ); + + /**************************************************** + * substitute D*(L^T) = T. ie: + L*D*(L^T)*x = b => L*T = b <=> T = inv(L)*b + ******************************************************/ + silk_SolveWithLowerTriangularWdiagOnes_FLP( &L[ 0 ][ 0 ], M, b, T ); + + /**************************************************** + D*(L^T)*x = T <=> (L^T)*x = inv(D)*T, because D is + diagonal just multiply with 1/d_i + ****************************************************/ + for( i = 0; i < M; i++ ) { + T[ i ] = T[ i ] * Dinv[ i ]; + } + /**************************************************** + x = inv(L') * inv(D) * T + *****************************************************/ + silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( &L[ 0 ][ 0 ], M, T, x ); +} + +static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( + const silk_float *L, /* I Pointer to Lower Triangular Matrix */ + opus_int M, /* I Dim of Matrix equation */ + const silk_float *b, /* I b Vector */ + silk_float *x /* O x Vector */ +) +{ + opus_int i, j; + silk_float temp; + const silk_float *ptr1; + + for( i = M - 1; i >= 0; i-- ) { + ptr1 = matrix_adr( L, 0, i, M ); + temp = 0; + for( j = M - 1; j > i ; j-- ) { + temp += ptr1[ j * M ] * x[ j ]; + } + temp = b[ i ] - temp; + x[ i ] = temp; + } +} + +static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP( + const silk_float *L, /* I Pointer to Lower Triangular Matrix */ + opus_int M, /* I Dim of Matrix equation */ + const silk_float *b, /* I b Vector */ + silk_float *x /* O x Vector */ +) +{ + opus_int i, j; + silk_float temp; + const silk_float *ptr1; + + for( i = 0; i < M; i++ ) { + ptr1 = matrix_adr( L, i, 0, M ); + temp = 0; + for( j = 0; j < i; j++ ) { + temp += ptr1[ j ] * x[ j ]; + } + temp = b[ i ] - temp; + x[ i ] = temp; + } +} + +static OPUS_INLINE void silk_LDL_FLP( + silk_float *A, /* I/O Pointer to Symetric Square Matrix */ + opus_int M, /* I Size of Matrix */ + silk_float *L, /* I/O Pointer to Square Upper triangular Matrix */ + silk_float *Dinv /* I/O Pointer to vector holding the inverse diagonal elements of D */ +) +{ + opus_int i, j, k, loop_count, err = 1; + silk_float *ptr1, *ptr2; + double temp, diag_min_value; + silk_float v[ MAX_MATRIX_SIZE ], D[ MAX_MATRIX_SIZE ]; /* temp arrays*/ + + silk_assert( M <= MAX_MATRIX_SIZE ); + + diag_min_value = FIND_LTP_COND_FAC * 0.5f * ( A[ 0 ] + A[ M * M - 1 ] ); + for( loop_count = 0; loop_count < M && err == 1; loop_count++ ) { + err = 0; + for( j = 0; j < M; j++ ) { + ptr1 = matrix_adr( L, j, 0, M ); + temp = matrix_ptr( A, j, j, M ); /* element in row j column j*/ + for( i = 0; i < j; i++ ) { + v[ i ] = ptr1[ i ] * D[ i ]; + temp -= ptr1[ i ] * v[ i ]; + } + if( temp < diag_min_value ) { + /* Badly conditioned matrix: add white noise and run again */ + temp = ( loop_count + 1 ) * diag_min_value - temp; + for( i = 0; i < M; i++ ) { + matrix_ptr( A, i, i, M ) += ( silk_float )temp; + } + err = 1; + break; + } + D[ j ] = ( silk_float )temp; + Dinv[ j ] = ( silk_float )( 1.0f / temp ); + matrix_ptr( L, j, j, M ) = 1.0f; + + ptr1 = matrix_adr( A, j, 0, M ); + ptr2 = matrix_adr( L, j + 1, 0, M); + for( i = j + 1; i < M; i++ ) { + temp = 0.0; + for( k = 0; k < j; k++ ) { + temp += ptr2[ k ] * v[ k ]; + } + matrix_ptr( L, i, j, M ) = ( silk_float )( ( ptr1[ i ] - temp ) * Dinv[ j ] ); + ptr2 += M; /* go to next column*/ + } + } + } + silk_assert( err == 0 ); +} + diff --git a/thirdparty/opus/silk/float/sort_FLP.c b/thirdparty/opus/silk/float/sort_FLP.c index 0e18f31950..f08d7592c5 100644 --- a/thirdparty/opus/silk/float/sort_FLP.c +++ b/thirdparty/opus/silk/float/sort_FLP.c @@ -47,9 +47,9 @@ void silk_insertion_sort_decreasing_FLP( opus_int i, j; /* Safety checks */ - celt_assert( K > 0 ); - celt_assert( L > 0 ); - celt_assert( L >= K ); + silk_assert( K > 0 ); + silk_assert( L > 0 ); + silk_assert( L >= K ); /* Write start indices in index vector */ for( i = 0; i < K; i++ ) { diff --git a/thirdparty/opus/silk/float/structs_FLP.h b/thirdparty/opus/silk/float/structs_FLP.h index 3150b386e4..14d647ced2 100644 --- a/thirdparty/opus/silk/float/structs_FLP.h +++ b/thirdparty/opus/silk/float/structs_FLP.h @@ -42,16 +42,32 @@ extern "C" /********************************/ typedef struct { opus_int8 LastGainIndex; + silk_float HarmBoost_smth; silk_float HarmShapeGain_smth; silk_float Tilt_smth; } silk_shape_state_FLP; /********************************/ +/* Prefilter state */ +/********************************/ +typedef struct { + silk_float sLTP_shp[ LTP_BUF_LENGTH ]; + silk_float sAR_shp[ MAX_SHAPE_LPC_ORDER + 1 ]; + opus_int sLTP_shp_buf_idx; + silk_float sLF_AR_shp; + silk_float sLF_MA_shp; + silk_float sHarmHP; + opus_int32 rand_seed; + opus_int lagPrev; +} silk_prefilter_state_FLP; + +/********************************/ /* Encoder state FLP */ /********************************/ typedef struct { silk_encoder_state sCmn; /* Common struct, shared with fixed-point code */ silk_shape_state_FLP sShape; /* Noise shaping state */ + silk_prefilter_state_FLP sPrefilt; /* Prefilter State */ /* Buffer for find pitch and noise shape analysis */ silk_float x_buf[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];/* Buffer for find pitch and noise shape analysis */ @@ -70,9 +86,12 @@ typedef struct { opus_int pitchL[ MAX_NB_SUBFR ]; /* Noise shaping parameters */ - silk_float AR[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; + silk_float AR1[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; + silk_float AR2[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; silk_float LF_MA_shp[ MAX_NB_SUBFR ]; silk_float LF_AR_shp[ MAX_NB_SUBFR ]; + silk_float GainsPre[ MAX_NB_SUBFR ]; + silk_float HarmBoost[ MAX_NB_SUBFR ]; silk_float Tilt[ MAX_NB_SUBFR ]; silk_float HarmShapeGain[ MAX_NB_SUBFR ]; silk_float Lambda; @@ -80,6 +99,7 @@ typedef struct { silk_float coding_quality; /* Measures */ + silk_float sparseness; silk_float predGain; silk_float LTPredCodGain; silk_float ResNrg[ MAX_NB_SUBFR ]; /* Residual energy per subframe */ diff --git a/thirdparty/opus/silk/float/warped_autocorrelation_FLP.c b/thirdparty/opus/silk/float/warped_autocorrelation_FLP.c index 09186e73d4..542414f48e 100644 --- a/thirdparty/opus/silk/float/warped_autocorrelation_FLP.c +++ b/thirdparty/opus/silk/float/warped_autocorrelation_FLP.c @@ -42,11 +42,11 @@ void silk_warped_autocorrelation_FLP( { opus_int n, i; double tmp1, tmp2; - double state[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 }; - double C[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 }; + double state[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; + double C[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; /* Order must be even */ - celt_assert( ( order & 1 ) == 0 ); + silk_assert( ( order & 1 ) == 0 ); /* Loop over samples */ for( n = 0; n < length; n++ ) { diff --git a/thirdparty/opus/silk/float/wrappers_FLP.c b/thirdparty/opus/silk/float/wrappers_FLP.c index ad90b874a4..6666b8efaa 100644 --- a/thirdparty/opus/silk/float/wrappers_FLP.c +++ b/thirdparty/opus/silk/float/wrappers_FLP.c @@ -54,14 +54,13 @@ void silk_A2NLSF_FLP( void silk_NLSF2A_FLP( silk_float *pAR, /* O LPC coefficients [ LPC_order ] */ const opus_int16 *NLSF_Q15, /* I NLSF vector [ LPC_order ] */ - const opus_int LPC_order, /* I LPC order */ - int arch /* I Run-time architecture */ + const opus_int LPC_order /* I LPC order */ ) { opus_int i; opus_int16 a_fix_Q12[ MAX_LPC_ORDER ]; - silk_NLSF2A( a_fix_Q12, NLSF_Q15, LPC_order, arch ); + silk_NLSF2A( a_fix_Q12, NLSF_Q15, LPC_order ); for( i = 0; i < LPC_order; i++ ) { pAR[ i ] = ( silk_float )a_fix_Q12[ i ] * ( 1.0f / 4096.0f ); @@ -103,14 +102,14 @@ void silk_NSQ_wrapper_FLP( ) { opus_int i, j; - opus_int16 x16[ MAX_FRAME_LENGTH ]; + opus_int32 x_Q3[ MAX_FRAME_LENGTH ]; opus_int32 Gains_Q16[ MAX_NB_SUBFR ]; silk_DWORD_ALIGN opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ]; opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ]; opus_int LTP_scale_Q14; /* Noise shaping parameters */ - opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; + opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ]; /* Packs two int16 coefficients per int32 value */ opus_int Lambda_Q10; opus_int Tilt_Q14[ MAX_NB_SUBFR ]; @@ -120,7 +119,7 @@ void silk_NSQ_wrapper_FLP( /* Noise shape parameters */ for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { for( j = 0; j < psEnc->sCmn.shapingLPCOrder; j++ ) { - AR_Q13[ i * MAX_SHAPE_LPC_ORDER + j ] = silk_float2int( psEncCtrl->AR[ i * MAX_SHAPE_LPC_ORDER + j ] * 8192.0f ); + AR2_Q13[ i * MAX_SHAPE_LPC_ORDER + j ] = silk_float2int( psEncCtrl->AR2[ i * MAX_SHAPE_LPC_ORDER + j ] * 8192.0f ); } } @@ -156,16 +155,16 @@ void silk_NSQ_wrapper_FLP( /* Convert input to fix */ for( i = 0; i < psEnc->sCmn.frame_length; i++ ) { - x16[ i ] = silk_float2int( x[ i ] ); + x_Q3[ i ] = silk_float2int( 8.0f * x[ i ] ); } /* Call NSQ */ if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) { - silk_NSQ_del_dec( &psEnc->sCmn, psNSQ, psIndices, x16, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14, - AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14, psEnc->sCmn.arch ); + silk_NSQ_del_dec( &psEnc->sCmn, psNSQ, psIndices, x_Q3, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14, + AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14, psEnc->sCmn.arch ); } else { - silk_NSQ( &psEnc->sCmn, psNSQ, psIndices, x16, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14, - AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14, psEnc->sCmn.arch ); + silk_NSQ( &psEnc->sCmn, psNSQ, psIndices, x_Q3, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14, + AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14, psEnc->sCmn.arch ); } } @@ -173,35 +172,31 @@ void silk_NSQ_wrapper_FLP( /* Floating-point Silk LTP quantiation wrapper */ /***********************************************/ void silk_quant_LTP_gains_FLP( - silk_float B[ MAX_NB_SUBFR * LTP_ORDER ], /* O Quantized LTP gains */ + silk_float B[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (Un-)quantized LTP gains */ opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook index */ opus_int8 *periodicity_index, /* O Periodicity index */ opus_int32 *sum_log_gain_Q7, /* I/O Cumulative max prediction gain */ - silk_float *pred_gain_dB, /* O LTP prediction gain */ - const silk_float XX[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I Correlation matrix */ - const silk_float xX[ MAX_NB_SUBFR * LTP_ORDER ], /* I Correlation vector */ - const opus_int subfr_len, /* I Number of samples per subframe */ - const opus_int nb_subfr, /* I Number of subframes */ + const silk_float W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I Error weights */ + const opus_int mu_Q10, /* I Mu value (R/D tradeoff) */ + const opus_int lowComplexity, /* I Flag for low complexity */ + const opus_int nb_subfr, /* I number of subframes */ int arch /* I Run-time architecture */ ) { - opus_int i, pred_gain_dB_Q7; + opus_int i; opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ]; - opus_int32 XX_Q17[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ]; - opus_int32 xX_Q17[ MAX_NB_SUBFR * LTP_ORDER ]; + opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ]; - for( i = 0; i < nb_subfr * LTP_ORDER * LTP_ORDER; i++ ) { - XX_Q17[ i ] = (opus_int32)silk_float2int( XX[ i ] * 131072.0f ); - } for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) { - xX_Q17[ i ] = (opus_int32)silk_float2int( xX[ i ] * 131072.0f ); + B_Q14[ i ] = (opus_int16)silk_float2int( B[ i ] * 16384.0f ); + } + for( i = 0; i < nb_subfr * LTP_ORDER * LTP_ORDER; i++ ) { + W_Q18[ i ] = (opus_int32)silk_float2int( W[ i ] * 262144.0f ); } - silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, sum_log_gain_Q7, &pred_gain_dB_Q7, XX_Q17, xX_Q17, subfr_len, nb_subfr, arch ); + silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, sum_log_gain_Q7, W_Q18, mu_Q10, lowComplexity, nb_subfr, arch ); for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) { B[ i ] = (silk_float)B_Q14[ i ] * ( 1.0f / 16384.0f ); } - - *pred_gain_dB = (silk_float)pred_gain_dB_Q7 * ( 1.0f / 128.0f ); } diff --git a/thirdparty/opus/silk/gain_quant.c b/thirdparty/opus/silk/gain_quant.c index ee65245aa3..64ccd0611b 100644 --- a/thirdparty/opus/silk/gain_quant.c +++ b/thirdparty/opus/silk/gain_quant.c @@ -76,7 +76,6 @@ void silk_gains_quant( /* Accumulate deltas */ if( ind[ k ] > double_step_size_threshold ) { *prev_ind += silk_LSHIFT( ind[ k ], 1 ) - double_step_size_threshold; - *prev_ind = silk_min_int( *prev_ind, N_LEVELS_QGAIN - 1 ); } else { *prev_ind += ind[ k ]; } diff --git a/thirdparty/opus/silk/init_decoder.c b/thirdparty/opus/silk/init_decoder.c index 16c03dcd1c..f887c67886 100644 --- a/thirdparty/opus/silk/init_decoder.c +++ b/thirdparty/opus/silk/init_decoder.c @@ -44,7 +44,6 @@ opus_int silk_init_decoder( /* Used to deactivate LSF interpolation */ psDec->first_frame_after_reset = 1; psDec->prev_gain_Q16 = 65536; - psDec->arch = opus_select_arch(); /* Reset CNG state */ silk_CNG_Reset( psDec ); diff --git a/thirdparty/opus/silk/interpolate.c b/thirdparty/opus/silk/interpolate.c index 833c28ef8e..1bd8ca4d53 100644 --- a/thirdparty/opus/silk/interpolate.c +++ b/thirdparty/opus/silk/interpolate.c @@ -42,8 +42,8 @@ void silk_interpolate( { opus_int i; - celt_assert( ifact_Q2 >= 0 ); - celt_assert( ifact_Q2 <= 4 ); + silk_assert( ifact_Q2 >= 0 ); + silk_assert( ifact_Q2 <= 4 ); for( i = 0; i < d; i++ ) { xi[ i ] = (opus_int16)silk_ADD_RSHIFT( x0[ i ], silk_SMULBB( x1[ i ] - x0[ i ], ifact_Q2 ), 2 ); diff --git a/thirdparty/opus/silk/lin2log.c b/thirdparty/opus/silk/lin2log.c index 0d5155aa86..d4fe515321 100644 --- a/thirdparty/opus/silk/lin2log.c +++ b/thirdparty/opus/silk/lin2log.c @@ -41,6 +41,6 @@ opus_int32 silk_lin2log( silk_CLZ_FRAC( inLin, &lz, &frac_Q7 ); /* Piece-wise parabolic approximation */ - return silk_ADD_LSHIFT32( silk_SMLAWB( frac_Q7, silk_MUL( frac_Q7, 128 - frac_Q7 ), 179 ), 31 - lz, 7 ); + return silk_LSHIFT( 31 - lz, 7 ) + silk_SMLAWB( frac_Q7, silk_MUL( frac_Q7, 128 - frac_Q7 ), 179 ); } diff --git a/thirdparty/opus/silk/macros.h b/thirdparty/opus/silk/macros.h index 3c67b6e5d9..d3ca347520 100644 --- a/thirdparty/opus/silk/macros.h +++ b/thirdparty/opus/silk/macros.h @@ -36,6 +36,14 @@ POSSIBILITY OF SUCH DAMAGE. #include "opus_defines.h" #include "arch.h" +#if OPUS_GNUC_PREREQ(3, 0) +#define opus_likely(x) (__builtin_expect(!!(x), 1)) +#define opus_unlikely(x) (__builtin_expect(!!(x), 0)) +#else +#define opus_likely(x) (!!(x)) +#define opus_unlikely(x) (!!(x)) +#endif + /* This is an OPUS_INLINE header file for general platform. */ /* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */ diff --git a/thirdparty/opus/silk/main.h b/thirdparty/opus/silk/main.h index 1a33eed549..2f90d68f7d 100644 --- a/thirdparty/opus/silk/main.h +++ b/thirdparty/opus/silk/main.h @@ -42,10 +42,6 @@ POSSIBILITY OF SUCH DAMAGE. #include "x86/main_sse.h" #endif -#if (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)) -#include "arm/NSQ_del_dec_arm.h" -#endif - /* Convert Left/Right stereo signal to adaptive Mid/Side representation */ void silk_stereo_LR_to_MS( stereo_enc_state *state, /* I/O State */ @@ -113,22 +109,22 @@ void silk_stereo_decode_mid_only( /* Encodes signs of excitation */ void silk_encode_signs( - ec_enc *psRangeEnc, /* I/O Compressor data structure */ - const opus_int8 pulses[], /* I pulse signal */ - opus_int length, /* I length of input */ - const opus_int signalType, /* I Signal type */ - const opus_int quantOffsetType, /* I Quantization offset type */ - const opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ] /* I Sum of absolute pulses per block */ + ec_enc *psRangeEnc, /* I/O Compressor data structure */ + const opus_int8 pulses[], /* I pulse signal */ + opus_int length, /* I length of input */ + const opus_int signalType, /* I Signal type */ + const opus_int quantOffsetType, /* I Quantization offset type */ + const opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ] /* I Sum of absolute pulses per block */ ); /* Decodes signs of excitation */ void silk_decode_signs( - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int16 pulses[], /* I/O pulse signal */ - opus_int length, /* I length of input */ - const opus_int signalType, /* I Signal type */ - const opus_int quantOffsetType, /* I Quantization offset type */ - const opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ] /* I Sum of absolute pulses per block */ + ec_dec *psRangeDec, /* I/O Compressor data structure */ + opus_int16 pulses[], /* I/O pulse signal */ + opus_int length, /* I length of input */ + const opus_int signalType, /* I Signal type */ + const opus_int quantOffsetType, /* I Quantization offset type */ + const opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ] /* I Sum of absolute pulses per block */ ); /* Check encoder control struct */ @@ -209,37 +205,37 @@ void silk_interpolate( /* LTP tap quantizer */ void silk_quant_LTP_gains( - opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* O Quantized LTP gains */ + opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (un)quantized LTP gains */ opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook Index */ opus_int8 *periodicity_index, /* O Periodicity Index */ opus_int32 *sum_gain_dB_Q7, /* I/O Cumulative max prediction gain */ - opus_int *pred_gain_dB_Q7, /* O LTP prediction gain */ - const opus_int32 XX_Q17[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ], /* I Correlation matrix in Q18 */ - const opus_int32 xX_Q17[ MAX_NB_SUBFR*LTP_ORDER ], /* I Correlation vector in Q18 */ - const opus_int subfr_len, /* I Number of samples per subframe */ - const opus_int nb_subfr, /* I Number of subframes */ + const opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ], /* I Error Weights in Q18 */ + opus_int mu_Q9, /* I Mu value (R/D tradeoff) */ + opus_int lowComplexity, /* I Flag for low complexity */ + const opus_int nb_subfr, /* I number of subframes */ int arch /* I Run-time architecture */ ); /* Entropy constrained matrix-weighted VQ, for a single input data vector */ void silk_VQ_WMat_EC_c( opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *res_nrg_Q15, /* O best residual energy */ - opus_int32 *rate_dist_Q8, /* O best total bitrate */ + opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int32 *XX_Q17, /* I correlation matrix */ - const opus_int32 *xX_Q17, /* I correlation vector */ + const opus_int16 *in_Q14, /* I input vector to be quantized */ + const opus_int32 *W_Q18, /* I weighting matrix */ const opus_int8 *cb_Q7, /* I codebook */ const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int subfr_len, /* I number of samples per subframe */ + const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - const opus_int L /* I number of vectors in codebook */ + opus_int L /* I number of vectors in codebook */ ); #if !defined(OVERRIDE_silk_VQ_WMat_EC) -#define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, subfr_len, max_gain_Q7, L, arch) \ - ((void)(arch),silk_VQ_WMat_EC_c(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, subfr_len, max_gain_Q7, L)) +#define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ + mu_Q9, max_gain_Q7, L, arch) \ + ((void)(arch),silk_VQ_WMat_EC_c(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ + mu_Q9, max_gain_Q7, L)) #endif /************************************/ @@ -247,14 +243,14 @@ void silk_VQ_WMat_EC_c( /************************************/ void silk_NSQ_c( - const silk_encoder_state *psEncC, /* I Encoder State */ + const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int16 x16[], /* I Input */ + const opus_int32 x_Q3[], /* I Prefiltered input signal */ opus_int8 pulses[], /* O Quantized pulse signal */ const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ @@ -265,22 +261,22 @@ void silk_NSQ_c( ); #if !defined(OVERRIDE_silk_NSQ) -#define silk_NSQ(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \ +#define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ - ((void)(arch),silk_NSQ_c(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \ + ((void)(arch),silk_NSQ_c(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) #endif /* Noise shaping using delayed decision */ void silk_NSQ_del_dec_c( - const silk_encoder_state *psEncC, /* I Encoder State */ + const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int16 x16[], /* I Input */ + const opus_int32 x_Q3[], /* I Prefiltered input signal */ opus_int8 pulses[], /* O Quantized pulse signal */ const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ @@ -291,9 +287,9 @@ void silk_NSQ_del_dec_c( ); #if !defined(OVERRIDE_silk_NSQ_del_dec) -#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \ +#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ - ((void)(arch),silk_NSQ_del_dec_c(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \ + ((void)(arch),silk_NSQ_del_dec_c(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) #endif @@ -350,7 +346,6 @@ void silk_NLSF_VQ( opus_int32 err_Q26[], /* O Quantization errors [K] */ const opus_int16 in_Q15[], /* I Input vectors to be quantized [LPC_order] */ const opus_uint8 pCB_Q8[], /* I Codebook vectors [K*LPC_order] */ - const opus_int16 pWght_Q9[], /* I Codebook weights [K*LPC_order] */ const opus_int K, /* I Number of codebook vectors */ const opus_int LPC_order /* I Number of LPCs */ ); diff --git a/thirdparty/opus/silk/mips/NSQ_del_dec_mipsr1.h b/thirdparty/opus/silk/mips/NSQ_del_dec_mipsr1.h index cd70713a8f..ad1cfe2a9b 100644 --- a/thirdparty/opus/silk/mips/NSQ_del_dec_mipsr1.h +++ b/thirdparty/opus/silk/mips/NSQ_del_dec_mipsr1.h @@ -61,7 +61,7 @@ static inline void silk_noise_shape_quantizer_del_dec( opus_int predictLPCOrder, /* I Prediction filter order */ opus_int warping_Q16, /* I */ opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ - opus_int *smpl_buf_idx, /* I/O Index to newest samples in buffers */ + opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ opus_int decisionDelay, /* I */ int arch /* I */ ) @@ -323,9 +323,8 @@ static inline void silk_noise_shape_quantizer_del_dec( psSS[ 1 ].xq_Q14 = xq_Q14; } - *smpl_buf_idx = ( *smpl_buf_idx - 1 ) % DECISION_DELAY; - if( *smpl_buf_idx < 0 ) *smpl_buf_idx += DECISION_DELAY; - last_smple_idx = ( *smpl_buf_idx + decisionDelay ) % DECISION_DELAY; + *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK; /* Index to newest samples */ + last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK; /* Index to decisionDelay old samples */ /* Find winner */ RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; diff --git a/thirdparty/opus/silk/mips/sigproc_fix_mipsr1.h b/thirdparty/opus/silk/mips/sigproc_fix_mipsr1.h index 51520c0a6f..3b0a695365 100644 --- a/thirdparty/opus/silk/mips/sigproc_fix_mipsr1.h +++ b/thirdparty/opus/silk/mips/sigproc_fix_mipsr1.h @@ -28,6 +28,11 @@ POSSIBILITY OF SUCH DAMAGE. #ifndef SILK_SIGPROC_FIX_MIPSR1_H #define SILK_SIGPROC_FIX_MIPSR1_H +#ifdef __cplusplus +extern "C" +{ +#endif + #undef silk_SAT16 static inline short int silk_SAT16(int a) { diff --git a/thirdparty/opus/silk/process_NLSFs.c b/thirdparty/opus/silk/process_NLSFs.c index d130809541..0ab71f0163 100644 --- a/thirdparty/opus/silk/process_NLSFs.c +++ b/thirdparty/opus/silk/process_NLSFs.c @@ -48,7 +48,7 @@ void silk_process_NLSFs( silk_assert( psEncC->speech_activity_Q8 >= 0 ); silk_assert( psEncC->speech_activity_Q8 <= SILK_FIX_CONST( 1.0, 8 ) ); - celt_assert( psEncC->useInterpolatedNLSFs == 1 || psEncC->indices.NLSFInterpCoef_Q2 == ( 1 << 2 ) ); + silk_assert( psEncC->useInterpolatedNLSFs == 1 || psEncC->indices.NLSFInterpCoef_Q2 == ( 1 << 2 ) ); /***********************/ /* Calculate mu values */ @@ -60,7 +60,7 @@ void silk_process_NLSFs( NLSF_mu_Q20 = silk_ADD_RSHIFT( NLSF_mu_Q20, NLSF_mu_Q20, 1 ); } - celt_assert( NLSF_mu_Q20 > 0 ); + silk_assert( NLSF_mu_Q20 > 0 ); silk_assert( NLSF_mu_Q20 <= SILK_FIX_CONST( 0.005, 20 ) ); /* Calculate NLSF weights */ @@ -89,7 +89,7 @@ void silk_process_NLSFs( NLSF_mu_Q20, psEncC->NLSF_MSVQ_Survivors, psEncC->indices.signalType ); /* Convert quantized NLSFs back to LPC coefficients */ - silk_NLSF2A( PredCoef_Q12[ 1 ], pNLSF_Q15, psEncC->predictLPCOrder, psEncC->arch ); + silk_NLSF2A( PredCoef_Q12[ 1 ], pNLSF_Q15, psEncC->predictLPCOrder ); if( doInterpolate ) { /* Calculate the interpolated, quantized LSF vector for the first half */ @@ -97,11 +97,11 @@ void silk_process_NLSFs( psEncC->indices.NLSFInterpCoef_Q2, psEncC->predictLPCOrder ); /* Convert back to LPC coefficients */ - silk_NLSF2A( PredCoef_Q12[ 0 ], pNLSF0_temp_Q15, psEncC->predictLPCOrder, psEncC->arch ); + silk_NLSF2A( PredCoef_Q12[ 0 ], pNLSF0_temp_Q15, psEncC->predictLPCOrder ); } else { /* Copy LPC coefficients for first half from second half */ - celt_assert( psEncC->predictLPCOrder <= MAX_LPC_ORDER ); + silk_assert( psEncC->predictLPCOrder <= MAX_LPC_ORDER ); silk_memcpy( PredCoef_Q12[ 0 ], PredCoef_Q12[ 1 ], psEncC->predictLPCOrder * sizeof( opus_int16 ) ); } } diff --git a/thirdparty/opus/silk/quant_LTP_gains.c b/thirdparty/opus/silk/quant_LTP_gains.c index d6b8eff8d1..513a8c4468 100644 --- a/thirdparty/opus/silk/quant_LTP_gains.c +++ b/thirdparty/opus/silk/quant_LTP_gains.c @@ -33,15 +33,14 @@ POSSIBILITY OF SUCH DAMAGE. #include "tuning_parameters.h" void silk_quant_LTP_gains( - opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* O Quantized LTP gains */ + opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (un)quantized LTP gains */ opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook Index */ opus_int8 *periodicity_index, /* O Periodicity Index */ opus_int32 *sum_log_gain_Q7, /* I/O Cumulative max prediction gain */ - opus_int *pred_gain_dB_Q7, /* O LTP prediction gain */ - const opus_int32 XX_Q17[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ], /* I Correlation matrix in Q18 */ - const opus_int32 xX_Q17[ MAX_NB_SUBFR*LTP_ORDER ], /* I Correlation vector in Q18 */ - const opus_int subfr_len, /* I Number of samples per subframe */ - const opus_int nb_subfr, /* I Number of subframes */ + const opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ], /* I Error Weights in Q18 */ + opus_int mu_Q9, /* I Mu value (R/D tradeoff) */ + opus_int lowComplexity, /* I Flag for low complexity */ + const opus_int nb_subfr, /* I number of subframes */ int arch /* I Run-time architecture */ ) { @@ -50,16 +49,16 @@ void silk_quant_LTP_gains( const opus_uint8 *cl_ptr_Q5; const opus_int8 *cbk_ptr_Q7; const opus_uint8 *cbk_gain_ptr_Q7; - const opus_int32 *XX_Q17_ptr, *xX_Q17_ptr; - opus_int32 res_nrg_Q15_subfr, res_nrg_Q15, rate_dist_Q7_subfr, rate_dist_Q7, min_rate_dist_Q7; - opus_int32 sum_log_gain_tmp_Q7, best_sum_log_gain_Q7, max_gain_Q7; - opus_int gain_Q7; + const opus_int16 *b_Q14_ptr; + const opus_int32 *W_Q18_ptr; + opus_int32 rate_dist_Q14_subfr, rate_dist_Q14, min_rate_dist_Q14; + opus_int32 sum_log_gain_tmp_Q7, best_sum_log_gain_Q7, max_gain_Q7, gain_Q7; /***************************************************/ /* iterate over different codebooks with different */ /* rates/distortions, and choose best */ /***************************************************/ - min_rate_dist_Q7 = silk_int32_MAX; + min_rate_dist_Q14 = silk_int32_MAX; best_sum_log_gain_Q7 = 0; for( k = 0; k < 3; k++ ) { /* Safety margin for pitch gain control, to take into account factors @@ -71,47 +70,53 @@ void silk_quant_LTP_gains( cbk_gain_ptr_Q7 = silk_LTP_vq_gain_ptrs_Q7[ k ]; cbk_size = silk_LTP_vq_sizes[ k ]; - /* Set up pointers to first subframe */ - XX_Q17_ptr = XX_Q17; - xX_Q17_ptr = xX_Q17; + /* Set up pointer to first subframe */ + W_Q18_ptr = W_Q18; + b_Q14_ptr = B_Q14; - res_nrg_Q15 = 0; - rate_dist_Q7 = 0; + rate_dist_Q14 = 0; sum_log_gain_tmp_Q7 = *sum_log_gain_Q7; for( j = 0; j < nb_subfr; j++ ) { max_gain_Q7 = silk_log2lin( ( SILK_FIX_CONST( MAX_SUM_LOG_GAIN_DB / 6.0, 7 ) - sum_log_gain_tmp_Q7 ) + SILK_FIX_CONST( 7, 7 ) ) - gain_safety; + silk_VQ_WMat_EC( &temp_idx[ j ], /* O index of best codebook vector */ - &res_nrg_Q15_subfr, /* O residual energy */ - &rate_dist_Q7_subfr, /* O best weighted quantization error + mu * rate */ + &rate_dist_Q14_subfr, /* O best weighted quantization error + mu * rate */ &gain_Q7, /* O sum of absolute LTP coefficients */ - XX_Q17_ptr, /* I correlation matrix */ - xX_Q17_ptr, /* I correlation vector */ + b_Q14_ptr, /* I input vector to be quantized */ + W_Q18_ptr, /* I weighting matrix */ cbk_ptr_Q7, /* I codebook */ cbk_gain_ptr_Q7, /* I codebook effective gains */ cl_ptr_Q5, /* I code length for each codebook vector */ - subfr_len, /* I number of samples per subframe */ + mu_Q9, /* I tradeoff between weighted error and rate */ max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ cbk_size, /* I number of vectors in codebook */ arch /* I Run-time architecture */ ); - res_nrg_Q15 = silk_ADD_POS_SAT32( res_nrg_Q15, res_nrg_Q15_subfr ); - rate_dist_Q7 = silk_ADD_POS_SAT32( rate_dist_Q7, rate_dist_Q7_subfr ); + rate_dist_Q14 = silk_ADD_POS_SAT32( rate_dist_Q14, rate_dist_Q14_subfr ); sum_log_gain_tmp_Q7 = silk_max(0, sum_log_gain_tmp_Q7 + silk_lin2log( gain_safety + gain_Q7 ) - SILK_FIX_CONST( 7, 7 )); - XX_Q17_ptr += LTP_ORDER * LTP_ORDER; - xX_Q17_ptr += LTP_ORDER; + b_Q14_ptr += LTP_ORDER; + W_Q18_ptr += LTP_ORDER * LTP_ORDER; } - if( rate_dist_Q7 <= min_rate_dist_Q7 ) { - min_rate_dist_Q7 = rate_dist_Q7; + /* Avoid never finding a codebook */ + rate_dist_Q14 = silk_min( silk_int32_MAX - 1, rate_dist_Q14 ); + + if( rate_dist_Q14 < min_rate_dist_Q14 ) { + min_rate_dist_Q14 = rate_dist_Q14; *periodicity_index = (opus_int8)k; silk_memcpy( cbk_index, temp_idx, nb_subfr * sizeof( opus_int8 ) ); best_sum_log_gain_Q7 = sum_log_gain_tmp_Q7; } + + /* Break early in low-complexity mode if rate distortion is below threshold */ + if( lowComplexity && ( rate_dist_Q14 < silk_LTP_gain_middle_avg_RD_Q14 ) ) { + break; + } } cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[ *periodicity_index ]; @@ -120,13 +125,5 @@ void silk_quant_LTP_gains( B_Q14[ j * LTP_ORDER + k ] = silk_LSHIFT( cbk_ptr_Q7[ cbk_index[ j ] * LTP_ORDER + k ], 7 ); } } - - if( nb_subfr == 2 ) { - res_nrg_Q15 = silk_RSHIFT32( res_nrg_Q15, 1 ); - } else { - res_nrg_Q15 = silk_RSHIFT32( res_nrg_Q15, 2 ); - } - *sum_log_gain_Q7 = best_sum_log_gain_Q7; - *pred_gain_dB_Q7 = (opus_int)silk_SMULBB( -3, silk_lin2log( res_nrg_Q15 ) - ( 15 << 7 ) ); } diff --git a/thirdparty/opus/silk/resampler.c b/thirdparty/opus/silk/resampler.c index 1f11e50891..374fbb3722 100644 --- a/thirdparty/opus/silk/resampler.c +++ b/thirdparty/opus/silk/resampler.c @@ -91,14 +91,14 @@ opus_int silk_resampler_init( if( forEnc ) { if( ( Fs_Hz_in != 8000 && Fs_Hz_in != 12000 && Fs_Hz_in != 16000 && Fs_Hz_in != 24000 && Fs_Hz_in != 48000 ) || ( Fs_Hz_out != 8000 && Fs_Hz_out != 12000 && Fs_Hz_out != 16000 ) ) { - celt_assert( 0 ); + silk_assert( 0 ); return -1; } S->inputDelay = delay_matrix_enc[ rateID( Fs_Hz_in ) ][ rateID( Fs_Hz_out ) ]; } else { if( ( Fs_Hz_in != 8000 && Fs_Hz_in != 12000 && Fs_Hz_in != 16000 ) || ( Fs_Hz_out != 8000 && Fs_Hz_out != 12000 && Fs_Hz_out != 16000 && Fs_Hz_out != 24000 && Fs_Hz_out != 48000 ) ) { - celt_assert( 0 ); + silk_assert( 0 ); return -1; } S->inputDelay = delay_matrix_dec[ rateID( Fs_Hz_in ) ][ rateID( Fs_Hz_out ) ]; @@ -151,7 +151,7 @@ opus_int silk_resampler_init( S->Coefs = silk_Resampler_1_6_COEFS; } else { /* None available */ - celt_assert( 0 ); + silk_assert( 0 ); return -1; } } else { @@ -181,9 +181,9 @@ opus_int silk_resampler( opus_int nSamples; /* Need at least 1 ms of input data */ - celt_assert( inLen >= S->Fs_in_kHz ); + silk_assert( inLen >= S->Fs_in_kHz ); /* Delay can't exceed the 1 ms of buffering */ - celt_assert( S->inputDelay <= S->Fs_in_kHz ); + silk_assert( S->inputDelay <= S->Fs_in_kHz ); nSamples = S->Fs_in_kHz - S->inputDelay; diff --git a/thirdparty/opus/silk/resampler_down2.c b/thirdparty/opus/silk/resampler_down2.c index 971d7bfd4a..cec3634640 100644 --- a/thirdparty/opus/silk/resampler_down2.c +++ b/thirdparty/opus/silk/resampler_down2.c @@ -43,8 +43,8 @@ void silk_resampler_down2( opus_int32 k, len2 = silk_RSHIFT32( inLen, 1 ); opus_int32 in32, out32, Y, X; - celt_assert( silk_resampler_down2_0 > 0 ); - celt_assert( silk_resampler_down2_1 < 0 ); + silk_assert( silk_resampler_down2_0 > 0 ); + silk_assert( silk_resampler_down2_1 < 0 ); /* Internal variables and state are in Q10 format */ for( k = 0; k < len2; k++ ) { diff --git a/thirdparty/opus/silk/resampler_private_down_FIR.c b/thirdparty/opus/silk/resampler_private_down_FIR.c index 3e8735a35a..783e42b356 100644 --- a/thirdparty/opus/silk/resampler_private_down_FIR.c +++ b/thirdparty/opus/silk/resampler_private_down_FIR.c @@ -136,7 +136,7 @@ static OPUS_INLINE opus_int16 *silk_resampler_private_down_FIR_INTERPOL( } break; default: - celt_assert( 0 ); + silk_assert( 0 ); } return out; } diff --git a/thirdparty/opus/silk/sort.c b/thirdparty/opus/silk/sort.c index 4fba16f831..7187c9efb1 100644 --- a/thirdparty/opus/silk/sort.c +++ b/thirdparty/opus/silk/sort.c @@ -48,9 +48,9 @@ void silk_insertion_sort_increasing( opus_int i, j; /* Safety checks */ - celt_assert( K > 0 ); - celt_assert( L > 0 ); - celt_assert( L >= K ); + silk_assert( K > 0 ); + silk_assert( L > 0 ); + silk_assert( L >= K ); /* Write start indices in index vector */ for( i = 0; i < K; i++ ) { @@ -96,9 +96,9 @@ void silk_insertion_sort_decreasing_int16( opus_int value; /* Safety checks */ - celt_assert( K > 0 ); - celt_assert( L > 0 ); - celt_assert( L >= K ); + silk_assert( K > 0 ); + silk_assert( L > 0 ); + silk_assert( L >= K ); /* Write start indices in index vector */ for( i = 0; i < K; i++ ) { @@ -141,7 +141,7 @@ void silk_insertion_sort_increasing_all_values_int16( opus_int i, j; /* Safety checks */ - celt_assert( L > 0 ); + silk_assert( L > 0 ); /* Sort vector elements by value, increasing order */ for( i = 1; i < L; i++ ) { diff --git a/thirdparty/opus/silk/stereo_LR_to_MS.c b/thirdparty/opus/silk/stereo_LR_to_MS.c index c8226663c8..dda0298de2 100644 --- a/thirdparty/opus/silk/stereo_LR_to_MS.c +++ b/thirdparty/opus/silk/stereo_LR_to_MS.c @@ -109,7 +109,7 @@ void silk_stereo_LR_to_MS( if( total_rate_bps < 1 ) { total_rate_bps = 1; } - min_mid_rate_bps = silk_SMLABB( 2000, fs_kHz, 600 ); + min_mid_rate_bps = silk_SMLABB( 2000, fs_kHz, 900 ); silk_assert( min_mid_rate_bps < 32767 ); /* Default bitrate distribution: 8 parts for Mid and (5+3*frac) parts for Side. so: mid_rate = ( 8 / ( 13 + 3 * frac ) ) * total_ rate */ frac_3_Q16 = silk_MUL( 3, frac_Q16 ); diff --git a/thirdparty/opus/silk/stereo_encode_pred.c b/thirdparty/opus/silk/stereo_encode_pred.c index 03becb6736..e6dd195066 100644 --- a/thirdparty/opus/silk/stereo_encode_pred.c +++ b/thirdparty/opus/silk/stereo_encode_pred.c @@ -41,11 +41,11 @@ void silk_stereo_encode_pred( /* Entropy coding */ n = 5 * ix[ 0 ][ 2 ] + ix[ 1 ][ 2 ]; - celt_assert( n < 25 ); + silk_assert( n < 25 ); ec_enc_icdf( psRangeEnc, n, silk_stereo_pred_joint_iCDF, 8 ); for( n = 0; n < 2; n++ ) { - celt_assert( ix[ n ][ 0 ] < 3 ); - celt_assert( ix[ n ][ 1 ] < STEREO_QUANT_SUB_STEPS ); + silk_assert( ix[ n ][ 0 ] < 3 ); + silk_assert( ix[ n ][ 1 ] < STEREO_QUANT_SUB_STEPS ); ec_enc_icdf( psRangeEnc, ix[ n ][ 0 ], silk_uniform3_iCDF, 8 ); ec_enc_icdf( psRangeEnc, ix[ n ][ 1 ], silk_uniform5_iCDF, 8 ); } diff --git a/thirdparty/opus/silk/structs.h b/thirdparty/opus/silk/structs.h index 3380c757b2..827829dc6f 100644 --- a/thirdparty/opus/silk/structs.h +++ b/thirdparty/opus/silk/structs.h @@ -48,7 +48,6 @@ typedef struct { opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ]; opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ]; opus_int32 sLF_AR_shp_Q14; - opus_int32 sDiff_shp_Q14; opus_int lagPrev; opus_int sLTP_buf_idx; opus_int sLTP_shp_buf_idx; @@ -78,7 +77,6 @@ typedef struct { opus_int32 In_LP_State[ 2 ]; /* Low pass filter state */ opus_int32 transition_frame_no; /* Counter which is mapped to a cut-off frequency */ opus_int mode; /* Operating mode, <0: switch down, >0: switch up; 0: do nothing */ - opus_int32 saved_fs_kHz; /* If non-zero, holds the last sampling rate before a bandwidth switching reset. */ } silk_LP_state; /* Structure containing NLSF codebook */ @@ -88,7 +86,6 @@ typedef struct { const opus_int16 quantStepSize_Q16; const opus_int16 invQuantStepSize_Q6; const opus_uint8 *CB1_NLSF_Q8; - const opus_int16 *CB1_Wght_Q9; const opus_uint8 *CB1_iCDF; const opus_uint8 *pred_Q8; const opus_uint8 *ec_sel; @@ -172,6 +169,8 @@ typedef struct { opus_int pitchEstimationComplexity; /* Complexity level for pitch estimator */ opus_int pitchEstimationLPCOrder; /* Whitening filter order for pitch estimator */ opus_int32 pitchEstimationThreshold_Q16; /* Threshold for pitch estimator */ + opus_int LTPQuantLowComplexity; /* Flag for low complexity LTP quantization */ + opus_int mu_LTP_Q9; /* Rate-distortion tradeoff in LTP quantization */ opus_int32 sum_log_gain_Q7; /* Cumulative max prediction gain */ opus_int NLSF_MSVQ_Survivors; /* Number of survivors in NLSF MSVQ */ opus_int first_frame_after_reset; /* Flag for deactivating NLSF interpolation, pitch prediction */ @@ -302,7 +301,6 @@ typedef struct { /* Stuff used for PLC */ opus_int lossCnt; opus_int prevSignalType; - int arch; silk_PLC_struct sPLC; diff --git a/thirdparty/opus/silk/sum_sqr_shift.c b/thirdparty/opus/silk/sum_sqr_shift.c index 4fd0c3d7d5..129df191d8 100644 --- a/thirdparty/opus/silk/sum_sqr_shift.c +++ b/thirdparty/opus/silk/sum_sqr_shift.c @@ -41,40 +41,43 @@ void silk_sum_sqr_shift( ) { opus_int i, shft; - opus_uint32 nrg_tmp; - opus_int32 nrg; + opus_int32 nrg_tmp, nrg; - /* Do a first run with the maximum shift we could have. */ - shft = 31-silk_CLZ32(len); - /* Let's be conservative with rounding and start with nrg=len. */ - nrg = len; - for( i = 0; i < len - 1; i += 2 ) { - nrg_tmp = silk_SMULBB( x[ i ], x[ i ] ); - nrg_tmp = silk_SMLABB_ovflw( nrg_tmp, x[ i + 1 ], x[ i + 1 ] ); - nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, nrg_tmp, shft ); + nrg = 0; + shft = 0; + len--; + for( i = 0; i < len; i += 2 ) { + nrg = silk_SMLABB_ovflw( nrg, x[ i ], x[ i ] ); + nrg = silk_SMLABB_ovflw( nrg, x[ i + 1 ], x[ i + 1 ] ); + if( nrg < 0 ) { + /* Scale down */ + nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 ); + shft = 2; + i+=2; + break; + } } - if( i < len ) { - /* One sample left to process */ - nrg_tmp = silk_SMULBB( x[ i ], x[ i ] ); - nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, nrg_tmp, shft ); - } - silk_assert( nrg >= 0 ); - /* Make sure the result will fit in a 32-bit signed integer with two bits - of headroom. */ - shft = silk_max_32(0, shft+3 - silk_CLZ32(nrg)); - nrg = 0; - for( i = 0 ; i < len - 1; i += 2 ) { + for( ; i < len; i += 2 ) { nrg_tmp = silk_SMULBB( x[ i ], x[ i ] ); nrg_tmp = silk_SMLABB_ovflw( nrg_tmp, x[ i + 1 ], x[ i + 1 ] ); - nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, nrg_tmp, shft ); + nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, (opus_uint32)nrg_tmp, shft ); + if( nrg < 0 ) { + /* Scale down */ + nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 ); + shft += 2; + } } - if( i < len ) { + if( i == len ) { /* One sample left to process */ nrg_tmp = silk_SMULBB( x[ i ], x[ i ] ); nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, nrg_tmp, shft ); } - silk_assert( nrg >= 0 ); + /* Make sure to have at least one extra leading zero (two leading zeros in total) */ + if( nrg & 0xC0000000 ) { + nrg = silk_RSHIFT_uint( (opus_uint32)nrg, 2 ); + shft += 2; + } /* Output arguments */ *shift = shft; diff --git a/thirdparty/opus/silk/tables.h b/thirdparty/opus/silk/tables.h index 95230c451a..7fea6fda39 100644 --- a/thirdparty/opus/silk/tables.h +++ b/thirdparty/opus/silk/tables.h @@ -76,8 +76,10 @@ extern const opus_uint8 silk_NLSF_EXT_iCDF[ 7 ]; extern const opus_uint8 silk_LTP_per_index_iCDF[ 3 ]; /* 3 */ extern const opus_uint8 * const silk_LTP_gain_iCDF_ptrs[ NB_LTP_CBKS ]; /* 3 */ extern const opus_uint8 * const silk_LTP_gain_BITS_Q5_ptrs[ NB_LTP_CBKS ]; /* 3 */ +extern const opus_int16 silk_LTP_gain_middle_avg_RD_Q14; extern const opus_int8 * const silk_LTP_vq_ptrs_Q7[ NB_LTP_CBKS ]; /* 168 */ extern const opus_uint8 * const silk_LTP_vq_gain_ptrs_Q7[NB_LTP_CBKS]; + extern const opus_int8 silk_LTP_vq_sizes[ NB_LTP_CBKS ]; /* 3 */ extern const opus_uint8 silk_LTPscale_iCDF[ 3 ]; /* 4 */ @@ -97,6 +99,12 @@ extern const opus_uint8 silk_NLSF_interpolation_factor_iCDF[ 5 ]; extern const silk_NLSF_CB_struct silk_NLSF_CB_WB; /* 1040 */ extern const silk_NLSF_CB_struct silk_NLSF_CB_NB_MB; /* 728 */ +/* Piece-wise linear mapping from bitrate in kbps to coding quality in dB SNR */ +extern const opus_int32 silk_TargetRate_table_NB[ TARGET_RATE_TAB_SZ ]; /* 32 */ +extern const opus_int32 silk_TargetRate_table_MB[ TARGET_RATE_TAB_SZ ]; /* 32 */ +extern const opus_int32 silk_TargetRate_table_WB[ TARGET_RATE_TAB_SZ ]; /* 32 */ +extern const opus_int16 silk_SNR_table_Q1[ TARGET_RATE_TAB_SZ ]; /* 32 */ + /* Quantization offsets */ extern const opus_int16 silk_Quantization_Offsets_Q10[ 2 ][ 2 ]; /* 8 */ diff --git a/thirdparty/opus/silk/tables_LTP.c b/thirdparty/opus/silk/tables_LTP.c index 5e12c8643e..0e6a0254d5 100644 --- a/thirdparty/opus/silk/tables_LTP.c +++ b/thirdparty/opus/silk/tables_LTP.c @@ -51,6 +51,8 @@ static const opus_uint8 silk_LTP_gain_iCDF_2[32] = { 24, 20, 16, 12, 9, 5, 2, 0 }; +const opus_int16 silk_LTP_gain_middle_avg_RD_Q14 = 12304; + static const opus_uint8 silk_LTP_gain_BITS_Q5_0[8] = { 15, 131, 138, 138, 155, 155, 173, 173 }; diff --git a/thirdparty/opus/silk/tables_NLSF_CB_NB_MB.c b/thirdparty/opus/silk/tables_NLSF_CB_NB_MB.c index 195d5b95bd..8c59d207aa 100644 --- a/thirdparty/opus/silk/tables_NLSF_CB_NB_MB.c +++ b/thirdparty/opus/silk/tables_NLSF_CB_NB_MB.c @@ -74,41 +74,6 @@ static const opus_uint8 silk_NLSF_CB1_NB_MB_Q8[ 320 ] = { 64, 84, 104, 118, 156, 177, 201, 230 }; -static const opus_int16 silk_NLSF_CB1_Wght_Q9[ 320 ] = { - 2897, 2314, 2314, 2314, 2287, 2287, 2314, 2300, 2327, 2287, - 2888, 2580, 2394, 2367, 2314, 2274, 2274, 2274, 2274, 2194, - 2487, 2340, 2340, 2314, 2314, 2314, 2340, 2340, 2367, 2354, - 3216, 2766, 2340, 2340, 2314, 2274, 2221, 2207, 2261, 2194, - 2460, 2474, 2367, 2394, 2394, 2394, 2394, 2367, 2407, 2314, - 3479, 3056, 2127, 2207, 2274, 2274, 2274, 2287, 2314, 2261, - 3282, 3141, 2580, 2394, 2247, 2221, 2207, 2194, 2194, 2114, - 4096, 3845, 2221, 2620, 2620, 2407, 2314, 2394, 2367, 2074, - 3178, 3244, 2367, 2221, 2553, 2434, 2340, 2314, 2167, 2221, - 3338, 3488, 2726, 2194, 2261, 2460, 2354, 2367, 2207, 2101, - 2354, 2420, 2327, 2367, 2394, 2420, 2420, 2420, 2460, 2367, - 3779, 3629, 2434, 2527, 2367, 2274, 2274, 2300, 2207, 2048, - 3254, 3225, 2713, 2846, 2447, 2327, 2300, 2300, 2274, 2127, - 3263, 3300, 2753, 2806, 2447, 2261, 2261, 2247, 2127, 2101, - 2873, 2981, 2633, 2367, 2407, 2354, 2194, 2247, 2247, 2114, - 3225, 3197, 2633, 2580, 2274, 2181, 2247, 2221, 2221, 2141, - 3178, 3310, 2740, 2407, 2274, 2274, 2274, 2287, 2194, 2114, - 3141, 3272, 2460, 2061, 2287, 2500, 2367, 2487, 2434, 2181, - 3507, 3282, 2314, 2700, 2647, 2474, 2367, 2394, 2340, 2127, - 3423, 3535, 3038, 3056, 2300, 1950, 2221, 2274, 2274, 2274, - 3404, 3366, 2087, 2687, 2873, 2354, 2420, 2274, 2474, 2540, - 3760, 3488, 1950, 2660, 2897, 2527, 2394, 2367, 2460, 2261, - 3028, 3272, 2740, 2888, 2740, 2154, 2127, 2287, 2234, 2247, - 3695, 3657, 2025, 1969, 2660, 2700, 2580, 2500, 2327, 2367, - 3207, 3413, 2354, 2074, 2888, 2888, 2340, 2487, 2247, 2167, - 3338, 3366, 2846, 2780, 2327, 2154, 2274, 2287, 2114, 2061, - 2327, 2300, 2181, 2167, 2181, 2367, 2633, 2700, 2700, 2553, - 2407, 2434, 2221, 2261, 2221, 2221, 2340, 2420, 2607, 2700, - 3038, 3244, 2806, 2888, 2474, 2074, 2300, 2314, 2354, 2380, - 2221, 2154, 2127, 2287, 2500, 2793, 2793, 2620, 2580, 2367, - 3676, 3713, 2234, 1838, 2181, 2753, 2726, 2673, 2513, 2207, - 2793, 3160, 2726, 2553, 2846, 2513, 2181, 2394, 2221, 2181 -}; - static const opus_uint8 silk_NLSF_CB1_iCDF_NB_MB[ 64 ] = { 212, 178, 148, 129, 108, 96, 85, 82, 79, 77, 61, 59, 57, 56, 51, 49, @@ -185,7 +150,6 @@ const silk_NLSF_CB_struct silk_NLSF_CB_NB_MB = SILK_FIX_CONST( 0.18, 16 ), SILK_FIX_CONST( 1.0 / 0.18, 6 ), silk_NLSF_CB1_NB_MB_Q8, - silk_NLSF_CB1_Wght_Q9, silk_NLSF_CB1_iCDF_NB_MB, silk_NLSF_PRED_NB_MB_Q8, silk_NLSF_CB2_SELECT_NB_MB, diff --git a/thirdparty/opus/silk/tables_NLSF_CB_WB.c b/thirdparty/opus/silk/tables_NLSF_CB_WB.c index 5cc9f57bff..50af87eb2e 100644 --- a/thirdparty/opus/silk/tables_NLSF_CB_WB.c +++ b/thirdparty/opus/silk/tables_NLSF_CB_WB.c @@ -98,41 +98,6 @@ static const opus_uint8 silk_NLSF_CB1_WB_Q8[ 512 ] = { 110, 119, 129, 141, 175, 198, 218, 237 }; -static const opus_int16 silk_NLSF_CB1_WB_Wght_Q9[ 512 ] = { - 3657, 2925, 2925, 2925, 2925, 2925, 2925, 2925, 2925, 2925, 2925, 2925, 2963, 2963, 2925, 2846, - 3216, 3085, 2972, 3056, 3056, 3010, 3010, 3010, 2963, 2963, 3010, 2972, 2888, 2846, 2846, 2726, - 3920, 4014, 2981, 3207, 3207, 2934, 3056, 2846, 3122, 3244, 2925, 2846, 2620, 2553, 2780, 2925, - 3516, 3197, 3010, 3103, 3019, 2888, 2925, 2925, 2925, 2925, 2888, 2888, 2888, 2888, 2888, 2753, - 5054, 5054, 2934, 3573, 3385, 3056, 3085, 2793, 3160, 3160, 2972, 2846, 2513, 2540, 2753, 2888, - 4428, 4149, 2700, 2753, 2972, 3010, 2925, 2846, 2981, 3019, 2925, 2925, 2925, 2925, 2888, 2726, - 3620, 3019, 2972, 3056, 3056, 2873, 2806, 3056, 3216, 3047, 2981, 3291, 3291, 2981, 3310, 2991, - 5227, 5014, 2540, 3338, 3526, 3385, 3197, 3094, 3376, 2981, 2700, 2647, 2687, 2793, 2846, 2673, - 5081, 5174, 4615, 4428, 2460, 2897, 3047, 3207, 3169, 2687, 2740, 2888, 2846, 2793, 2846, 2700, - 3122, 2888, 2963, 2925, 2925, 2925, 2925, 2963, 2963, 2963, 2963, 2925, 2925, 2963, 2963, 2963, - 4202, 3207, 2981, 3103, 3010, 2888, 2888, 2925, 2972, 2873, 2916, 3019, 2972, 3010, 3197, 2873, - 3760, 3760, 3244, 3103, 2981, 2888, 2925, 2888, 2972, 2934, 2793, 2793, 2846, 2888, 2888, 2660, - 3854, 4014, 3207, 3122, 3244, 2934, 3047, 2963, 2963, 3085, 2846, 2793, 2793, 2793, 2793, 2580, - 3845, 4080, 3357, 3516, 3094, 2740, 3010, 2934, 3122, 3085, 2846, 2846, 2647, 2647, 2846, 2806, - 5147, 4894, 3225, 3845, 3441, 3169, 2897, 3413, 3451, 2700, 2580, 2673, 2740, 2846, 2806, 2753, - 4109, 3789, 3291, 3160, 2925, 2888, 2888, 2925, 2793, 2740, 2793, 2740, 2793, 2846, 2888, 2806, - 5081, 5054, 3047, 3545, 3244, 3056, 3085, 2944, 3103, 2897, 2740, 2740, 2740, 2846, 2793, 2620, - 4309, 4309, 2860, 2527, 3207, 3376, 3376, 3075, 3075, 3376, 3056, 2846, 2647, 2580, 2726, 2753, - 3056, 2916, 2806, 2888, 2740, 2687, 2897, 3103, 3150, 3150, 3216, 3169, 3056, 3010, 2963, 2846, - 4375, 3882, 2925, 2888, 2846, 2888, 2846, 2846, 2888, 2888, 2888, 2846, 2888, 2925, 2888, 2846, - 2981, 2916, 2916, 2981, 2981, 3056, 3122, 3216, 3150, 3056, 3010, 2972, 2972, 2972, 2925, 2740, - 4229, 4149, 3310, 3347, 2925, 2963, 2888, 2981, 2981, 2846, 2793, 2740, 2846, 2846, 2846, 2793, - 4080, 4014, 3103, 3010, 2925, 2925, 2925, 2888, 2925, 2925, 2846, 2846, 2846, 2793, 2888, 2780, - 4615, 4575, 3169, 3441, 3207, 2981, 2897, 3038, 3122, 2740, 2687, 2687, 2687, 2740, 2793, 2700, - 4149, 4269, 3789, 3657, 2726, 2780, 2888, 2888, 3010, 2972, 2925, 2846, 2687, 2687, 2793, 2888, - 4215, 3554, 2753, 2846, 2846, 2888, 2888, 2888, 2925, 2925, 2888, 2925, 2925, 2925, 2963, 2888, - 5174, 4921, 2261, 3432, 3789, 3479, 3347, 2846, 3310, 3479, 3150, 2897, 2460, 2487, 2753, 2925, - 3451, 3685, 3122, 3197, 3357, 3047, 3207, 3207, 2981, 3216, 3085, 2925, 2925, 2687, 2540, 2434, - 2981, 3010, 2793, 2793, 2740, 2793, 2846, 2972, 3056, 3103, 3150, 3150, 3150, 3103, 3010, 3010, - 2944, 2873, 2687, 2726, 2780, 3010, 3432, 3545, 3357, 3244, 3056, 3010, 2963, 2925, 2888, 2846, - 3019, 2944, 2897, 3010, 3010, 2972, 3019, 3103, 3056, 3056, 3010, 2888, 2846, 2925, 2925, 2888, - 3920, 3967, 3010, 3197, 3357, 3216, 3291, 3291, 3479, 3704, 3441, 2726, 2181, 2460, 2580, 2607 -}; - static const opus_uint8 silk_NLSF_CB1_iCDF_WB[ 64 ] = { 225, 204, 201, 184, 183, 175, 158, 154, 153, 135, 119, 115, 113, 110, 109, 99, @@ -223,7 +188,6 @@ const silk_NLSF_CB_struct silk_NLSF_CB_WB = SILK_FIX_CONST( 0.15, 16 ), SILK_FIX_CONST( 1.0 / 0.15, 6 ), silk_NLSF_CB1_WB_Q8, - silk_NLSF_CB1_WB_Wght_Q9, silk_NLSF_CB1_iCDF_WB, silk_NLSF_PRED_WB_Q8, silk_NLSF_CB2_SELECT_WB, diff --git a/thirdparty/opus/silk/tables_other.c b/thirdparty/opus/silk/tables_other.c index e34d90777b..398686bf26 100644 --- a/thirdparty/opus/silk/tables_other.c +++ b/thirdparty/opus/silk/tables_other.c @@ -38,6 +38,20 @@ extern "C" { #endif +/* Piece-wise linear mapping from bitrate in kbps to coding quality in dB SNR */ +const opus_int32 silk_TargetRate_table_NB[ TARGET_RATE_TAB_SZ ] = { + 0, 8000, 9400, 11500, 13500, 17500, 25000, MAX_TARGET_RATE_BPS +}; +const opus_int32 silk_TargetRate_table_MB[ TARGET_RATE_TAB_SZ ] = { + 0, 9000, 12000, 14500, 18500, 24500, 35500, MAX_TARGET_RATE_BPS +}; +const opus_int32 silk_TargetRate_table_WB[ TARGET_RATE_TAB_SZ ] = { + 0, 10500, 14000, 17000, 21500, 28500, 42000, MAX_TARGET_RATE_BPS +}; +const opus_int16 silk_SNR_table_Q1[ TARGET_RATE_TAB_SZ ] = { + 18, 29, 38, 40, 46, 52, 62, 84 +}; + /* Tables for stereo predictor coding */ const opus_int16 silk_stereo_pred_quant_Q13[ STEREO_QUANT_TAB_SIZE ] = { -13732, -10050, -8266, -7526, -6500, -5000, -2950, -820, diff --git a/thirdparty/opus/silk/tuning_parameters.h b/thirdparty/opus/silk/tuning_parameters.h index d70275fd8f..5b8f404235 100644 --- a/thirdparty/opus/silk/tuning_parameters.h +++ b/thirdparty/opus/silk/tuning_parameters.h @@ -53,12 +53,19 @@ extern "C" /* LPC analysis regularization */ #define FIND_LPC_COND_FAC 1e-5f +/* LTP analysis defines */ +#define FIND_LTP_COND_FAC 1e-5f +#define LTP_DAMPING 0.05f +#define LTP_SMOOTHING 0.1f + +/* LTP quantization settings */ +#define MU_LTP_QUANT_NB 0.03f +#define MU_LTP_QUANT_MB 0.025f +#define MU_LTP_QUANT_WB 0.02f + /* Max cumulative LTP gain */ #define MAX_SUM_LOG_GAIN_DB 250.0f -/* LTP analysis defines */ -#define LTP_CORR_INV_MAX 0.03f - /***********************/ /* High pass filtering */ /***********************/ @@ -96,16 +103,25 @@ extern "C" #define SPARSE_SNR_INCR_dB 2.0f /* threshold for sparseness measure above which to use lower quantization offset during unvoiced */ -#define ENERGY_VARIATION_THRESHOLD_QNT_OFFSET 0.6f +#define SPARSENESS_THRESHOLD_QNT_OFFSET 0.75f /* warping control */ #define WARPING_MULTIPLIER 0.015f /* fraction added to first autocorrelation value */ -#define SHAPE_WHITE_NOISE_FRACTION 3e-5f +#define SHAPE_WHITE_NOISE_FRACTION 5e-5f /* noise shaping filter chirp factor */ -#define BANDWIDTH_EXPANSION 0.94f +#define BANDWIDTH_EXPANSION 0.95f + +/* difference between chirp factors for analysis and synthesis noise shaping filters at low bitrates */ +#define LOW_RATE_BANDWIDTH_EXPANSION_DELTA 0.01f + +/* extra harmonic boosting (signal shaping) at low bitrates */ +#define LOW_RATE_HARMONIC_BOOST 0.1f + +/* extra harmonic boosting (signal shaping) for noisy input signals */ +#define LOW_INPUT_QUALITY_HARMONIC_BOOST 0.1f /* harmonic noise shaping */ #define HARMONIC_SHAPING 0.3f diff --git a/thirdparty/opus/silk/x86/NSQ_del_dec_sse4_1.c b/thirdparty/opus/silk/x86/NSQ_del_dec_sse.c index 2c75ede2dd..21d4a8bc1e 100644 --- a/thirdparty/opus/silk/x86/NSQ_del_dec_sse4_1.c +++ b/thirdparty/opus/silk/x86/NSQ_del_dec_sse.c @@ -107,12 +107,12 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( opus_int predictLPCOrder, /* I Prediction filter order */ opus_int warping_Q16, /* I */ opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ - opus_int *smpl_buf_idx, /* I/O Index to newest samples in buffers */ + opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ opus_int decisionDelay /* I */ ); void silk_NSQ_del_dec_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ + const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ const opus_int32 x_Q3[], /* I Prefiltered input signal */ @@ -234,8 +234,7 @@ void silk_NSQ_del_dec_sse4_1( psDD = &psDelDec[ Winner_ind ]; last_smple_idx = smpl_buf_idx + decisionDelay; for( i = 0; i < decisionDelay; i++ ) { - last_smple_idx = ( last_smple_idx - 1 ) % DECISION_DELAY; - if( last_smple_idx < 0 ) last_smple_idx += DECISION_DELAY; + last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK; pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gains_Q16[ 1 ] ), 14 ) ); @@ -247,7 +246,7 @@ void silk_NSQ_del_dec_sse4_1( /* Rewhiten with new A coefs */ start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2; - celt_assert( start_idx > 0 ); + silk_assert( start_idx > 0 ); silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch ); @@ -286,8 +285,7 @@ void silk_NSQ_del_dec_sse4_1( last_smple_idx = smpl_buf_idx + decisionDelay; Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 ); for( i = 0; i < decisionDelay; i++ ) { - last_smple_idx = ( last_smple_idx - 1 ) % DECISION_DELAY; - if( last_smple_idx < 0 ) last_smple_idx += DECISION_DELAY; + last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK; pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) ); @@ -301,6 +299,7 @@ void silk_NSQ_del_dec_sse4_1( NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ]; /* Save quantized speech signal */ + /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->frame_length * sizeof( opus_int16 ) ) */ silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); RESTORE_STACK; @@ -334,7 +333,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( opus_int predictLPCOrder, /* I Prediction filter order */ opus_int warping_Q16, /* I */ opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ - opus_int *smpl_buf_idx, /* I/O Index to newest samples in buffers */ + opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ opus_int decisionDelay /* I */ ) { @@ -353,7 +352,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( __m128i b_Q12_0123, b_sr_Q12_0123; SAVE_STACK; - celt_assert( nStatesDelayedDecision > 0 ); + silk_assert( nStatesDelayedDecision > 0 ); ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair ); shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; @@ -639,9 +638,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( psSS[ 1 ].xq_Q14 = xq_Q14; } } - *smpl_buf_idx = ( *smpl_buf_idx - 1 ) % DECISION_DELAY; - if( *smpl_buf_idx < 0 ) *smpl_buf_idx += DECISION_DELAY; - last_smple_idx = ( *smpl_buf_idx + decisionDelay ) % DECISION_DELAY; + *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK; /* Index to newest samples */ + last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK; /* Index to decisionDelay old samples */ /* Find winner */ RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; diff --git a/thirdparty/opus/silk/x86/NSQ_sse4_1.c b/thirdparty/opus/silk/x86/NSQ_sse.c index b0315e35fc..bb3c5f1955 100644 --- a/thirdparty/opus/silk/x86/NSQ_sse4_1.c +++ b/thirdparty/opus/silk/x86/NSQ_sse.c @@ -71,7 +71,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( ); void silk_NSQ_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ + const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ const opus_int32 x_Q3[], /* I Prefiltered input signal */ @@ -199,7 +199,7 @@ void silk_NSQ_sse4_1( if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) { /* Rewhiten with new A coefs */ start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2; - celt_assert( start_idx > 0 ); + silk_assert( start_idx > 0 ); silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch ); @@ -233,6 +233,7 @@ void silk_NSQ_sse4_1( NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ]; /* Save quantized speech and noise shaping signals */ + /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[ psEncC->ltp_mem_length ], psEncC->frame_length * sizeof( opus_int16 ) ) */ silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); RESTORE_STACK; diff --git a/thirdparty/opus/silk/x86/VAD_sse4_1.c b/thirdparty/opus/silk/x86/VAD_sse.c index d02ddf4ad0..4e90f4410d 100644 --- a/thirdparty/opus/silk/x86/VAD_sse4_1.c +++ b/thirdparty/opus/silk/x86/VAD_sse.c @@ -65,9 +65,9 @@ opus_int silk_VAD_GetSA_Q8_sse4_1( /* O Return value, 0 if s /* Safety checks */ silk_assert( VAD_N_BANDS == 4 ); - celt_assert( MAX_FRAME_LENGTH >= psEncC->frame_length ); - celt_assert( psEncC->frame_length <= 512 ); - celt_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) ); + silk_assert( MAX_FRAME_LENGTH >= psEncC->frame_length ); + silk_assert( psEncC->frame_length <= 512 ); + silk_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) ); /***********************/ /* Filter and Decimate */ diff --git a/thirdparty/opus/silk/x86/VQ_WMat_EC_sse4_1.c b/thirdparty/opus/silk/x86/VQ_WMat_EC_sse.c index 74d6c6d0ec..74d6c6d0ec 100644 --- a/thirdparty/opus/silk/x86/VQ_WMat_EC_sse4_1.c +++ b/thirdparty/opus/silk/x86/VQ_WMat_EC_sse.c diff --git a/thirdparty/opus/silk/x86/main_sse.h b/thirdparty/opus/silk/x86/main_sse.h index 2f15d44869..d8d61310ed 100644 --- a/thirdparty/opus/silk/x86/main_sse.h +++ b/thirdparty/opus/silk/x86/main_sse.h @@ -34,7 +34,6 @@ # if defined(OPUS_X86_MAY_HAVE_SSE4_1) -#if 0 /* FIXME: SSE disabled until silk_VQ_WMat_EC_sse4_1() gets updated. */ # define OVERRIDE_silk_VQ_WMat_EC void silk_VQ_WMat_EC_sse4_1( @@ -80,13 +79,11 @@ extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])( mu_Q9, max_gain_Q7, L)) #endif -#endif -#if 0 /* FIXME: SSE disabled until the NSQ code gets updated. */ # define OVERRIDE_silk_NSQ void silk_NSQ_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ + const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ const opus_int32 x_Q3[], /* I Prefiltered input signal */ @@ -113,7 +110,7 @@ void silk_NSQ_sse4_1( #else extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])( - const silk_encoder_state *psEncC, /* I Encoder State */ + const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ const opus_int32 x_Q3[], /* I Prefiltered input signal */ @@ -140,7 +137,7 @@ extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])( # define OVERRIDE_silk_NSQ_del_dec void silk_NSQ_del_dec_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ + const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ const opus_int32 x_Q3[], /* I Prefiltered input signal */ @@ -167,7 +164,7 @@ void silk_NSQ_del_dec_sse4_1( #else extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])( - const silk_encoder_state *psEncC, /* I Encoder State */ + const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ const opus_int32 x_Q3[], /* I Prefiltered input signal */ @@ -190,7 +187,6 @@ extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])( HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) #endif -#endif void silk_noise_shape_quantizer( silk_nsq_state *NSQ, /* I/O NSQ state */ @@ -242,6 +238,39 @@ extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])( silk_encoder_state *psEnC, const opus_int16 pIn[]); +# define OVERRIDE_silk_warped_LPC_analysis_filter_FIX + +#endif + +void silk_warped_LPC_analysis_filter_FIX_sse4_1( + opus_int32 state[], /* I/O State [order + 1] */ + opus_int32 res_Q2[], /* O Residual signal [length] */ + const opus_int16 coef_Q13[], /* I Coefficients [order] */ + const opus_int16 input[], /* I Input signal [length] */ + const opus_int16 lambda_Q16, /* I Warping factor */ + const opus_int length, /* I Length of input signal */ + const opus_int order /* I Filter order (even) */ +); + +#if defined(OPUS_X86_PRESUME_SSE4_1) +#define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \ + ((void)(arch),silk_warped_LPC_analysis_filter_FIX_c(state, res_Q2, coef_Q13, input, lambda_Q16, length, order)) + +#else + +extern void (*const SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[OPUS_ARCHMASK + 1])( + opus_int32 state[], /* I/O State [order + 1] */ + opus_int32 res_Q2[], /* O Residual signal [length] */ + const opus_int16 coef_Q13[], /* I Coefficients [order] */ + const opus_int16 input[], /* I Input signal [length] */ + const opus_int16 lambda_Q16, /* I Warping factor */ + const opus_int length, /* I Length of input signal */ + const opus_int order /* I Filter order (even) */ +); + +# define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \ + ((*SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[(arch) & OPUS_ARCHMASK])(state, res_Q2, coef_Q13, input, lambda_Q16, length, order)) + #endif # endif diff --git a/thirdparty/opus/silk/x86/x86_silk_map.c b/thirdparty/opus/silk/x86/x86_silk_map.c index 32dcc3cab7..818841f2c1 100644 --- a/thirdparty/opus/silk/x86/x86_silk_map.c +++ b/thirdparty/opus/silk/x86/x86_silk_map.c @@ -66,9 +66,8 @@ opus_int (*const SILK_VAD_GETSA_Q8_IMPL[ OPUS_ARCHMASK + 1 ] )( MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ) /* avx */ }; -#if 0 /* FIXME: SSE disabled until the NSQ code gets updated. */ void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )( - const silk_encoder_state *psEncC, /* I Encoder State */ + const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ const opus_int32 x_Q3[], /* I Prefiltered input signal */ @@ -90,9 +89,7 @@ void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )( MAY_HAVE_SSE4_1( silk_NSQ ), /* sse4.1 */ MAY_HAVE_SSE4_1( silk_NSQ ) /* avx */ }; -#endif -#if 0 /* FIXME: SSE disabled until silk_VQ_WMat_EC_sse4_1() gets updated. */ void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )( opus_int8 *ind, /* O index of best codebook vector */ opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ @@ -112,11 +109,9 @@ void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )( MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ), /* sse4.1 */ MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ) /* avx */ }; -#endif -#if 0 /* FIXME: SSE disabled until the NSQ code gets updated. */ void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )( - const silk_encoder_state *psEncC, /* I Encoder State */ + const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ const opus_int32 x_Q3[], /* I Prefiltered input signal */ @@ -138,10 +133,25 @@ void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )( MAY_HAVE_SSE4_1( silk_NSQ_del_dec ), /* sse4.1 */ MAY_HAVE_SSE4_1( silk_NSQ_del_dec ) /* avx */ }; -#endif #if defined(FIXED_POINT) +void (*const SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[ OPUS_ARCHMASK + 1 ] )( + opus_int32 state[], /* I/O State [order + 1] */ + opus_int32 res_Q2[], /* O Residual signal [length] */ + const opus_int16 coef_Q13[], /* I Coefficients [order] */ + const opus_int16 input[], /* I Input signal [length] */ + const opus_int16 lambda_Q16, /* I Warping factor */ + const opus_int length, /* I Length of input signal */ + const opus_int order /* I Filter order (even) */ +) = { + silk_warped_LPC_analysis_filter_FIX_c, /* non-sse */ + silk_warped_LPC_analysis_filter_FIX_c, + silk_warped_LPC_analysis_filter_FIX_c, + MAY_HAVE_SSE4_1( silk_warped_LPC_analysis_filter_FIX ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_warped_LPC_analysis_filter_FIX ) /* avx */ +}; + void (*const SILK_BURG_MODIFIED_IMPL[ OPUS_ARCHMASK + 1 ] )( opus_int32 *res_nrg, /* O Residual energy */ opus_int *res_nrg_Q, /* O Residual energy Q value */ diff --git a/thirdparty/opus/stream.c b/thirdparty/opus/stream.c index 6a85197a66..0238a6b31b 100644 --- a/thirdparty/opus/stream.c +++ b/thirdparty/opus/stream.c @@ -235,7 +235,8 @@ void *op_fopen(OpusFileCallbacks *_cb,const char *_path,const char *_mode){ fp=fopen(_path,_mode); #else fp=NULL; - { + if(_path==NULL||_mode==NULL)errno=EINVAL; + else{ wchar_t *wpath; wchar_t *wmode; wpath=op_utf8_to_utf16(_path); @@ -265,7 +266,8 @@ void *op_freopen(OpusFileCallbacks *_cb,const char *_path,const char *_mode, fp=freopen(_path,_mode,(FILE *)_stream); #else fp=NULL; - { + if(_path==NULL||_mode==NULL)errno=EINVAL; + else{ wchar_t *wpath; wchar_t *wmode; wpath=op_utf8_to_utf16(_path); |