diff options
author | Rémi Verschelde <rverschelde@gmail.com> | 2019-11-18 09:56:18 +0100 |
---|---|---|
committer | Rémi Verschelde <rverschelde@gmail.com> | 2019-11-18 09:56:48 +0100 |
commit | 46ae64cd60166ead412bacc1bf03e9c8f8965e2c (patch) | |
tree | 9e592667ffa91e55491a66733e5e3d7de0b666c9 /thirdparty/opus/silk/fixed | |
parent | 974646309bfe09c48c8a72bf751b0ea6ad8b5bc5 (diff) |
Revert "Update opus to 1.3.1 and opusfile to 0.11"
This reverts commit e00426c512a7905f5f925d382c443bab7a0ca693.
The way we handle platform-specific intrinsics is not good, so the
current state will not compile on armv8. This commit also requires
SSE4.1 support, which is likely not a good idea for portable binaries.
We'll have to redo this with more caution after 3.2 is released, or
we might simply drop opus as we're only using it as dependency for
theora right now.
Fixes #33606.
Diffstat (limited to 'thirdparty/opus/silk/fixed')
28 files changed, 1043 insertions, 689 deletions
diff --git a/thirdparty/opus/silk/fixed/apply_sine_window_FIX.c b/thirdparty/opus/silk/fixed/apply_sine_window_FIX.c index 03e088a6de..4502b7130e 100644 --- a/thirdparty/opus/silk/fixed/apply_sine_window_FIX.c +++ b/thirdparty/opus/silk/fixed/apply_sine_window_FIX.c @@ -57,15 +57,15 @@ void silk_apply_sine_window( opus_int k, f_Q16, c_Q16; opus_int32 S0_Q16, S1_Q16; - celt_assert( win_type == 1 || win_type == 2 ); + silk_assert( win_type == 1 || win_type == 2 ); /* Length must be in a range from 16 to 120 and a multiple of 4 */ - celt_assert( length >= 16 && length <= 120 ); - celt_assert( ( length & 3 ) == 0 ); + silk_assert( length >= 16 && length <= 120 ); + silk_assert( ( length & 3 ) == 0 ); /* Frequency */ k = ( length >> 2 ) - 4; - celt_assert( k >= 0 && k <= 26 ); + silk_assert( k >= 0 && k <= 26 ); f_Q16 = (opus_int)freq_table_Q16[ k ]; /* Factor used for cosine approximation */ diff --git a/thirdparty/opus/silk/fixed/arm/warped_autocorrelation_FIX_arm.h b/thirdparty/opus/silk/fixed/arm/warped_autocorrelation_FIX_arm.h deleted file mode 100644 index 1992e43288..0000000000 --- a/thirdparty/opus/silk/fixed/arm/warped_autocorrelation_FIX_arm.h +++ /dev/null @@ -1,68 +0,0 @@ -/*********************************************************************** -Copyright (c) 2017 Google Inc. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_WARPED_AUTOCORRELATION_FIX_ARM_H -# define SILK_WARPED_AUTOCORRELATION_FIX_ARM_H - -# include "celt/arm/armcpu.h" - -# if defined(FIXED_POINT) - -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -void silk_warped_autocorrelation_FIX_neon( - opus_int32 *corr, /* O Result [order + 1] */ - opus_int *scale, /* O Scaling of the correlation vector */ - const opus_int16 *input, /* I Input data to correlate */ - const opus_int warping_Q16, /* I Warping coefficient */ - const opus_int length, /* I Length of input */ - const opus_int order /* I Correlation order (even) */ -); - -# if !defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_PRESUME_NEON) -# define OVERRIDE_silk_warped_autocorrelation_FIX (1) -# define silk_warped_autocorrelation_FIX(corr, scale, input, warping_Q16, length, order, arch) \ - ((void)(arch), PRESUME_NEON(silk_warped_autocorrelation_FIX)(corr, scale, input, warping_Q16, length, order)) -# endif -# endif - -# if !defined(OVERRIDE_silk_warped_autocorrelation_FIX) -/*Is run-time CPU detection enabled on this platform?*/ -# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)) -extern void (*const SILK_WARPED_AUTOCORRELATION_FIX_IMPL[OPUS_ARCHMASK+1])(opus_int32*, opus_int*, const opus_int16*, const opus_int, const opus_int, const opus_int); -# define OVERRIDE_silk_warped_autocorrelation_FIX (1) -# define silk_warped_autocorrelation_FIX(corr, scale, input, warping_Q16, length, order, arch) \ - ((*SILK_WARPED_AUTOCORRELATION_FIX_IMPL[(arch)&OPUS_ARCHMASK])(corr, scale, input, warping_Q16, length, order)) -# elif defined(OPUS_ARM_PRESUME_NEON_INTR) -# define OVERRIDE_silk_warped_autocorrelation_FIX (1) -# define silk_warped_autocorrelation_FIX(corr, scale, input, warping_Q16, length, order, arch) \ - ((void)(arch), silk_warped_autocorrelation_FIX_neon(corr, scale, input, warping_Q16, length, order)) -# endif -# endif - -# endif /* end FIXED_POINT */ - -#endif /* end SILK_WARPED_AUTOCORRELATION_FIX_ARM_H */ diff --git a/thirdparty/opus/silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c b/thirdparty/opus/silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c deleted file mode 100644 index 00a70cb51f..0000000000 --- a/thirdparty/opus/silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c +++ /dev/null @@ -1,260 +0,0 @@ -/*********************************************************************** -Copyright (c) 2017 Google Inc., Jean-Marc Valin -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <arm_neon.h> -#ifdef OPUS_CHECK_ASM -# include <string.h> -#endif -#include "stack_alloc.h" -#include "main_FIX.h" - -static OPUS_INLINE void calc_corr( const opus_int32 *const input_QS, opus_int64 *const corr_QC, const opus_int offset, const int32x4_t state_QS_s32x4 ) -{ - int64x2_t corr_QC_s64x2[ 2 ], t_s64x2[ 2 ]; - const int32x4_t input_QS_s32x4 = vld1q_s32( input_QS + offset ); - corr_QC_s64x2[ 0 ] = vld1q_s64( corr_QC + offset + 0 ); - corr_QC_s64x2[ 1 ] = vld1q_s64( corr_QC + offset + 2 ); - t_s64x2[ 0 ] = vmull_s32( vget_low_s32( state_QS_s32x4 ), vget_low_s32( input_QS_s32x4 ) ); - t_s64x2[ 1 ] = vmull_s32( vget_high_s32( state_QS_s32x4 ), vget_high_s32( input_QS_s32x4 ) ); - corr_QC_s64x2[ 0 ] = vsraq_n_s64( corr_QC_s64x2[ 0 ], t_s64x2[ 0 ], 2 * QS - QC ); - corr_QC_s64x2[ 1 ] = vsraq_n_s64( corr_QC_s64x2[ 1 ], t_s64x2[ 1 ], 2 * QS - QC ); - vst1q_s64( corr_QC + offset + 0, corr_QC_s64x2[ 0 ] ); - vst1q_s64( corr_QC + offset + 2, corr_QC_s64x2[ 1 ] ); -} - -static OPUS_INLINE int32x4_t calc_state( const int32x4_t state_QS0_s32x4, const int32x4_t state_QS0_1_s32x4, const int32x4_t state_QS1_1_s32x4, const int32x4_t warping_Q16_s32x4 ) -{ - int32x4_t t_s32x4 = vsubq_s32( state_QS0_s32x4, state_QS0_1_s32x4 ); - t_s32x4 = vqdmulhq_s32( t_s32x4, warping_Q16_s32x4 ); - return vaddq_s32( state_QS1_1_s32x4, t_s32x4 ); -} - -void silk_warped_autocorrelation_FIX_neon( - opus_int32 *corr, /* O Result [order + 1] */ - opus_int *scale, /* O Scaling of the correlation vector */ - const opus_int16 *input, /* I Input data to correlate */ - const opus_int warping_Q16, /* I Warping coefficient */ - const opus_int length, /* I Length of input */ - const opus_int order /* I Correlation order (even) */ -) -{ - if( ( MAX_SHAPE_LPC_ORDER > 24 ) || ( order < 6 ) ) { - silk_warped_autocorrelation_FIX_c( corr, scale, input, warping_Q16, length, order ); - } else { - opus_int n, i, lsh; - opus_int64 corr_QC[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 }; /* In reverse order */ - opus_int64 corr_QC_orderT; - int64x2_t lsh_s64x2; - const opus_int orderT = ( order + 3 ) & ~3; - opus_int64 *corr_QCT; - opus_int32 *input_QS; - VARDECL( opus_int32, input_QST ); - VARDECL( opus_int32, state ); - SAVE_STACK; - - /* Order must be even */ - silk_assert( ( order & 1 ) == 0 ); - silk_assert( 2 * QS - QC >= 0 ); - - ALLOC( input_QST, length + 2 * MAX_SHAPE_LPC_ORDER, opus_int32 ); - - input_QS = input_QST; - /* input_QS has zero paddings in the beginning and end. */ - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - - /* Loop over samples */ - for( n = 0; n < length - 7; n += 8, input_QS += 8 ) { - const int16x8_t t0_s16x4 = vld1q_s16( input + n ); - vst1q_s32( input_QS + 0, vshll_n_s16( vget_low_s16( t0_s16x4 ), QS ) ); - vst1q_s32( input_QS + 4, vshll_n_s16( vget_high_s16( t0_s16x4 ), QS ) ); - } - for( ; n < length; n++, input_QS++ ) { - input_QS[ 0 ] = silk_LSHIFT32( (opus_int32)input[ n ], QS ); - } - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS += 4; - vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); - input_QS = input_QST + MAX_SHAPE_LPC_ORDER - orderT; - - /* The following loop runs ( length + order ) times, with ( order ) extra epilogues. */ - /* The zero paddings in input_QS guarantee corr_QC's correctness even with the extra epilogues. */ - /* The values of state_QS will be polluted by the extra epilogues, however they are temporary values. */ - - /* Keep the C code here to help understand the intrinsics optimization. */ - /* - { - opus_int32 state_QS[ 2 ][ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 }; - opus_int32 *state_QST[ 3 ]; - state_QST[ 0 ] = state_QS[ 0 ]; - state_QST[ 1 ] = state_QS[ 1 ]; - for( n = 0; n < length + order; n++, input_QS++ ) { - state_QST[ 0 ][ orderT ] = input_QS[ orderT ]; - for( i = 0; i < orderT; i++ ) { - corr_QC[ i ] += silk_RSHIFT64( silk_SMULL( state_QST[ 0 ][ i ], input_QS[ i ] ), 2 * QS - QC ); - state_QST[ 1 ][ i ] = silk_SMLAWB( state_QST[ 1 ][ i + 1 ], state_QST[ 0 ][ i ] - state_QST[ 0 ][ i + 1 ], warping_Q16 ); - } - state_QST[ 2 ] = state_QST[ 0 ]; - state_QST[ 0 ] = state_QST[ 1 ]; - state_QST[ 1 ] = state_QST[ 2 ]; - } - } - */ - - { - const int32x4_t warping_Q16_s32x4 = vdupq_n_s32( warping_Q16 << 15 ); - const opus_int32 *in = input_QS + orderT; - opus_int o = orderT; - int32x4_t state_QS_s32x4[ 3 ][ 2 ]; - - ALLOC( state, length + orderT, opus_int32 ); - state_QS_s32x4[ 2 ][ 1 ] = vdupq_n_s32( 0 ); - - /* Calculate 8 taps of all inputs in each loop. */ - do { - state_QS_s32x4[ 0 ][ 0 ] = state_QS_s32x4[ 0 ][ 1 ] = - state_QS_s32x4[ 1 ][ 0 ] = state_QS_s32x4[ 1 ][ 1 ] = vdupq_n_s32( 0 ); - n = 0; - do { - calc_corr( input_QS + n, corr_QC, o - 8, state_QS_s32x4[ 0 ][ 0 ] ); - calc_corr( input_QS + n, corr_QC, o - 4, state_QS_s32x4[ 0 ][ 1 ] ); - state_QS_s32x4[ 2 ][ 1 ] = vld1q_s32( in + n ); - vst1q_lane_s32( state + n, state_QS_s32x4[ 0 ][ 0 ], 0 ); - state_QS_s32x4[ 2 ][ 0 ] = vextq_s32( state_QS_s32x4[ 0 ][ 0 ], state_QS_s32x4[ 0 ][ 1 ], 1 ); - state_QS_s32x4[ 2 ][ 1 ] = vextq_s32( state_QS_s32x4[ 0 ][ 1 ], state_QS_s32x4[ 2 ][ 1 ], 1 ); - state_QS_s32x4[ 0 ][ 0 ] = calc_state( state_QS_s32x4[ 0 ][ 0 ], state_QS_s32x4[ 2 ][ 0 ], state_QS_s32x4[ 1 ][ 0 ], warping_Q16_s32x4 ); - state_QS_s32x4[ 0 ][ 1 ] = calc_state( state_QS_s32x4[ 0 ][ 1 ], state_QS_s32x4[ 2 ][ 1 ], state_QS_s32x4[ 1 ][ 1 ], warping_Q16_s32x4 ); - state_QS_s32x4[ 1 ][ 0 ] = state_QS_s32x4[ 2 ][ 0 ]; - state_QS_s32x4[ 1 ][ 1 ] = state_QS_s32x4[ 2 ][ 1 ]; - } while( ++n < ( length + order ) ); - in = state; - o -= 8; - } while( o > 4 ); - - if( o ) { - /* Calculate the last 4 taps of all inputs. */ - opus_int32 *stateT = state; - silk_assert( o == 4 ); - state_QS_s32x4[ 0 ][ 0 ] = state_QS_s32x4[ 1 ][ 0 ] = vdupq_n_s32( 0 ); - n = length + order; - do { - calc_corr( input_QS, corr_QC, 0, state_QS_s32x4[ 0 ][ 0 ] ); - state_QS_s32x4[ 2 ][ 0 ] = vld1q_s32( stateT ); - vst1q_lane_s32( stateT, state_QS_s32x4[ 0 ][ 0 ], 0 ); - state_QS_s32x4[ 2 ][ 0 ] = vextq_s32( state_QS_s32x4[ 0 ][ 0 ], state_QS_s32x4[ 2 ][ 0 ], 1 ); - state_QS_s32x4[ 0 ][ 0 ] = calc_state( state_QS_s32x4[ 0 ][ 0 ], state_QS_s32x4[ 2 ][ 0 ], state_QS_s32x4[ 1 ][ 0 ], warping_Q16_s32x4 ); - state_QS_s32x4[ 1 ][ 0 ] = state_QS_s32x4[ 2 ][ 0 ]; - input_QS++; - stateT++; - } while( --n ); - } - } - - { - const opus_int16 *inputT = input; - int32x4_t t_s32x4; - int64x1_t t_s64x1; - int64x2_t t_s64x2 = vdupq_n_s64( 0 ); - for( n = 0; n <= length - 8; n += 8 ) { - int16x8_t input_s16x8 = vld1q_s16( inputT ); - t_s32x4 = vmull_s16( vget_low_s16( input_s16x8 ), vget_low_s16( input_s16x8 ) ); - t_s32x4 = vmlal_s16( t_s32x4, vget_high_s16( input_s16x8 ), vget_high_s16( input_s16x8 ) ); - t_s64x2 = vaddw_s32( t_s64x2, vget_low_s32( t_s32x4 ) ); - t_s64x2 = vaddw_s32( t_s64x2, vget_high_s32( t_s32x4 ) ); - inputT += 8; - } - t_s64x1 = vadd_s64( vget_low_s64( t_s64x2 ), vget_high_s64( t_s64x2 ) ); - corr_QC_orderT = vget_lane_s64( t_s64x1, 0 ); - for( ; n < length; n++ ) { - corr_QC_orderT += silk_SMULL( input[ n ], input[ n ] ); - } - corr_QC_orderT = silk_LSHIFT64( corr_QC_orderT, QC ); - corr_QC[ orderT ] = corr_QC_orderT; - } - - corr_QCT = corr_QC + orderT - order; - lsh = silk_CLZ64( corr_QC_orderT ) - 35; - lsh = silk_LIMIT( lsh, -12 - QC, 30 - QC ); - *scale = -( QC + lsh ); - silk_assert( *scale >= -30 && *scale <= 12 ); - lsh_s64x2 = vdupq_n_s64( lsh ); - for( i = 0; i <= order - 3; i += 4 ) { - int32x4_t corr_s32x4; - int64x2_t corr_QC0_s64x2, corr_QC1_s64x2; - corr_QC0_s64x2 = vld1q_s64( corr_QCT + i ); - corr_QC1_s64x2 = vld1q_s64( corr_QCT + i + 2 ); - corr_QC0_s64x2 = vshlq_s64( corr_QC0_s64x2, lsh_s64x2 ); - corr_QC1_s64x2 = vshlq_s64( corr_QC1_s64x2, lsh_s64x2 ); - corr_s32x4 = vcombine_s32( vmovn_s64( corr_QC1_s64x2 ), vmovn_s64( corr_QC0_s64x2 ) ); - corr_s32x4 = vrev64q_s32( corr_s32x4 ); - vst1q_s32( corr + order - i - 3, corr_s32x4 ); - } - if( lsh >= 0 ) { - for( ; i < order + 1; i++ ) { - corr[ order - i ] = (opus_int32)silk_CHECK_FIT32( silk_LSHIFT64( corr_QCT[ i ], lsh ) ); - } - } else { - for( ; i < order + 1; i++ ) { - corr[ order - i ] = (opus_int32)silk_CHECK_FIT32( silk_RSHIFT64( corr_QCT[ i ], -lsh ) ); - } - } - silk_assert( corr_QCT[ order ] >= 0 ); /* If breaking, decrease QC*/ - RESTORE_STACK; - } - -#ifdef OPUS_CHECK_ASM - { - opus_int32 corr_c[ MAX_SHAPE_LPC_ORDER + 1 ]; - opus_int scale_c; - silk_warped_autocorrelation_FIX_c( corr_c, &scale_c, input, warping_Q16, length, order ); - silk_assert( !memcmp( corr_c, corr, sizeof( corr_c[ 0 ] ) * ( order + 1 ) ) ); - silk_assert( scale_c == *scale ); - } -#endif -} diff --git a/thirdparty/opus/silk/fixed/burg_modified_FIX.c b/thirdparty/opus/silk/fixed/burg_modified_FIX.c index 274d4b28e1..17d0e0993c 100644 --- a/thirdparty/opus/silk/fixed/burg_modified_FIX.c +++ b/thirdparty/opus/silk/fixed/burg_modified_FIX.c @@ -37,7 +37,7 @@ POSSIBILITY OF SUCH DAMAGE. #define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */ #define QA 25 -#define N_BITS_HEAD_ROOM 3 +#define N_BITS_HEAD_ROOM 2 #define MIN_RSHIFTS -16 #define MAX_RSHIFTS (32 - QA) @@ -65,7 +65,7 @@ void silk_burg_modified_c( opus_int32 xcorr[ SILK_MAX_ORDER_LPC ]; opus_int64 C0_64; - celt_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); + silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); /* Compute autocorrelations, added over subframes */ C0_64 = silk_inner_prod16_aligned_64( x, x, subfr_length*nb_subfr, arch ); diff --git a/thirdparty/opus/silk/fixed/corrMatrix_FIX.c b/thirdparty/opus/silk/fixed/corrMatrix_FIX.c index 1b4a29c232..c1d437c785 100644 --- a/thirdparty/opus/silk/fixed/corrMatrix_FIX.c +++ b/thirdparty/opus/silk/fixed/corrMatrix_FIX.c @@ -58,7 +58,7 @@ void silk_corrVector_FIX( for( lag = 0; lag < order; lag++ ) { inner_prod = 0; for( i = 0; i < L; i++ ) { - inner_prod = silk_ADD_RSHIFT32( inner_prod, silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts ); + inner_prod += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts ); } Xt[ lag ] = inner_prod; /* X[:,lag]'*t */ ptr1--; /* Go to next column of X */ @@ -77,54 +77,61 @@ void silk_corrMatrix_FIX( const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */ const opus_int L, /* I Length of vectors */ const opus_int order, /* I Max lag for correlation */ + const opus_int head_room, /* I Desired headroom */ opus_int32 *XX, /* O Pointer to X'*X correlation matrix [ order x order ] */ - opus_int32 *nrg, /* O Energy of x vector */ - opus_int *rshifts, /* O Right shifts of correlations and energy */ + opus_int *rshifts, /* I/O Right shifts of correlations */ int arch /* I Run-time architecture */ ) { - opus_int i, j, lag; + opus_int i, j, lag, rshifts_local, head_room_rshifts; opus_int32 energy; const opus_int16 *ptr1, *ptr2; /* Calculate energy to find shift used to fit in 32 bits */ - silk_sum_sqr_shift( nrg, rshifts, x, L + order - 1 ); - energy = *nrg; + silk_sum_sqr_shift( &energy, &rshifts_local, x, L + order - 1 ); + /* Add shifts to get the desired head room */ + head_room_rshifts = silk_max( head_room - silk_CLZ32( energy ), 0 ); + + energy = silk_RSHIFT32( energy, head_room_rshifts ); + rshifts_local += head_room_rshifts; /* Calculate energy of first column (0) of X: X[:,0]'*X[:,0] */ /* Remove contribution of first order - 1 samples */ for( i = 0; i < order - 1; i++ ) { - energy -= silk_RSHIFT32( silk_SMULBB( x[ i ], x[ i ] ), *rshifts ); + energy -= silk_RSHIFT32( silk_SMULBB( x[ i ], x[ i ] ), rshifts_local ); + } + if( rshifts_local < *rshifts ) { + /* Adjust energy */ + energy = silk_RSHIFT32( energy, *rshifts - rshifts_local ); + rshifts_local = *rshifts; } /* Calculate energy of remaining columns of X: X[:,j]'*X[:,j] */ /* Fill out the diagonal of the correlation matrix */ matrix_ptr( XX, 0, 0, order ) = energy; - silk_assert( energy >= 0 ); ptr1 = &x[ order - 1 ]; /* First sample of column 0 of X */ for( j = 1; j < order; j++ ) { - energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr1[ L - j ] ), *rshifts ) ); - energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr1[ -j ] ), *rshifts ) ); + energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr1[ L - j ] ), rshifts_local ) ); + energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr1[ -j ] ), rshifts_local ) ); matrix_ptr( XX, j, j, order ) = energy; - silk_assert( energy >= 0 ); } ptr2 = &x[ order - 2 ]; /* First sample of column 1 of X */ /* Calculate the remaining elements of the correlation matrix */ - if( *rshifts > 0 ) { + if( rshifts_local > 0 ) { /* Right shifting used */ for( lag = 1; lag < order; lag++ ) { /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */ energy = 0; for( i = 0; i < L; i++ ) { - energy += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), *rshifts ); + energy += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts_local ); } /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */ matrix_ptr( XX, lag, 0, order ) = energy; matrix_ptr( XX, 0, lag, order ) = energy; for( j = 1; j < ( order - lag ); j++ ) { - energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ), *rshifts ) ); - energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr2[ -j ] ), *rshifts ) ); + energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ), rshifts_local ) ); + energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr2[ -j ] ), rshifts_local ) ); matrix_ptr( XX, lag + j, j, order ) = energy; matrix_ptr( XX, j, lag + j, order ) = energy; } @@ -146,5 +153,6 @@ void silk_corrMatrix_FIX( ptr2--;/* Update pointer to first sample of next column (lag) in X */ } } + *rshifts = rshifts_local; } diff --git a/thirdparty/opus/silk/fixed/encode_frame_FIX.c b/thirdparty/opus/silk/fixed/encode_frame_FIX.c index a02bf87dbb..5ef44b03fc 100644 --- a/thirdparty/opus/silk/fixed/encode_frame_FIX.c +++ b/thirdparty/opus/silk/fixed/encode_frame_FIX.c @@ -29,7 +29,6 @@ POSSIBILITY OF SUCH DAMAGE. #include "config.h" #endif -#include <stdlib.h> #include "main_FIX.h" #include "stack_alloc.h" #include "tuning_parameters.h" @@ -38,33 +37,26 @@ POSSIBILITY OF SUCH DAMAGE. static OPUS_INLINE void silk_LBRR_encode_FIX( silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ silk_encoder_control_FIX *psEncCtrl, /* I/O Pointer to Silk FIX encoder control struct */ - const opus_int16 x16[], /* I Input signal */ + const opus_int32 xfw_Q3[], /* I Input signal */ opus_int condCoding /* I The type of conditional coding used so far for this frame */ ); void silk_encode_do_VAD_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ - opus_int activity /* I Decision of Opus voice activity detector */ + silk_encoder_state_FIX *psEnc /* I/O Pointer to Silk FIX encoder state */ ) { - const opus_int activity_threshold = SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ); - /****************************/ /* Voice Activity Detection */ /****************************/ silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch ); - /* If Opus VAD is inactive and Silk VAD is active: lower Silk VAD to just under the threshold */ - if( activity == VAD_NO_ACTIVITY && psEnc->sCmn.speech_activity_Q8 >= activity_threshold ) { - psEnc->sCmn.speech_activity_Q8 = activity_threshold - 1; - } /**************************************************/ /* Convert speech activity into VAD and DTX flags */ /**************************************************/ - if( psEnc->sCmn.speech_activity_Q8 < activity_threshold ) { + if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) { psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY; psEnc->sCmn.noSpeechCounter++; - if( psEnc->sCmn.noSpeechCounter <= NB_SPEECH_FRAMES_BEFORE_DTX ) { + if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) { psEnc->sCmn.inDTX = 0; } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) { psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX; @@ -102,9 +94,6 @@ opus_int silk_encode_frame_FIX( opus_int16 ec_prevLagIndex_copy; opus_int ec_prevSignalType_copy; opus_int8 LastGainIndex_copy2; - opus_int gain_lock[ MAX_NB_SUBFR ] = {0}; - opus_int16 best_gain_mult[ MAX_NB_SUBFR ]; - opus_int best_sum[ MAX_NB_SUBFR ]; SAVE_STACK; /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */ @@ -129,6 +118,7 @@ opus_int silk_encode_frame_FIX( silk_memcpy( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length * sizeof( opus_int16 ) ); if( !psEnc->sCmn.prefillFlag ) { + VARDECL( opus_int32, xfw_Q3 ); VARDECL( opus_int16, res_pitch ); VARDECL( opus_uint8, ec_buf_copy ); opus_int16 *res_pitch_frame; @@ -142,7 +132,7 @@ opus_int silk_encode_frame_FIX( /*****************************************/ /* Find pitch lags, initial LPC analysis */ /*****************************************/ - silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame - psEnc->sCmn.ltp_mem_length, psEnc->sCmn.arch ); + silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch ); /************************/ /* Noise shape analysis */ @@ -152,17 +142,23 @@ opus_int silk_encode_frame_FIX( /***************************************************/ /* Find linear prediction coefficients (LPC + LTP) */ /***************************************************/ - silk_find_pred_coefs_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame, condCoding ); + silk_find_pred_coefs_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding ); /****************************************/ /* Process gains */ /****************************************/ silk_process_gains_FIX( psEnc, &sEncCtrl, condCoding ); + /*****************************************/ + /* Prefiltering for noise shaper */ + /*****************************************/ + ALLOC( xfw_Q3, psEnc->sCmn.frame_length, opus_int32 ); + silk_prefilter_FIX( psEnc, &sEncCtrl, xfw_Q3, x_frame ); + /****************************************/ /* Low Bitrate Redundant Encoding */ /****************************************/ - silk_LBRR_encode_FIX( psEnc, &sEncCtrl, x_frame, condCoding ); + silk_LBRR_encode_FIX( psEnc, &sEncCtrl, xfw_Q3, condCoding ); /* Loop over quantizer and entropy coding to control bitrate */ maxIter = 6; @@ -198,21 +194,17 @@ opus_int silk_encode_frame_FIX( /* Noise shaping quantization */ /*****************************************/ if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) { - silk_NSQ_del_dec( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, x_frame, psEnc->sCmn.pulses, - sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR_Q13, sEncCtrl.HarmShapeGain_Q14, + silk_NSQ_del_dec( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses, + sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14, sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14, psEnc->sCmn.arch ); } else { - silk_NSQ( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, x_frame, psEnc->sCmn.pulses, - sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR_Q13, sEncCtrl.HarmShapeGain_Q14, + silk_NSQ( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses, + sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14, sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14, psEnc->sCmn.arch); } - if ( iter == maxIter && !found_lower ) { - silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) ); - } - /****************************************/ /* Encode Parameters */ /****************************************/ @@ -226,33 +218,6 @@ opus_int silk_encode_frame_FIX( nBits = ec_tell( psRangeEnc ); - /* If we still bust after the last iteration, do some damage control. */ - if ( iter == maxIter && !found_lower && nBits > maxBits ) { - silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) ); - - /* Keep gains the same as the last frame. */ - psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev; - for ( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - psEnc->sCmn.indices.GainsIndices[ i ] = 4; - } - if (condCoding != CODE_CONDITIONALLY) { - psEnc->sCmn.indices.GainsIndices[ 0 ] = sEncCtrl.lastGainIndexPrev; - } - psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy; - psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy; - /* Clear all pulses. */ - for ( i = 0; i < psEnc->sCmn.frame_length; i++ ) { - psEnc->sCmn.pulses[ i ] = 0; - } - - silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding ); - - silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType, - psEnc->sCmn.pulses, psEnc->sCmn.frame_length ); - - nBits = ec_tell( psRangeEnc ); - } - if( useCBR == 0 && iter == 0 && nBits <= maxBits ) { break; } @@ -262,7 +227,7 @@ opus_int silk_encode_frame_FIX( if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) { /* Restore output state from earlier iteration that did meet the bitrate budget */ silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) ); - celt_assert( sRangeEnc_copy2.offs <= 1275 ); + silk_assert( sRangeEnc_copy2.offs <= 1275 ); silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs ); silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) ); psEnc->sShape.LastGainIndex = LastGainIndex_copy2; @@ -290,7 +255,7 @@ opus_int silk_encode_frame_FIX( gainsID_lower = gainsID; /* Copy part of the output state */ silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) ); - celt_assert( psRangeEnc->offs <= 1275 ); + silk_assert( psRangeEnc->offs <= 1275 ); silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs ); silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); LastGainIndex_copy2 = psEnc->sShape.LastGainIndex; @@ -300,35 +265,15 @@ opus_int silk_encode_frame_FIX( break; } - if ( !found_lower && nBits > maxBits ) { - int j; - for ( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - int sum=0; - for ( j = i*psEnc->sCmn.subfr_length; j < (i+1)*psEnc->sCmn.subfr_length; j++ ) { - sum += abs( psEnc->sCmn.pulses[j] ); - } - if ( iter == 0 || (sum < best_sum[i] && !gain_lock[i]) ) { - best_sum[i] = sum; - best_gain_mult[i] = gainMult_Q8; - } else { - gain_lock[i] = 1; - } - } - } if( ( found_lower & found_upper ) == 0 ) { /* Adjust gain according to high-rate rate/distortion curve */ + opus_int32 gain_factor_Q16; + gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) ); + gain_factor_Q16 = silk_min_32( gain_factor_Q16, SILK_FIX_CONST( 2, 16 ) ); if( nBits > maxBits ) { - if (gainMult_Q8 < 16384) { - gainMult_Q8 *= 2; - } else { - gainMult_Q8 = 32767; - } - } else { - opus_int32 gain_factor_Q16; - gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) ); - gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 ); + gain_factor_Q16 = silk_max_32( gain_factor_Q16, SILK_FIX_CONST( 1.3, 16 ) ); } - + gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 ); } else { /* Adjust gain by interpolating */ gainMult_Q8 = gainMult_lower + silk_DIV32_16( silk_MUL( gainMult_upper - gainMult_lower, maxBits - nBits_lower ), nBits_upper - nBits_lower ); @@ -342,13 +287,7 @@ opus_int silk_encode_frame_FIX( } for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - opus_int16 tmp; - if ( gain_lock[i] ) { - tmp = best_gain_mult[i]; - } else { - tmp = gainMult_Q8; - } - sEncCtrl.Gains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], tmp ), 8 ); + sEncCtrl.Gains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 ); } /* Quantize gains */ @@ -392,7 +331,7 @@ opus_int silk_encode_frame_FIX( static OPUS_INLINE void silk_LBRR_encode_FIX( silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ silk_encoder_control_FIX *psEncCtrl, /* I/O Pointer to Silk FIX encoder control struct */ - const opus_int16 x16[], /* I Input signal */ + const opus_int32 xfw_Q3[], /* I Input signal */ opus_int condCoding /* I The type of conditional coding used so far for this frame */ ) { @@ -431,14 +370,14 @@ static OPUS_INLINE void silk_LBRR_encode_FIX( /* Noise shaping quantization */ /*****************************************/ if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) { - silk_NSQ_del_dec( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, x16, + silk_NSQ_del_dec( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3, psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14, - psEncCtrl->AR_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14, + psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14, psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch ); } else { - silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, x16, + silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3, psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14, - psEncCtrl->AR_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14, + psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14, psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch ); } diff --git a/thirdparty/opus/silk/fixed/find_LPC_FIX.c b/thirdparty/opus/silk/fixed/find_LPC_FIX.c index c762a0f2a2..e11cdc86e6 100644 --- a/thirdparty/opus/silk/fixed/find_LPC_FIX.c +++ b/thirdparty/opus/silk/fixed/find_LPC_FIX.c @@ -92,7 +92,7 @@ void silk_find_LPC_FIX( silk_interpolate( NLSF0_Q15, psEncC->prev_NLSFq_Q15, NLSF_Q15, k, psEncC->predictLPCOrder ); /* Convert to LPC for residual energy evaluation */ - silk_NLSF2A( a_tmp_Q12, NLSF0_Q15, psEncC->predictLPCOrder, psEncC->arch ); + silk_NLSF2A( a_tmp_Q12, NLSF0_Q15, psEncC->predictLPCOrder ); /* Calculate residual energy with NLSF interpolation */ silk_LPC_analysis_filter( LPC_res, x, a_tmp_Q12, 2 * subfr_length, psEncC->predictLPCOrder, psEncC->arch ); @@ -146,6 +146,6 @@ void silk_find_LPC_FIX( silk_A2NLSF( NLSF_Q15, a_Q16, psEncC->predictLPCOrder ); } - celt_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) ); + silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) ); RESTORE_STACK; } diff --git a/thirdparty/opus/silk/fixed/find_LTP_FIX.c b/thirdparty/opus/silk/fixed/find_LTP_FIX.c index 62d4afb250..1314a28137 100644 --- a/thirdparty/opus/silk/fixed/find_LTP_FIX.c +++ b/thirdparty/opus/silk/fixed/find_LTP_FIX.c @@ -32,68 +32,214 @@ POSSIBILITY OF SUCH DAMAGE. #include "main_FIX.h" #include "tuning_parameters.h" +/* Head room for correlations */ +#define LTP_CORRS_HEAD_ROOM 2 + +void silk_fit_LTP( + opus_int32 LTP_coefs_Q16[ LTP_ORDER ], + opus_int16 LTP_coefs_Q14[ LTP_ORDER ] +); + void silk_find_LTP_FIX( - opus_int32 XXLTP_Q17[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Correlation matrix */ - opus_int32 xXLTP_Q17[ MAX_NB_SUBFR * LTP_ORDER ], /* O Correlation vector */ - const opus_int16 r_ptr[], /* I Residual signal after LPC */ + opus_int16 b_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */ + opus_int32 WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ + opus_int *LTPredCodGain_Q7, /* O LTP coding gain */ + const opus_int16 r_lpc[], /* I residual signal after LPC signal + state for first 10 ms */ const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */ - const opus_int subfr_length, /* I Subframe length */ - const opus_int nb_subfr, /* I Number of subframes */ + const opus_int32 Wght_Q15[ MAX_NB_SUBFR ], /* I weights */ + const opus_int subfr_length, /* I subframe length */ + const opus_int nb_subfr, /* I number of subframes */ + const opus_int mem_offset, /* I number of samples in LTP memory */ + opus_int corr_rshifts[ MAX_NB_SUBFR ], /* O right shifts applied to correlations */ int arch /* I Run-time architecture */ ) { - opus_int i, k, extra_shifts; - opus_int xx_shifts, xX_shifts, XX_shifts; - const opus_int16 *lag_ptr; - opus_int32 *XXLTP_Q17_ptr, *xXLTP_Q17_ptr; - opus_int32 xx, nrg, temp; - - xXLTP_Q17_ptr = xXLTP_Q17; - XXLTP_Q17_ptr = XXLTP_Q17; + opus_int i, k, lshift; + const opus_int16 *r_ptr, *lag_ptr; + opus_int16 *b_Q14_ptr; + + opus_int32 regu; + opus_int32 *WLTP_ptr; + opus_int32 b_Q16[ LTP_ORDER ], delta_b_Q14[ LTP_ORDER ], d_Q14[ MAX_NB_SUBFR ], nrg[ MAX_NB_SUBFR ], g_Q26; + opus_int32 w[ MAX_NB_SUBFR ], WLTP_max, max_abs_d_Q14, max_w_bits; + + opus_int32 temp32, denom32; + opus_int extra_shifts; + opus_int rr_shifts, maxRshifts, maxRshifts_wxtra, LZs; + opus_int32 LPC_res_nrg, LPC_LTP_res_nrg, div_Q16; + opus_int32 Rr[ LTP_ORDER ], rr[ MAX_NB_SUBFR ]; + opus_int32 wd, m_Q12; + + b_Q14_ptr = b_Q14; + WLTP_ptr = WLTP; + r_ptr = &r_lpc[ mem_offset ]; for( k = 0; k < nb_subfr; k++ ) { lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 ); - silk_sum_sqr_shift( &xx, &xx_shifts, r_ptr, subfr_length + LTP_ORDER ); /* xx in Q( -xx_shifts ) */ - silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, XXLTP_Q17_ptr, &nrg, &XX_shifts, arch ); /* XXLTP_Q17_ptr and nrg in Q( -XX_shifts ) */ - extra_shifts = xx_shifts - XX_shifts; - if( extra_shifts > 0 ) { - /* Shift XX */ - xX_shifts = xx_shifts; - for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) { - XXLTP_Q17_ptr[ i ] = silk_RSHIFT32( XXLTP_Q17_ptr[ i ], extra_shifts ); /* Q( -xX_shifts ) */ - } - nrg = silk_RSHIFT32( nrg, extra_shifts ); /* Q( -xX_shifts ) */ - } else if( extra_shifts < 0 ) { - /* Shift xx */ - xX_shifts = XX_shifts; - xx = silk_RSHIFT32( xx, -extra_shifts ); /* Q( -xX_shifts ) */ - } else { - xX_shifts = xx_shifts; + silk_sum_sqr_shift( &rr[ k ], &rr_shifts, r_ptr, subfr_length ); /* rr[ k ] in Q( -rr_shifts ) */ + + /* Assure headroom */ + LZs = silk_CLZ32( rr[k] ); + if( LZs < LTP_CORRS_HEAD_ROOM ) { + rr[ k ] = silk_RSHIFT_ROUND( rr[ k ], LTP_CORRS_HEAD_ROOM - LZs ); + rr_shifts += ( LTP_CORRS_HEAD_ROOM - LZs ); } - silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, xXLTP_Q17_ptr, xX_shifts, arch ); /* xXLTP_Q17_ptr in Q( -xX_shifts ) */ + corr_rshifts[ k ] = rr_shifts; + silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, LTP_CORRS_HEAD_ROOM, WLTP_ptr, &corr_rshifts[ k ], arch ); /* WLTP_fix_ptr in Q( -corr_rshifts[ k ] ) */ + + /* The correlation vector always has lower max abs value than rr and/or RR so head room is assured */ + silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr, corr_rshifts[ k ], arch ); /* Rr_fix_ptr in Q( -corr_rshifts[ k ] ) */ + if( corr_rshifts[ k ] > rr_shifts ) { + rr[ k ] = silk_RSHIFT( rr[ k ], corr_rshifts[ k ] - rr_shifts ); /* rr[ k ] in Q( -corr_rshifts[ k ] ) */ + } + silk_assert( rr[ k ] >= 0 ); + + regu = 1; + regu = silk_SMLAWB( regu, rr[ k ], SILK_FIX_CONST( LTP_DAMPING/3, 16 ) ); + regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, 0, 0, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) ); + regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, LTP_ORDER-1, LTP_ORDER-1, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) ); + silk_regularize_correlations_FIX( WLTP_ptr, &rr[k], regu, LTP_ORDER ); + + silk_solve_LDL_FIX( WLTP_ptr, LTP_ORDER, Rr, b_Q16 ); /* WLTP_fix_ptr and Rr_fix_ptr both in Q(-corr_rshifts[k]) */ + + /* Limit and store in Q14 */ + silk_fit_LTP( b_Q16, b_Q14_ptr ); + + /* Calculate residual energy */ + nrg[ k ] = silk_residual_energy16_covar_FIX( b_Q14_ptr, WLTP_ptr, Rr, rr[ k ], LTP_ORDER, 14 ); /* nrg_fix in Q( -corr_rshifts[ k ] ) */ + + /* temp = Wght[ k ] / ( nrg[ k ] * Wght[ k ] + 0.01f * subfr_length ); */ + extra_shifts = silk_min_int( corr_rshifts[ k ], LTP_CORRS_HEAD_ROOM ); + denom32 = silk_LSHIFT_SAT32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 + extra_shifts ) + /* Q( -corr_rshifts[ k ] + extra_shifts ) */ + silk_RSHIFT( silk_SMULWB( (opus_int32)subfr_length, 655 ), corr_rshifts[ k ] - extra_shifts ); /* Q( -corr_rshifts[ k ] + extra_shifts ) */ + denom32 = silk_max( denom32, 1 ); + silk_assert( ((opus_int64)Wght_Q15[ k ] << 16 ) < silk_int32_MAX ); /* Wght always < 0.5 in Q0 */ + temp32 = silk_DIV32( silk_LSHIFT( (opus_int32)Wght_Q15[ k ], 16 ), denom32 ); /* Q( 15 + 16 + corr_rshifts[k] - extra_shifts ) */ + temp32 = silk_RSHIFT( temp32, 31 + corr_rshifts[ k ] - extra_shifts - 26 ); /* Q26 */ - /* At this point all correlations are in Q(-xX_shifts) */ - temp = silk_SMLAWB( 1, nrg, SILK_FIX_CONST( LTP_CORR_INV_MAX, 16 ) ); - temp = silk_max( temp, xx ); -TIC(div) -#if 0 + /* Limit temp such that the below scaling never wraps around */ + WLTP_max = 0; for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) { - XXLTP_Q17_ptr[ i ] = silk_DIV32_varQ( XXLTP_Q17_ptr[ i ], temp, 17 ); + WLTP_max = silk_max( WLTP_ptr[ i ], WLTP_max ); } + lshift = silk_CLZ32( WLTP_max ) - 1 - 3; /* keep 3 bits free for vq_nearest_neighbor_fix */ + silk_assert( 26 - 18 + lshift >= 0 ); + if( 26 - 18 + lshift < 31 ) { + temp32 = silk_min_32( temp32, silk_LSHIFT( (opus_int32)1, 26 - 18 + lshift ) ); + } + + silk_scale_vector32_Q26_lshift_18( WLTP_ptr, temp32, LTP_ORDER * LTP_ORDER ); /* WLTP_ptr in Q( 18 - corr_rshifts[ k ] ) */ + + w[ k ] = matrix_ptr( WLTP_ptr, LTP_ORDER/2, LTP_ORDER/2, LTP_ORDER ); /* w in Q( 18 - corr_rshifts[ k ] ) */ + silk_assert( w[k] >= 0 ); + + r_ptr += subfr_length; + b_Q14_ptr += LTP_ORDER; + WLTP_ptr += LTP_ORDER * LTP_ORDER; + } + + maxRshifts = 0; + for( k = 0; k < nb_subfr; k++ ) { + maxRshifts = silk_max_int( corr_rshifts[ k ], maxRshifts ); + } + + /* Compute LTP coding gain */ + if( LTPredCodGain_Q7 != NULL ) { + LPC_LTP_res_nrg = 0; + LPC_res_nrg = 0; + silk_assert( LTP_CORRS_HEAD_ROOM >= 2 ); /* Check that no overflow will happen when adding */ + for( k = 0; k < nb_subfr; k++ ) { + LPC_res_nrg = silk_ADD32( LPC_res_nrg, silk_RSHIFT( silk_ADD32( silk_SMULWB( rr[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */ + LPC_LTP_res_nrg = silk_ADD32( LPC_LTP_res_nrg, silk_RSHIFT( silk_ADD32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */ + } + LPC_LTP_res_nrg = silk_max( LPC_LTP_res_nrg, 1 ); /* avoid division by zero */ + + div_Q16 = silk_DIV32_varQ( LPC_res_nrg, LPC_LTP_res_nrg, 16 ); + *LTPredCodGain_Q7 = ( opus_int )silk_SMULBB( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) ); + + silk_assert( *LTPredCodGain_Q7 == ( opus_int )silk_SAT16( silk_MUL( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) ) ) ); + } + + /* smoothing */ + /* d = sum( B, 1 ); */ + b_Q14_ptr = b_Q14; + for( k = 0; k < nb_subfr; k++ ) { + d_Q14[ k ] = 0; for( i = 0; i < LTP_ORDER; i++ ) { - xXLTP_Q17_ptr[ i ] = silk_DIV32_varQ( xXLTP_Q17_ptr[ i ], temp, 17 ); + d_Q14[ k ] += b_Q14_ptr[ i ]; } -#else - for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) { - XXLTP_Q17_ptr[ i ] = (opus_int32)( silk_LSHIFT64( (opus_int64)XXLTP_Q17_ptr[ i ], 17 ) / temp ); + b_Q14_ptr += LTP_ORDER; + } + + /* m = ( w * d' ) / ( sum( w ) + 1e-3 ); */ + + /* Find maximum absolute value of d_Q14 and the bits used by w in Q0 */ + max_abs_d_Q14 = 0; + max_w_bits = 0; + for( k = 0; k < nb_subfr; k++ ) { + max_abs_d_Q14 = silk_max_32( max_abs_d_Q14, silk_abs( d_Q14[ k ] ) ); + /* w[ k ] is in Q( 18 - corr_rshifts[ k ] ) */ + /* Find bits needed in Q( 18 - maxRshifts ) */ + max_w_bits = silk_max_32( max_w_bits, 32 - silk_CLZ32( w[ k ] ) + corr_rshifts[ k ] - maxRshifts ); + } + + /* max_abs_d_Q14 = (5 << 15); worst case, i.e. LTP_ORDER * -silk_int16_MIN */ + silk_assert( max_abs_d_Q14 <= ( 5 << 15 ) ); + + /* How many bits is needed for w*d' in Q( 18 - maxRshifts ) in the worst case, of all d_Q14's being equal to max_abs_d_Q14 */ + extra_shifts = max_w_bits + 32 - silk_CLZ32( max_abs_d_Q14 ) - 14; + + /* Subtract what we got available; bits in output var plus maxRshifts */ + extra_shifts -= ( 32 - 1 - 2 + maxRshifts ); /* Keep sign bit free as well as 2 bits for accumulation */ + extra_shifts = silk_max_int( extra_shifts, 0 ); + + maxRshifts_wxtra = maxRshifts + extra_shifts; + + temp32 = silk_RSHIFT( 262, maxRshifts + extra_shifts ) + 1; /* 1e-3f in Q( 18 - (maxRshifts + extra_shifts) ) */ + wd = 0; + for( k = 0; k < nb_subfr; k++ ) { + /* w has at least 2 bits of headroom so no overflow should happen */ + temp32 = silk_ADD32( temp32, silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ) ); /* Q( 18 - maxRshifts_wxtra ) */ + wd = silk_ADD32( wd, silk_LSHIFT( silk_SMULWW( silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ), d_Q14[ k ] ), 2 ) ); /* Q( 18 - maxRshifts_wxtra ) */ + } + m_Q12 = silk_DIV32_varQ( wd, temp32, 12 ); + + b_Q14_ptr = b_Q14; + for( k = 0; k < nb_subfr; k++ ) { + /* w_fix[ k ] from Q( 18 - corr_rshifts[ k ] ) to Q( 16 ) */ + if( 2 - corr_rshifts[k] > 0 ) { + temp32 = silk_RSHIFT( w[ k ], 2 - corr_rshifts[ k ] ); + } else { + temp32 = silk_LSHIFT_SAT32( w[ k ], corr_rshifts[ k ] - 2 ); } + + g_Q26 = silk_MUL( + silk_DIV32( + SILK_FIX_CONST( LTP_SMOOTHING, 26 ), + silk_RSHIFT( SILK_FIX_CONST( LTP_SMOOTHING, 26 ), 10 ) + temp32 ), /* Q10 */ + silk_LSHIFT_SAT32( silk_SUB_SAT32( (opus_int32)m_Q12, silk_RSHIFT( d_Q14[ k ], 2 ) ), 4 ) ); /* Q16 */ + + temp32 = 0; for( i = 0; i < LTP_ORDER; i++ ) { - xXLTP_Q17_ptr[ i ] = (opus_int32)( silk_LSHIFT64( (opus_int64)xXLTP_Q17_ptr[ i ], 17 ) / temp ); + delta_b_Q14[ i ] = silk_max_16( b_Q14_ptr[ i ], 1638 ); /* 1638_Q14 = 0.1_Q0 */ + temp32 += delta_b_Q14[ i ]; /* Q14 */ } -#endif -TOC(div) - r_ptr += subfr_length; - XXLTP_Q17_ptr += LTP_ORDER * LTP_ORDER; - xXLTP_Q17_ptr += LTP_ORDER; + temp32 = silk_DIV32( g_Q26, temp32 ); /* Q14 -> Q12 */ + for( i = 0; i < LTP_ORDER; i++ ) { + b_Q14_ptr[ i ] = silk_LIMIT_32( (opus_int32)b_Q14_ptr[ i ] + silk_SMULWB( silk_LSHIFT_SAT32( temp32, 4 ), delta_b_Q14[ i ] ), -16000, 28000 ); + } + b_Q14_ptr += LTP_ORDER; + } +} + +void silk_fit_LTP( + opus_int32 LTP_coefs_Q16[ LTP_ORDER ], + opus_int16 LTP_coefs_Q14[ LTP_ORDER ] +) +{ + opus_int i; + + for( i = 0; i < LTP_ORDER; i++ ) { + LTP_coefs_Q14[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( LTP_coefs_Q16[ i ], 2 ) ); } } diff --git a/thirdparty/opus/silk/fixed/find_pitch_lags_FIX.c b/thirdparty/opus/silk/fixed/find_pitch_lags_FIX.c index 6c3379f2bb..b8440a8247 100644 --- a/thirdparty/opus/silk/fixed/find_pitch_lags_FIX.c +++ b/thirdparty/opus/silk/fixed/find_pitch_lags_FIX.c @@ -44,7 +44,7 @@ void silk_find_pitch_lags_FIX( { opus_int buf_len, i, scale; opus_int32 thrhld_Q13, res_nrg; - const opus_int16 *x_ptr; + const opus_int16 *x_buf, *x_buf_ptr; VARDECL( opus_int16, Wsig ); opus_int16 *Wsig_ptr; opus_int32 auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ]; @@ -59,7 +59,9 @@ void silk_find_pitch_lags_FIX( buf_len = psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length + psEnc->sCmn.ltp_mem_length; /* Safety check */ - celt_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length ); + silk_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length ); + + x_buf = x - psEnc->sCmn.ltp_mem_length; /*************************************/ /* Estimate LPC AR coefficients */ @@ -70,19 +72,19 @@ void silk_find_pitch_lags_FIX( ALLOC( Wsig, psEnc->sCmn.pitch_LPC_win_length, opus_int16 ); /* First LA_LTP samples */ - x_ptr = x + buf_len - psEnc->sCmn.pitch_LPC_win_length; + x_buf_ptr = x_buf + buf_len - psEnc->sCmn.pitch_LPC_win_length; Wsig_ptr = Wsig; - silk_apply_sine_window( Wsig_ptr, x_ptr, 1, psEnc->sCmn.la_pitch ); + silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 1, psEnc->sCmn.la_pitch ); /* Middle un - windowed samples */ Wsig_ptr += psEnc->sCmn.la_pitch; - x_ptr += psEnc->sCmn.la_pitch; - silk_memcpy( Wsig_ptr, x_ptr, ( psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ) ) * sizeof( opus_int16 ) ); + x_buf_ptr += psEnc->sCmn.la_pitch; + silk_memcpy( Wsig_ptr, x_buf_ptr, ( psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ) ) * sizeof( opus_int16 ) ); /* Last LA_LTP samples */ Wsig_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ); - x_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ); - silk_apply_sine_window( Wsig_ptr, x_ptr, 2, psEnc->sCmn.la_pitch ); + x_buf_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ); + silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch ); /* Calculate autocorrelation sequence */ silk_autocorr( auto_corr, &scale, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1, arch ); @@ -110,7 +112,7 @@ void silk_find_pitch_lags_FIX( /*****************************************/ /* LPC analysis filtering */ /*****************************************/ - silk_LPC_analysis_filter( res, x, A_Q12, buf_len, psEnc->sCmn.pitchEstimationLPCOrder, psEnc->sCmn.arch ); + silk_LPC_analysis_filter( res, x_buf, A_Q12, buf_len, psEnc->sCmn.pitchEstimationLPCOrder, psEnc->sCmn.arch ); if( psEnc->sCmn.indices.signalType != TYPE_NO_VOICE_ACTIVITY && psEnc->sCmn.first_frame_after_reset == 0 ) { /* Threshold for pitch estimator */ diff --git a/thirdparty/opus/silk/fixed/find_pred_coefs_FIX.c b/thirdparty/opus/silk/fixed/find_pred_coefs_FIX.c index 606d863347..d308e9cf5f 100644 --- a/thirdparty/opus/silk/fixed/find_pred_coefs_FIX.c +++ b/thirdparty/opus/silk/fixed/find_pred_coefs_FIX.c @@ -41,12 +41,13 @@ void silk_find_pred_coefs_FIX( ) { opus_int i; - opus_int32 invGains_Q16[ MAX_NB_SUBFR ], local_gains[ MAX_NB_SUBFR ]; + opus_int32 invGains_Q16[ MAX_NB_SUBFR ], local_gains[ MAX_NB_SUBFR ], Wght_Q15[ MAX_NB_SUBFR ]; opus_int16 NLSF_Q15[ MAX_LPC_ORDER ]; const opus_int16 *x_ptr; opus_int16 *x_pre_ptr; VARDECL( opus_int16, LPC_in_pre ); - opus_int32 min_gain_Q16, minInvGain_Q30; + opus_int32 tmp, min_gain_Q16, minInvGain_Q30; + opus_int LTP_corrs_rshift[ MAX_NB_SUBFR ]; SAVE_STACK; /* weighting for weighted least squares */ @@ -60,11 +61,13 @@ void silk_find_pred_coefs_FIX( /* Invert and normalize gains, and ensure that maximum invGains_Q16 is within range of a 16 bit int */ invGains_Q16[ i ] = silk_DIV32_varQ( min_gain_Q16, psEncCtrl->Gains_Q16[ i ], 16 - 2 ); - /* Limit inverse */ - invGains_Q16[ i ] = silk_max( invGains_Q16[ i ], 100 ); + /* Ensure Wght_Q15 a minimum value 1 */ + invGains_Q16[ i ] = silk_max( invGains_Q16[ i ], 363 ); /* Square the inverted gains */ silk_assert( invGains_Q16[ i ] == silk_SAT16( invGains_Q16[ i ] ) ); + tmp = silk_SMULWB( invGains_Q16[ i ], invGains_Q16[ i ] ); + Wght_Q15[ i ] = silk_RSHIFT( tmp, 1 ); /* Invert the inverted and normalized gains */ local_gains[ i ] = silk_DIV32( ( (opus_int32)1 << 16 ), invGains_Q16[ i ] ); @@ -74,24 +77,24 @@ void silk_find_pred_coefs_FIX( psEnc->sCmn.nb_subfr * psEnc->sCmn.predictLPCOrder + psEnc->sCmn.frame_length, opus_int16 ); if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - VARDECL( opus_int32, xXLTP_Q17 ); - VARDECL( opus_int32, XXLTP_Q17 ); + VARDECL( opus_int32, WLTP ); /**********/ /* VOICED */ /**********/ - celt_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 ); + silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 ); - ALLOC( xXLTP_Q17, psEnc->sCmn.nb_subfr * LTP_ORDER, opus_int32 ); - ALLOC( XXLTP_Q17, psEnc->sCmn.nb_subfr * LTP_ORDER * LTP_ORDER, opus_int32 ); + ALLOC( WLTP, psEnc->sCmn.nb_subfr * LTP_ORDER * LTP_ORDER, opus_int32 ); /* LTP analysis */ - silk_find_LTP_FIX( XXLTP_Q17, xXLTP_Q17, res_pitch, - psEncCtrl->pitchL, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.arch ); + silk_find_LTP_FIX( psEncCtrl->LTPCoef_Q14, WLTP, &psEncCtrl->LTPredCodGain_Q7, + res_pitch, psEncCtrl->pitchL, Wght_Q15, psEnc->sCmn.subfr_length, + psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length, LTP_corrs_rshift, psEnc->sCmn.arch ); /* Quantize LTP gain parameters */ silk_quant_LTP_gains( psEncCtrl->LTPCoef_Q14, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex, - &psEnc->sCmn.sum_log_gain_Q7, &psEncCtrl->LTPredCodGain_Q7, XXLTP_Q17, xXLTP_Q17, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.arch ); + &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr, + psEnc->sCmn.arch); /* Control LTP scaling */ silk_LTP_scale_ctrl_FIX( psEnc, psEncCtrl, condCoding ); diff --git a/thirdparty/opus/silk/fixed/k2a_FIX.c b/thirdparty/opus/silk/fixed/k2a_FIX.c index 549f6eadaa..5fee599bcb 100644 --- a/thirdparty/opus/silk/fixed/k2a_FIX.c +++ b/thirdparty/opus/silk/fixed/k2a_FIX.c @@ -39,15 +39,14 @@ void silk_k2a( ) { opus_int k, n; - opus_int32 rc, tmp1, tmp2; + opus_int32 Atmp[ SILK_MAX_ORDER_LPC ]; for( k = 0; k < order; k++ ) { - rc = rc_Q15[ k ]; - for( n = 0; n < (k + 1) >> 1; n++ ) { - tmp1 = A_Q24[ n ]; - tmp2 = A_Q24[ k - n - 1 ]; - A_Q24[ n ] = silk_SMLAWB( tmp1, silk_LSHIFT( tmp2, 1 ), rc ); - A_Q24[ k - n - 1 ] = silk_SMLAWB( tmp2, silk_LSHIFT( tmp1, 1 ), rc ); + for( n = 0; n < k; n++ ) { + Atmp[ n ] = A_Q24[ n ]; + } + for( n = 0; n < k; n++ ) { + A_Q24[ n ] = silk_SMLAWB( A_Q24[ n ], silk_LSHIFT( Atmp[ k - n - 1 ], 1 ), rc_Q15[ k ] ); } A_Q24[ k ] = -silk_LSHIFT( (opus_int32)rc_Q15[ k ], 9 ); } diff --git a/thirdparty/opus/silk/fixed/k2a_Q16_FIX.c b/thirdparty/opus/silk/fixed/k2a_Q16_FIX.c index 1595aa6212..3b03987544 100644 --- a/thirdparty/opus/silk/fixed/k2a_Q16_FIX.c +++ b/thirdparty/opus/silk/fixed/k2a_Q16_FIX.c @@ -39,16 +39,15 @@ void silk_k2a_Q16( ) { opus_int k, n; - opus_int32 rc, tmp1, tmp2; + opus_int32 Atmp[ SILK_MAX_ORDER_LPC ]; for( k = 0; k < order; k++ ) { - rc = rc_Q16[ k ]; - for( n = 0; n < (k + 1) >> 1; n++ ) { - tmp1 = A_Q24[ n ]; - tmp2 = A_Q24[ k - n - 1 ]; - A_Q24[ n ] = silk_SMLAWW( tmp1, tmp2, rc ); - A_Q24[ k - n - 1 ] = silk_SMLAWW( tmp2, tmp1, rc ); + for( n = 0; n < k; n++ ) { + Atmp[ n ] = A_Q24[ n ]; } - A_Q24[ k ] = -silk_LSHIFT( rc, 8 ); + for( n = 0; n < k; n++ ) { + A_Q24[ n ] = silk_SMLAWW( A_Q24[ n ], Atmp[ k - n - 1 ], rc_Q16[ k ] ); + } + A_Q24[ k ] = -silk_LSHIFT( rc_Q16[ k ], 8 ); } } diff --git a/thirdparty/opus/silk/fixed/main_FIX.h b/thirdparty/opus/silk/fixed/main_FIX.h index 6d2112e511..375b5eb32e 100644 --- a/thirdparty/opus/silk/fixed/main_FIX.h +++ b/thirdparty/opus/silk/fixed/main_FIX.h @@ -36,11 +36,6 @@ POSSIBILITY OF SUCH DAMAGE. #include "debug.h" #include "entenc.h" -#if ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \ - || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)) -#include "fixed/arm/warped_autocorrelation_FIX_arm.h" -#endif - #ifndef FORCE_CPP_BUILD #ifdef __cplusplus extern "C" @@ -52,9 +47,6 @@ extern "C" #define silk_encode_do_VAD_Fxx silk_encode_do_VAD_FIX #define silk_encode_frame_Fxx silk_encode_frame_FIX -#define QC 10 -#define QS 13 - /*********************/ /* Encoder Functions */ /*********************/ @@ -66,8 +58,7 @@ void silk_HP_variable_cutoff( /* Encoder main function */ void silk_encode_do_VAD_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ - opus_int activity /* I Decision of Opus voice activity detector */ + silk_encoder_state_FIX *psEnc /* I/O Pointer to Silk FIX encoder state */ ); /* Encoder main function */ @@ -90,11 +81,33 @@ opus_int silk_init_encoder( opus_int silk_control_encoder( silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk encoder state */ silk_EncControlStruct *encControl, /* I Control structure */ + const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */ const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */ const opus_int channelNb, /* I Channel number */ const opus_int force_fs_kHz ); +/****************/ +/* Prefiltering */ +/****************/ +void silk_prefilter_FIX( + silk_encoder_state_FIX *psEnc, /* I/O Encoder state */ + const silk_encoder_control_FIX *psEncCtrl, /* I Encoder control */ + opus_int32 xw_Q10[], /* O Weighted signal */ + const opus_int16 x[] /* I Speech signal */ +); + +void silk_warped_LPC_analysis_filter_FIX_c( + opus_int32 state[], /* I/O State [order + 1] */ + opus_int32 res_Q2[], /* O Residual signal [length] */ + const opus_int16 coef_Q13[], /* I Coefficients [order] */ + const opus_int16 input[], /* I Input signal [length] */ + const opus_int16 lambda_Q16, /* I Warping factor */ + const opus_int length, /* I Length of input signal */ + const opus_int order /* I Filter order (even) */ +); + + /**************************/ /* Noise shaping analysis */ /**************************/ @@ -108,7 +121,7 @@ void silk_noise_shape_analysis_FIX( ); /* Autocorrelations for a warped frequency axis */ -void silk_warped_autocorrelation_FIX_c( +void silk_warped_autocorrelation_FIX( opus_int32 *corr, /* O Result [order + 1] */ opus_int *scale, /* O Scaling of the correlation vector */ const opus_int16 *input, /* I Input data to correlate */ @@ -117,11 +130,6 @@ void silk_warped_autocorrelation_FIX_c( const opus_int order /* I Correlation order (even) */ ); -#if !defined(OVERRIDE_silk_warped_autocorrelation_FIX) -#define silk_warped_autocorrelation_FIX(corr, scale, input, warping_Q16, length, order, arch) \ - ((void)(arch), silk_warped_autocorrelation_FIX_c(corr, scale, input, warping_Q16, length, order)) -#endif - /* Calculation of LTP state scaling */ void silk_LTP_scale_ctrl_FIX( silk_encoder_state_FIX *psEnc, /* I/O encoder state */ @@ -160,12 +168,16 @@ void silk_find_LPC_FIX( /* LTP analysis */ void silk_find_LTP_FIX( - opus_int32 XXLTP_Q17[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Correlation matrix */ - opus_int32 xXLTP_Q17[ MAX_NB_SUBFR * LTP_ORDER ], /* O Correlation vector */ - const opus_int16 r_lpc[], /* I Residual signal after LPC */ + opus_int16 b_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */ + opus_int32 WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ + opus_int *LTPredCodGain_Q7, /* O LTP coding gain */ + const opus_int16 r_lpc[], /* I residual signal after LPC signal + state for first 10 ms */ const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */ - const opus_int subfr_length, /* I Subframe length */ - const opus_int nb_subfr, /* I Number of subframes */ + const opus_int32 Wght_Q15[ MAX_NB_SUBFR ], /* I weights */ + const opus_int subfr_length, /* I subframe length */ + const opus_int nb_subfr, /* I number of subframes */ + const opus_int mem_offset, /* I number of samples in LTP memory */ + opus_int corr_rshifts[ MAX_NB_SUBFR ], /* O right shifts applied to correlations */ int arch /* I Run-time architecture */ ); @@ -219,9 +231,9 @@ void silk_corrMatrix_FIX( const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */ const opus_int L, /* I Length of vectors */ const opus_int order, /* I Max lag for correlation */ + const opus_int head_room, /* I Desired headroom */ opus_int32 *XX, /* O Pointer to X'*X correlation matrix [ order x order ] */ - opus_int32 *nrg, /* O Energy of x vector */ - opus_int *rshifts, /* O Right shifts of correlations */ + opus_int *rshifts, /* I/O Right shifts of correlations */ int arch /* I Run-time architecture */ ); @@ -236,6 +248,22 @@ void silk_corrVector_FIX( int arch /* I Run-time architecture */ ); +/* Add noise to matrix diagonal */ +void silk_regularize_correlations_FIX( + opus_int32 *XX, /* I/O Correlation matrices */ + opus_int32 *xx, /* I/O Correlation values */ + opus_int32 noise, /* I Noise to add */ + opus_int D /* I Dimension of XX */ +); + +/* Solves Ax = b, assuming A is symmetric */ +void silk_solve_LDL_FIX( + opus_int32 *A, /* I Pointer to symetric square matrix A */ + opus_int M, /* I Size of matrix */ + const opus_int32 *b, /* I Pointer to b vector */ + opus_int32 *x_Q16 /* O Pointer to x solution vector */ +); + #ifndef FORCE_CPP_BUILD #ifdef __cplusplus } diff --git a/thirdparty/opus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h b/thirdparty/opus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h index 3999b5bd09..c30481e437 100644 --- a/thirdparty/opus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h +++ b/thirdparty/opus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h @@ -169,7 +169,7 @@ void silk_noise_shape_analysis_FIX( if( psEnc->sCmn.warping_Q16 > 0 ) { /* Calculate warped auto correlation */ - silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder, arch ); + silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder ); } else { /* Calculate regular auto correlation */ silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch ); @@ -224,8 +224,8 @@ void silk_noise_shape_analysis_FIX( silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 ); /* Ratio of prediction gains, in energy domain */ - pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder, arch ); - nrg = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder, arch ); + pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder ); + nrg = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder ); /*psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/ pre_nrg_Q30 = silk_LSHIFT32( silk_SMULWB( pre_nrg_Q30, SILK_FIX_CONST( 0.7, 15 ) ), 1 ); diff --git a/thirdparty/opus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h b/thirdparty/opus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h index 66eb2ed26d..e803ef0fce 100644 --- a/thirdparty/opus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h +++ b/thirdparty/opus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h @@ -41,8 +41,8 @@ POSSIBILITY OF SUCH DAMAGE. #define QS 14 /* Autocorrelations for a warped frequency axis */ -#define OVERRIDE_silk_warped_autocorrelation_FIX_c -void silk_warped_autocorrelation_FIX_c( +#define OVERRIDE_silk_warped_autocorrelation_FIX +void silk_warped_autocorrelation_FIX( opus_int32 *corr, /* O Result [order + 1] */ opus_int *scale, /* O Scaling of the correlation vector */ const opus_int16 *input, /* I Input data to correlate */ diff --git a/thirdparty/opus/silk/fixed/noise_shape_analysis_FIX.c b/thirdparty/opus/silk/fixed/noise_shape_analysis_FIX.c index 85fea0bf09..22a89f75ae 100644 --- a/thirdparty/opus/silk/fixed/noise_shape_analysis_FIX.c +++ b/thirdparty/opus/silk/fixed/noise_shape_analysis_FIX.c @@ -57,79 +57,88 @@ static OPUS_INLINE opus_int32 warped_gain( /* gain in Q16*/ /* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum */ /* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */ static OPUS_INLINE void limit_warped_coefs( - opus_int32 *coefs_Q24, + opus_int32 *coefs_syn_Q24, + opus_int32 *coefs_ana_Q24, opus_int lambda_Q16, opus_int32 limit_Q24, opus_int order ) { opus_int i, iter, ind = 0; - opus_int32 tmp, maxabs_Q24, chirp_Q16, gain_Q16; + opus_int32 tmp, maxabs_Q24, chirp_Q16, gain_syn_Q16, gain_ana_Q16; opus_int32 nom_Q16, den_Q24; - opus_int32 limit_Q20, maxabs_Q20; /* Convert to monic coefficients */ lambda_Q16 = -lambda_Q16; for( i = order - 1; i > 0; i-- ) { - coefs_Q24[ i - 1 ] = silk_SMLAWB( coefs_Q24[ i - 1 ], coefs_Q24[ i ], lambda_Q16 ); + coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 ); + coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 ); } lambda_Q16 = -lambda_Q16; - nom_Q16 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 16 ), -(opus_int32)lambda_Q16, lambda_Q16 ); - den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_Q24[ 0 ], lambda_Q16 ); - gain_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); + nom_Q16 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 16 ), -(opus_int32)lambda_Q16, lambda_Q16 ); + den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_syn_Q24[ 0 ], lambda_Q16 ); + gain_syn_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); + den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_ana_Q24[ 0 ], lambda_Q16 ); + gain_ana_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); for( i = 0; i < order; i++ ) { - coefs_Q24[ i ] = silk_SMULWW( gain_Q16, coefs_Q24[ i ] ); + coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] ); + coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] ); } - limit_Q20 = silk_RSHIFT(limit_Q24, 4); + for( iter = 0; iter < 10; iter++ ) { /* Find maximum absolute value */ maxabs_Q24 = -1; for( i = 0; i < order; i++ ) { - tmp = silk_abs_int32( coefs_Q24[ i ] ); + tmp = silk_max( silk_abs_int32( coefs_syn_Q24[ i ] ), silk_abs_int32( coefs_ana_Q24[ i ] ) ); if( tmp > maxabs_Q24 ) { maxabs_Q24 = tmp; ind = i; } } - /* Use Q20 to avoid any overflow when multiplying by (ind + 1) later. */ - maxabs_Q20 = silk_RSHIFT(maxabs_Q24, 4); - if( maxabs_Q20 <= limit_Q20 ) { + if( maxabs_Q24 <= limit_Q24 ) { /* Coefficients are within range - done */ return; } /* Convert back to true warped coefficients */ for( i = 1; i < order; i++ ) { - coefs_Q24[ i - 1 ] = silk_SMLAWB( coefs_Q24[ i - 1 ], coefs_Q24[ i ], lambda_Q16 ); + coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 ); + coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 ); } - gain_Q16 = silk_INVERSE32_varQ( gain_Q16, 32 ); + gain_syn_Q16 = silk_INVERSE32_varQ( gain_syn_Q16, 32 ); + gain_ana_Q16 = silk_INVERSE32_varQ( gain_ana_Q16, 32 ); for( i = 0; i < order; i++ ) { - coefs_Q24[ i ] = silk_SMULWW( gain_Q16, coefs_Q24[ i ] ); + coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] ); + coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] ); } /* Apply bandwidth expansion */ chirp_Q16 = SILK_FIX_CONST( 0.99, 16 ) - silk_DIV32_varQ( - silk_SMULWB( maxabs_Q20 - limit_Q20, silk_SMLABB( SILK_FIX_CONST( 0.8, 10 ), SILK_FIX_CONST( 0.1, 10 ), iter ) ), - silk_MUL( maxabs_Q20, ind + 1 ), 22 ); - silk_bwexpander_32( coefs_Q24, order, chirp_Q16 ); + silk_SMULWB( maxabs_Q24 - limit_Q24, silk_SMLABB( SILK_FIX_CONST( 0.8, 10 ), SILK_FIX_CONST( 0.1, 10 ), iter ) ), + silk_MUL( maxabs_Q24, ind + 1 ), 22 ); + silk_bwexpander_32( coefs_syn_Q24, order, chirp_Q16 ); + silk_bwexpander_32( coefs_ana_Q24, order, chirp_Q16 ); /* Convert to monic warped coefficients */ lambda_Q16 = -lambda_Q16; for( i = order - 1; i > 0; i-- ) { - coefs_Q24[ i - 1 ] = silk_SMLAWB( coefs_Q24[ i - 1 ], coefs_Q24[ i ], lambda_Q16 ); + coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 ); + coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 ); } lambda_Q16 = -lambda_Q16; nom_Q16 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 16 ), -(opus_int32)lambda_Q16, lambda_Q16 ); - den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_Q24[ 0 ], lambda_Q16 ); - gain_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); + den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_syn_Q24[ 0 ], lambda_Q16 ); + gain_syn_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); + den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_ana_Q24[ 0 ], lambda_Q16 ); + gain_ana_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); for( i = 0; i < order; i++ ) { - coefs_Q24[ i ] = silk_SMULWW( gain_Q16, coefs_Q24[ i ] ); + coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] ); + coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] ); } } silk_assert( 0 ); } -/* Disable MIPS version until it's updated. */ -#if 0 && defined(MIPSr1_ASM) +#if defined(MIPSr1_ASM) #include "mips/noise_shape_analysis_FIX_mipsr1.h" #endif @@ -146,13 +155,14 @@ void silk_noise_shape_analysis_FIX( ) { silk_shape_state_FIX *psShapeSt = &psEnc->sShape; - opus_int k, i, nSamples, nSegs, Qnrg, b_Q14, warping_Q16, scale = 0; - opus_int32 SNR_adj_dB_Q7, HarmShapeGain_Q16, Tilt_Q16, tmp32; - opus_int32 nrg, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7; - opus_int32 BWExp_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8; + opus_int k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0; + opus_int32 SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32; + opus_int32 nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7; + opus_int32 delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8; opus_int32 auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ]; opus_int32 refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ]; - opus_int32 AR_Q24[ MAX_SHAPE_LPC_ORDER ]; + opus_int32 AR1_Q24[ MAX_SHAPE_LPC_ORDER ]; + opus_int32 AR2_Q24[ MAX_SHAPE_LPC_ORDER ]; VARDECL( opus_int16, x_windowed ); const opus_int16 *x_ptr, *pitch_res_ptr; SAVE_STACK; @@ -199,14 +209,14 @@ void silk_noise_shape_analysis_FIX( if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { /* Initially set to 0; may be overruled in process_gains(..) */ psEnc->sCmn.indices.quantOffsetType = 0; + psEncCtrl->sparseness_Q8 = 0; } else { /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */ nSamples = silk_LSHIFT( psEnc->sCmn.fs_kHz, 1 ); energy_variation_Q7 = 0; log_energy_prev_Q7 = 0; pitch_res_ptr = pitch_res; - nSegs = silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; - for( k = 0; k < nSegs; k++ ) { + for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) { silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples ); nrg += silk_RSHIFT( nSamples, scale ); /* Q(-scale)*/ @@ -218,12 +228,18 @@ void silk_noise_shape_analysis_FIX( pitch_res_ptr += nSamples; } + psEncCtrl->sparseness_Q8 = silk_RSHIFT( silk_sigm_Q15( silk_SMULWB( energy_variation_Q7 - + SILK_FIX_CONST( 5.0, 7 ), SILK_FIX_CONST( 0.1, 16 ) ) ), 7 ); + /* Set quantization offset depending on sparseness measure */ - if( energy_variation_Q7 > SILK_FIX_CONST( ENERGY_VARIATION_THRESHOLD_QNT_OFFSET, 7 ) * (nSegs-1) ) { + if( psEncCtrl->sparseness_Q8 > SILK_FIX_CONST( SPARSENESS_THRESHOLD_QNT_OFFSET, 8 ) ) { psEnc->sCmn.indices.quantOffsetType = 0; } else { psEnc->sCmn.indices.quantOffsetType = 1; } + + /* Increase coding SNR for sparse signals */ + SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( SPARSE_SNR_INCR_dB, 15 ), psEncCtrl->sparseness_Q8 - SILK_FIX_CONST( 0.5, 8 ) ); } /*******************************/ @@ -231,8 +247,14 @@ void silk_noise_shape_analysis_FIX( /*******************************/ /* More BWE for signals with high prediction gain */ strength_Q16 = silk_SMULWB( psEncCtrl->predGain_Q16, SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) ); - BWExp_Q16 = silk_DIV32_varQ( SILK_FIX_CONST( BANDWIDTH_EXPANSION, 16 ), + BWExp1_Q16 = BWExp2_Q16 = silk_DIV32_varQ( SILK_FIX_CONST( BANDWIDTH_EXPANSION, 16 ), silk_SMLAWW( SILK_FIX_CONST( 1.0, 16 ), strength_Q16, strength_Q16 ), 16 ); + delta_Q16 = silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - silk_SMULBB( 3, psEncCtrl->coding_quality_Q14 ), + SILK_FIX_CONST( LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16 ) ); + BWExp1_Q16 = silk_SUB32( BWExp1_Q16, delta_Q16 ); + BWExp2_Q16 = silk_ADD32( BWExp2_Q16, delta_Q16 ); + /* BWExp1 will be applied after BWExp2, so make it relative */ + BWExp1_Q16 = silk_DIV32_16( silk_LSHIFT( BWExp1_Q16, 14 ), silk_RSHIFT( BWExp2_Q16, 2 ) ); if( psEnc->sCmn.warping_Q16 > 0 ) { /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */ @@ -262,7 +284,7 @@ void silk_noise_shape_analysis_FIX( if( psEnc->sCmn.warping_Q16 > 0 ) { /* Calculate warped auto correlation */ - silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder, arch ); + silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder ); } else { /* Calculate regular auto correlation */ silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch ); @@ -277,7 +299,7 @@ void silk_noise_shape_analysis_FIX( silk_assert( nrg >= 0 ); /* Convert reflection coefficients to prediction coefficients */ - silk_k2a_Q16( AR_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder ); + silk_k2a_Q16( AR2_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder ); Qnrg = -scale; /* range: -12...30*/ silk_assert( Qnrg >= -12 ); @@ -296,34 +318,40 @@ void silk_noise_shape_analysis_FIX( if( psEnc->sCmn.warping_Q16 > 0 ) { /* Adjust gain for warping */ - gain_mult_Q16 = warped_gain( AR_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder ); - silk_assert( psEncCtrl->Gains_Q16[ k ] > 0 ); - if( psEncCtrl->Gains_Q16[ k ] < SILK_FIX_CONST( 0.25, 16 ) ) { - psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 ); + gain_mult_Q16 = warped_gain( AR2_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder ); + silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 ); + if ( silk_SMULWW( silk_RSHIFT_ROUND( psEncCtrl->Gains_Q16[ k ], 1 ), gain_mult_Q16 ) >= ( silk_int32_MAX >> 1 ) ) { + psEncCtrl->Gains_Q16[ k ] = silk_int32_MAX; } else { - psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( silk_RSHIFT_ROUND( psEncCtrl->Gains_Q16[ k ], 1 ), gain_mult_Q16 ); - if ( psEncCtrl->Gains_Q16[ k ] >= ( silk_int32_MAX >> 1 ) ) { - psEncCtrl->Gains_Q16[ k ] = silk_int32_MAX; - } else { - psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT32( psEncCtrl->Gains_Q16[ k ], 1 ); - } + psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 ); } - silk_assert( psEncCtrl->Gains_Q16[ k ] > 0 ); } - /* Bandwidth expansion */ - silk_bwexpander_32( AR_Q24, psEnc->sCmn.shapingLPCOrder, BWExp_Q16 ); + /* Bandwidth expansion for synthesis filter shaping */ + silk_bwexpander_32( AR2_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 ); - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Convert to monic warped prediction coefficients and limit absolute values */ - limit_warped_coefs( AR_Q24, warping_Q16, SILK_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder ); + /* Compute noise shaping filter coefficients */ + silk_memcpy( AR1_Q24, AR2_Q24, psEnc->sCmn.shapingLPCOrder * sizeof( opus_int32 ) ); - /* Convert from Q24 to Q13 and store in int16 */ - for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) { - psEncCtrl->AR_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR_Q24[ i ], 11 ) ); - } - } else { - silk_LPC_fit( &psEncCtrl->AR_Q13[ k * MAX_SHAPE_LPC_ORDER ], AR_Q24, 13, 24, psEnc->sCmn.shapingLPCOrder ); + /* Bandwidth expansion for analysis filter shaping */ + silk_assert( BWExp1_Q16 <= SILK_FIX_CONST( 1.0, 16 ) ); + silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 ); + + /* Ratio of prediction gains, in energy domain */ + pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder ); + nrg = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder ); + + /*psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/ + pre_nrg_Q30 = silk_LSHIFT32( silk_SMULWB( pre_nrg_Q30, SILK_FIX_CONST( 0.7, 15 ) ), 1 ); + psEncCtrl->GainsPre_Q14[ k ] = ( opus_int ) SILK_FIX_CONST( 0.3, 14 ) + silk_DIV32_varQ( pre_nrg_Q30, nrg, 14 ); + + /* Convert to monic warped prediction coefficients and limit absolute values */ + limit_warped_coefs( AR2_Q24, AR1_Q24, warping_Q16, SILK_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder ); + + /* Convert from Q24 to Q13 and store in int16 */ + for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) { + psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR1_Q24[ i ], 11 ) ); + psEncCtrl->AR2_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR2_Q24[ i ], 11 ) ); } } @@ -340,6 +368,11 @@ void silk_noise_shape_analysis_FIX( psEncCtrl->Gains_Q16[ k ] = silk_ADD_POS_SAT32( psEncCtrl->Gains_Q16[ k ], gain_add_Q16 ); } + gain_mult_Q16 = SILK_FIX_CONST( 1.0, 16 ) + silk_RSHIFT_ROUND( silk_MLA( SILK_FIX_CONST( INPUT_TILT, 26 ), + psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ), 10 ); + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + psEncCtrl->GainsPre_Q14[ k ] = silk_SMULWB( gain_mult_Q16, psEncCtrl->GainsPre_Q14[ k ] ); + } /************************************************/ /* Control low-frequency shaping and noise tilt */ @@ -377,6 +410,14 @@ void silk_noise_shape_analysis_FIX( /****************************/ /* HARMONIC SHAPING CONTROL */ /****************************/ + /* Control boosting of harmonic frequencies */ + HarmBoost_Q16 = silk_SMULWB( silk_SMULWB( SILK_FIX_CONST( 1.0, 17 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 3 ), + psEnc->LTPCorr_Q15 ), SILK_FIX_CONST( LOW_RATE_HARMONIC_BOOST, 16 ) ); + + /* More harmonic boost for noisy input signals */ + HarmBoost_Q16 = silk_SMLAWB( HarmBoost_Q16, + SILK_FIX_CONST( 1.0, 16 ) - silk_LSHIFT( psEncCtrl->input_quality_Q14, 2 ), SILK_FIX_CONST( LOW_INPUT_QUALITY_HARMONIC_BOOST, 16 ) ); + if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) { /* More harmonic noise shaping for high bitrates or noisy input */ HarmShapeGain_Q16 = silk_SMLAWB( SILK_FIX_CONST( HARMONIC_SHAPING, 16 ), @@ -394,11 +435,14 @@ void silk_noise_shape_analysis_FIX( /* Smooth over subframes */ /*************************/ for( k = 0; k < MAX_NB_SUBFR; k++ ) { + psShapeSt->HarmBoost_smth_Q16 = + silk_SMLAWB( psShapeSt->HarmBoost_smth_Q16, HarmBoost_Q16 - psShapeSt->HarmBoost_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); psShapeSt->HarmShapeGain_smth_Q16 = silk_SMLAWB( psShapeSt->HarmShapeGain_smth_Q16, HarmShapeGain_Q16 - psShapeSt->HarmShapeGain_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); psShapeSt->Tilt_smth_Q16 = silk_SMLAWB( psShapeSt->Tilt_smth_Q16, Tilt_Q16 - psShapeSt->Tilt_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); + psEncCtrl->HarmBoost_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmBoost_smth_Q16, 2 ); psEncCtrl->HarmShapeGain_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmShapeGain_smth_Q16, 2 ); psEncCtrl->Tilt_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->Tilt_smth_Q16, 2 ); } diff --git a/thirdparty/opus/silk/fixed/pitch_analysis_core_FIX.c b/thirdparty/opus/silk/fixed/pitch_analysis_core_FIX.c index 14729046d2..01bb9fc0a8 100644 --- a/thirdparty/opus/silk/fixed/pitch_analysis_core_FIX.c +++ b/thirdparty/opus/silk/fixed/pitch_analysis_core_FIX.c @@ -80,7 +80,7 @@ static void silk_P_Ana_calc_energy_st3( /* FIXED POINT CORE PITCH ANALYSIS FUNCTION */ /*************************************************************/ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 unvoiced */ - const opus_int16 *frame_unscaled, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */ + const opus_int16 *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */ opus_int *pitch_out, /* O 4 pitch lag values */ opus_int16 *lagIndex, /* O Lag Index */ opus_int8 *contourIndex, /* O Pitch contour Index */ @@ -94,17 +94,16 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 int arch /* I Run-time architecture */ ) { - VARDECL( opus_int16, frame_8kHz_buf ); + VARDECL( opus_int16, frame_8kHz ); VARDECL( opus_int16, frame_4kHz ); - VARDECL( opus_int16, frame_scaled ); opus_int32 filt_state[ 6 ]; - const opus_int16 *frame, *frame_8kHz; + const opus_int16 *input_frame_ptr; opus_int i, k, d, j; VARDECL( opus_int16, C ); VARDECL( opus_int32, xcorr32 ); const opus_int16 *target_ptr, *basis_ptr; - opus_int32 cross_corr, normalizer, energy, energy_basis, energy_target; - opus_int d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp, shift; + opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target; + opus_int d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp; VARDECL( opus_int16, d_comp ); opus_int32 sum, threshold, lag_counter; opus_int CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new; @@ -120,13 +119,12 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13; const opus_int8 *Lag_CB_ptr; SAVE_STACK; - /* Check for valid sampling frequency */ - celt_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 ); + silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 ); /* Check for valid complexity setting */ - celt_assert( complexity >= SILK_PE_MIN_COMPLEX ); - celt_assert( complexity <= SILK_PE_MAX_COMPLEX ); + silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); + silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); silk_assert( search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1<<16) ); silk_assert( search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1<<13) ); @@ -139,33 +137,17 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 min_lag = PE_MIN_LAG_MS * Fs_kHz; max_lag = PE_MAX_LAG_MS * Fs_kHz - 1; - /* Downscale input if necessary */ - silk_sum_sqr_shift( &energy, &shift, frame_unscaled, frame_length ); - shift += 3 - silk_CLZ32( energy ); /* at least two bits headroom */ - ALLOC( frame_scaled, frame_length, opus_int16 ); - if( shift > 0 ) { - shift = silk_RSHIFT( shift + 1, 1 ); - for( i = 0; i < frame_length; i++ ) { - frame_scaled[ i ] = silk_RSHIFT( frame_unscaled[ i ], shift ); - } - frame = frame_scaled; - } else { - frame = frame_unscaled; - } - - ALLOC( frame_8kHz_buf, ( Fs_kHz == 8 ) ? 1 : frame_length_8kHz, opus_int16 ); /* Resample from input sampled at Fs_kHz to 8 kHz */ + ALLOC( frame_8kHz, frame_length_8kHz, opus_int16 ); if( Fs_kHz == 16 ) { silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) ); - silk_resampler_down2( filt_state, frame_8kHz_buf, frame, frame_length ); - frame_8kHz = frame_8kHz_buf; + silk_resampler_down2( filt_state, frame_8kHz, frame, frame_length ); } else if( Fs_kHz == 12 ) { silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) ); - silk_resampler_down2_3( filt_state, frame_8kHz_buf, frame, frame_length ); - frame_8kHz = frame_8kHz_buf; + silk_resampler_down2_3( filt_state, frame_8kHz, frame, frame_length ); } else { - celt_assert( Fs_kHz == 8 ); - frame_8kHz = frame; + silk_assert( Fs_kHz == 8 ); + silk_memcpy( frame_8kHz, frame, frame_length_8kHz * sizeof(opus_int16) ); } /* Decimate again to 4 kHz */ @@ -178,6 +160,19 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 frame_4kHz[ i ] = silk_ADD_SAT16( frame_4kHz[ i ], frame_4kHz[ i - 1 ] ); } + /******************************************************************************* + ** Scale 4 kHz signal down to prevent correlations measures from overflowing + ** find scaling as max scaling for each 8kHz(?) subframe + *******************************************************************************/ + + /* Inner product is calculated with different lengths, so scale for the worst case */ + silk_sum_sqr_shift( &energy, &shift, frame_4kHz, frame_length_4kHz ); + if( shift > 0 ) { + shift = silk_RSHIFT( shift, 1 ); + for( i = 0; i < frame_length_4kHz; i++ ) { + frame_4kHz[ i ] = silk_RSHIFT( frame_4kHz[ i ], shift ); + } + } /****************************************************************************** * FIRST STAGE, operating in 4 khz @@ -188,14 +183,14 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ]; for( k = 0; k < nb_subfr >> 1; k++ ) { /* Check that we are within range of the array */ - celt_assert( target_ptr >= frame_4kHz ); - celt_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); + silk_assert( target_ptr >= frame_4kHz ); + silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); basis_ptr = target_ptr - MIN_LAG_4KHZ; /* Check that we are within range of the array */ - celt_assert( basis_ptr >= frame_4kHz ); - celt_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); + silk_assert( basis_ptr >= frame_4kHz ); + silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch ); @@ -249,7 +244,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 /* Sort */ length_d_srch = silk_ADD_LSHIFT32( 4, complexity, 1 ); - celt_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH ); + silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH ); silk_insertion_sort_decreasing_int16( C, d_srch, CSTRIDE_4KHZ, length_d_srch ); @@ -274,7 +269,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 break; } } - celt_assert( length_d_srch > 0 ); + silk_assert( length_d_srch > 0 ); ALLOC( d_comp, D_COMP_STRIDE, opus_int16 ); for( i = D_COMP_MIN; i < D_COMP_MAX; i++ ) { @@ -316,6 +311,18 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation *************************************************************************************/ + /****************************************************************************** + ** Scale signal down to avoid correlations measures from overflowing + *******************************************************************************/ + /* find scaling as max scaling for each subframe */ + silk_sum_sqr_shift( &energy, &shift, frame_8kHz, frame_length_8kHz ); + if( shift > 0 ) { + shift = silk_RSHIFT( shift, 1 ); + for( i = 0; i < frame_length_8kHz; i++ ) { + frame_8kHz[ i ] = silk_RSHIFT( frame_8kHz[ i ], shift ); + } + } + /********************************************************************************* * Find energy of each subframe projected onto its history, for a range of delays *********************************************************************************/ @@ -325,8 +332,8 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 for( k = 0; k < nb_subfr; k++ ) { /* Check that we are within range of the array */ - celt_assert( target_ptr >= frame_8kHz ); - celt_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz ); + silk_assert( target_ptr >= frame_8kHz ); + silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz ); energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch ), 1 ); for( j = 0; j < length_d_comp; j++ ) { @@ -455,6 +462,24 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 silk_assert( *LTPCorr_Q15 >= 0 ); if( Fs_kHz > 8 ) { + VARDECL( opus_int16, scratch_mem ); + /***************************************************************************/ + /* Scale input signal down to avoid correlations measures from overflowing */ + /***************************************************************************/ + /* find scaling as max scaling for each subframe */ + silk_sum_sqr_shift( &energy, &shift, frame, frame_length ); + ALLOC( scratch_mem, shift > 0 ? frame_length : ALLOC_NONE, opus_int16 ); + if( shift > 0 ) { + /* Move signal to scratch mem because the input signal should be unchanged */ + shift = silk_RSHIFT( shift, 1 ); + for( i = 0; i < frame_length; i++ ) { + scratch_mem[ i ] = silk_RSHIFT( frame[ i ], shift ); + } + input_frame_ptr = scratch_mem; + } else { + input_frame_ptr = frame; + } + /* Search in original signal */ CBimax_old = CBimax; @@ -494,14 +519,14 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 /* Calculate the correlations and energies needed in stage 3 */ ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals ); ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals ); - silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch ); - silk_P_Ana_calc_energy_st3( energies_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch ); + silk_P_Ana_calc_corr_st3( cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch ); + silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch ); lag_counter = 0; silk_assert( lag == silk_SAT16( lag ) ); contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag ); - target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ]; + target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ]; energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length, arch ), 1 ); for( d = start_lag; d <= end_lag; d++ ) { for( j = 0; j < nb_cbk_search; j++ ) { @@ -550,7 +575,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 *lagIndex = (opus_int16)( lag - MIN_LAG_8KHZ ); *contourIndex = (opus_int8)CBimax; } - celt_assert( *lagIndex >= 0 ); + silk_assert( *lagIndex >= 0 ); /* return as voiced */ RESTORE_STACK; return 0; @@ -587,8 +612,8 @@ static void silk_P_Ana_calc_corr_st3( const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; SAVE_STACK; - celt_assert( complexity >= SILK_PE_MIN_COMPLEX ); - celt_assert( complexity <= SILK_PE_MAX_COMPLEX ); + silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); + silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); if( nb_subfr == PE_MAX_NB_SUBFR ) { Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ]; @@ -596,7 +621,7 @@ static void silk_P_Ana_calc_corr_st3( nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ]; cbk_size = PE_NB_CBKS_STAGE3_MAX; } else { - celt_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); + silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ]; Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; @@ -612,7 +637,7 @@ static void silk_P_Ana_calc_corr_st3( /* Calculate the correlations for each subframe */ lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 ); lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 ); - celt_assert(lag_high-lag_low+1 <= SCRATCH_SIZE); + silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE); celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch ); for( j = lag_low; j <= lag_high; j++ ) { silk_assert( lag_counter < SCRATCH_SIZE ); @@ -659,8 +684,8 @@ static void silk_P_Ana_calc_energy_st3( const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; SAVE_STACK; - celt_assert( complexity >= SILK_PE_MIN_COMPLEX ); - celt_assert( complexity <= SILK_PE_MAX_COMPLEX ); + silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); + silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); if( nb_subfr == PE_MAX_NB_SUBFR ) { Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ]; @@ -668,7 +693,7 @@ static void silk_P_Ana_calc_energy_st3( nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ]; cbk_size = PE_NB_CBKS_STAGE3_MAX; } else { - celt_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); + silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ]; Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; diff --git a/thirdparty/opus/silk/fixed/prefilter_FIX.c b/thirdparty/opus/silk/fixed/prefilter_FIX.c new file mode 100644 index 0000000000..6a8e35152e --- /dev/null +++ b/thirdparty/opus/silk/fixed/prefilter_FIX.c @@ -0,0 +1,221 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "main_FIX.h" +#include "stack_alloc.h" +#include "tuning_parameters.h" + +#if defined(MIPSr1_ASM) +#include "mips/prefilter_FIX_mipsr1.h" +#endif + + +#if !defined(OVERRIDE_silk_warped_LPC_analysis_filter_FIX) +#define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \ + ((void)(arch),silk_warped_LPC_analysis_filter_FIX_c(state, res_Q2, coef_Q13, input, lambda_Q16, length, order)) +#endif + +/* Prefilter for finding Quantizer input signal */ +static OPUS_INLINE void silk_prefilt_FIX( + silk_prefilter_state_FIX *P, /* I/O state */ + opus_int32 st_res_Q12[], /* I short term residual signal */ + opus_int32 xw_Q3[], /* O prefiltered signal */ + opus_int32 HarmShapeFIRPacked_Q12, /* I Harmonic shaping coeficients */ + opus_int Tilt_Q14, /* I Tilt shaping coeficient */ + opus_int32 LF_shp_Q14, /* I Low-frequancy shaping coeficients */ + opus_int lag, /* I Lag for harmonic shaping */ + opus_int length /* I Length of signals */ +); + +void silk_warped_LPC_analysis_filter_FIX_c( + opus_int32 state[], /* I/O State [order + 1] */ + opus_int32 res_Q2[], /* O Residual signal [length] */ + const opus_int16 coef_Q13[], /* I Coefficients [order] */ + const opus_int16 input[], /* I Input signal [length] */ + const opus_int16 lambda_Q16, /* I Warping factor */ + const opus_int length, /* I Length of input signal */ + const opus_int order /* I Filter order (even) */ +) +{ + opus_int n, i; + opus_int32 acc_Q11, tmp1, tmp2; + + /* Order must be even */ + silk_assert( ( order & 1 ) == 0 ); + + for( n = 0; n < length; n++ ) { + /* Output of lowpass section */ + tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 ); + state[ 0 ] = silk_LSHIFT( input[ n ], 14 ); + /* Output of allpass section */ + tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 ); + state[ 1 ] = tmp2; + acc_Q11 = silk_RSHIFT( order, 1 ); + acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] ); + /* Loop over allpass sections */ + for( i = 2; i < order; i += 2 ) { + /* Output of allpass section */ + tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 ); + state[ i ] = tmp1; + acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] ); + /* Output of allpass section */ + tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 ); + state[ i + 1 ] = tmp2; + acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] ); + } + state[ order ] = tmp1; + acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] ); + res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 ); + } +} + +void silk_prefilter_FIX( + silk_encoder_state_FIX *psEnc, /* I/O Encoder state */ + const silk_encoder_control_FIX *psEncCtrl, /* I Encoder control */ + opus_int32 xw_Q3[], /* O Weighted signal */ + const opus_int16 x[] /* I Speech signal */ +) +{ + silk_prefilter_state_FIX *P = &psEnc->sPrefilt; + opus_int j, k, lag; + opus_int32 tmp_32; + const opus_int16 *AR1_shp_Q13; + const opus_int16 *px; + opus_int32 *pxw_Q3; + opus_int HarmShapeGain_Q12, Tilt_Q14; + opus_int32 HarmShapeFIRPacked_Q12, LF_shp_Q14; + VARDECL( opus_int32, x_filt_Q12 ); + VARDECL( opus_int32, st_res_Q2 ); + opus_int16 B_Q10[ 2 ]; + SAVE_STACK; + + /* Set up pointers */ + px = x; + pxw_Q3 = xw_Q3; + lag = P->lagPrev; + ALLOC( x_filt_Q12, psEnc->sCmn.subfr_length, opus_int32 ); + ALLOC( st_res_Q2, psEnc->sCmn.subfr_length, opus_int32 ); + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + /* Update Variables that change per sub frame */ + if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + lag = psEncCtrl->pitchL[ k ]; + } + + /* Noise shape parameters */ + HarmShapeGain_Q12 = silk_SMULWB( (opus_int32)psEncCtrl->HarmShapeGain_Q14[ k ], 16384 - psEncCtrl->HarmBoost_Q14[ k ] ); + silk_assert( HarmShapeGain_Q12 >= 0 ); + HarmShapeFIRPacked_Q12 = silk_RSHIFT( HarmShapeGain_Q12, 2 ); + HarmShapeFIRPacked_Q12 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q12, 1 ), 16 ); + Tilt_Q14 = psEncCtrl->Tilt_Q14[ k ]; + LF_shp_Q14 = psEncCtrl->LF_shp_Q14[ k ]; + AR1_shp_Q13 = &psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER ]; + + /* Short term FIR filtering*/ + silk_warped_LPC_analysis_filter_FIX( P->sAR_shp, st_res_Q2, AR1_shp_Q13, px, + psEnc->sCmn.warping_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder, psEnc->sCmn.arch ); + + /* Reduce (mainly) low frequencies during harmonic emphasis */ + B_Q10[ 0 ] = silk_RSHIFT_ROUND( psEncCtrl->GainsPre_Q14[ k ], 4 ); + tmp_32 = silk_SMLABB( SILK_FIX_CONST( INPUT_TILT, 26 ), psEncCtrl->HarmBoost_Q14[ k ], HarmShapeGain_Q12 ); /* Q26 */ + tmp_32 = silk_SMLABB( tmp_32, psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ); /* Q26 */ + tmp_32 = silk_SMULWB( tmp_32, -psEncCtrl->GainsPre_Q14[ k ] ); /* Q24 */ + tmp_32 = silk_RSHIFT_ROUND( tmp_32, 14 ); /* Q10 */ + B_Q10[ 1 ]= silk_SAT16( tmp_32 ); + x_filt_Q12[ 0 ] = silk_MLA( silk_MUL( st_res_Q2[ 0 ], B_Q10[ 0 ] ), P->sHarmHP_Q2, B_Q10[ 1 ] ); + for( j = 1; j < psEnc->sCmn.subfr_length; j++ ) { + x_filt_Q12[ j ] = silk_MLA( silk_MUL( st_res_Q2[ j ], B_Q10[ 0 ] ), st_res_Q2[ j - 1 ], B_Q10[ 1 ] ); + } + P->sHarmHP_Q2 = st_res_Q2[ psEnc->sCmn.subfr_length - 1 ]; + + silk_prefilt_FIX( P, x_filt_Q12, pxw_Q3, HarmShapeFIRPacked_Q12, Tilt_Q14, LF_shp_Q14, lag, psEnc->sCmn.subfr_length ); + + px += psEnc->sCmn.subfr_length; + pxw_Q3 += psEnc->sCmn.subfr_length; + } + + P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ]; + RESTORE_STACK; +} + +#ifndef OVERRIDE_silk_prefilt_FIX +/* Prefilter for finding Quantizer input signal */ +static OPUS_INLINE void silk_prefilt_FIX( + silk_prefilter_state_FIX *P, /* I/O state */ + opus_int32 st_res_Q12[], /* I short term residual signal */ + opus_int32 xw_Q3[], /* O prefiltered signal */ + opus_int32 HarmShapeFIRPacked_Q12, /* I Harmonic shaping coeficients */ + opus_int Tilt_Q14, /* I Tilt shaping coeficient */ + opus_int32 LF_shp_Q14, /* I Low-frequancy shaping coeficients */ + opus_int lag, /* I Lag for harmonic shaping */ + opus_int length /* I Length of signals */ +) +{ + opus_int i, idx, LTP_shp_buf_idx; + opus_int32 n_LTP_Q12, n_Tilt_Q10, n_LF_Q10; + opus_int32 sLF_MA_shp_Q12, sLF_AR_shp_Q12; + opus_int16 *LTP_shp_buf; + + /* To speed up use temp variables instead of using the struct */ + LTP_shp_buf = P->sLTP_shp; + LTP_shp_buf_idx = P->sLTP_shp_buf_idx; + sLF_AR_shp_Q12 = P->sLF_AR_shp_Q12; + sLF_MA_shp_Q12 = P->sLF_MA_shp_Q12; + + for( i = 0; i < length; i++ ) { + if( lag > 0 ) { + /* unrolled loop */ + silk_assert( HARM_SHAPE_FIR_TAPS == 3 ); + idx = lag + LTP_shp_buf_idx; + n_LTP_Q12 = silk_SMULBB( LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); + n_LTP_Q12 = silk_SMLABT( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 ) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); + n_LTP_Q12 = silk_SMLABB( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); + } else { + n_LTP_Q12 = 0; + } + + n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 ); + n_LF_Q10 = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 ); + + sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) ); + sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12, silk_LSHIFT( n_LF_Q10, 2 ) ); + + LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK; + LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) ); + + xw_Q3[i] = silk_RSHIFT_ROUND( silk_SUB32( sLF_MA_shp_Q12, n_LTP_Q12 ), 9 ); + } + + /* Copy temp variable back to state */ + P->sLF_AR_shp_Q12 = sLF_AR_shp_Q12; + P->sLF_MA_shp_Q12 = sLF_MA_shp_Q12; + P->sLTP_shp_buf_idx = LTP_shp_buf_idx; +} +#endif /* OVERRIDE_silk_prefilt_FIX */ diff --git a/thirdparty/opus/silk/fixed/residual_energy16_FIX.c b/thirdparty/opus/silk/fixed/residual_energy16_FIX.c index 7f130f3d3d..ebffb2a66f 100644 --- a/thirdparty/opus/silk/fixed/residual_energy16_FIX.c +++ b/thirdparty/opus/silk/fixed/residual_energy16_FIX.c @@ -47,10 +47,10 @@ opus_int32 silk_residual_energy16_covar_FIX( const opus_int32 *pRow; /* Safety checks */ - celt_assert( D >= 0 ); - celt_assert( D <= 16 ); - celt_assert( cQ > 0 ); - celt_assert( cQ < 16 ); + silk_assert( D >= 0 ); + silk_assert( D <= 16 ); + silk_assert( cQ > 0 ); + silk_assert( cQ < 16 ); lshifts = 16 - cQ; Qxtra = lshifts; diff --git a/thirdparty/opus/silk/fixed/residual_energy_FIX.c b/thirdparty/opus/silk/fixed/residual_energy_FIX.c index 6c7cade9a0..41f74778e8 100644 --- a/thirdparty/opus/silk/fixed/residual_energy_FIX.c +++ b/thirdparty/opus/silk/fixed/residual_energy_FIX.c @@ -58,7 +58,7 @@ void silk_residual_energy_FIX( /* Filter input to create the LPC residual for each frame half, and measure subframe energies */ ALLOC( LPC_res, ( MAX_NB_SUBFR >> 1 ) * offset, opus_int16 ); - celt_assert( ( nb_subfr >> 1 ) * ( MAX_NB_SUBFR >> 1 ) == nb_subfr ); + silk_assert( ( nb_subfr >> 1 ) * ( MAX_NB_SUBFR >> 1 ) == nb_subfr ); for( i = 0; i < nb_subfr >> 1; i++ ) { /* Calculate half frame LPC residual signal including preceding samples */ silk_LPC_analysis_filter( LPC_res, x_ptr, a_Q12[ i ], ( MAX_NB_SUBFR >> 1 ) * offset, LPC_order, arch ); diff --git a/thirdparty/opus/silk/fixed/schur64_FIX.c b/thirdparty/opus/silk/fixed/schur64_FIX.c index 4b7e19ea59..764a10ef3e 100644 --- a/thirdparty/opus/silk/fixed/schur64_FIX.c +++ b/thirdparty/opus/silk/fixed/schur64_FIX.c @@ -43,7 +43,7 @@ opus_int32 silk_schur64( /* O returns residual ene opus_int32 C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ]; opus_int32 Ctmp1_Q30, Ctmp2_Q30, rc_tmp_Q31; - celt_assert( order >= 0 && order <= SILK_MAX_ORDER_LPC ); + silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 ); /* Check for invalid input */ if( c[ 0 ] <= 0 ) { @@ -51,10 +51,9 @@ opus_int32 silk_schur64( /* O returns residual ene return 0; } - k = 0; - do { + for( k = 0; k < order + 1; k++ ) { C[ k ][ 0 ] = C[ k ][ 1 ] = c[ k ]; - } while( ++k <= order ); + } for( k = 0; k < order; k++ ) { /* Check that we won't be getting an unstable rc, otherwise stop here. */ diff --git a/thirdparty/opus/silk/fixed/schur_FIX.c b/thirdparty/opus/silk/fixed/schur_FIX.c index 2840f6b1aa..c4c0ef23b4 100644 --- a/thirdparty/opus/silk/fixed/schur_FIX.c +++ b/thirdparty/opus/silk/fixed/schur_FIX.c @@ -43,29 +43,28 @@ opus_int32 silk_schur( /* O Returns residual ene opus_int32 C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ]; opus_int32 Ctmp1, Ctmp2, rc_tmp_Q15; - celt_assert( order >= 0 && order <= SILK_MAX_ORDER_LPC ); + silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 ); /* Get number of leading zeros */ lz = silk_CLZ32( c[ 0 ] ); /* Copy correlations and adjust level to Q30 */ - k = 0; if( lz < 2 ) { /* lz must be 1, so shift one to the right */ - do { + for( k = 0; k < order + 1; k++ ) { C[ k ][ 0 ] = C[ k ][ 1 ] = silk_RSHIFT( c[ k ], 1 ); - } while( ++k <= order ); + } } else if( lz > 2 ) { /* Shift to the left */ lz -= 2; - do { + for( k = 0; k < order + 1; k++ ) { C[ k ][ 0 ] = C[ k ][ 1 ] = silk_LSHIFT( c[ k ], lz ); - } while( ++k <= order ); + } } else { /* No need to shift */ - do { + for( k = 0; k < order + 1; k++ ) { C[ k ][ 0 ] = C[ k ][ 1 ] = c[ k ]; - } while( ++k <= order ); + } } for( k = 0; k < order; k++ ) { diff --git a/thirdparty/opus/silk/fixed/solve_LS_FIX.c b/thirdparty/opus/silk/fixed/solve_LS_FIX.c new file mode 100644 index 0000000000..51d7d49d02 --- /dev/null +++ b/thirdparty/opus/silk/fixed/solve_LS_FIX.c @@ -0,0 +1,249 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "main_FIX.h" +#include "stack_alloc.h" +#include "tuning_parameters.h" + +/*****************************/ +/* Internal function headers */ +/*****************************/ + +typedef struct { + opus_int32 Q36_part; + opus_int32 Q48_part; +} inv_D_t; + +/* Factorize square matrix A into LDL form */ +static OPUS_INLINE void silk_LDL_factorize_FIX( + opus_int32 *A, /* I/O Pointer to Symetric Square Matrix */ + opus_int M, /* I Size of Matrix */ + opus_int32 *L_Q16, /* I/O Pointer to Square Upper triangular Matrix */ + inv_D_t *inv_D /* I/O Pointer to vector holding inverted diagonal elements of D */ +); + +/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */ +static OPUS_INLINE void silk_LS_SolveFirst_FIX( + const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ + opus_int M, /* I Dim of Matrix equation */ + const opus_int32 *b, /* I b Vector */ + opus_int32 *x_Q16 /* O x Vector */ +); + +/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */ +static OPUS_INLINE void silk_LS_SolveLast_FIX( + const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ + const opus_int M, /* I Dim of Matrix equation */ + const opus_int32 *b, /* I b Vector */ + opus_int32 *x_Q16 /* O x Vector */ +); + +static OPUS_INLINE void silk_LS_divide_Q16_FIX( + opus_int32 T[], /* I/O Numenator vector */ + inv_D_t *inv_D, /* I 1 / D vector */ + opus_int M /* I dimension */ +); + +/* Solves Ax = b, assuming A is symmetric */ +void silk_solve_LDL_FIX( + opus_int32 *A, /* I Pointer to symetric square matrix A */ + opus_int M, /* I Size of matrix */ + const opus_int32 *b, /* I Pointer to b vector */ + opus_int32 *x_Q16 /* O Pointer to x solution vector */ +) +{ + VARDECL( opus_int32, L_Q16 ); + opus_int32 Y[ MAX_MATRIX_SIZE ]; + inv_D_t inv_D[ MAX_MATRIX_SIZE ]; + SAVE_STACK; + + silk_assert( M <= MAX_MATRIX_SIZE ); + ALLOC( L_Q16, M * M, opus_int32 ); + + /*************************************************** + Factorize A by LDL such that A = L*D*L', + where L is lower triangular with ones on diagonal + ****************************************************/ + silk_LDL_factorize_FIX( A, M, L_Q16, inv_D ); + + /**************************************************** + * substitute D*L'*x = Y. ie: + L*D*L'*x = b => L*Y = b <=> Y = inv(L)*b + ******************************************************/ + silk_LS_SolveFirst_FIX( L_Q16, M, b, Y ); + + /**************************************************** + D*L'*x = Y <=> L'*x = inv(D)*Y, because D is + diagonal just multiply with 1/d_i + ****************************************************/ + silk_LS_divide_Q16_FIX( Y, inv_D, M ); + + /**************************************************** + x = inv(L') * inv(D) * Y + *****************************************************/ + silk_LS_SolveLast_FIX( L_Q16, M, Y, x_Q16 ); + RESTORE_STACK; +} + +static OPUS_INLINE void silk_LDL_factorize_FIX( + opus_int32 *A, /* I/O Pointer to Symetric Square Matrix */ + opus_int M, /* I Size of Matrix */ + opus_int32 *L_Q16, /* I/O Pointer to Square Upper triangular Matrix */ + inv_D_t *inv_D /* I/O Pointer to vector holding inverted diagonal elements of D */ +) +{ + opus_int i, j, k, status, loop_count; + const opus_int32 *ptr1, *ptr2; + opus_int32 diag_min_value, tmp_32, err; + opus_int32 v_Q0[ MAX_MATRIX_SIZE ], D_Q0[ MAX_MATRIX_SIZE ]; + opus_int32 one_div_diag_Q36, one_div_diag_Q40, one_div_diag_Q48; + + silk_assert( M <= MAX_MATRIX_SIZE ); + + status = 1; + diag_min_value = silk_max_32( silk_SMMUL( silk_ADD_SAT32( A[ 0 ], A[ silk_SMULBB( M, M ) - 1 ] ), SILK_FIX_CONST( FIND_LTP_COND_FAC, 31 ) ), 1 << 9 ); + for( loop_count = 0; loop_count < M && status == 1; loop_count++ ) { + status = 0; + for( j = 0; j < M; j++ ) { + ptr1 = matrix_adr( L_Q16, j, 0, M ); + tmp_32 = 0; + for( i = 0; i < j; i++ ) { + v_Q0[ i ] = silk_SMULWW( D_Q0[ i ], ptr1[ i ] ); /* Q0 */ + tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ i ], ptr1[ i ] ); /* Q0 */ + } + tmp_32 = silk_SUB32( matrix_ptr( A, j, j, M ), tmp_32 ); + + if( tmp_32 < diag_min_value ) { + tmp_32 = silk_SUB32( silk_SMULBB( loop_count + 1, diag_min_value ), tmp_32 ); + /* Matrix not positive semi-definite, or ill conditioned */ + for( i = 0; i < M; i++ ) { + matrix_ptr( A, i, i, M ) = silk_ADD32( matrix_ptr( A, i, i, M ), tmp_32 ); + } + status = 1; + break; + } + D_Q0[ j ] = tmp_32; /* always < max(Correlation) */ + + /* two-step division */ + one_div_diag_Q36 = silk_INVERSE32_varQ( tmp_32, 36 ); /* Q36 */ + one_div_diag_Q40 = silk_LSHIFT( one_div_diag_Q36, 4 ); /* Q40 */ + err = silk_SUB32( (opus_int32)1 << 24, silk_SMULWW( tmp_32, one_div_diag_Q40 ) ); /* Q24 */ + one_div_diag_Q48 = silk_SMULWW( err, one_div_diag_Q40 ); /* Q48 */ + + /* Save 1/Ds */ + inv_D[ j ].Q36_part = one_div_diag_Q36; + inv_D[ j ].Q48_part = one_div_diag_Q48; + + matrix_ptr( L_Q16, j, j, M ) = 65536; /* 1.0 in Q16 */ + ptr1 = matrix_adr( A, j, 0, M ); + ptr2 = matrix_adr( L_Q16, j + 1, 0, M ); + for( i = j + 1; i < M; i++ ) { + tmp_32 = 0; + for( k = 0; k < j; k++ ) { + tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ k ], ptr2[ k ] ); /* Q0 */ + } + tmp_32 = silk_SUB32( ptr1[ i ], tmp_32 ); /* always < max(Correlation) */ + + /* tmp_32 / D_Q0[j] : Divide to Q16 */ + matrix_ptr( L_Q16, i, j, M ) = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), + silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) ); + + /* go to next column */ + ptr2 += M; + } + } + } + + silk_assert( status == 0 ); +} + +static OPUS_INLINE void silk_LS_divide_Q16_FIX( + opus_int32 T[], /* I/O Numenator vector */ + inv_D_t *inv_D, /* I 1 / D vector */ + opus_int M /* I dimension */ +) +{ + opus_int i; + opus_int32 tmp_32; + opus_int32 one_div_diag_Q36, one_div_diag_Q48; + + for( i = 0; i < M; i++ ) { + one_div_diag_Q36 = inv_D[ i ].Q36_part; + one_div_diag_Q48 = inv_D[ i ].Q48_part; + + tmp_32 = T[ i ]; + T[ i ] = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) ); + } +} + +/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */ +static OPUS_INLINE void silk_LS_SolveFirst_FIX( + const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ + opus_int M, /* I Dim of Matrix equation */ + const opus_int32 *b, /* I b Vector */ + opus_int32 *x_Q16 /* O x Vector */ +) +{ + opus_int i, j; + const opus_int32 *ptr32; + opus_int32 tmp_32; + + for( i = 0; i < M; i++ ) { + ptr32 = matrix_adr( L_Q16, i, 0, M ); + tmp_32 = 0; + for( j = 0; j < i; j++ ) { + tmp_32 = silk_SMLAWW( tmp_32, ptr32[ j ], x_Q16[ j ] ); + } + x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 ); + } +} + +/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */ +static OPUS_INLINE void silk_LS_SolveLast_FIX( + const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ + const opus_int M, /* I Dim of Matrix equation */ + const opus_int32 *b, /* I b Vector */ + opus_int32 *x_Q16 /* O x Vector */ +) +{ + opus_int i, j; + const opus_int32 *ptr32; + opus_int32 tmp_32; + + for( i = M - 1; i >= 0; i-- ) { + ptr32 = matrix_adr( L_Q16, 0, i, M ); + tmp_32 = 0; + for( j = M - 1; j > i; j-- ) { + tmp_32 = silk_SMLAWW( tmp_32, ptr32[ silk_SMULBB( j, M ) ], x_Q16[ j ] ); + } + x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 ); + } +} diff --git a/thirdparty/opus/silk/fixed/structs_FIX.h b/thirdparty/opus/silk/fixed/structs_FIX.h index 2774a97b24..3294b25128 100644 --- a/thirdparty/opus/silk/fixed/structs_FIX.h +++ b/thirdparty/opus/silk/fixed/structs_FIX.h @@ -48,16 +48,30 @@ typedef struct { } silk_shape_state_FIX; /********************************/ +/* Prefilter state */ +/********************************/ +typedef struct { + opus_int16 sLTP_shp[ LTP_BUF_LENGTH ]; + opus_int32 sAR_shp[ MAX_SHAPE_LPC_ORDER + 1 ]; + opus_int sLTP_shp_buf_idx; + opus_int32 sLF_AR_shp_Q12; + opus_int32 sLF_MA_shp_Q12; + opus_int32 sHarmHP_Q2; + opus_int32 rand_seed; + opus_int lagPrev; +} silk_prefilter_state_FIX; + +/********************************/ /* Encoder state FIX */ /********************************/ typedef struct { silk_encoder_state sCmn; /* Common struct, shared with floating-point code */ silk_shape_state_FIX sShape; /* Shape state */ + silk_prefilter_state_FIX sPrefilt; /* Prefilter State */ /* Buffer for find pitch and noise shape analysis */ silk_DWORD_ALIGN opus_int16 x_buf[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];/* Buffer for find pitch and noise shape analysis */ opus_int LTPCorr_Q15; /* Normalized correlation from pitch lag estimator */ - opus_int32 resNrgSmth; } silk_encoder_state_FIX; /************************/ @@ -73,8 +87,11 @@ typedef struct { /* Noise shaping parameters */ /* Testing */ - silk_DWORD_ALIGN opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; + silk_DWORD_ALIGN opus_int16 AR1_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; + silk_DWORD_ALIGN opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ]; /* Packs two int16 coefficients per int32 value */ + opus_int GainsPre_Q14[ MAX_NB_SUBFR ]; + opus_int HarmBoost_Q14[ MAX_NB_SUBFR ]; opus_int Tilt_Q14[ MAX_NB_SUBFR ]; opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ]; opus_int Lambda_Q10; @@ -82,6 +99,7 @@ typedef struct { opus_int coding_quality_Q14; /* measures */ + opus_int sparseness_Q8; opus_int32 predGain_Q16; opus_int LTPredCodGain_Q7; opus_int32 ResNrg[ MAX_NB_SUBFR ]; /* Residual energy per subframe */ diff --git a/thirdparty/opus/silk/fixed/warped_autocorrelation_FIX.c b/thirdparty/opus/silk/fixed/warped_autocorrelation_FIX.c index 5c79553bc0..6ca6c1184d 100644 --- a/thirdparty/opus/silk/fixed/warped_autocorrelation_FIX.c +++ b/thirdparty/opus/silk/fixed/warped_autocorrelation_FIX.c @@ -31,14 +31,17 @@ POSSIBILITY OF SUCH DAMAGE. #include "main_FIX.h" +#define QC 10 +#define QS 14 + #if defined(MIPSr1_ASM) #include "mips/warped_autocorrelation_FIX_mipsr1.h" #endif +#ifndef OVERRIDE_silk_warped_autocorrelation_FIX /* Autocorrelations for a warped frequency axis */ -#ifndef OVERRIDE_silk_warped_autocorrelation_FIX_c -void silk_warped_autocorrelation_FIX_c( +void silk_warped_autocorrelation_FIX( opus_int32 *corr, /* O Result [order + 1] */ opus_int *scale, /* O Scaling of the correlation vector */ const opus_int16 *input, /* I Input data to correlate */ @@ -53,7 +56,7 @@ void silk_warped_autocorrelation_FIX_c( opus_int64 corr_QC[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 }; /* Order must be even */ - celt_assert( ( order & 1 ) == 0 ); + silk_assert( ( order & 1 ) == 0 ); silk_assert( 2 * QS - QC >= 0 ); /* Loop over samples */ @@ -89,4 +92,4 @@ void silk_warped_autocorrelation_FIX_c( } silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/ } -#endif /* OVERRIDE_silk_warped_autocorrelation_FIX_c */ +#endif /* OVERRIDE_silk_warped_autocorrelation_FIX */ diff --git a/thirdparty/opus/silk/fixed/x86/burg_modified_FIX_sse4_1.c b/thirdparty/opus/silk/fixed/x86/burg_modified_FIX_sse.c index bbb1ce0fcc..3c3583c5fc 100644 --- a/thirdparty/opus/silk/fixed/x86/burg_modified_FIX_sse4_1.c +++ b/thirdparty/opus/silk/fixed/x86/burg_modified_FIX_sse.c @@ -72,7 +72,7 @@ void silk_burg_modified_sse4_1( __m128i FIRST_3210, LAST_3210, ATMP_3210, TMP1_3210, TMP2_3210, T1_3210, T2_3210, PTR_3210, SUBFR_3210, X1_3210, X2_3210; __m128i CONST1 = _mm_set1_epi32(1); - celt_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); + silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); /* Compute autocorrelations, added over subframes */ silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length ); diff --git a/thirdparty/opus/silk/fixed/x86/prefilter_FIX_sse.c b/thirdparty/opus/silk/fixed/x86/prefilter_FIX_sse.c index 555432cd96..488a603f5d 100644 --- a/thirdparty/opus/silk/fixed/x86/prefilter_FIX_sse.c +++ b/thirdparty/opus/silk/fixed/x86/prefilter_FIX_sse.c @@ -49,7 +49,7 @@ void silk_warped_LPC_analysis_filter_FIX_sse4_1( opus_int32 acc_Q11, tmp1, tmp2; /* Order must be even */ - celt_assert( ( order & 1 ) == 0 ); + silk_assert( ( order & 1 ) == 0 ); if (order == 10) { @@ -65,7 +65,7 @@ void silk_warped_LPC_analysis_filter_FIX_sse4_1( register opus_int32 state_8, state_9, state_a; register opus_int64 coef_Q13_8, coef_Q13_9; - celt_assert( length > 0 ); + silk_assert( length > 0 ); coef_Q13_3210 = OP_CVTEPI16_EPI32_M64( &coef_Q13[ 0 ] ); coef_Q13_7654 = OP_CVTEPI16_EPI32_M64( &coef_Q13[ 4 ] ); @@ -107,8 +107,8 @@ void silk_warped_LPC_analysis_filter_FIX_sse4_1( xmm_tempb = _mm_add_epi32( xmm_tempb, xmm_product2 ); xmm_tempa = _mm_add_epi32( xmm_tempa, xmm_tempb ); - sum = (opus_int32)((coef_Q13_8 * state_8) >> 16); - sum += (opus_int32)((coef_Q13_9 * state_9) >> 16); + sum = (coef_Q13_8 * state_8) >> 16; + sum += (coef_Q13_9 * state_9) >> 16; xmm_tempa = _mm_add_epi32( xmm_tempa, _mm_shuffle_epi32( xmm_tempa, _MM_SHUFFLE( 0, 0, 0, 2 ) ) ); sum += _mm_cvtsi128_si32( xmm_tempa); diff --git a/thirdparty/opus/silk/fixed/x86/vector_ops_FIX_sse4_1.c b/thirdparty/opus/silk/fixed/x86/vector_ops_FIX_sse.c index c1e90564d0..c1e90564d0 100644 --- a/thirdparty/opus/silk/fixed/x86/vector_ops_FIX_sse4_1.c +++ b/thirdparty/opus/silk/fixed/x86/vector_ops_FIX_sse.c |