diff options
Diffstat (limited to 'thirdparty/opus/silk/x86')
-rw-r--r-- | thirdparty/opus/silk/x86/NSQ_del_dec_sse.c (renamed from thirdparty/opus/silk/x86/NSQ_del_dec_sse4_1.c) | 22 | ||||
-rw-r--r-- | thirdparty/opus/silk/x86/NSQ_sse.c (renamed from thirdparty/opus/silk/x86/NSQ_sse4_1.c) | 5 | ||||
-rw-r--r-- | thirdparty/opus/silk/x86/VAD_sse.c (renamed from thirdparty/opus/silk/x86/VAD_sse4_1.c) | 6 | ||||
-rw-r--r-- | thirdparty/opus/silk/x86/VQ_WMat_EC_sse.c (renamed from thirdparty/opus/silk/x86/VQ_WMat_EC_sse4_1.c) | 0 | ||||
-rw-r--r-- | thirdparty/opus/silk/x86/main_sse.h | 45 | ||||
-rw-r--r-- | thirdparty/opus/silk/x86/x86_silk_map.c | 26 |
6 files changed, 71 insertions, 33 deletions
diff --git a/thirdparty/opus/silk/x86/NSQ_del_dec_sse4_1.c b/thirdparty/opus/silk/x86/NSQ_del_dec_sse.c index 2c75ede2dd..21d4a8bc1e 100644 --- a/thirdparty/opus/silk/x86/NSQ_del_dec_sse4_1.c +++ b/thirdparty/opus/silk/x86/NSQ_del_dec_sse.c @@ -107,12 +107,12 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( opus_int predictLPCOrder, /* I Prediction filter order */ opus_int warping_Q16, /* I */ opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ - opus_int *smpl_buf_idx, /* I/O Index to newest samples in buffers */ + opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ opus_int decisionDelay /* I */ ); void silk_NSQ_del_dec_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ + const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ const opus_int32 x_Q3[], /* I Prefiltered input signal */ @@ -234,8 +234,7 @@ void silk_NSQ_del_dec_sse4_1( psDD = &psDelDec[ Winner_ind ]; last_smple_idx = smpl_buf_idx + decisionDelay; for( i = 0; i < decisionDelay; i++ ) { - last_smple_idx = ( last_smple_idx - 1 ) % DECISION_DELAY; - if( last_smple_idx < 0 ) last_smple_idx += DECISION_DELAY; + last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK; pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gains_Q16[ 1 ] ), 14 ) ); @@ -247,7 +246,7 @@ void silk_NSQ_del_dec_sse4_1( /* Rewhiten with new A coefs */ start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2; - celt_assert( start_idx > 0 ); + silk_assert( start_idx > 0 ); silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch ); @@ -286,8 +285,7 @@ void silk_NSQ_del_dec_sse4_1( last_smple_idx = smpl_buf_idx + decisionDelay; Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 ); for( i = 0; i < decisionDelay; i++ ) { - last_smple_idx = ( last_smple_idx - 1 ) % DECISION_DELAY; - if( last_smple_idx < 0 ) last_smple_idx += DECISION_DELAY; + last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK; pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) ); @@ -301,6 +299,7 @@ void silk_NSQ_del_dec_sse4_1( NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ]; /* Save quantized speech signal */ + /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->frame_length * sizeof( opus_int16 ) ) */ silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); RESTORE_STACK; @@ -334,7 +333,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( opus_int predictLPCOrder, /* I Prediction filter order */ opus_int warping_Q16, /* I */ opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ - opus_int *smpl_buf_idx, /* I/O Index to newest samples in buffers */ + opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ opus_int decisionDelay /* I */ ) { @@ -353,7 +352,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( __m128i b_Q12_0123, b_sr_Q12_0123; SAVE_STACK; - celt_assert( nStatesDelayedDecision > 0 ); + silk_assert( nStatesDelayedDecision > 0 ); ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair ); shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; @@ -639,9 +638,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( psSS[ 1 ].xq_Q14 = xq_Q14; } } - *smpl_buf_idx = ( *smpl_buf_idx - 1 ) % DECISION_DELAY; - if( *smpl_buf_idx < 0 ) *smpl_buf_idx += DECISION_DELAY; - last_smple_idx = ( *smpl_buf_idx + decisionDelay ) % DECISION_DELAY; + *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK; /* Index to newest samples */ + last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK; /* Index to decisionDelay old samples */ /* Find winner */ RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; diff --git a/thirdparty/opus/silk/x86/NSQ_sse4_1.c b/thirdparty/opus/silk/x86/NSQ_sse.c index b0315e35fc..bb3c5f1955 100644 --- a/thirdparty/opus/silk/x86/NSQ_sse4_1.c +++ b/thirdparty/opus/silk/x86/NSQ_sse.c @@ -71,7 +71,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( ); void silk_NSQ_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ + const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ const opus_int32 x_Q3[], /* I Prefiltered input signal */ @@ -199,7 +199,7 @@ void silk_NSQ_sse4_1( if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) { /* Rewhiten with new A coefs */ start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2; - celt_assert( start_idx > 0 ); + silk_assert( start_idx > 0 ); silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch ); @@ -233,6 +233,7 @@ void silk_NSQ_sse4_1( NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ]; /* Save quantized speech and noise shaping signals */ + /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[ psEncC->ltp_mem_length ], psEncC->frame_length * sizeof( opus_int16 ) ) */ silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); RESTORE_STACK; diff --git a/thirdparty/opus/silk/x86/VAD_sse4_1.c b/thirdparty/opus/silk/x86/VAD_sse.c index d02ddf4ad0..4e90f4410d 100644 --- a/thirdparty/opus/silk/x86/VAD_sse4_1.c +++ b/thirdparty/opus/silk/x86/VAD_sse.c @@ -65,9 +65,9 @@ opus_int silk_VAD_GetSA_Q8_sse4_1( /* O Return value, 0 if s /* Safety checks */ silk_assert( VAD_N_BANDS == 4 ); - celt_assert( MAX_FRAME_LENGTH >= psEncC->frame_length ); - celt_assert( psEncC->frame_length <= 512 ); - celt_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) ); + silk_assert( MAX_FRAME_LENGTH >= psEncC->frame_length ); + silk_assert( psEncC->frame_length <= 512 ); + silk_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) ); /***********************/ /* Filter and Decimate */ diff --git a/thirdparty/opus/silk/x86/VQ_WMat_EC_sse4_1.c b/thirdparty/opus/silk/x86/VQ_WMat_EC_sse.c index 74d6c6d0ec..74d6c6d0ec 100644 --- a/thirdparty/opus/silk/x86/VQ_WMat_EC_sse4_1.c +++ b/thirdparty/opus/silk/x86/VQ_WMat_EC_sse.c diff --git a/thirdparty/opus/silk/x86/main_sse.h b/thirdparty/opus/silk/x86/main_sse.h index 2f15d44869..d8d61310ed 100644 --- a/thirdparty/opus/silk/x86/main_sse.h +++ b/thirdparty/opus/silk/x86/main_sse.h @@ -34,7 +34,6 @@ # if defined(OPUS_X86_MAY_HAVE_SSE4_1) -#if 0 /* FIXME: SSE disabled until silk_VQ_WMat_EC_sse4_1() gets updated. */ # define OVERRIDE_silk_VQ_WMat_EC void silk_VQ_WMat_EC_sse4_1( @@ -80,13 +79,11 @@ extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])( mu_Q9, max_gain_Q7, L)) #endif -#endif -#if 0 /* FIXME: SSE disabled until the NSQ code gets updated. */ # define OVERRIDE_silk_NSQ void silk_NSQ_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ + const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ const opus_int32 x_Q3[], /* I Prefiltered input signal */ @@ -113,7 +110,7 @@ void silk_NSQ_sse4_1( #else extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])( - const silk_encoder_state *psEncC, /* I Encoder State */ + const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ const opus_int32 x_Q3[], /* I Prefiltered input signal */ @@ -140,7 +137,7 @@ extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])( # define OVERRIDE_silk_NSQ_del_dec void silk_NSQ_del_dec_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ + const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ const opus_int32 x_Q3[], /* I Prefiltered input signal */ @@ -167,7 +164,7 @@ void silk_NSQ_del_dec_sse4_1( #else extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])( - const silk_encoder_state *psEncC, /* I Encoder State */ + const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ const opus_int32 x_Q3[], /* I Prefiltered input signal */ @@ -190,7 +187,6 @@ extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])( HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) #endif -#endif void silk_noise_shape_quantizer( silk_nsq_state *NSQ, /* I/O NSQ state */ @@ -242,6 +238,39 @@ extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])( silk_encoder_state *psEnC, const opus_int16 pIn[]); +# define OVERRIDE_silk_warped_LPC_analysis_filter_FIX + +#endif + +void silk_warped_LPC_analysis_filter_FIX_sse4_1( + opus_int32 state[], /* I/O State [order + 1] */ + opus_int32 res_Q2[], /* O Residual signal [length] */ + const opus_int16 coef_Q13[], /* I Coefficients [order] */ + const opus_int16 input[], /* I Input signal [length] */ + const opus_int16 lambda_Q16, /* I Warping factor */ + const opus_int length, /* I Length of input signal */ + const opus_int order /* I Filter order (even) */ +); + +#if defined(OPUS_X86_PRESUME_SSE4_1) +#define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \ + ((void)(arch),silk_warped_LPC_analysis_filter_FIX_c(state, res_Q2, coef_Q13, input, lambda_Q16, length, order)) + +#else + +extern void (*const SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[OPUS_ARCHMASK + 1])( + opus_int32 state[], /* I/O State [order + 1] */ + opus_int32 res_Q2[], /* O Residual signal [length] */ + const opus_int16 coef_Q13[], /* I Coefficients [order] */ + const opus_int16 input[], /* I Input signal [length] */ + const opus_int16 lambda_Q16, /* I Warping factor */ + const opus_int length, /* I Length of input signal */ + const opus_int order /* I Filter order (even) */ +); + +# define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \ + ((*SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[(arch) & OPUS_ARCHMASK])(state, res_Q2, coef_Q13, input, lambda_Q16, length, order)) + #endif # endif diff --git a/thirdparty/opus/silk/x86/x86_silk_map.c b/thirdparty/opus/silk/x86/x86_silk_map.c index 32dcc3cab7..818841f2c1 100644 --- a/thirdparty/opus/silk/x86/x86_silk_map.c +++ b/thirdparty/opus/silk/x86/x86_silk_map.c @@ -66,9 +66,8 @@ opus_int (*const SILK_VAD_GETSA_Q8_IMPL[ OPUS_ARCHMASK + 1 ] )( MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ) /* avx */ }; -#if 0 /* FIXME: SSE disabled until the NSQ code gets updated. */ void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )( - const silk_encoder_state *psEncC, /* I Encoder State */ + const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ const opus_int32 x_Q3[], /* I Prefiltered input signal */ @@ -90,9 +89,7 @@ void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )( MAY_HAVE_SSE4_1( silk_NSQ ), /* sse4.1 */ MAY_HAVE_SSE4_1( silk_NSQ ) /* avx */ }; -#endif -#if 0 /* FIXME: SSE disabled until silk_VQ_WMat_EC_sse4_1() gets updated. */ void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )( opus_int8 *ind, /* O index of best codebook vector */ opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ @@ -112,11 +109,9 @@ void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )( MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ), /* sse4.1 */ MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ) /* avx */ }; -#endif -#if 0 /* FIXME: SSE disabled until the NSQ code gets updated. */ void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )( - const silk_encoder_state *psEncC, /* I Encoder State */ + const silk_encoder_state *psEncC, /* I/O Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ SideInfoIndices *psIndices, /* I/O Quantization Indices */ const opus_int32 x_Q3[], /* I Prefiltered input signal */ @@ -138,10 +133,25 @@ void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )( MAY_HAVE_SSE4_1( silk_NSQ_del_dec ), /* sse4.1 */ MAY_HAVE_SSE4_1( silk_NSQ_del_dec ) /* avx */ }; -#endif #if defined(FIXED_POINT) +void (*const SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[ OPUS_ARCHMASK + 1 ] )( + opus_int32 state[], /* I/O State [order + 1] */ + opus_int32 res_Q2[], /* O Residual signal [length] */ + const opus_int16 coef_Q13[], /* I Coefficients [order] */ + const opus_int16 input[], /* I Input signal [length] */ + const opus_int16 lambda_Q16, /* I Warping factor */ + const opus_int length, /* I Length of input signal */ + const opus_int order /* I Filter order (even) */ +) = { + silk_warped_LPC_analysis_filter_FIX_c, /* non-sse */ + silk_warped_LPC_analysis_filter_FIX_c, + silk_warped_LPC_analysis_filter_FIX_c, + MAY_HAVE_SSE4_1( silk_warped_LPC_analysis_filter_FIX ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_warped_LPC_analysis_filter_FIX ) /* avx */ +}; + void (*const SILK_BURG_MODIFIED_IMPL[ OPUS_ARCHMASK + 1 ] )( opus_int32 *res_nrg, /* O Residual energy */ opus_int *res_nrg_Q, /* O Residual energy Q value */ |