summaryrefslogtreecommitdiff
path: root/drivers/opus/silk/fixed
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/opus/silk/fixed')
-rw-r--r--drivers/opus/silk/fixed/LTP_analysis_filter_FIX.c22
-rw-r--r--drivers/opus/silk/fixed/LTP_scale_ctrl_FIX.c3
-rw-r--r--drivers/opus/silk/fixed/apply_sine_window_FIX.c3
-rw-r--r--drivers/opus/silk/fixed/autocorr_FIX.c3
-rw-r--r--drivers/opus/silk/fixed/burg_modified_FIX.c39
-rw-r--r--drivers/opus/silk/fixed/corrMatrix_FIX.c13
-rw-r--r--drivers/opus/silk/fixed/encode_frame_FIX.c17
-rw-r--r--drivers/opus/silk/fixed/find_LPC_FIX.c5
-rw-r--r--drivers/opus/silk/fixed/find_LTP_FIX.c10
-rw-r--r--drivers/opus/silk/fixed/find_pitch_lags_FIX.c5
-rw-r--r--drivers/opus/silk/fixed/find_pred_coefs_FIX.c18
-rw-r--r--drivers/opus/silk/fixed/k2a_FIX.c3
-rw-r--r--drivers/opus/silk/fixed/k2a_Q16_FIX.c3
-rw-r--r--drivers/opus/silk/fixed/main_FIX.h25
-rw-r--r--drivers/opus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h336
-rw-r--r--drivers/opus/silk/fixed/mips/prefilter_FIX_mipsr1.h181
-rw-r--r--drivers/opus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h162
-rw-r--r--drivers/opus/silk/fixed/noise_shape_analysis_FIX.c9
-rw-r--r--drivers/opus/silk/fixed/pitch_analysis_core_FIX.c25
-rw-r--r--drivers/opus/silk/fixed/prefilter_FIX.c19
-rw-r--r--drivers/opus/silk/fixed/process_gains_FIX.c3
-rw-r--r--drivers/opus/silk/fixed/regularize_correlations_FIX.c3
-rw-r--r--drivers/opus/silk/fixed/residual_energy16_FIX.c3
-rw-r--r--drivers/opus/silk/fixed/residual_energy_FIX.c8
-rw-r--r--drivers/opus/silk/fixed/schur64_FIX.c3
-rw-r--r--drivers/opus/silk/fixed/schur_FIX.c3
-rw-r--r--drivers/opus/silk/fixed/solve_LS_FIX.c3
-rw-r--r--drivers/opus/silk/fixed/structs_FIX.h3
-rw-r--r--drivers/opus/silk/fixed/vector_ops_FIX.c13
-rw-r--r--drivers/opus/silk/fixed/warped_autocorrelation_FIX.c10
-rw-r--r--drivers/opus/silk/fixed/x86/burg_modified_FIX_sse.c372
-rw-r--r--drivers/opus/silk/fixed/x86/prefilter_FIX_sse.c157
-rw-r--r--drivers/opus/silk/fixed/x86/vector_ops_FIX_sse.c85
33 files changed, 1421 insertions, 146 deletions
diff --git a/drivers/opus/silk/fixed/LTP_analysis_filter_FIX.c b/drivers/opus/silk/fixed/LTP_analysis_filter_FIX.c
index 19d5defc7a..041e85f0b9 100644
--- a/drivers/opus/silk/fixed/LTP_analysis_filter_FIX.c
+++ b/drivers/opus/silk/fixed/LTP_analysis_filter_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/fixed/main_FIX.h"
@@ -45,7 +42,7 @@ void silk_LTP_analysis_filter_FIX(
const opus_int16 *x_ptr, *x_lag_ptr;
opus_int16 Btmp_Q14[ LTP_ORDER ];
opus_int16 *LTP_res_ptr;
- opus_int k, i, j;
+ opus_int k, i;
opus_int32 LTP_est;
x_ptr = x;
@@ -53,9 +50,12 @@ void silk_LTP_analysis_filter_FIX(
for( k = 0; k < nb_subfr; k++ ) {
x_lag_ptr = x_ptr - pitchL[ k ];
- for( i = 0; i < LTP_ORDER; i++ ) {
- Btmp_Q14[ i ] = LTPCoef_Q14[ k * LTP_ORDER + i ];
- }
+
+ Btmp_Q14[ 0 ] = LTPCoef_Q14[ k * LTP_ORDER ];
+ Btmp_Q14[ 1 ] = LTPCoef_Q14[ k * LTP_ORDER + 1 ];
+ Btmp_Q14[ 2 ] = LTPCoef_Q14[ k * LTP_ORDER + 2 ];
+ Btmp_Q14[ 3 ] = LTPCoef_Q14[ k * LTP_ORDER + 3 ];
+ Btmp_Q14[ 4 ] = LTPCoef_Q14[ k * LTP_ORDER + 4 ];
/* LTP analysis FIR filter */
for( i = 0; i < subfr_length + pre_length; i++ ) {
@@ -63,9 +63,11 @@ void silk_LTP_analysis_filter_FIX(
/* Long-term prediction */
LTP_est = silk_SMULBB( x_lag_ptr[ LTP_ORDER / 2 ], Btmp_Q14[ 0 ] );
- for( j = 1; j < LTP_ORDER; j++ ) {
- LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ LTP_ORDER / 2 - j ], Btmp_Q14[ j ] );
- }
+ LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ 1 ], Btmp_Q14[ 1 ] );
+ LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ 0 ], Btmp_Q14[ 2 ] );
+ LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ -1 ], Btmp_Q14[ 3 ] );
+ LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ -2 ], Btmp_Q14[ 4 ] );
+
LTP_est = silk_RSHIFT_ROUND( LTP_est, 14 ); /* round and -> Q0*/
/* Subtract long-term prediction */
diff --git a/drivers/opus/silk/fixed/LTP_scale_ctrl_FIX.c b/drivers/opus/silk/fixed/LTP_scale_ctrl_FIX.c
index 0887fd03f7..dac1fbc08a 100644
--- a/drivers/opus/silk/fixed/LTP_scale_ctrl_FIX.c
+++ b/drivers/opus/silk/fixed/LTP_scale_ctrl_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/fixed/main_FIX.h"
diff --git a/drivers/opus/silk/fixed/apply_sine_window_FIX.c b/drivers/opus/silk/fixed/apply_sine_window_FIX.c
index 5b4d8ebfdc..9d63e7bce2 100644
--- a/drivers/opus/silk/fixed/apply_sine_window_FIX.c
+++ b/drivers/opus/silk/fixed/apply_sine_window_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/SigProc_FIX.h"
diff --git a/drivers/opus/silk/fixed/autocorr_FIX.c b/drivers/opus/silk/fixed/autocorr_FIX.c
index 88a849e12c..db39a0335b 100644
--- a/drivers/opus/silk/fixed/autocorr_FIX.c
+++ b/drivers/opus/silk/fixed/autocorr_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/SigProc_FIX.h"
#include "opus/celt/celt_lpc.h"
diff --git a/drivers/opus/silk/fixed/burg_modified_FIX.c b/drivers/opus/silk/fixed/burg_modified_FIX.c
index 5ef3ad2c38..48d64eccc5 100644
--- a/drivers/opus/silk/fixed/burg_modified_FIX.c
+++ b/drivers/opus/silk/fixed/burg_modified_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/SigProc_FIX.h"
#include "opus/silk/define.h"
@@ -42,7 +39,7 @@ POSSIBILITY OF SUCH DAMAGE.
#define MAX_RSHIFTS (32 - QA)
/* Compute reflection coefficients from input signal */
-void silk_burg_modified(
+void silk_burg_modified_c(
opus_int32 *res_nrg, /* O Residual energy */
opus_int *res_nrg_Q, /* O Residual energy Q value */
opus_int32 A_Q16[], /* O Prediction coefficients (length order) */
@@ -54,7 +51,7 @@ void silk_burg_modified(
int arch /* I Run-time architecture */
)
{
- opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain;
+ opus_int k, n, s, lz, rshifts, reached_max_gain;
opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2;
const opus_int16 *x_ptr;
opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ];
@@ -63,27 +60,23 @@ void silk_burg_modified(
opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ];
opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ];
opus_int32 xcorr[ SILK_MAX_ORDER_LPC ];
+ opus_int64 C0_64;
silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
/* Compute autocorrelations, added over subframes */
- silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length );
- if( rshifts > MAX_RSHIFTS ) {
- C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS );
- silk_assert( C0 > 0 );
- rshifts = MAX_RSHIFTS;
+ C0_64 = silk_inner_prod16_aligned_64( x, x, subfr_length*nb_subfr, arch );
+ lz = silk_CLZ64(C0_64);
+ rshifts = 32 + 1 + N_BITS_HEAD_ROOM - lz;
+ if (rshifts > MAX_RSHIFTS) rshifts = MAX_RSHIFTS;
+ if (rshifts < MIN_RSHIFTS) rshifts = MIN_RSHIFTS;
+
+ if (rshifts > 0) {
+ C0 = (opus_int32)silk_RSHIFT64(C0_64, rshifts );
} else {
- lz = silk_CLZ32( C0 ) - 1;
- rshifts_extra = N_BITS_HEAD_ROOM - lz;
- if( rshifts_extra > 0 ) {
- rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts );
- C0 = silk_RSHIFT32( C0, rshifts_extra );
- } else {
- rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts );
- C0 = silk_LSHIFT32( C0, -rshifts_extra );
- }
- rshifts += rshifts_extra;
+ C0 = silk_LSHIFT32((opus_int32)C0_64, -rshifts );
}
+
CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */
silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
if( rshifts > 0 ) {
@@ -91,7 +84,7 @@ void silk_burg_modified(
x_ptr = x + s * subfr_length;
for( n = 1; n < D + 1; n++ ) {
C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64(
- silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n ), rshifts );
+ silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts );
}
}
} else {
@@ -252,12 +245,12 @@ void silk_burg_modified(
if( rshifts > 0 ) {
for( s = 0; s < nb_subfr; s++ ) {
x_ptr = x + s * subfr_length;
- C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D ), rshifts );
+ C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts );
}
} else {
for( s = 0; s < nb_subfr; s++ ) {
x_ptr = x + s * subfr_length;
- C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D ), -rshifts );
+ C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D, arch), -rshifts);
}
}
/* Approximate residual energy */
diff --git a/drivers/opus/silk/fixed/corrMatrix_FIX.c b/drivers/opus/silk/fixed/corrMatrix_FIX.c
index 9f50153dcc..ac5331fe57 100644
--- a/drivers/opus/silk/fixed/corrMatrix_FIX.c
+++ b/drivers/opus/silk/fixed/corrMatrix_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
/**********************************************************************
* Correlation Matrix Computations for LS estimate.
@@ -42,7 +39,8 @@ void silk_corrVector_FIX(
const opus_int L, /* I Length of vectors */
const opus_int order, /* I Max lag for correlation */
opus_int32 *Xt, /* O Pointer to X'*t correlation vector [order] */
- const opus_int rshifts /* I Right shifts of correlations */
+ const opus_int rshifts, /* I Right shifts of correlations */
+ int arch /* I Run-time architecture */
)
{
opus_int lag, i;
@@ -65,7 +63,7 @@ void silk_corrVector_FIX(
} else {
silk_assert( rshifts == 0 );
for( lag = 0; lag < order; lag++ ) {
- Xt[ lag ] = silk_inner_prod_aligned( ptr1, ptr2, L ); /* X[:,lag]'*t */
+ Xt[ lag ] = silk_inner_prod_aligned( ptr1, ptr2, L, arch ); /* X[:,lag]'*t */
ptr1--; /* Go to next column of X */
}
}
@@ -78,7 +76,8 @@ void silk_corrMatrix_FIX(
const opus_int order, /* I Max lag for correlation */
const opus_int head_room, /* I Desired headroom */
opus_int32 *XX, /* O Pointer to X'*X correlation matrix [ order x order ] */
- opus_int *rshifts /* I/O Right shifts of correlations */
+ opus_int *rshifts, /* I/O Right shifts of correlations */
+ int arch /* I Run-time architecture */
)
{
opus_int i, j, lag, rshifts_local, head_room_rshifts;
@@ -138,7 +137,7 @@ void silk_corrMatrix_FIX(
} else {
for( lag = 1; lag < order; lag++ ) {
/* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */
- energy = silk_inner_prod_aligned( ptr1, ptr2, L );
+ energy = silk_inner_prod_aligned( ptr1, ptr2, L, arch );
matrix_ptr( XX, lag, 0, order ) = energy;
matrix_ptr( XX, 0, lag, order ) = energy;
/* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */
diff --git a/drivers/opus/silk/fixed/encode_frame_FIX.c b/drivers/opus/silk/fixed/encode_frame_FIX.c
index 27944729f8..e4ead0afef 100644
--- a/drivers/opus/silk/fixed/encode_frame_FIX.c
+++ b/drivers/opus/silk/fixed/encode_frame_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/fixed/main_FIX.h"
#include "opus/celt/stack_alloc.h"
@@ -48,7 +45,7 @@ void silk_encode_do_VAD_FIX(
/****************************/
/* Voice Activity Detection */
/****************************/
- silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
+ silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch );
/**************************************************/
/* Convert speech activity into VAD and DTX flags */
@@ -196,11 +193,13 @@ opus_int silk_encode_frame_FIX(
if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) {
silk_NSQ_del_dec( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses,
sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14,
- sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 );
+ sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14,
+ psEnc->sCmn.arch );
} else {
silk_NSQ( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses,
sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14,
- sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 );
+ sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14,
+ psEnc->sCmn.arch);
}
/****************************************/
@@ -287,7 +286,7 @@ opus_int silk_encode_frame_FIX(
for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
sEncCtrl.Gains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 );
}
-
+
/* Quantize gains */
psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev;
silk_gains_quant( psEnc->sCmn.indices.GainsIndices, sEncCtrl.Gains_Q16,
@@ -371,12 +370,12 @@ static OPUS_INLINE void silk_LBRR_encode_FIX(
silk_NSQ_del_dec( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3,
psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14,
psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
- psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 );
+ psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch );
} else {
silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3,
psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14,
psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
- psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 );
+ psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch );
}
/* Restore original gains */
diff --git a/drivers/opus/silk/fixed/find_LPC_FIX.c b/drivers/opus/silk/fixed/find_LPC_FIX.c
index df76e17ca2..5b56c41702 100644
--- a/drivers/opus/silk/fixed/find_LPC_FIX.c
+++ b/drivers/opus/silk/fixed/find_LPC_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/fixed/main_FIX.h"
#include "opus/celt/stack_alloc.h"
@@ -95,7 +92,7 @@ void silk_find_LPC_FIX(
silk_NLSF2A( a_tmp_Q12, NLSF0_Q15, psEncC->predictLPCOrder );
/* Calculate residual energy with NLSF interpolation */
- silk_LPC_analysis_filter( LPC_res, x, a_tmp_Q12, 2 * subfr_length, psEncC->predictLPCOrder );
+ silk_LPC_analysis_filter( LPC_res, x, a_tmp_Q12, 2 * subfr_length, psEncC->predictLPCOrder, psEncC->arch );
silk_sum_sqr_shift( &res_nrg0, &rshift0, LPC_res + psEncC->predictLPCOrder, subfr_length - psEncC->predictLPCOrder );
silk_sum_sqr_shift( &res_nrg1, &rshift1, LPC_res + psEncC->predictLPCOrder + subfr_length, subfr_length - psEncC->predictLPCOrder );
diff --git a/drivers/opus/silk/fixed/find_LTP_FIX.c b/drivers/opus/silk/fixed/find_LTP_FIX.c
index d556371ffa..a5a253516d 100644
--- a/drivers/opus/silk/fixed/find_LTP_FIX.c
+++ b/drivers/opus/silk/fixed/find_LTP_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/fixed/main_FIX.h"
#include "opus/silk/tuning_parameters.h"
@@ -50,7 +47,8 @@ void silk_find_LTP_FIX(
const opus_int subfr_length, /* I subframe length */
const opus_int nb_subfr, /* I number of subframes */
const opus_int mem_offset, /* I number of samples in LTP memory */
- opus_int corr_rshifts[ MAX_NB_SUBFR ] /* O right shifts applied to correlations */
+ opus_int corr_rshifts[ MAX_NB_SUBFR ], /* O right shifts applied to correlations */
+ int arch /* I Run-time architecture */
)
{
opus_int i, k, lshift;
@@ -84,10 +82,10 @@ void silk_find_LTP_FIX(
rr_shifts += ( LTP_CORRS_HEAD_ROOM - LZs );
}
corr_rshifts[ k ] = rr_shifts;
- silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, LTP_CORRS_HEAD_ROOM, WLTP_ptr, &corr_rshifts[ k ] ); /* WLTP_fix_ptr in Q( -corr_rshifts[ k ] ) */
+ silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, LTP_CORRS_HEAD_ROOM, WLTP_ptr, &corr_rshifts[ k ], arch ); /* WLTP_fix_ptr in Q( -corr_rshifts[ k ] ) */
/* The correlation vector always has lower max abs value than rr and/or RR so head room is assured */
- silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr, corr_rshifts[ k ] ); /* Rr_fix_ptr in Q( -corr_rshifts[ k ] ) */
+ silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr, corr_rshifts[ k ], arch ); /* Rr_fix_ptr in Q( -corr_rshifts[ k ] ) */
if( corr_rshifts[ k ] > rr_shifts ) {
rr[ k ] = silk_RSHIFT( rr[ k ], corr_rshifts[ k ] - rr_shifts ); /* rr[ k ] in Q( -corr_rshifts[ k ] ) */
}
diff --git a/drivers/opus/silk/fixed/find_pitch_lags_FIX.c b/drivers/opus/silk/fixed/find_pitch_lags_FIX.c
index 0d00b3edf6..bfd2c1b2d2 100644
--- a/drivers/opus/silk/fixed/find_pitch_lags_FIX.c
+++ b/drivers/opus/silk/fixed/find_pitch_lags_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/fixed/main_FIX.h"
#include "opus/celt/stack_alloc.h"
@@ -112,7 +109,7 @@ void silk_find_pitch_lags_FIX(
/*****************************************/
/* LPC analysis filtering */
/*****************************************/
- silk_LPC_analysis_filter( res, x_buf, A_Q12, buf_len, psEnc->sCmn.pitchEstimationLPCOrder );
+ silk_LPC_analysis_filter( res, x_buf, A_Q12, buf_len, psEnc->sCmn.pitchEstimationLPCOrder, psEnc->sCmn.arch );
if( psEnc->sCmn.indices.signalType != TYPE_NO_VOICE_ACTIVITY && psEnc->sCmn.first_frame_after_reset == 0 ) {
/* Threshold for pitch estimator */
diff --git a/drivers/opus/silk/fixed/find_pred_coefs_FIX.c b/drivers/opus/silk/fixed/find_pred_coefs_FIX.c
index 93b506b523..d0280963bd 100644
--- a/drivers/opus/silk/fixed/find_pred_coefs_FIX.c
+++ b/drivers/opus/silk/fixed/find_pred_coefs_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/fixed/main_FIX.h"
#include "opus/celt/stack_alloc.h"
@@ -89,11 +86,12 @@ void silk_find_pred_coefs_FIX(
/* LTP analysis */
silk_find_LTP_FIX( psEncCtrl->LTPCoef_Q14, WLTP, &psEncCtrl->LTPredCodGain_Q7,
res_pitch, psEncCtrl->pitchL, Wght_Q15, psEnc->sCmn.subfr_length,
- psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length, LTP_corrs_rshift );
+ psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length, LTP_corrs_rshift, psEnc->sCmn.arch );
/* Quantize LTP gain parameters */
silk_quant_LTP_gains( psEncCtrl->LTPCoef_Q14, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex,
- &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr);
+ &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr,
+ psEnc->sCmn.arch);
/* Control LTP scaling */
silk_LTP_scale_ctrl_FIX( psEnc, psEncCtrl, condCoding );
@@ -118,16 +116,16 @@ void silk_find_pred_coefs_FIX(
silk_memset( psEncCtrl->LTPCoef_Q14, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( opus_int16 ) );
psEncCtrl->LTPredCodGain_Q7 = 0;
- psEnc->sCmn.sum_log_gain_Q7 = 0;
+ psEnc->sCmn.sum_log_gain_Q7 = 0;
}
/* Limit on total predictive coding gain */
if( psEnc->sCmn.first_frame_after_reset ) {
minInvGain_Q30 = SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET, 30 );
- } else {
+ } else {
minInvGain_Q30 = silk_log2lin( silk_SMLAWB( 16 << 7, (opus_int32)psEncCtrl->LTPredCodGain_Q7, SILK_FIX_CONST( 1.0 / 3, 16 ) ) ); /* Q16 */
- minInvGain_Q30 = silk_DIV32_varQ( minInvGain_Q30,
- silk_SMULWW( SILK_FIX_CONST( MAX_PREDICTION_POWER_GAIN, 0 ),
+ minInvGain_Q30 = silk_DIV32_varQ( minInvGain_Q30,
+ silk_SMULWW( SILK_FIX_CONST( MAX_PREDICTION_POWER_GAIN, 0 ),
silk_SMLAWB( SILK_FIX_CONST( 0.25, 18 ), SILK_FIX_CONST( 0.75, 18 ), psEncCtrl->coding_quality_Q14 ) ), 14 );
}
@@ -139,7 +137,7 @@ void silk_find_pred_coefs_FIX(
/* Calculate residual energy using quantized LPC coefficients */
silk_residual_energy_FIX( psEncCtrl->ResNrg, psEncCtrl->ResNrgQ, LPC_in_pre, psEncCtrl->PredCoef_Q12, local_gains,
- psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder );
+ psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder, psEnc->sCmn.arch );
/* Copy to prediction struct for use in next frame for interpolation */
silk_memcpy( psEnc->sCmn.prev_NLSFq_Q15, NLSF_Q15, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) );
diff --git a/drivers/opus/silk/fixed/k2a_FIX.c b/drivers/opus/silk/fixed/k2a_FIX.c
index 7d5808f190..64771f370c 100644
--- a/drivers/opus/silk/fixed/k2a_FIX.c
+++ b/drivers/opus/silk/fixed/k2a_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/SigProc_FIX.h"
diff --git a/drivers/opus/silk/fixed/k2a_Q16_FIX.c b/drivers/opus/silk/fixed/k2a_Q16_FIX.c
index 8df61dc3f6..49d90a862e 100644
--- a/drivers/opus/silk/fixed/k2a_Q16_FIX.c
+++ b/drivers/opus/silk/fixed/k2a_Q16_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/SigProc_FIX.h"
diff --git a/drivers/opus/silk/fixed/main_FIX.h b/drivers/opus/silk/fixed/main_FIX.h
index 71a560ef0e..f824c47653 100644
--- a/drivers/opus/silk/fixed/main_FIX.h
+++ b/drivers/opus/silk/fixed/main_FIX.h
@@ -31,7 +31,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "opus/silk/SigProc_FIX.h"
#include "opus/silk/fixed/structs_FIX.h"
#include "opus/silk/control.h"
-#include "opus/silk/silk_main.h"
+#include "opus/silk/main.h"
#include "opus/silk/PLC.h"
#include "opus/silk/debug.h"
#include "opus/celt/entenc.h"
@@ -97,6 +97,17 @@ void silk_prefilter_FIX(
const opus_int16 x[] /* I Speech signal */
);
+void silk_warped_LPC_analysis_filter_FIX_c(
+ opus_int32 state[], /* I/O State [order + 1] */
+ opus_int32 res_Q2[], /* O Residual signal [length] */
+ const opus_int16 coef_Q13[], /* I Coefficients [order] */
+ const opus_int16 input[], /* I Input signal [length] */
+ const opus_int16 lambda_Q16, /* I Warping factor */
+ const opus_int length, /* I Length of input signal */
+ const opus_int order /* I Filter order (even) */
+);
+
+
/**************************/
/* Noise shaping analysis */
/**************************/
@@ -166,7 +177,8 @@ void silk_find_LTP_FIX(
const opus_int subfr_length, /* I subframe length */
const opus_int nb_subfr, /* I number of subframes */
const opus_int mem_offset, /* I number of samples in LTP memory */
- opus_int corr_rshifts[ MAX_NB_SUBFR ] /* O right shifts applied to correlations */
+ opus_int corr_rshifts[ MAX_NB_SUBFR ], /* O right shifts applied to correlations */
+ int arch /* I Run-time architecture */
);
void silk_LTP_analysis_filter_FIX(
@@ -190,7 +202,8 @@ void silk_residual_energy_FIX(
const opus_int32 gains[ MAX_NB_SUBFR ], /* I Quantization gains */
const opus_int subfr_length, /* I Subframe length */
const opus_int nb_subfr, /* I Number of subframes */
- const opus_int LPC_order /* I LPC order */
+ const opus_int LPC_order, /* I LPC order */
+ int arch /* I Run-time architecture */
);
/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */
@@ -220,7 +233,8 @@ void silk_corrMatrix_FIX(
const opus_int order, /* I Max lag for correlation */
const opus_int head_room, /* I Desired headroom */
opus_int32 *XX, /* O Pointer to X'*X correlation matrix [ order x order ] */
- opus_int *rshifts /* I/O Right shifts of correlations */
+ opus_int *rshifts, /* I/O Right shifts of correlations */
+ int arch /* I Run-time architecture */
);
/* Calculates correlation vector X'*t */
@@ -230,7 +244,8 @@ void silk_corrVector_FIX(
const opus_int L, /* I Length of vectors */
const opus_int order, /* I Max lag for correlation */
opus_int32 *Xt, /* O Pointer to X'*t correlation vector [order] */
- const opus_int rshifts /* I Right shifts of correlations */
+ const opus_int rshifts, /* I Right shifts of correlations */
+ int arch /* I Run-time architecture */
);
/* Add noise to matrix diagonal */
diff --git a/drivers/opus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h b/drivers/opus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h
new file mode 100644
index 0000000000..c30481e437
--- /dev/null
+++ b/drivers/opus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h
@@ -0,0 +1,336 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+
+/**************************************************************/
+/* Compute noise shaping coefficients and initial gain values */
+/**************************************************************/
+#define OVERRIDE_silk_noise_shape_analysis_FIX
+
+void silk_noise_shape_analysis_FIX(
+ silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */
+ silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */
+ const opus_int16 *pitch_res, /* I LPC residual from pitch analysis */
+ const opus_int16 *x, /* I Input signal [ frame_length + la_shape ] */
+ int arch /* I Run-time architecture */
+)
+{
+ silk_shape_state_FIX *psShapeSt = &psEnc->sShape;
+ opus_int k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0;
+ opus_int32 SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32;
+ opus_int32 nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7;
+ opus_int32 delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8;
+ opus_int32 auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ];
+ opus_int32 refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ];
+ opus_int32 AR1_Q24[ MAX_SHAPE_LPC_ORDER ];
+ opus_int32 AR2_Q24[ MAX_SHAPE_LPC_ORDER ];
+ VARDECL( opus_int16, x_windowed );
+ const opus_int16 *x_ptr, *pitch_res_ptr;
+ SAVE_STACK;
+
+ /* Point to start of first LPC analysis block */
+ x_ptr = x - psEnc->sCmn.la_shape;
+
+ /****************/
+ /* GAIN CONTROL */
+ /****************/
+ SNR_adj_dB_Q7 = psEnc->sCmn.SNR_dB_Q7;
+
+ /* Input quality is the average of the quality in the lowest two VAD bands */
+ psEncCtrl->input_quality_Q14 = ( opus_int )silk_RSHIFT( (opus_int32)psEnc->sCmn.input_quality_bands_Q15[ 0 ]
+ + psEnc->sCmn.input_quality_bands_Q15[ 1 ], 2 );
+
+ /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */
+ psEncCtrl->coding_quality_Q14 = silk_RSHIFT( silk_sigm_Q15( silk_RSHIFT_ROUND( SNR_adj_dB_Q7 -
+ SILK_FIX_CONST( 20.0, 7 ), 4 ) ), 1 );
+
+ /* Reduce coding SNR during low speech activity */
+ if( psEnc->sCmn.useCBR == 0 ) {
+ b_Q8 = SILK_FIX_CONST( 1.0, 8 ) - psEnc->sCmn.speech_activity_Q8;
+ b_Q8 = silk_SMULWB( silk_LSHIFT( b_Q8, 8 ), b_Q8 );
+ SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7,
+ silk_SMULBB( SILK_FIX_CONST( -BG_SNR_DECR_dB, 7 ) >> ( 4 + 1 ), b_Q8 ), /* Q11*/
+ silk_SMULWB( SILK_FIX_CONST( 1.0, 14 ) + psEncCtrl->input_quality_Q14, psEncCtrl->coding_quality_Q14 ) ); /* Q12*/
+ }
+
+ if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+ /* Reduce gains for periodic signals */
+ SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( HARM_SNR_INCR_dB, 8 ), psEnc->LTPCorr_Q15 );
+ } else {
+ /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */
+ SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7,
+ silk_SMLAWB( SILK_FIX_CONST( 6.0, 9 ), -SILK_FIX_CONST( 0.4, 18 ), psEnc->sCmn.SNR_dB_Q7 ),
+ SILK_FIX_CONST( 1.0, 14 ) - psEncCtrl->input_quality_Q14 );
+ }
+
+ /*************************/
+ /* SPARSENESS PROCESSING */
+ /*************************/
+ /* Set quantizer offset */
+ if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+ /* Initially set to 0; may be overruled in process_gains(..) */
+ psEnc->sCmn.indices.quantOffsetType = 0;
+ psEncCtrl->sparseness_Q8 = 0;
+ } else {
+ /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */
+ nSamples = silk_LSHIFT( psEnc->sCmn.fs_kHz, 1 );
+ energy_variation_Q7 = 0;
+ log_energy_prev_Q7 = 0;
+ pitch_res_ptr = pitch_res;
+ for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) {
+ silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples );
+ nrg += silk_RSHIFT( nSamples, scale ); /* Q(-scale)*/
+
+ log_energy_Q7 = silk_lin2log( nrg );
+ if( k > 0 ) {
+ energy_variation_Q7 += silk_abs( log_energy_Q7 - log_energy_prev_Q7 );
+ }
+ log_energy_prev_Q7 = log_energy_Q7;
+ pitch_res_ptr += nSamples;
+ }
+
+ psEncCtrl->sparseness_Q8 = silk_RSHIFT( silk_sigm_Q15( silk_SMULWB( energy_variation_Q7 -
+ SILK_FIX_CONST( 5.0, 7 ), SILK_FIX_CONST( 0.1, 16 ) ) ), 7 );
+
+ /* Set quantization offset depending on sparseness measure */
+ if( psEncCtrl->sparseness_Q8 > SILK_FIX_CONST( SPARSENESS_THRESHOLD_QNT_OFFSET, 8 ) ) {
+ psEnc->sCmn.indices.quantOffsetType = 0;
+ } else {
+ psEnc->sCmn.indices.quantOffsetType = 1;
+ }
+
+ /* Increase coding SNR for sparse signals */
+ SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( SPARSE_SNR_INCR_dB, 15 ), psEncCtrl->sparseness_Q8 - SILK_FIX_CONST( 0.5, 8 ) );
+ }
+
+ /*******************************/
+ /* Control bandwidth expansion */
+ /*******************************/
+ /* More BWE for signals with high prediction gain */
+ strength_Q16 = silk_SMULWB( psEncCtrl->predGain_Q16, SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) );
+ BWExp1_Q16 = BWExp2_Q16 = silk_DIV32_varQ( SILK_FIX_CONST( BANDWIDTH_EXPANSION, 16 ),
+ silk_SMLAWW( SILK_FIX_CONST( 1.0, 16 ), strength_Q16, strength_Q16 ), 16 );
+ delta_Q16 = silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - silk_SMULBB( 3, psEncCtrl->coding_quality_Q14 ),
+ SILK_FIX_CONST( LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16 ) );
+ BWExp1_Q16 = silk_SUB32( BWExp1_Q16, delta_Q16 );
+ BWExp2_Q16 = silk_ADD32( BWExp2_Q16, delta_Q16 );
+ /* BWExp1 will be applied after BWExp2, so make it relative */
+ BWExp1_Q16 = silk_DIV32_16( silk_LSHIFT( BWExp1_Q16, 14 ), silk_RSHIFT( BWExp2_Q16, 2 ) );
+
+ if( psEnc->sCmn.warping_Q16 > 0 ) {
+ /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */
+ warping_Q16 = silk_SMLAWB( psEnc->sCmn.warping_Q16, (opus_int32)psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( 0.01, 18 ) );
+ } else {
+ warping_Q16 = 0;
+ }
+
+ /********************************************/
+ /* Compute noise shaping AR coefs and gains */
+ /********************************************/
+ ALLOC( x_windowed, psEnc->sCmn.shapeWinLength, opus_int16 );
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ /* Apply window: sine slope followed by flat part followed by cosine slope */
+ opus_int shift, slope_part, flat_part;
+ flat_part = psEnc->sCmn.fs_kHz * 3;
+ slope_part = silk_RSHIFT( psEnc->sCmn.shapeWinLength - flat_part, 1 );
+
+ silk_apply_sine_window( x_windowed, x_ptr, 1, slope_part );
+ shift = slope_part;
+ silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(opus_int16) );
+ shift += flat_part;
+ silk_apply_sine_window( x_windowed + shift, x_ptr + shift, 2, slope_part );
+
+ /* Update pointer: next LPC analysis block */
+ x_ptr += psEnc->sCmn.subfr_length;
+
+ if( psEnc->sCmn.warping_Q16 > 0 ) {
+ /* Calculate warped auto correlation */
+ silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder );
+ } else {
+ /* Calculate regular auto correlation */
+ silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch );
+ }
+
+ /* Add white noise, as a fraction of energy */
+ auto_corr[0] = silk_ADD32( auto_corr[0], silk_max_32( silk_SMULWB( silk_RSHIFT( auto_corr[ 0 ], 4 ),
+ SILK_FIX_CONST( SHAPE_WHITE_NOISE_FRACTION, 20 ) ), 1 ) );
+
+ /* Calculate the reflection coefficients using schur */
+ nrg = silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder );
+ silk_assert( nrg >= 0 );
+
+ /* Convert reflection coefficients to prediction coefficients */
+ silk_k2a_Q16( AR2_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder );
+
+ Qnrg = -scale; /* range: -12...30*/
+ silk_assert( Qnrg >= -12 );
+ silk_assert( Qnrg <= 30 );
+
+ /* Make sure that Qnrg is an even number */
+ if( Qnrg & 1 ) {
+ Qnrg -= 1;
+ nrg >>= 1;
+ }
+
+ tmp32 = silk_SQRT_APPROX( nrg );
+ Qnrg >>= 1; /* range: -6...15*/
+
+ psEncCtrl->Gains_Q16[ k ] = (silk_LSHIFT32( silk_LIMIT( (tmp32), silk_RSHIFT32( silk_int32_MIN, (16 - Qnrg) ), \
+ silk_RSHIFT32( silk_int32_MAX, (16 - Qnrg) ) ), (16 - Qnrg) ));
+
+ if( psEnc->sCmn.warping_Q16 > 0 ) {
+ /* Adjust gain for warping */
+ gain_mult_Q16 = warped_gain( AR2_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder );
+ silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 );
+ if ( silk_SMULWW( silk_RSHIFT_ROUND( psEncCtrl->Gains_Q16[ k ], 1 ), gain_mult_Q16 ) >= ( silk_int32_MAX >> 1 ) ) {
+ psEncCtrl->Gains_Q16[ k ] = silk_int32_MAX;
+ } else {
+ psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 );
+ }
+ }
+
+ /* Bandwidth expansion for synthesis filter shaping */
+ silk_bwexpander_32( AR2_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 );
+
+ /* Compute noise shaping filter coefficients */
+ silk_memcpy( AR1_Q24, AR2_Q24, psEnc->sCmn.shapingLPCOrder * sizeof( opus_int32 ) );
+
+ /* Bandwidth expansion for analysis filter shaping */
+ silk_assert( BWExp1_Q16 <= SILK_FIX_CONST( 1.0, 16 ) );
+ silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 );
+
+ /* Ratio of prediction gains, in energy domain */
+ pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder );
+ nrg = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder );
+
+ /*psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/
+ pre_nrg_Q30 = silk_LSHIFT32( silk_SMULWB( pre_nrg_Q30, SILK_FIX_CONST( 0.7, 15 ) ), 1 );
+ psEncCtrl->GainsPre_Q14[ k ] = ( opus_int ) SILK_FIX_CONST( 0.3, 14 ) + silk_DIV32_varQ( pre_nrg_Q30, nrg, 14 );
+
+ /* Convert to monic warped prediction coefficients and limit absolute values */
+ limit_warped_coefs( AR2_Q24, AR1_Q24, warping_Q16, SILK_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder );
+
+ /* Convert from Q24 to Q13 and store in int16 */
+ for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) {
+ psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR1_Q24[ i ], 11 ) );
+ psEncCtrl->AR2_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR2_Q24[ i ], 11 ) );
+ }
+ }
+
+ /*****************/
+ /* Gain tweaking */
+ /*****************/
+ /* Increase gains during low speech activity and put lower limit on gains */
+ gain_mult_Q16 = silk_log2lin( -silk_SMLAWB( -SILK_FIX_CONST( 16.0, 7 ), SNR_adj_dB_Q7, SILK_FIX_CONST( 0.16, 16 ) ) );
+ gain_add_Q16 = silk_log2lin( silk_SMLAWB( SILK_FIX_CONST( 16.0, 7 ), SILK_FIX_CONST( MIN_QGAIN_DB, 7 ), SILK_FIX_CONST( 0.16, 16 ) ) );
+ silk_assert( gain_mult_Q16 > 0 );
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 );
+ silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 );
+ psEncCtrl->Gains_Q16[ k ] = silk_ADD_POS_SAT32( psEncCtrl->Gains_Q16[ k ], gain_add_Q16 );
+ }
+
+ gain_mult_Q16 = SILK_FIX_CONST( 1.0, 16 ) + silk_RSHIFT_ROUND( silk_MLA( SILK_FIX_CONST( INPUT_TILT, 26 ),
+ psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ), 10 );
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ psEncCtrl->GainsPre_Q14[ k ] = silk_SMULWB( gain_mult_Q16, psEncCtrl->GainsPre_Q14[ k ] );
+ }
+
+ /************************************************/
+ /* Control low-frequency shaping and noise tilt */
+ /************************************************/
+ /* Less low frequency shaping for noisy inputs */
+ strength_Q16 = silk_MUL( SILK_FIX_CONST( LOW_FREQ_SHAPING, 4 ), silk_SMLAWB( SILK_FIX_CONST( 1.0, 12 ),
+ SILK_FIX_CONST( LOW_QUALITY_LOW_FREQ_SHAPING_DECR, 13 ), psEnc->sCmn.input_quality_bands_Q15[ 0 ] - SILK_FIX_CONST( 1.0, 15 ) ) );
+ strength_Q16 = silk_RSHIFT( silk_MUL( strength_Q16, psEnc->sCmn.speech_activity_Q8 ), 8 );
+ if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+ /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */
+ /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/
+ opus_int fs_kHz_inv = silk_DIV32_16( SILK_FIX_CONST( 0.2, 14 ), psEnc->sCmn.fs_kHz );
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ b_Q14 = fs_kHz_inv + silk_DIV32_16( SILK_FIX_CONST( 3.0, 14 ), psEncCtrl->pitchL[ k ] );
+ /* Pack two coefficients in one int32 */
+ psEncCtrl->LF_shp_Q14[ k ] = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - silk_SMULWB( strength_Q16, b_Q14 ), 16 );
+ psEncCtrl->LF_shp_Q14[ k ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) );
+ }
+ silk_assert( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ) < SILK_FIX_CONST( 0.5, 24 ) ); /* Guarantees that second argument to SMULWB() is within range of an opus_int16*/
+ Tilt_Q16 = - SILK_FIX_CONST( HP_NOISE_COEF, 16 ) -
+ silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - SILK_FIX_CONST( HP_NOISE_COEF, 16 ),
+ silk_SMULWB( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ), psEnc->sCmn.speech_activity_Q8 ) );
+ } else {
+ b_Q14 = silk_DIV32_16( 21299, psEnc->sCmn.fs_kHz ); /* 1.3_Q0 = 21299_Q14*/
+ /* Pack two coefficients in one int32 */
+ psEncCtrl->LF_shp_Q14[ 0 ] = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 -
+ silk_SMULWB( strength_Q16, silk_SMULWB( SILK_FIX_CONST( 0.6, 16 ), b_Q14 ) ), 16 );
+ psEncCtrl->LF_shp_Q14[ 0 ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) );
+ for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) {
+ psEncCtrl->LF_shp_Q14[ k ] = psEncCtrl->LF_shp_Q14[ 0 ];
+ }
+ Tilt_Q16 = -SILK_FIX_CONST( HP_NOISE_COEF, 16 );
+ }
+
+ /****************************/
+ /* HARMONIC SHAPING CONTROL */
+ /****************************/
+ /* Control boosting of harmonic frequencies */
+ HarmBoost_Q16 = silk_SMULWB( silk_SMULWB( SILK_FIX_CONST( 1.0, 17 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 3 ),
+ psEnc->LTPCorr_Q15 ), SILK_FIX_CONST( LOW_RATE_HARMONIC_BOOST, 16 ) );
+
+ /* More harmonic boost for noisy input signals */
+ HarmBoost_Q16 = silk_SMLAWB( HarmBoost_Q16,
+ SILK_FIX_CONST( 1.0, 16 ) - silk_LSHIFT( psEncCtrl->input_quality_Q14, 2 ), SILK_FIX_CONST( LOW_INPUT_QUALITY_HARMONIC_BOOST, 16 ) );
+
+ if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+ /* More harmonic noise shaping for high bitrates or noisy input */
+ HarmShapeGain_Q16 = silk_SMLAWB( SILK_FIX_CONST( HARMONIC_SHAPING, 16 ),
+ SILK_FIX_CONST( 1.0, 16 ) - silk_SMULWB( SILK_FIX_CONST( 1.0, 18 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 4 ),
+ psEncCtrl->input_quality_Q14 ), SILK_FIX_CONST( HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING, 16 ) );
+
+ /* Less harmonic noise shaping for less periodic signals */
+ HarmShapeGain_Q16 = silk_SMULWB( silk_LSHIFT( HarmShapeGain_Q16, 1 ),
+ silk_SQRT_APPROX( silk_LSHIFT( psEnc->LTPCorr_Q15, 15 ) ) );
+ } else {
+ HarmShapeGain_Q16 = 0;
+ }
+
+ /*************************/
+ /* Smooth over subframes */
+ /*************************/
+ for( k = 0; k < MAX_NB_SUBFR; k++ ) {
+ psShapeSt->HarmBoost_smth_Q16 =
+ silk_SMLAWB( psShapeSt->HarmBoost_smth_Q16, HarmBoost_Q16 - psShapeSt->HarmBoost_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
+ psShapeSt->HarmShapeGain_smth_Q16 =
+ silk_SMLAWB( psShapeSt->HarmShapeGain_smth_Q16, HarmShapeGain_Q16 - psShapeSt->HarmShapeGain_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
+ psShapeSt->Tilt_smth_Q16 =
+ silk_SMLAWB( psShapeSt->Tilt_smth_Q16, Tilt_Q16 - psShapeSt->Tilt_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
+
+ psEncCtrl->HarmBoost_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmBoost_smth_Q16, 2 );
+ psEncCtrl->HarmShapeGain_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmShapeGain_smth_Q16, 2 );
+ psEncCtrl->Tilt_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->Tilt_smth_Q16, 2 );
+ }
+ RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/fixed/mips/prefilter_FIX_mipsr1.h b/drivers/opus/silk/fixed/mips/prefilter_FIX_mipsr1.h
new file mode 100644
index 0000000000..c7f04279a0
--- /dev/null
+++ b/drivers/opus/silk/fixed/mips/prefilter_FIX_mipsr1.h
@@ -0,0 +1,181 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+#ifndef __PREFILTER_FIX_MIPSR1_H__
+#define __PREFILTER_FIX_MIPSR1_H__
+#include "opus/opus_config.h"
+
+#include "opus/silk/fixed/main_FIX.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/silk/tuning_parameters.h"
+
+#define OVERRIDE_silk_warped_LPC_analysis_filter_FIX
+void silk_warped_LPC_analysis_filter_FIX(
+ opus_int32 state[], /* I/O State [order + 1] */
+ opus_int32 res_Q2[], /* O Residual signal [length] */
+ const opus_int16 coef_Q13[], /* I Coefficients [order] */
+ const opus_int16 input[], /* I Input signal [length] */
+ const opus_int16 lambda_Q16, /* I Warping factor */
+ const opus_int length, /* I Length of input signal */
+ const opus_int order, /* I Filter order (even) */
+ int arch
+)
+{
+ opus_int n, i;
+ opus_int32 acc_Q11, acc_Q22, tmp1, tmp2, tmp3, tmp4;
+ opus_int32 state_cur, state_next;
+
+ (void)arch;
+
+ /* Order must be even */
+ /* Length must be even */
+
+ silk_assert( ( order & 1 ) == 0 );
+ silk_assert( ( length & 1 ) == 0 );
+
+ for( n = 0; n < length; n+=2 ) {
+ /* Output of lowpass section */
+ tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 );
+ state_cur = silk_LSHIFT( input[ n ], 14 );
+ /* Output of allpass section */
+ tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 );
+ state_next = tmp2;
+ acc_Q11 = silk_RSHIFT( order, 1 );
+ acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] );
+
+
+ /* Output of lowpass section */
+ tmp4 = silk_SMLAWB( state_cur, state_next, lambda_Q16 );
+ state[ 0 ] = silk_LSHIFT( input[ n+1 ], 14 );
+ /* Output of allpass section */
+ tmp3 = silk_SMLAWB( state_next, tmp1 - tmp4, lambda_Q16 );
+ state[ 1 ] = tmp4;
+ acc_Q22 = silk_RSHIFT( order, 1 );
+ acc_Q22 = silk_SMLAWB( acc_Q22, tmp4, coef_Q13[ 0 ] );
+
+ /* Loop over allpass sections */
+ for( i = 2; i < order; i += 2 ) {
+ /* Output of allpass section */
+ tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 );
+ state_cur = tmp1;
+ acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] );
+ /* Output of allpass section */
+ tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 );
+ state_next = tmp2;
+ acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] );
+
+
+ /* Output of allpass section */
+ tmp4 = silk_SMLAWB( state_cur, state_next - tmp3, lambda_Q16 );
+ state[ i ] = tmp3;
+ acc_Q22 = silk_SMLAWB( acc_Q22, tmp3, coef_Q13[ i - 1 ] );
+ /* Output of allpass section */
+ tmp3 = silk_SMLAWB( state_next, tmp1 - tmp4, lambda_Q16 );
+ state[ i + 1 ] = tmp4;
+ acc_Q22 = silk_SMLAWB( acc_Q22, tmp4, coef_Q13[ i ] );
+ }
+ acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] );
+ res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 );
+
+ state[ order ] = tmp3;
+ acc_Q22 = silk_SMLAWB( acc_Q22, tmp3, coef_Q13[ order - 1 ] );
+ res_Q2[ n+1 ] = silk_LSHIFT( (opus_int32)input[ n+1 ], 2 ) - silk_RSHIFT_ROUND( acc_Q22, 9 );
+ }
+}
+
+
+
+/* Prefilter for finding Quantizer input signal */
+#define OVERRIDE_silk_prefilt_FIX
+static inline void silk_prefilt_FIX(
+ silk_prefilter_state_FIX *P, /* I/O state */
+ opus_int32 st_res_Q12[], /* I short term residual signal */
+ opus_int32 xw_Q3[], /* O prefiltered signal */
+ opus_int32 HarmShapeFIRPacked_Q12, /* I Harmonic shaping coeficients */
+ opus_int Tilt_Q14, /* I Tilt shaping coeficient */
+ opus_int32 LF_shp_Q14, /* I Low-frequancy shaping coeficients */
+ opus_int lag, /* I Lag for harmonic shaping */
+ opus_int length /* I Length of signals */
+)
+{
+ opus_int i, idx, LTP_shp_buf_idx;
+ opus_int32 n_LTP_Q12, n_Tilt_Q10, n_LF_Q10;
+ opus_int32 sLF_MA_shp_Q12, sLF_AR_shp_Q12;
+ opus_int16 *LTP_shp_buf;
+
+ /* To speed up use temp variables instead of using the struct */
+ LTP_shp_buf = P->sLTP_shp;
+ LTP_shp_buf_idx = P->sLTP_shp_buf_idx;
+ sLF_AR_shp_Q12 = P->sLF_AR_shp_Q12;
+ sLF_MA_shp_Q12 = P->sLF_MA_shp_Q12;
+
+ if( lag > 0 ) {
+ for( i = 0; i < length; i++ ) {
+ /* unrolled loop */
+ silk_assert( HARM_SHAPE_FIR_TAPS == 3 );
+ idx = lag + LTP_shp_buf_idx;
+ n_LTP_Q12 = silk_SMULBB( LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
+ n_LTP_Q12 = silk_SMLABT( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 ) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
+ n_LTP_Q12 = silk_SMLABB( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
+
+ n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 );
+ n_LF_Q10 = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 );
+
+ sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) );
+ sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12, silk_LSHIFT( n_LF_Q10, 2 ) );
+
+ LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK;
+ LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) );
+
+ xw_Q3[i] = silk_RSHIFT_ROUND( silk_SUB32( sLF_MA_shp_Q12, n_LTP_Q12 ), 9 );
+ }
+ }
+ else
+ {
+ for( i = 0; i < length; i++ ) {
+
+ n_LTP_Q12 = 0;
+
+ n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 );
+ n_LF_Q10 = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 );
+
+ sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) );
+ sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12, silk_LSHIFT( n_LF_Q10, 2 ) );
+
+ LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK;
+ LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) );
+
+ xw_Q3[i] = silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 9 );
+ }
+ }
+
+ /* Copy temp variable back to state */
+ P->sLF_AR_shp_Q12 = sLF_AR_shp_Q12;
+ P->sLF_MA_shp_Q12 = sLF_MA_shp_Q12;
+ P->sLTP_shp_buf_idx = LTP_shp_buf_idx;
+}
+
+#endif /* __PREFILTER_FIX_MIPSR1_H__ */
diff --git a/drivers/opus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h b/drivers/opus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h
new file mode 100644
index 0000000000..eba7711db7
--- /dev/null
+++ b/drivers/opus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h
@@ -0,0 +1,162 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__
+#define __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__
+#include "opus/opus_config.h"
+
+#include "opus/silk/fixed/main_FIX.h"
+
+#undef QC
+#define QC 10
+
+#undef QS
+#define QS 14
+
+/* Autocorrelations for a warped frequency axis */
+#define OVERRIDE_silk_warped_autocorrelation_FIX
+void silk_warped_autocorrelation_FIX(
+ opus_int32 *corr, /* O Result [order + 1] */
+ opus_int *scale, /* O Scaling of the correlation vector */
+ const opus_int16 *input, /* I Input data to correlate */
+ const opus_int warping_Q16, /* I Warping coefficient */
+ const opus_int length, /* I Length of input */
+ const opus_int order /* I Correlation order (even) */
+)
+{
+ opus_int n, i, lsh;
+ opus_int32 tmp1_QS=0, tmp2_QS=0, tmp3_QS=0, tmp4_QS=0, tmp5_QS=0, tmp6_QS=0, tmp7_QS=0, tmp8_QS=0, start_1=0, start_2=0, start_3=0;
+ opus_int32 state_QS[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 };
+ opus_int64 corr_QC[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 };
+ opus_int64 temp64;
+
+ opus_int32 val;
+ val = 2 * QS - QC;
+
+ /* Order must be even */
+ silk_assert( ( order & 1 ) == 0 );
+ silk_assert( 2 * QS - QC >= 0 );
+
+ /* Loop over samples */
+ for( n = 0; n < length; n=n+4 ) {
+
+ tmp1_QS = silk_LSHIFT32( (opus_int32)input[ n ], QS );
+ start_1 = tmp1_QS;
+ tmp3_QS = silk_LSHIFT32( (opus_int32)input[ n+1], QS );
+ start_2 = tmp3_QS;
+ tmp5_QS = silk_LSHIFT32( (opus_int32)input[ n+2], QS );
+ start_3 = tmp5_QS;
+ tmp7_QS = silk_LSHIFT32( (opus_int32)input[ n+3], QS );
+
+ /* Loop over allpass sections */
+ for( i = 0; i < order; i += 2 ) {
+ /* Output of allpass section */
+ tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 );
+ corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp1_QS, start_1);
+
+ tmp4_QS = silk_SMLAWB( tmp1_QS, tmp2_QS - tmp3_QS, warping_Q16 );
+ corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp3_QS, start_2);
+
+ tmp6_QS = silk_SMLAWB( tmp3_QS, tmp4_QS - tmp5_QS, warping_Q16 );
+ corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp5_QS, start_3);
+
+ tmp8_QS = silk_SMLAWB( tmp5_QS, tmp6_QS - tmp7_QS, warping_Q16 );
+ state_QS[ i ] = tmp7_QS;
+ corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp7_QS, state_QS[0]);
+
+ /* Output of allpass section */
+ tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 );
+ corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp2_QS, start_1);
+
+ tmp3_QS = silk_SMLAWB( tmp2_QS, tmp1_QS - tmp4_QS, warping_Q16 );
+ corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp4_QS, start_2);
+
+ tmp5_QS = silk_SMLAWB( tmp4_QS, tmp3_QS - tmp6_QS, warping_Q16 );
+ corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp6_QS, start_3);
+
+ tmp7_QS = silk_SMLAWB( tmp6_QS, tmp5_QS - tmp8_QS, warping_Q16 );
+ state_QS[ i + 1 ] = tmp8_QS;
+ corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp8_QS, state_QS[ 0 ]);
+
+ }
+ state_QS[ order ] = tmp7_QS;
+
+ corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp1_QS, start_1);
+ corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp3_QS, start_2);
+ corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp5_QS, start_3);
+ corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp7_QS, state_QS[ 0 ]);
+ }
+
+ for(;n< length; n++ ) {
+
+ tmp1_QS = silk_LSHIFT32( (opus_int32)input[ n ], QS );
+
+ /* Loop over allpass sections */
+ for( i = 0; i < order; i += 2 ) {
+
+ /* Output of allpass section */
+ tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 );
+ state_QS[ i ] = tmp1_QS;
+ corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp1_QS, state_QS[ 0 ]);
+
+ /* Output of allpass section */
+ tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 );
+ state_QS[ i + 1 ] = tmp2_QS;
+ corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp2_QS, state_QS[ 0 ]);
+ }
+ state_QS[ order ] = tmp1_QS;
+ corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp1_QS, state_QS[ 0 ]);
+ }
+
+ temp64 = corr_QC[ 0 ];
+ temp64 = __builtin_mips_shilo(temp64, val);
+
+ lsh = silk_CLZ64( temp64 ) - 35;
+ lsh = silk_LIMIT( lsh, -12 - QC, 30 - QC );
+ *scale = -( QC + lsh );
+ silk_assert( *scale >= -30 && *scale <= 12 );
+ if( lsh >= 0 ) {
+ for( i = 0; i < order + 1; i++ ) {
+ temp64 = corr_QC[ i ];
+ //temp64 = __builtin_mips_shilo(temp64, val);
+ temp64 = (val >= 0) ? (temp64 >> val) : (temp64 << -val);
+ corr[ i ] = (opus_int32)silk_CHECK_FIT32( __builtin_mips_shilo( temp64, -lsh ) );
+ }
+ } else {
+ for( i = 0; i < order + 1; i++ ) {
+ temp64 = corr_QC[ i ];
+ //temp64 = __builtin_mips_shilo(temp64, val);
+ temp64 = (val >= 0) ? (temp64 >> val) : (temp64 << -val);
+ corr[ i ] = (opus_int32)silk_CHECK_FIT32( __builtin_mips_shilo( temp64, -lsh ) );
+ }
+ }
+
+ corr_QC[ 0 ] = __builtin_mips_shilo(corr_QC[ 0 ], val);
+
+ silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/
+}
+#endif /* __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__ */
diff --git a/drivers/opus/silk/fixed/noise_shape_analysis_FIX.c b/drivers/opus/silk/fixed/noise_shape_analysis_FIX.c
index 862640d2c8..6f1dc3ddd5 100644
--- a/drivers/opus/silk/fixed/noise_shape_analysis_FIX.c
+++ b/drivers/opus/silk/fixed/noise_shape_analysis_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/fixed/main_FIX.h"
#include "opus/celt/stack_alloc.h"
@@ -138,9 +135,14 @@ static OPUS_INLINE void limit_warped_coefs(
silk_assert( 0 );
}
+#if defined(MIPSr1_ASM)
+#include "opus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h"
+#endif
+
/**************************************************************/
/* Compute noise shaping coefficients and initial gain values */
/**************************************************************/
+#ifndef OVERRIDE_silk_noise_shape_analysis_FIX
void silk_noise_shape_analysis_FIX(
silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */
silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */
@@ -443,3 +445,4 @@ void silk_noise_shape_analysis_FIX(
}
RESTORE_STACK;
}
+#endif /* OVERRIDE_silk_noise_shape_analysis_FIX */
diff --git a/drivers/opus/silk/fixed/pitch_analysis_core_FIX.c b/drivers/opus/silk/fixed/pitch_analysis_core_FIX.c
index 2acd51db6a..cf421c9612 100644
--- a/drivers/opus/silk/fixed/pitch_analysis_core_FIX.c
+++ b/drivers/opus/silk/fixed/pitch_analysis_core_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
/***********************************************************
* Pitch analyser function
@@ -72,7 +69,8 @@ static void silk_P_Ana_calc_energy_st3(
opus_int start_lag, /* I lag offset to search around */
opus_int sf_length, /* I length of one 5 ms subframe */
opus_int nb_subfr, /* I number of subframes */
- opus_int complexity /* I Complexity setting */
+ opus_int complexity, /* I Complexity setting */
+ int arch /* I Run-time architecture */
);
/*************************************************************/
@@ -195,8 +193,8 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
/* Calculate first vector products before loop */
cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ];
- normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ );
- normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ ) );
+ normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch );
+ normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ, arch ) );
normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) );
matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) =
@@ -334,7 +332,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
silk_assert( target_ptr >= frame_8kHz );
silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
- energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ ), 1 );
+ energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch ), 1 );
for( j = 0; j < length_d_comp; j++ ) {
d = d_comp[ j ];
basis_ptr = target_ptr - d;
@@ -343,9 +341,9 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
silk_assert( basis_ptr >= frame_8kHz );
silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
- cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ );
+ cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ, arch );
if( cross_corr > 0 ) {
- energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ );
+ energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ, arch );
matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) =
(opus_int16)silk_DIV32_varQ( cross_corr,
silk_ADD32( energy_target,
@@ -519,14 +517,14 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
silk_P_Ana_calc_corr_st3( cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch );
- silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );
+ silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch );
lag_counter = 0;
silk_assert( lag == silk_SAT16( lag ) );
contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );
target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
- energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length ), 1 );
+ energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length, arch ), 1 );
for( d = start_lag; d <= end_lag; d++ ) {
for( j = 0; j < nb_cbk_search; j++ ) {
cross_corr = 0;
@@ -671,7 +669,8 @@ static void silk_P_Ana_calc_energy_st3(
opus_int start_lag, /* I lag offset to search around */
opus_int sf_length, /* I length of one 5 ms subframe */
opus_int nb_subfr, /* I number of subframes */
- opus_int complexity /* I Complexity setting */
+ opus_int complexity, /* I Complexity setting */
+ int arch /* I Run-time architecture */
)
{
const opus_int16 *target_ptr, *basis_ptr;
@@ -705,7 +704,7 @@ static void silk_P_Ana_calc_energy_st3(
/* Calculate the energy for first lag */
basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) );
- energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length );
+ energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length, arch );
silk_assert( energy >= 0 );
scratch_mem[ lag_counter ] = energy;
lag_counter++;
diff --git a/drivers/opus/silk/fixed/prefilter_FIX.c b/drivers/opus/silk/fixed/prefilter_FIX.c
index 195df3da75..344be2badd 100644
--- a/drivers/opus/silk/fixed/prefilter_FIX.c
+++ b/drivers/opus/silk/fixed/prefilter_FIX.c
@@ -24,15 +24,22 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/fixed/main_FIX.h"
#include "opus/celt/stack_alloc.h"
#include "opus/silk/tuning_parameters.h"
+#if defined(MIPSr1_ASM)
+#include "opus/silk/fixed/mips/prefilter_FIX_mipsr1.h"
+#endif
+
+
+#if !defined(OVERRIDE_silk_warped_LPC_analysis_filter_FIX)
+#define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \
+ ((void)(arch),silk_warped_LPC_analysis_filter_FIX_c(state, res_Q2, coef_Q13, input, lambda_Q16, length, order))
+#endif
+
/* Prefilter for finding Quantizer input signal */
static OPUS_INLINE void silk_prefilt_FIX(
silk_prefilter_state_FIX *P, /* I/O state */
@@ -45,7 +52,7 @@ static OPUS_INLINE void silk_prefilt_FIX(
opus_int length /* I Length of signals */
);
-void silk_warped_LPC_analysis_filter_FIX(
+void silk_warped_LPC_analysis_filter_FIX_c(
opus_int32 state[], /* I/O State [order + 1] */
opus_int32 res_Q2[], /* O Residual signal [length] */
const opus_int16 coef_Q13[], /* I Coefficients [order] */
@@ -130,7 +137,7 @@ void silk_prefilter_FIX(
/* Short term FIR filtering*/
silk_warped_LPC_analysis_filter_FIX( P->sAR_shp, st_res_Q2, AR1_shp_Q13, px,
- psEnc->sCmn.warping_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder );
+ psEnc->sCmn.warping_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder, psEnc->sCmn.arch );
/* Reduce (mainly) low frequencies during harmonic emphasis */
B_Q10[ 0 ] = silk_RSHIFT_ROUND( psEncCtrl->GainsPre_Q14[ k ], 4 );
@@ -155,6 +162,7 @@ void silk_prefilter_FIX(
RESTORE_STACK;
}
+#ifndef OVERRIDE_silk_prefilt_FIX
/* Prefilter for finding Quantizer input signal */
static OPUS_INLINE void silk_prefilt_FIX(
silk_prefilter_state_FIX *P, /* I/O state */
@@ -207,3 +215,4 @@ static OPUS_INLINE void silk_prefilt_FIX(
P->sLF_MA_shp_Q12 = sLF_MA_shp_Q12;
P->sLTP_shp_buf_idx = LTP_shp_buf_idx;
}
+#endif /* OVERRIDE_silk_prefilt_FIX */
diff --git a/drivers/opus/silk/fixed/process_gains_FIX.c b/drivers/opus/silk/fixed/process_gains_FIX.c
index 2c501a2010..c0d591210b 100644
--- a/drivers/opus/silk/fixed/process_gains_FIX.c
+++ b/drivers/opus/silk/fixed/process_gains_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/fixed/main_FIX.h"
#include "opus/silk/tuning_parameters.h"
diff --git a/drivers/opus/silk/fixed/regularize_correlations_FIX.c b/drivers/opus/silk/fixed/regularize_correlations_FIX.c
index af34da68fa..c38a5589b6 100644
--- a/drivers/opus/silk/fixed/regularize_correlations_FIX.c
+++ b/drivers/opus/silk/fixed/regularize_correlations_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/fixed/main_FIX.h"
diff --git a/drivers/opus/silk/fixed/residual_energy16_FIX.c b/drivers/opus/silk/fixed/residual_energy16_FIX.c
index 9b6e103a52..39bc96fd90 100644
--- a/drivers/opus/silk/fixed/residual_energy16_FIX.c
+++ b/drivers/opus/silk/fixed/residual_energy16_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/fixed/main_FIX.h"
diff --git a/drivers/opus/silk/fixed/residual_energy_FIX.c b/drivers/opus/silk/fixed/residual_energy_FIX.c
index 468bfbab48..0e51f4baa1 100644
--- a/drivers/opus/silk/fixed/residual_energy_FIX.c
+++ b/drivers/opus/silk/fixed/residual_energy_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/fixed/main_FIX.h"
#include "opus/celt/stack_alloc.h"
@@ -42,7 +39,8 @@ void silk_residual_energy_FIX(
const opus_int32 gains[ MAX_NB_SUBFR ], /* I Quantization gains */
const opus_int subfr_length, /* I Subframe length */
const opus_int nb_subfr, /* I Number of subframes */
- const opus_int LPC_order /* I LPC order */
+ const opus_int LPC_order, /* I LPC order */
+ int arch /* I Run-time architecture */
)
{
opus_int offset, i, j, rshift, lz1, lz2;
@@ -60,7 +58,7 @@ void silk_residual_energy_FIX(
silk_assert( ( nb_subfr >> 1 ) * ( MAX_NB_SUBFR >> 1 ) == nb_subfr );
for( i = 0; i < nb_subfr >> 1; i++ ) {
/* Calculate half frame LPC residual signal including preceding samples */
- silk_LPC_analysis_filter( LPC_res, x_ptr, a_Q12[ i ], ( MAX_NB_SUBFR >> 1 ) * offset, LPC_order );
+ silk_LPC_analysis_filter( LPC_res, x_ptr, a_Q12[ i ], ( MAX_NB_SUBFR >> 1 ) * offset, LPC_order, arch );
/* Point to first subframe of the just calculated LPC residual signal */
LPC_res_ptr = LPC_res + LPC_order;
diff --git a/drivers/opus/silk/fixed/schur64_FIX.c b/drivers/opus/silk/fixed/schur64_FIX.c
index fe1278e062..69ef76e028 100644
--- a/drivers/opus/silk/fixed/schur64_FIX.c
+++ b/drivers/opus/silk/fixed/schur64_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/SigProc_FIX.h"
diff --git a/drivers/opus/silk/fixed/schur_FIX.c b/drivers/opus/silk/fixed/schur_FIX.c
index b0a36e5ad2..9effa59daf 100644
--- a/drivers/opus/silk/fixed/schur_FIX.c
+++ b/drivers/opus/silk/fixed/schur_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/SigProc_FIX.h"
diff --git a/drivers/opus/silk/fixed/solve_LS_FIX.c b/drivers/opus/silk/fixed/solve_LS_FIX.c
index 6e8afaaa88..4ff97497be 100644
--- a/drivers/opus/silk/fixed/solve_LS_FIX.c
+++ b/drivers/opus/silk/fixed/solve_LS_FIX.c
@@ -24,10 +24,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/fixed/main_FIX.h"
#include "opus/celt/stack_alloc.h"
diff --git a/drivers/opus/silk/fixed/structs_FIX.h b/drivers/opus/silk/fixed/structs_FIX.h
index 21eab05f0f..36ddff0a9e 100644
--- a/drivers/opus/silk/fixed/structs_FIX.h
+++ b/drivers/opus/silk/fixed/structs_FIX.h
@@ -29,7 +29,7 @@ POSSIBILITY OF SUCH DAMAGE.
#define SILK_STRUCTS_FIX_H
#include "opus/silk/typedef.h"
-#include "opus/silk/silk_main.h"
+#include "opus/silk/main.h"
#include "opus/silk/structs.h"
#ifdef __cplusplus
@@ -116,6 +116,7 @@ typedef struct {
typedef struct {
silk_encoder_state_FIX state_Fxx[ ENCODER_NUM_CHANNELS ];
stereo_enc_state sStereo;
+ opus_int32 nBitsUsedLBRR;
opus_int32 nBitsExceeded;
opus_int nChannelsAPI;
opus_int nChannelsInternal;
diff --git a/drivers/opus/silk/fixed/vector_ops_FIX.c b/drivers/opus/silk/fixed/vector_ops_FIX.c
index c2725194ec..1e8fdec0cb 100644
--- a/drivers/opus/silk/fixed/vector_ops_FIX.c
+++ b/drivers/opus/silk/fixed/vector_ops_FIX.c
@@ -24,12 +24,10 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/SigProc_FIX.h"
+#include "opus/celt/pitch.h"
/* Copy and multiply a vector by a constant */
void silk_scale_copy_vector16(
@@ -70,18 +68,23 @@ void silk_scale_vector32_Q26_lshift_18(
opus_int32 silk_inner_prod_aligned(
const opus_int16 *const inVec1, /* I input vector 1 */
const opus_int16 *const inVec2, /* I input vector 2 */
- const opus_int len /* I vector lengths */
+ const opus_int len, /* I vector lengths */
+ int arch /* I Run-time architecture */
)
{
+#ifdef OPUS_FIXED_POINT
+ return celt_inner_prod(inVec1, inVec2, len, arch);
+#else
opus_int i;
opus_int32 sum = 0;
for( i = 0; i < len; i++ ) {
sum = silk_SMLABB( sum, inVec1[ i ], inVec2[ i ] );
}
return sum;
+#endif
}
-opus_int64 silk_inner_prod16_aligned_64(
+opus_int64 silk_inner_prod16_aligned_64_c(
const opus_int16 *inVec1, /* I input vector 1 */
const opus_int16 *inVec2, /* I input vector 2 */
const opus_int len /* I vector lengths */
diff --git a/drivers/opus/silk/fixed/warped_autocorrelation_FIX.c b/drivers/opus/silk/fixed/warped_autocorrelation_FIX.c
index 4e5a7fee58..3b8d52d5c6 100644
--- a/drivers/opus/silk/fixed/warped_autocorrelation_FIX.c
+++ b/drivers/opus/silk/fixed/warped_autocorrelation_FIX.c
@@ -24,16 +24,19 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-
-#ifdef OPUS_ENABLED
#include "opus/opus_config.h"
-#endif
#include "opus/silk/fixed/main_FIX.h"
#define QC 10
#define QS 14
+#if defined(MIPSr1_ASM)
+#include "opus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h"
+#endif
+
+
+#ifndef OVERRIDE_silk_warped_autocorrelation_FIX
/* Autocorrelations for a warped frequency axis */
void silk_warped_autocorrelation_FIX(
opus_int32 *corr, /* O Result [order + 1] */
@@ -86,3 +89,4 @@ void silk_warped_autocorrelation_FIX(
}
silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/
}
+#endif /* OVERRIDE_silk_warped_autocorrelation_FIX */
diff --git a/drivers/opus/silk/fixed/x86/burg_modified_FIX_sse.c b/drivers/opus/silk/fixed/x86/burg_modified_FIX_sse.c
new file mode 100644
index 0000000000..f7d46487df
--- /dev/null
+++ b/drivers/opus/silk/fixed/x86/burg_modified_FIX_sse.c
@@ -0,0 +1,372 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+ Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#include "opus/opus_config.h"
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/silk/define.h"
+#include "opus/silk/tuning_parameters.h"
+#include "opus/celt/pitch.h"
+#include "opus/celt/x86/x86cpu.h"
+
+#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */
+
+#define QA 25
+#define N_BITS_HEAD_ROOM 2
+#define MIN_RSHIFTS -16
+#define MAX_RSHIFTS (32 - QA)
+
+/* Compute reflection coefficients from input signal */
+void silk_burg_modified_sse4_1(
+ opus_int32 *res_nrg, /* O Residual energy */
+ opus_int *res_nrg_Q, /* O Residual energy Q value */
+ opus_int32 A_Q16[], /* O Prediction coefficients (length order) */
+ const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */
+ const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */
+ const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */
+ const opus_int nb_subfr, /* I Number of subframes stacked in x */
+ const opus_int D, /* I Order */
+ int arch /* I Run-time architecture */
+)
+{
+ opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain;
+ opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2;
+ const opus_int16 *x_ptr;
+ opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ];
+ opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ];
+ opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ];
+ opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ];
+ opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ];
+ opus_int32 xcorr[ SILK_MAX_ORDER_LPC ];
+
+ __m128i FIRST_3210, LAST_3210, ATMP_3210, TMP1_3210, TMP2_3210, T1_3210, T2_3210, PTR_3210, SUBFR_3210, X1_3210, X2_3210;
+ __m128i CONST1 = _mm_set1_epi32(1);
+
+ silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
+
+ /* Compute autocorrelations, added over subframes */
+ silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length );
+ if( rshifts > MAX_RSHIFTS ) {
+ C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS );
+ silk_assert( C0 > 0 );
+ rshifts = MAX_RSHIFTS;
+ } else {
+ lz = silk_CLZ32( C0 ) - 1;
+ rshifts_extra = N_BITS_HEAD_ROOM - lz;
+ if( rshifts_extra > 0 ) {
+ rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts );
+ C0 = silk_RSHIFT32( C0, rshifts_extra );
+ } else {
+ rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts );
+ C0 = silk_LSHIFT32( C0, -rshifts_extra );
+ }
+ rshifts += rshifts_extra;
+ }
+ CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */
+ silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
+ if( rshifts > 0 ) {
+ for( s = 0; s < nb_subfr; s++ ) {
+ x_ptr = x + s * subfr_length;
+ for( n = 1; n < D + 1; n++ ) {
+ C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64(
+ silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts );
+ }
+ }
+ } else {
+ for( s = 0; s < nb_subfr; s++ ) {
+ int i;
+ opus_int32 d;
+ x_ptr = x + s * subfr_length;
+ celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch );
+ for( n = 1; n < D + 1; n++ ) {
+ for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ )
+ d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] );
+ xcorr[ n - 1 ] += d;
+ }
+ for( n = 1; n < D + 1; n++ ) {
+ C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts );
+ }
+ }
+ }
+ silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
+
+ /* Initialize */
+ CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */
+
+ invGain_Q30 = (opus_int32)1 << 30;
+ reached_max_gain = 0;
+ for( n = 0; n < D; n++ ) {
+ /* Update first row of correlation matrix (without first element) */
+ /* Update last row of correlation matrix (without last element, stored in reversed order) */
+ /* Update C * Af */
+ /* Update C * flipud(Af) (stored in reversed order) */
+ if( rshifts > -2 ) {
+ for( s = 0; s < nb_subfr; s++ ) {
+ x_ptr = x + s * subfr_length;
+ x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], 16 - rshifts ); /* Q(16-rshifts) */
+ x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts ); /* Q(16-rshifts) */
+ tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], QA - 16 ); /* Q(QA-16) */
+ tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 ); /* Q(QA-16) */
+ for( k = 0; k < n; k++ ) {
+ C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */
+ C_last_row[ k ] = silk_SMLAWB( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
+ Atmp_QA = Af_QA[ k ];
+ tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ] ); /* Q(QA-16) */
+ tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] ); /* Q(QA-16) */
+ }
+ tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts ); /* Q(16-rshifts) */
+ tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts ); /* Q(16-rshifts) */
+ for( k = 0; k <= n; k++ ) {
+ CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ] ); /* Q( -rshift ) */
+ CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] ); /* Q( -rshift ) */
+ }
+ }
+ } else {
+ for( s = 0; s < nb_subfr; s++ ) {
+ x_ptr = x + s * subfr_length;
+ x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], -rshifts ); /* Q( -rshifts ) */
+ x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts ); /* Q( -rshifts ) */
+ tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], 17 ); /* Q17 */
+ tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 ); /* Q17 */
+
+ X1_3210 = _mm_set1_epi32( x1 );
+ X2_3210 = _mm_set1_epi32( x2 );
+ TMP1_3210 = _mm_setzero_si128();
+ TMP2_3210 = _mm_setzero_si128();
+ for( k = 0; k < n - 3; k += 4 ) {
+ PTR_3210 = OP_CVTEPI16_EPI32_M64( &x_ptr[ n - k - 1 - 3 ] );
+ SUBFR_3210 = OP_CVTEPI16_EPI32_M64( &x_ptr[ subfr_length - n + k ] );
+ FIRST_3210 = _mm_loadu_si128( (__m128i *)&C_first_row[ k ] );
+ PTR_3210 = _mm_shuffle_epi32( PTR_3210, _MM_SHUFFLE( 0, 1, 2, 3 ) );
+ LAST_3210 = _mm_loadu_si128( (__m128i *)&C_last_row[ k ] );
+ ATMP_3210 = _mm_loadu_si128( (__m128i *)&Af_QA[ k ] );
+
+ T1_3210 = _mm_mullo_epi32( PTR_3210, X1_3210 );
+ T2_3210 = _mm_mullo_epi32( SUBFR_3210, X2_3210 );
+
+ ATMP_3210 = _mm_srai_epi32( ATMP_3210, 7 );
+ ATMP_3210 = _mm_add_epi32( ATMP_3210, CONST1 );
+ ATMP_3210 = _mm_srai_epi32( ATMP_3210, 1 );
+
+ FIRST_3210 = _mm_add_epi32( FIRST_3210, T1_3210 );
+ LAST_3210 = _mm_add_epi32( LAST_3210, T2_3210 );
+
+ PTR_3210 = _mm_mullo_epi32( ATMP_3210, PTR_3210 );
+ SUBFR_3210 = _mm_mullo_epi32( ATMP_3210, SUBFR_3210 );
+
+ _mm_storeu_si128( (__m128i *)&C_first_row[ k ], FIRST_3210 );
+ _mm_storeu_si128( (__m128i *)&C_last_row[ k ], LAST_3210 );
+
+ TMP1_3210 = _mm_add_epi32( TMP1_3210, PTR_3210 );
+ TMP2_3210 = _mm_add_epi32( TMP2_3210, SUBFR_3210 );
+ }
+
+ TMP1_3210 = _mm_add_epi32( TMP1_3210, _mm_unpackhi_epi64(TMP1_3210, TMP1_3210 ) );
+ TMP2_3210 = _mm_add_epi32( TMP2_3210, _mm_unpackhi_epi64(TMP2_3210, TMP2_3210 ) );
+ TMP1_3210 = _mm_add_epi32( TMP1_3210, _mm_shufflelo_epi16(TMP1_3210, 0x0E ) );
+ TMP2_3210 = _mm_add_epi32( TMP2_3210, _mm_shufflelo_epi16(TMP2_3210, 0x0E ) );
+
+ tmp1 += _mm_cvtsi128_si32( TMP1_3210 );
+ tmp2 += _mm_cvtsi128_si32( TMP2_3210 );
+
+ for( ; k < n; k++ ) {
+ C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */
+ C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
+ Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */
+ tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */
+ tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */
+ }
+
+ tmp1 = -tmp1; /* Q17 */
+ tmp2 = -tmp2; /* Q17 */
+
+ {
+ __m128i xmm_tmp1, xmm_tmp2;
+ __m128i xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1;
+ __m128i xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1;
+
+ xmm_tmp1 = _mm_set1_epi32( tmp1 );
+ xmm_tmp2 = _mm_set1_epi32( tmp2 );
+
+ for( k = 0; k <= n - 3; k += 4 ) {
+ xmm_x_ptr_n_k_x2x0 = OP_CVTEPI16_EPI32_M64( &x_ptr[ n - k - 3 ] );
+ xmm_x_ptr_sub_x2x0 = OP_CVTEPI16_EPI32_M64( &x_ptr[ subfr_length - n + k - 1 ] );
+
+ xmm_x_ptr_n_k_x2x0 = _mm_shuffle_epi32( xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE( 0, 1, 2, 3 ) );
+
+ xmm_x_ptr_n_k_x2x0 = _mm_slli_epi32( xmm_x_ptr_n_k_x2x0, -rshifts - 1 );
+ xmm_x_ptr_sub_x2x0 = _mm_slli_epi32( xmm_x_ptr_sub_x2x0, -rshifts - 1 );
+
+ /* equal shift right 4 bytes, xmm_x_ptr_n_k_x3x1 = _mm_srli_si128(xmm_x_ptr_n_k_x2x0, 4)*/
+ xmm_x_ptr_n_k_x3x1 = _mm_shuffle_epi32( xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
+ xmm_x_ptr_sub_x3x1 = _mm_shuffle_epi32( xmm_x_ptr_sub_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
+
+ xmm_x_ptr_n_k_x2x0 = _mm_mul_epi32( xmm_x_ptr_n_k_x2x0, xmm_tmp1 );
+ xmm_x_ptr_n_k_x3x1 = _mm_mul_epi32( xmm_x_ptr_n_k_x3x1, xmm_tmp1 );
+ xmm_x_ptr_sub_x2x0 = _mm_mul_epi32( xmm_x_ptr_sub_x2x0, xmm_tmp2 );
+ xmm_x_ptr_sub_x3x1 = _mm_mul_epi32( xmm_x_ptr_sub_x3x1, xmm_tmp2 );
+
+ xmm_x_ptr_n_k_x2x0 = _mm_srli_epi64( xmm_x_ptr_n_k_x2x0, 16 );
+ xmm_x_ptr_n_k_x3x1 = _mm_slli_epi64( xmm_x_ptr_n_k_x3x1, 16 );
+ xmm_x_ptr_sub_x2x0 = _mm_srli_epi64( xmm_x_ptr_sub_x2x0, 16 );
+ xmm_x_ptr_sub_x3x1 = _mm_slli_epi64( xmm_x_ptr_sub_x3x1, 16 );
+
+ xmm_x_ptr_n_k_x2x0 = _mm_blend_epi16( xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1, 0xCC );
+ xmm_x_ptr_sub_x2x0 = _mm_blend_epi16( xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1, 0xCC );
+
+ X1_3210 = _mm_loadu_si128( (__m128i *)&CAf[ k ] );
+ PTR_3210 = _mm_loadu_si128( (__m128i *)&CAb[ k ] );
+
+ X1_3210 = _mm_add_epi32( X1_3210, xmm_x_ptr_n_k_x2x0 );
+ PTR_3210 = _mm_add_epi32( PTR_3210, xmm_x_ptr_sub_x2x0 );
+
+ _mm_storeu_si128( (__m128i *)&CAf[ k ], X1_3210 );
+ _mm_storeu_si128( (__m128i *)&CAb[ k ], PTR_3210 );
+ }
+
+ for( ; k <= n; k++ ) {
+ CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1,
+ silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) ); /* Q( -rshift ) */
+ CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2,
+ silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */
+ }
+ }
+ }
+ }
+
+ /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
+ tmp1 = C_first_row[ n ]; /* Q( -rshifts ) */
+ tmp2 = C_last_row[ n ]; /* Q( -rshifts ) */
+ num = 0; /* Q( -rshifts ) */
+ nrg = silk_ADD32( CAb[ 0 ], CAf[ 0 ] ); /* Q( 1-rshifts ) */
+ for( k = 0; k < n; k++ ) {
+ Atmp_QA = Af_QA[ k ];
+ lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1;
+ lz = silk_min( 32 - QA, lz );
+ Atmp1 = silk_LSHIFT32( Atmp_QA, lz ); /* Q( QA + lz ) */
+
+ tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */
+ tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */
+ num = silk_ADD_LSHIFT32( num, silk_SMMUL( CAb[ n - k ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */
+ nrg = silk_ADD_LSHIFT32( nrg, silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ),
+ Atmp1 ), 32 - QA - lz ); /* Q( 1-rshifts ) */
+ }
+ CAf[ n + 1 ] = tmp1; /* Q( -rshifts ) */
+ CAb[ n + 1 ] = tmp2; /* Q( -rshifts ) */
+ num = silk_ADD32( num, tmp2 ); /* Q( -rshifts ) */
+ num = silk_LSHIFT32( -num, 1 ); /* Q( 1-rshifts ) */
+
+ /* Calculate the next order reflection (parcor) coefficient */
+ if( silk_abs( num ) < nrg ) {
+ rc_Q31 = silk_DIV32_varQ( num, nrg, 31 );
+ } else {
+ rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN;
+ }
+
+ /* Update inverse prediction gain */
+ tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
+ tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 );
+ if( tmp1 <= minInvGain_Q30 ) {
+ /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
+ tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */
+ rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */
+ /* Newton-Raphson iteration */
+ rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */
+ rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */
+ if( num < 0 ) {
+ /* Ensure adjusted reflection coefficients has the original sign */
+ rc_Q31 = -rc_Q31;
+ }
+ invGain_Q30 = minInvGain_Q30;
+ reached_max_gain = 1;
+ } else {
+ invGain_Q30 = tmp1;
+ }
+
+ /* Update the AR coefficients */
+ for( k = 0; k < (n + 1) >> 1; k++ ) {
+ tmp1 = Af_QA[ k ]; /* QA */
+ tmp2 = Af_QA[ n - k - 1 ]; /* QA */
+ Af_QA[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* QA */
+ Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* QA */
+ }
+ Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA ); /* QA */
+
+ if( reached_max_gain ) {
+ /* Reached max prediction gain; set remaining coefficients to zero and exit loop */
+ for( k = n + 1; k < D; k++ ) {
+ Af_QA[ k ] = 0;
+ }
+ break;
+ }
+
+ /* Update C * Af and C * Ab */
+ for( k = 0; k <= n + 1; k++ ) {
+ tmp1 = CAf[ k ]; /* Q( -rshifts ) */
+ tmp2 = CAb[ n - k + 1 ]; /* Q( -rshifts ) */
+ CAf[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* Q( -rshifts ) */
+ CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* Q( -rshifts ) */
+ }
+ }
+
+ if( reached_max_gain ) {
+ for( k = 0; k < D; k++ ) {
+ /* Scale coefficients */
+ A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );
+ }
+ /* Subtract energy of preceding samples from C0 */
+ if( rshifts > 0 ) {
+ for( s = 0; s < nb_subfr; s++ ) {
+ x_ptr = x + s * subfr_length;
+ C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts );
+ }
+ } else {
+ for( s = 0; s < nb_subfr; s++ ) {
+ x_ptr = x + s * subfr_length;
+ C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D, arch ), -rshifts );
+ }
+ }
+ /* Approximate residual energy */
+ *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 );
+ *res_nrg_Q = -rshifts;
+ } else {
+ /* Return residual energy */
+ nrg = CAf[ 0 ]; /* Q( -rshifts ) */
+ tmp1 = (opus_int32)1 << 16; /* Q16 */
+ for( k = 0; k < D; k++ ) {
+ Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); /* Q16 */
+ nrg = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 ); /* Q( -rshifts ) */
+ tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 ); /* Q16 */
+ A_Q16[ k ] = -Atmp1;
+ }
+ *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */
+ *res_nrg_Q = -rshifts;
+ }
+}
diff --git a/drivers/opus/silk/fixed/x86/prefilter_FIX_sse.c b/drivers/opus/silk/fixed/x86/prefilter_FIX_sse.c
new file mode 100644
index 0000000000..febb105645
--- /dev/null
+++ b/drivers/opus/silk/fixed/x86/prefilter_FIX_sse.c
@@ -0,0 +1,157 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+ Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#include "opus/opus_config.h"
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include "opus/silk/main.h"
+#include "opus/celt/x86/x86cpu.h"
+
+void silk_warped_LPC_analysis_filter_FIX_sse4_1(
+ opus_int32 state[], /* I/O State [order + 1] */
+ opus_int32 res_Q2[], /* O Residual signal [length] */
+ const opus_int16 coef_Q13[], /* I Coefficients [order] */
+ const opus_int16 input[], /* I Input signal [length] */
+ const opus_int16 lambda_Q16, /* I Warping factor */
+ const opus_int length, /* I Length of input signal */
+ const opus_int order /* I Filter order (even) */
+)
+{
+ opus_int n, i;
+ opus_int32 acc_Q11, tmp1, tmp2;
+
+ /* Order must be even */
+ silk_assert( ( order & 1 ) == 0 );
+
+ if (order == 10)
+ {
+ if (0 == lambda_Q16)
+ {
+ __m128i coef_Q13_3210, coef_Q13_7654;
+ __m128i coef_Q13_0123, coef_Q13_4567;
+ __m128i state_0123, state_4567;
+ __m128i xmm_product1, xmm_product2;
+ __m128i xmm_tempa, xmm_tempb;
+
+ register opus_int32 sum;
+ register opus_int32 state_8, state_9, state_a;
+ register opus_int64 coef_Q13_8, coef_Q13_9;
+
+ silk_assert( length > 0 );
+
+ coef_Q13_3210 = OP_CVTEPI16_EPI32_M64( &coef_Q13[ 0 ] );
+ coef_Q13_7654 = OP_CVTEPI16_EPI32_M64( &coef_Q13[ 4 ] );
+
+ coef_Q13_0123 = _mm_shuffle_epi32( coef_Q13_3210, _MM_SHUFFLE( 0, 1, 2, 3 ) );
+ coef_Q13_4567 = _mm_shuffle_epi32( coef_Q13_7654, _MM_SHUFFLE( 0, 1, 2, 3 ) );
+
+ coef_Q13_8 = (opus_int64) coef_Q13[ 8 ];
+ coef_Q13_9 = (opus_int64) coef_Q13[ 9 ];
+
+ state_0123 = _mm_loadu_si128( (__m128i *)(&state[ 0 ] ) );
+ state_4567 = _mm_loadu_si128( (__m128i *)(&state[ 4 ] ) );
+
+ state_0123 = _mm_shuffle_epi32( state_0123, _MM_SHUFFLE( 0, 1, 2, 3 ) );
+ state_4567 = _mm_shuffle_epi32( state_4567, _MM_SHUFFLE( 0, 1, 2, 3 ) );
+
+ state_8 = state[ 8 ];
+ state_9 = state[ 9 ];
+ state_a = 0;
+
+ for( n = 0; n < length; n++ )
+ {
+ xmm_product1 = _mm_mul_epi32( coef_Q13_0123, state_0123 ); /* 64-bit multiply, only 2 pairs */
+ xmm_product2 = _mm_mul_epi32( coef_Q13_4567, state_4567 );
+
+ xmm_tempa = _mm_shuffle_epi32( state_0123, _MM_SHUFFLE( 0, 1, 2, 3 ) );
+ xmm_tempb = _mm_shuffle_epi32( state_4567, _MM_SHUFFLE( 0, 1, 2, 3 ) );
+
+ xmm_product1 = _mm_srli_epi64( xmm_product1, 16 ); /* >> 16, zero extending works */
+ xmm_product2 = _mm_srli_epi64( xmm_product2, 16 );
+
+ xmm_tempa = _mm_mul_epi32( coef_Q13_3210, xmm_tempa );
+ xmm_tempb = _mm_mul_epi32( coef_Q13_7654, xmm_tempb );
+
+ xmm_tempa = _mm_srli_epi64( xmm_tempa, 16 );
+ xmm_tempb = _mm_srli_epi64( xmm_tempb, 16 );
+
+ xmm_tempa = _mm_add_epi32( xmm_tempa, xmm_product1 );
+ xmm_tempb = _mm_add_epi32( xmm_tempb, xmm_product2 );
+ xmm_tempa = _mm_add_epi32( xmm_tempa, xmm_tempb );
+
+ sum = (coef_Q13_8 * state_8) >> 16;
+ sum += (coef_Q13_9 * state_9) >> 16;
+
+ xmm_tempa = _mm_add_epi32( xmm_tempa, _mm_shuffle_epi32( xmm_tempa, _MM_SHUFFLE( 0, 0, 0, 2 ) ) );
+ sum += _mm_cvtsi128_si32( xmm_tempa);
+ res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( ( 5 + sum ), 9);
+
+ /* move right */
+ state_a = state_9;
+ state_9 = state_8;
+ state_8 = _mm_cvtsi128_si32( state_4567 );
+ state_4567 = _mm_alignr_epi8( state_0123, state_4567, 4 );
+
+ state_0123 = _mm_alignr_epi8( _mm_cvtsi32_si128( silk_LSHIFT( input[ n ], 14 ) ), state_0123, 4 );
+ }
+
+ _mm_storeu_si128( (__m128i *)( &state[ 0 ] ), _mm_shuffle_epi32( state_0123, _MM_SHUFFLE( 0, 1, 2, 3 ) ) );
+ _mm_storeu_si128( (__m128i *)( &state[ 4 ] ), _mm_shuffle_epi32( state_4567, _MM_SHUFFLE( 0, 1, 2, 3 ) ) );
+ state[ 8 ] = state_8;
+ state[ 9 ] = state_9;
+ state[ 10 ] = state_a;
+
+ return;
+ }
+ }
+
+ for( n = 0; n < length; n++ ) {
+ /* Output of lowpass section */
+ tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 );
+ state[ 0 ] = silk_LSHIFT( input[ n ], 14 );
+ /* Output of allpass section */
+ tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 );
+ state[ 1 ] = tmp2;
+ acc_Q11 = silk_RSHIFT( order, 1 );
+ acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] );
+ /* Loop over allpass sections */
+ for( i = 2; i < order; i += 2 ) {
+ /* Output of allpass section */
+ tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 );
+ state[ i ] = tmp1;
+ acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] );
+ /* Output of allpass section */
+ tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 );
+ state[ i + 1 ] = tmp2;
+ acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] );
+ }
+ state[ order ] = tmp1;
+ acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] );
+ res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 );
+ }
+}
diff --git a/drivers/opus/silk/fixed/x86/vector_ops_FIX_sse.c b/drivers/opus/silk/fixed/x86/vector_ops_FIX_sse.c
new file mode 100644
index 0000000000..920d58cebc
--- /dev/null
+++ b/drivers/opus/silk/fixed/x86/vector_ops_FIX_sse.c
@@ -0,0 +1,85 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+ Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#include "opus/opus_config.h"
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include "opus/silk/main.h"
+
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/celt/pitch.h"
+
+opus_int64 silk_inner_prod16_aligned_64_sse4_1(
+ const opus_int16 *inVec1, /* I input vector 1 */
+ const opus_int16 *inVec2, /* I input vector 2 */
+ const opus_int len /* I vector lengths */
+)
+{
+ opus_int i, dataSize8;
+ opus_int64 sum;
+
+ __m128i xmm_tempa;
+ __m128i inVec1_76543210, acc1;
+ __m128i inVec2_76543210, acc2;
+
+ sum = 0;
+ dataSize8 = len & ~7;
+
+ acc1 = _mm_setzero_si128();
+ acc2 = _mm_setzero_si128();
+
+ for( i = 0; i < dataSize8; i += 8 ) {
+ inVec1_76543210 = _mm_loadu_si128( (__m128i *)(&inVec1[i + 0] ) );
+ inVec2_76543210 = _mm_loadu_si128( (__m128i *)(&inVec2[i + 0] ) );
+
+ /* only when all 4 operands are -32768 (0x8000), this results in wrap around */
+ inVec1_76543210 = _mm_madd_epi16( inVec1_76543210, inVec2_76543210 );
+
+ xmm_tempa = _mm_cvtepi32_epi64( inVec1_76543210 );
+ /* equal shift right 8 bytes */
+ inVec1_76543210 = _mm_shuffle_epi32( inVec1_76543210, _MM_SHUFFLE( 0, 0, 3, 2 ) );
+ inVec1_76543210 = _mm_cvtepi32_epi64( inVec1_76543210 );
+
+ acc1 = _mm_add_epi64( acc1, xmm_tempa );
+ acc2 = _mm_add_epi64( acc2, inVec1_76543210 );
+ }
+
+ acc1 = _mm_add_epi64( acc1, acc2 );
+
+ /* equal shift right 8 bytes */
+ acc2 = _mm_shuffle_epi32( acc1, _MM_SHUFFLE( 0, 0, 3, 2 ) );
+ acc1 = _mm_add_epi64( acc1, acc2 );
+
+ _mm_storel_epi64( (__m128i *)&sum, acc1 );
+
+ for( ; i < len; i++ ) {
+ sum = silk_SMLABB( sum, inVec1[ i ], inVec2[ i ] );
+ }
+
+ return sum;
+}