diff options
Diffstat (limited to 'thirdparty/opus/celt')
-rw-r--r-- | thirdparty/opus/celt/arch.h | 13 | ||||
-rw-r--r-- | thirdparty/opus/celt/arm/arm_celt_map.c | 24 | ||||
-rw-r--r-- | thirdparty/opus/celt/arm/armcpu.c | 49 | ||||
-rw-r--r-- | thirdparty/opus/celt/arm/armcpu.h | 6 | ||||
-rw-r--r-- | thirdparty/opus/celt/arm/celt_neon_intr.c | 61 | ||||
-rw-r--r-- | thirdparty/opus/celt/arm/pitch_arm.h | 68 | ||||
-rw-r--r-- | thirdparty/opus/celt/bands.c | 4 | ||||
-rw-r--r-- | thirdparty/opus/celt/celt.h | 2 | ||||
-rw-r--r-- | thirdparty/opus/celt/celt_decoder.c | 10 | ||||
-rw-r--r-- | thirdparty/opus/celt/celt_encoder.c | 13 | ||||
-rw-r--r-- | thirdparty/opus/celt/celt_lpc.c | 3 | ||||
-rw-r--r-- | thirdparty/opus/celt/cwrs.c | 2 | ||||
-rw-r--r-- | thirdparty/opus/celt/fixed_generic.h | 16 | ||||
-rw-r--r-- | thirdparty/opus/celt/kiss_fft.c | 2 | ||||
-rw-r--r-- | thirdparty/opus/celt/mathops.c | 2 | ||||
-rw-r--r-- | thirdparty/opus/celt/pitch.c | 67 | ||||
-rw-r--r-- | thirdparty/opus/celt/pitch.h | 20 | ||||
-rw-r--r-- | thirdparty/opus/celt/rate.c | 2 | ||||
-rw-r--r-- | thirdparty/opus/celt/vq.c | 2 | ||||
-rw-r--r-- | thirdparty/opus/celt/x86/pitch_sse.h | 34 | ||||
-rw-r--r-- | thirdparty/opus/celt/x86/x86_celt_map.c | 2 | ||||
-rw-r--r-- | thirdparty/opus/celt/x86/x86cpu.c | 2 |
22 files changed, 293 insertions, 111 deletions
diff --git a/thirdparty/opus/celt/arch.h b/thirdparty/opus/celt/arch.h index 9f74ddd267..8ceab5fe10 100644 --- a/thirdparty/opus/celt/arch.h +++ b/thirdparty/opus/celt/arch.h @@ -78,6 +78,15 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line) #define UADD32(a,b) ((a)+(b)) #define USUB32(a,b) ((a)-(b)) +/* Set this if opus_int64 is a native type of the CPU. */ +/* Assume that all LP64 architectures have fast 64-bit types; also x86_64 + (which can be ILP32 for x32) and Win64 (which is LLP64). */ +#if defined(__x86_64__) || defined(__LP64__) || defined(_WIN64) +#define OPUS_FAST_INT64 1 +#else +#define OPUS_FAST_INT64 0 +#endif + #define PRINT_MIPS(file) #ifdef FIXED_POINT @@ -118,7 +127,9 @@ static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { #include "fixed_generic.h" -#ifdef OPUS_ARM_INLINE_EDSP +#ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR +#include "arm/fixed_arm64.h" +#elif OPUS_ARM_INLINE_EDSP #include "arm/fixed_armv5e.h" #elif defined (OPUS_ARM_INLINE_ASM) #include "arm/fixed_armv4.h" diff --git a/thirdparty/opus/celt/arm/arm_celt_map.c b/thirdparty/opus/celt/arm/arm_celt_map.c index ee6c244786..4d4d069a86 100644 --- a/thirdparty/opus/celt/arm/arm_celt_map.c +++ b/thirdparty/opus/celt/arm/arm_celt_map.c @@ -36,6 +36,9 @@ #if defined(OPUS_HAVE_RTCD) # if defined(FIXED_POINT) +# if ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \ + (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \ + (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP))) opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, const opus_val16 *, opus_val32 *, int , int) = { celt_pitch_xcorr_c, /* ARMv4 */ @@ -43,8 +46,10 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */ MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */ }; + +# endif # else /* !FIXED_POINT */ -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR) void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, const opus_val16 *, opus_val32 *, int, int) = { celt_pitch_xcorr_c, /* ARMv4 */ @@ -55,6 +60,23 @@ void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, # endif # endif /* FIXED_POINT */ +#if defined(FIXED_POINT) && defined(OPUS_HAVE_RTCD) && \ + defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR) + +void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( + const opus_val16 *x, + const opus_val16 *y, + opus_val32 sum[4], + int len +) = { + xcorr_kernel_c, /* ARMv4 */ + xcorr_kernel_c, /* EDSP */ + xcorr_kernel_c, /* Media */ + xcorr_kernel_neon_fixed, /* Neon */ +}; + +#endif + # if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) # if defined(HAVE_ARM_NE10) # if defined(CUSTOM_MODES) diff --git a/thirdparty/opus/celt/arm/armcpu.c b/thirdparty/opus/celt/arm/armcpu.c index 5e5d10c344..694a63b78e 100644 --- a/thirdparty/opus/celt/arm/armcpu.c +++ b/thirdparty/opus/celt/arm/armcpu.c @@ -37,11 +37,12 @@ #include "cpu_support.h" #include "os_support.h" #include "opus_types.h" +#include "arch.h" -#define OPUS_CPU_ARM_V4 (1) -#define OPUS_CPU_ARM_EDSP (1<<1) -#define OPUS_CPU_ARM_MEDIA (1<<2) -#define OPUS_CPU_ARM_NEON (1<<3) +#define OPUS_CPU_ARM_V4_FLAG (1<<OPUS_ARCH_ARM_V4) +#define OPUS_CPU_ARM_EDSP_FLAG (1<<OPUS_ARCH_ARM_EDSP) +#define OPUS_CPU_ARM_MEDIA_FLAG (1<<OPUS_ARCH_ARM_MEDIA) +#define OPUS_CPU_ARM_NEON_FLAG (1<<OPUS_ARCH_ARM_NEON) #if defined(_MSC_VER) /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ @@ -55,20 +56,22 @@ static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){ /* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit * instructions via their assembled hex code. * All of these instructions should be essentially nops. */ -# if defined(OPUS_ARM_MAY_HAVE_EDSP) +# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \ + || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) __try{ /*PLD [r13]*/ __emit(0xF5DDF000); - flags|=OPUS_CPU_ARM_EDSP; + flags|=OPUS_CPU_ARM_EDSP_FLAG; } __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ /*Ignore exception.*/ } -# if defined(OPUS_ARM_MAY_HAVE_MEDIA) +# if defined(OPUS_ARM_MAY_HAVE_MEDIA) \ + || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) __try{ /*SHADD8 r3,r3,r3*/ __emit(0xE6333F93); - flags|=OPUS_CPU_ARM_MEDIA; + flags|=OPUS_CPU_ARM_MEDIA_FLAG; } __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ /*Ignore exception.*/ @@ -77,7 +80,7 @@ static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){ __try{ /*VORR q0,q0,q0*/ __emit(0xF2200150); - flags|=OPUS_CPU_ARM_NEON; + flags|=OPUS_CPU_ARM_NEON_FLAG; } __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ /*Ignore exception.*/ @@ -107,26 +110,26 @@ opus_uint32 opus_cpu_capabilities(void) while(fgets(buf, 512, cpuinfo) != NULL) { -# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \ + || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) /* Search for edsp and neon flag */ if(memcmp(buf, "Features", 8) == 0) { char *p; -# if defined(OPUS_ARM_MAY_HAVE_EDSP) p = strstr(buf, " edsp"); if(p != NULL && (p[5] == ' ' || p[5] == '\n')) - flags |= OPUS_CPU_ARM_EDSP; -# endif + flags |= OPUS_CPU_ARM_EDSP_FLAG; # if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) p = strstr(buf, " neon"); if(p != NULL && (p[5] == ' ' || p[5] == '\n')) - flags |= OPUS_CPU_ARM_NEON; + flags |= OPUS_CPU_ARM_NEON_FLAG; # endif } # endif -# if defined(OPUS_ARM_MAY_HAVE_MEDIA) +# if defined(OPUS_ARM_MAY_HAVE_MEDIA) \ + || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) /* Search for media capabilities (>= ARMv6) */ if(memcmp(buf, "CPU architecture:", 17) == 0) { @@ -134,7 +137,7 @@ opus_uint32 opus_cpu_capabilities(void) version = atoi(buf+17); if(version >= 6) - flags |= OPUS_CPU_ARM_MEDIA; + flags |= OPUS_CPU_ARM_MEDIA_FLAG; } # endif } @@ -156,18 +159,26 @@ int opus_select_arch(void) opus_uint32 flags = opus_cpu_capabilities(); int arch = 0; - if(!(flags & OPUS_CPU_ARM_EDSP)) + if(!(flags & OPUS_CPU_ARM_EDSP_FLAG)) { + /* Asserts ensure arch values are sequential */ + celt_assert(arch == OPUS_ARCH_ARM_V4); return arch; + } arch++; - if(!(flags & OPUS_CPU_ARM_MEDIA)) + if(!(flags & OPUS_CPU_ARM_MEDIA_FLAG)) { + celt_assert(arch == OPUS_ARCH_ARM_EDSP); return arch; + } arch++; - if(!(flags & OPUS_CPU_ARM_NEON)) + if(!(flags & OPUS_CPU_ARM_NEON_FLAG)) { + celt_assert(arch == OPUS_ARCH_ARM_MEDIA); return arch; + } arch++; + celt_assert(arch == OPUS_ARCH_ARM_NEON); return arch; } diff --git a/thirdparty/opus/celt/arm/armcpu.h b/thirdparty/opus/celt/arm/armcpu.h index ac5744606e..820262ff5f 100644 --- a/thirdparty/opus/celt/arm/armcpu.h +++ b/thirdparty/opus/celt/arm/armcpu.h @@ -66,6 +66,12 @@ # if defined(OPUS_HAVE_RTCD) int opus_select_arch(void); + +#define OPUS_ARCH_ARM_V4 (0) +#define OPUS_ARCH_ARM_EDSP (1) +#define OPUS_ARCH_ARM_MEDIA (2) +#define OPUS_ARCH_ARM_NEON (3) + # endif #endif diff --git a/thirdparty/opus/celt/arm/celt_neon_intr.c b/thirdparty/opus/celt/arm/celt_neon_intr.c index 47dce15ba5..47bbe3dc22 100644 --- a/thirdparty/opus/celt/arm/celt_neon_intr.c +++ b/thirdparty/opus/celt/arm/celt_neon_intr.c @@ -37,7 +37,66 @@ #include <arm_neon.h> #include "../pitch.h" -#if !defined(FIXED_POINT) +#if defined(FIXED_POINT) +void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len) +{ + int j; + int32x4_t a = vld1q_s32(sum); + /* Load y[0...3] */ + /* This requires len>0 to always be valid (which we assert in the C code). */ + int16x4_t y0 = vld1_s16(y); + y += 4; + + for (j = 0; j + 8 <= len; j += 8) + { + /* Load x[0...7] */ + int16x8_t xx = vld1q_s16(x); + int16x4_t x0 = vget_low_s16(xx); + int16x4_t x4 = vget_high_s16(xx); + /* Load y[4...11] */ + int16x8_t yy = vld1q_s16(y); + int16x4_t y4 = vget_low_s16(yy); + int16x4_t y8 = vget_high_s16(yy); + int32x4_t a0 = vmlal_lane_s16(a, y0, x0, 0); + int32x4_t a1 = vmlal_lane_s16(a0, y4, x4, 0); + + int16x4_t y1 = vext_s16(y0, y4, 1); + int16x4_t y5 = vext_s16(y4, y8, 1); + int32x4_t a2 = vmlal_lane_s16(a1, y1, x0, 1); + int32x4_t a3 = vmlal_lane_s16(a2, y5, x4, 1); + + int16x4_t y2 = vext_s16(y0, y4, 2); + int16x4_t y6 = vext_s16(y4, y8, 2); + int32x4_t a4 = vmlal_lane_s16(a3, y2, x0, 2); + int32x4_t a5 = vmlal_lane_s16(a4, y6, x4, 2); + + int16x4_t y3 = vext_s16(y0, y4, 3); + int16x4_t y7 = vext_s16(y4, y8, 3); + int32x4_t a6 = vmlal_lane_s16(a5, y3, x0, 3); + int32x4_t a7 = vmlal_lane_s16(a6, y7, x4, 3); + + y0 = y8; + a = a7; + x += 8; + y += 8; + } + + for (; j < len; j++) + { + int16x4_t x0 = vld1_dup_s16(x); /* load next x */ + int32x4_t a0 = vmlal_s16(a, y0, x0); + + int16x4_t y4 = vld1_dup_s16(y); /* load next y */ + y0 = vext_s16(y0, y4, 1); + a = a0; + x++; + y++; + } + + vst1q_s32(sum, a); +} + +#else /* * Function: xcorr_kernel_neon_float * --------------------------------- diff --git a/thirdparty/opus/celt/arm/pitch_arm.h b/thirdparty/opus/celt/arm/pitch_arm.h index 8626ed75b9..14331169ee 100644 --- a/thirdparty/opus/celt/arm/pitch_arm.h +++ b/thirdparty/opus/celt/arm/pitch_arm.h @@ -46,10 +46,53 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch); # endif -# if !defined(OPUS_HAVE_RTCD) +# if defined(OPUS_HAVE_RTCD) && \ + ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \ + (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \ + (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP))) +extern opus_val32 +(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, + const opus_val16 *, opus_val32 *, int, int); +# define OVERRIDE_PITCH_XCORR (1) +# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ + ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \ + xcorr, len, max_pitch)) + +# elif defined(OPUS_ARM_PRESUME_EDSP) || \ + defined(OPUS_ARM_PRESUME_MEDIA) || \ + defined(OPUS_ARM_PRESUME_NEON) # define OVERRIDE_PITCH_XCORR (1) # define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch)) + +# endif + +# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +void xcorr_kernel_neon_fixed( + const opus_val16 *x, + const opus_val16 *y, + opus_val32 sum[4], + int len); +# endif + +# if defined(OPUS_HAVE_RTCD) && \ + (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)) + +extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( + const opus_val16 *x, + const opus_val16 *y, + opus_val32 sum[4], + int len); + +# define OVERRIDE_XCORR_KERNEL (1) +# define xcorr_kernel(x, y, sum, len, arch) \ + ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len)) + +# elif defined(OPUS_ARM_PRESUME_NEON_INTR) +# define OVERRIDE_XCORR_KERNEL (1) +# define xcorr_kernel(x, y, sum, len, arch) \ + ((void)arch, xcorr_kernel_neon_fixed(x, y, sum, len)) + # endif #else /* Start !FIXED_POINT */ @@ -57,12 +100,27 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y, #if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch); -#if !defined(OPUS_HAVE_RTCD) || defined(OPUS_ARM_PRESUME_NEON_INTR) -#define OVERRIDE_PITCH_XCORR (1) +#endif + +# if defined(OPUS_HAVE_RTCD) && \ + (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)) +extern void +(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, + const opus_val16 *, opus_val32 *, int, int); + +# define OVERRIDE_PITCH_XCORR (1) +# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ + ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \ + xcorr, len, max_pitch)) + +# elif defined(OPUS_ARM_PRESUME_NEON_INTR) + +# define OVERRIDE_PITCH_XCORR (1) # define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ ((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch)) -#endif -#endif + +# endif #endif /* end !FIXED_POINT */ + #endif diff --git a/thirdparty/opus/celt/bands.c b/thirdparty/opus/celt/bands.c index 25f229e267..87eaa6c031 100644 --- a/thirdparty/opus/celt/bands.c +++ b/thirdparty/opus/celt/bands.c @@ -414,7 +414,7 @@ static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT /* Compensating for the mid normalization */ xp = MULT16_32_Q15(mid, xp); /* mid and side are in Q15, not Q14 like X and Y */ - mid2 = SHR32(mid, 1); + mid2 = SHR16(mid, 1); El = MULT16_16(mid2, mid2) + side - 2*xp; Er = MULT16_16(mid2, mid2) + side + 2*xp; if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28)) @@ -714,7 +714,7 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx, if (qn!=1) { if (encode) - itheta = (itheta*qn+8192)>>14; + itheta = (itheta*(opus_int32)qn+8192)>>14; /* Entropy coding of the angle. We use a uniform pdf for the time split, a step for stereo, and a triangular one for the rest. */ diff --git a/thirdparty/opus/celt/celt.h b/thirdparty/opus/celt/celt.h index a423b95046..d1f7eb690d 100644 --- a/thirdparty/opus/celt/celt.h +++ b/thirdparty/opus/celt/celt.h @@ -209,7 +209,7 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N, #endif #ifndef OVERRIDE_COMB_FILTER_CONST -# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \ +# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \ ((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12)) #endif diff --git a/thirdparty/opus/celt/celt_decoder.c b/thirdparty/opus/celt/celt_decoder.c index b688f2a4e3..b978bb34d1 100644 --- a/thirdparty/opus/celt/celt_decoder.c +++ b/thirdparty/opus/celt/celt_decoder.c @@ -82,6 +82,7 @@ struct OpusCustomDecoder { int error; int last_pitch_index; int loss_count; + int skip_plc; int postfilter_period; int postfilter_period_old; opus_val16 postfilter_gain; @@ -164,8 +165,6 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMod st->signalling = 1; st->arch = opus_select_arch(); - st->loss_count = 0; - opus_custom_decoder_ctl(st, OPUS_RESET_STATE); return OPUS_OK; @@ -447,7 +446,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) loss_count = st->loss_count; start = st->start; - noise_based = loss_count >= 5 || start != 0; + noise_based = loss_count >= 5 || start != 0 || st->skip_plc; if (noise_based) { /* Noise-based PLC/CNG */ @@ -832,6 +831,10 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat return frame_size/st->downsample; } + /* Check if there are at least two packets received consecutively before + * turning on the pitch-based PLC */ + st->skip_plc = st->loss_count != 0; + if (dec == NULL) { ec_dec_init(&_dec,(unsigned char*)data,len); @@ -1198,6 +1201,7 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...) ((char*)&st->DECODER_RESET_START - (char*)st)); for (i=0;i<2*st->mode->nbEBands;i++) oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT); + st->skip_plc = 1; } break; case OPUS_GET_PITCH_REQUEST: diff --git a/thirdparty/opus/celt/celt_encoder.c b/thirdparty/opus/celt/celt_encoder.c index 41fbfd49c8..3ee7a4d3f7 100644 --- a/thirdparty/opus/celt/celt_encoder.c +++ b/thirdparty/opus/celt/celt_encoder.c @@ -1175,10 +1175,10 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, if (N>COMBFILTER_MAXPERIOD) { - OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD); + OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD); } else { OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N); - OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N); + OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N); } } while (++c<CC); @@ -1281,12 +1281,15 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 if ((!has_surround_mask||lfe) && (constrained_vbr || bitrate<64000)) { - opus_val16 rate_factor; + opus_val16 rate_factor = Q15ONE; + if (bitrate < 64000) + { #ifdef FIXED_POINT - rate_factor = MAX16(0,(bitrate-32000)); + rate_factor = MAX16(0,(bitrate-32000)); #else - rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000)); + rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000)); #endif + } if (constrained_vbr) rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15)); target = base_target + (opus_int32)MULT16_32_Q15(rate_factor, target-base_target); diff --git a/thirdparty/opus/celt/celt_lpc.c b/thirdparty/opus/celt/celt_lpc.c index f02145af0d..b410a21c5f 100644 --- a/thirdparty/opus/celt/celt_lpc.c +++ b/thirdparty/opus/celt/celt_lpc.c @@ -49,8 +49,7 @@ int p float *lpc = _lpc; #endif - for (i = 0; i < p; i++) - lpc[i] = 0; + OPUS_CLEAR(lpc, p); if (ac[0] != 0) { for (i = 0; i < p; i++) { diff --git a/thirdparty/opus/celt/cwrs.c b/thirdparty/opus/celt/cwrs.c index 2fa9f89cd6..9722f0ac86 100644 --- a/thirdparty/opus/celt/cwrs.c +++ b/thirdparty/opus/celt/cwrs.c @@ -74,7 +74,7 @@ int log2_frac(opus_uint32 val, int frac) /*Although derived separately, the pulse vector coding scheme is equivalent to a Pyramid Vector Quantizer \cite{Fis86}. Some additional notes about an early version appear at - http://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering + https://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering and the definitions of some terms have evolved since that was written. The conversion from a pulse vector to an integer index (encoding) and back diff --git a/thirdparty/opus/celt/fixed_generic.h b/thirdparty/opus/celt/fixed_generic.h index ac67d37ce8..1cfd6d6989 100644 --- a/thirdparty/opus/celt/fixed_generic.h +++ b/thirdparty/opus/celt/fixed_generic.h @@ -37,16 +37,32 @@ #define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b)) /** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ +#if OPUS_FAST_INT64 +#define MULT16_32_Q16(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),16)) +#else #define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16)) +#endif /** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */ +#if OPUS_FAST_INT64 +#define MULT16_32_P16(a,b) ((opus_val32)PSHR((opus_int64)((opus_val16)(a))*(b),16)) +#else #define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16)) +#endif /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ +#if OPUS_FAST_INT64 +#define MULT16_32_Q15(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),15)) +#else #define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15)) +#endif /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ +#if OPUS_FAST_INT64 +#define MULT32_32_Q31(a,b) ((opus_val32)SHR((opus_int64)(a)*(opus_int64)(b),31)) +#else #define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15)) +#endif /** Compile-time conversion of float constant to 16-bit value */ #define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits)))) diff --git a/thirdparty/opus/celt/kiss_fft.c b/thirdparty/opus/celt/kiss_fft.c index 4ed37d2bb7..1f8fd05321 100644 --- a/thirdparty/opus/celt/kiss_fft.c +++ b/thirdparty/opus/celt/kiss_fft.c @@ -191,7 +191,7 @@ static void kf_bfly3( kiss_fft_cpx * Fout_beg = Fout; #ifdef FIXED_POINT - epi3.r = -16384; + /*epi3.r = -16384;*/ /* Unused */ epi3.i = -28378; #else epi3 = st->twiddles[fstride*m]; diff --git a/thirdparty/opus/celt/mathops.c b/thirdparty/opus/celt/mathops.c index 3f8c5dcc0e..21a01f52e4 100644 --- a/thirdparty/opus/celt/mathops.c +++ b/thirdparty/opus/celt/mathops.c @@ -164,7 +164,7 @@ opus_val16 celt_cos_norm(opus_val32 x) { return _celt_cos_pi_2(EXTRACT16(x)); } else { - return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x))); + return NEG16(_celt_cos_pi_2(EXTRACT16(65536-x))); } } else { if (x&0x0000ffff) diff --git a/thirdparty/opus/celt/pitch.c b/thirdparty/opus/celt/pitch.c index 1d89cb0342..bf46e7d562 100644 --- a/thirdparty/opus/celt/pitch.c +++ b/thirdparty/opus/celt/pitch.c @@ -412,6 +412,41 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR RESTORE_STACK; } +#ifdef FIXED_POINT +static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy) +{ + opus_val32 x2y2; + int sx, sy, shift; + opus_val32 g; + opus_val16 den; + if (xy == 0 || xx == 0 || yy == 0) + return 0; + sx = celt_ilog2(xx)-14; + sy = celt_ilog2(yy)-14; + shift = sx + sy; + x2y2 = MULT16_16_Q14(VSHR32(xx, sx), VSHR32(yy, sy)); + if (shift & 1) { + if (x2y2 < 32768) + { + x2y2 <<= 1; + shift--; + } else { + x2y2 >>= 1; + shift++; + } + } + den = celt_rsqrt_norm(x2y2); + g = MULT16_32_Q15(den, xy); + g = VSHR32(g, (shift>>1)-1); + return EXTRACT16(MIN32(g, Q15ONE)); +} +#else +static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy) +{ + return xy/celt_sqrt(1+xx*yy); +} +#endif + static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2}; opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch) @@ -450,18 +485,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, yy = yy_lookup[T0]; best_xy = xy; best_yy = yy; -#ifdef FIXED_POINT - { - opus_val32 x2y2; - int sh, t; - x2y2 = 1+HALF32(MULT32_32_Q31(xx,yy)); - sh = celt_ilog2(x2y2)>>1; - t = VSHR32(x2y2, 2*(sh-7)); - g = g0 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1); - } -#else - g = g0 = xy/celt_sqrt(1+xx*yy); -#endif + g = g0 = compute_pitch_gain(xy, xx, yy); /* Look for any pitch at T/k */ for (k=2;k<=15;k++) { @@ -484,24 +508,13 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, T1b = celt_udiv(2*second_check[k]*T0+k, 2*k); } dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2, arch); - xy += xy2; - yy = yy_lookup[T1] + yy_lookup[T1b]; -#ifdef FIXED_POINT - { - opus_val32 x2y2; - int sh, t; - x2y2 = 1+MULT32_32_Q31(xx,yy); - sh = celt_ilog2(x2y2)>>1; - t = VSHR32(x2y2, 2*(sh-7)); - g1 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1); - } -#else - g1 = xy/celt_sqrt(1+2.f*xx*1.f*yy); -#endif + xy = HALF32(xy + xy2); + yy = HALF32(yy_lookup[T1] + yy_lookup[T1b]); + g1 = compute_pitch_gain(xy, xx, yy); if (abs(T1-prev_period)<=1) cont = prev_gain; else if (abs(T1-prev_period)<=2 && 5*k*k < T0) - cont = HALF32(prev_gain); + cont = HALF16(prev_gain); else cont = 0; thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont); diff --git a/thirdparty/opus/celt/pitch.h b/thirdparty/opus/celt/pitch.h index 65a77a6ecc..d3503532a0 100644 --- a/thirdparty/opus/celt/pitch.h +++ b/thirdparty/opus/celt/pitch.h @@ -187,25 +187,6 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch); #if !defined(OVERRIDE_PITCH_XCORR) -/*Is run-time CPU detection enabled on this platform?*/ -# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_ASM) \ - || (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) \ - && !defined(OPUS_ARM_PRESUME_NEON_INTR))) -extern -# if defined(FIXED_POINT) -opus_val32 -# else -void -# endif -(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, - const opus_val16 *, opus_val32 *, int, int); - -# define OVERRIDE_PITCH_XCORR -# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ - ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \ - xcorr, len, max_pitch)) -# else - #ifdef FIXED_POINT opus_val32 #else @@ -214,7 +195,6 @@ void celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch, int arch); -# endif #endif #endif diff --git a/thirdparty/opus/celt/rate.c b/thirdparty/opus/celt/rate.c index b28d8feccd..7dfa5be8a6 100644 --- a/thirdparty/opus/celt/rate.c +++ b/thirdparty/opus/celt/rate.c @@ -296,7 +296,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, done = 0; for (j=end;j-->start;) { - int tmp = bits1[j] + (lo*bits2[j]>>ALLOC_STEPS); + int tmp = bits1[j] + ((opus_int32)lo*bits2[j]>>ALLOC_STEPS); if (tmp < thresh[j] && !done) { if (tmp >= alloc_floor) diff --git a/thirdparty/opus/celt/vq.c b/thirdparty/opus/celt/vq.c index f358396065..d29f38fd8e 100644 --- a/thirdparty/opus/celt/vq.c +++ b/thirdparty/opus/celt/vq.c @@ -271,7 +271,7 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc best_id = 0; /* The squared magnitude term gets added anyway, so we might as well add it outside the loop */ - yy = ADD32(yy, 1); + yy = ADD16(yy, 1); j=0; do { opus_val16 Rxy, Ryy; diff --git a/thirdparty/opus/celt/x86/pitch_sse.h b/thirdparty/opus/celt/x86/pitch_sse.h index d4cbeb8b9c..e5f87ab51a 100644 --- a/thirdparty/opus/celt/x86/pitch_sse.h +++ b/thirdparty/opus/celt/x86/pitch_sse.h @@ -102,21 +102,21 @@ opus_val32 celt_inner_prod_sse( #if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT) #define OVERRIDE_CELT_INNER_PROD #define celt_inner_prod(x, y, N, arch) \ - ((void)arch, celt_inner_prod_sse4_1(x, y, N)) + ((void)arch, celt_inner_prod_sse4_1(x, y, N)) #elif defined(OPUS_X86_PRESUME_SSE2) && defined(FIXED_POINT) && !defined(OPUS_X86_MAY_HAVE_SSE4_1) #define OVERRIDE_CELT_INNER_PROD #define celt_inner_prod(x, y, N, arch) \ - ((void)arch, celt_inner_prod_sse2(x, y, N)) + ((void)arch, celt_inner_prod_sse2(x, y, N)) #elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT) #define OVERRIDE_CELT_INNER_PROD #define celt_inner_prod(x, y, N, arch) \ - ((void)arch, celt_inner_prod_sse(x, y, N)) + ((void)arch, celt_inner_prod_sse(x, y, N)) #elif ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) || \ - (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) + (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( const opus_val16 *x, @@ -138,19 +138,19 @@ extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( #undef comb_filter_const void dual_inner_prod_sse(const opus_val16 *x, - const opus_val16 *y01, - const opus_val16 *y02, - int N, - opus_val32 *xy1, - opus_val32 *xy2); + const opus_val16 *y01, + const opus_val16 *y02, + int N, + opus_val32 *xy1, + opus_val32 *xy2); void comb_filter_const_sse(opus_val32 *y, - opus_val32 *x, - int T, - int N, - opus_val16 g10, - opus_val16 g11, - opus_val16 g12); + opus_val32 *x, + int T, + int N, + opus_val16 g10, + opus_val16 g11, + opus_val16 g12); #if defined(OPUS_X86_PRESUME_SSE) @@ -169,7 +169,7 @@ extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( opus_val32 *xy1, opus_val32 *xy2); -#define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \ +#define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \ ((*DUAL_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2)) extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])( @@ -181,7 +181,7 @@ extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])( opus_val16 g11, opus_val16 g12); -#define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \ +#define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \ ((*COMB_FILTER_CONST_IMPL[(arch) & OPUS_ARCHMASK])(y, x, T, N, g10, g11, g12)) #define NON_STATIC_COMB_FILTER_CONST_C diff --git a/thirdparty/opus/celt/x86/x86_celt_map.c b/thirdparty/opus/celt/x86/x86_celt_map.c index 8e5e449275..47ba41b9ee 100644 --- a/thirdparty/opus/celt/x86/x86_celt_map.c +++ b/thirdparty/opus/celt/x86/x86_celt_map.c @@ -72,7 +72,7 @@ void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( #endif #if (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \ - (!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) + (!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( const opus_val16 *x, diff --git a/thirdparty/opus/celt/x86/x86cpu.c b/thirdparty/opus/celt/x86/x86cpu.c index 555a576b8a..080eb25e41 100644 --- a/thirdparty/opus/celt/x86/x86cpu.c +++ b/thirdparty/opus/celt/x86/x86cpu.c @@ -46,7 +46,7 @@ #include <intrin.h> static _inline void cpuid(unsigned int CPUInfo[4], unsigned int InfoType) { - __cpuid((int*)CPUInfo, InfoType); + __cpuid((int*)CPUInfo, InfoType); } #else |