14 files changed, 136 insertions, 419 deletions
diff --git a/thirdparty/opus/celt/arm/arm2gnu.pl b/thirdparty/opus/celt/arm/arm2gnu.pl
index a2895f7445..6c922ac819 100755
--- a/thirdparty/opus/celt/arm/arm2gnu.pl
+++ b/thirdparty/opus/celt/arm/arm2gnu.pl
@@ -164,11 +164,11 @@ while (<>) {
         $prefix = "";
         if ($proc)
         {
-            $prefix = $prefix.sprintf("\t.type\t%s, %%function", $proc) unless ($apple);
+            $prefix = $prefix.sprintf("\t.type\t%s, %%function; ",$proc) unless ($apple);
             # Make sure we $prefix isn't empty here (for the $apple case).
             # We handle mangling the label here, make sure it doesn't match
             # the label handling below (if $prefix would be empty).
-            $prefix = $prefix."; ";
+            $prefix = "; ";
             push(@proc_stack, $proc);
             s/^[A-Za-z_\.]\w+/$symprefix$&:/;
         }
diff --git a/thirdparty/opus/celt/arm/arm_celt_map.c b/thirdparty/opus/celt/arm/arm_celt_map.c
index ca988b66f5..4d4d069a86 100644
--- a/thirdparty/opus/celt/arm/arm_celt_map.c
+++ b/thirdparty/opus/celt/arm/arm_celt_map.c
@@ -35,29 +35,12 @@
 
 #if defined(OPUS_HAVE_RTCD)
 
-# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
-opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y, int N) = {
-  celt_inner_prod_c,   /* ARMv4 */
-  celt_inner_prod_c,   /* EDSP */
-  celt_inner_prod_c,   /* Media */
-  celt_inner_prod_neon /* NEON */
-};
-
-void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
-      int N, opus_val32 *xy1, opus_val32 *xy2) = {
-  dual_inner_prod_c,   /* ARMv4 */
-  dual_inner_prod_c,   /* EDSP */
-  dual_inner_prod_c,   /* Media */
-  dual_inner_prod_neon /* NEON */
-};
-# endif
-
 # if defined(FIXED_POINT)
 #  if ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
     (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
     (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
 opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
-    const opus_val16 *, opus_val32 *, int, int, int) = {
+    const opus_val16 *, opus_val32 *, int , int) = {
   celt_pitch_xcorr_c,               /* ARMv4 */
   MAY_HAVE_EDSP(celt_pitch_xcorr),  /* EDSP */
   MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
@@ -68,7 +51,7 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
 # else /* !FIXED_POINT */
 #  if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
 void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
-    const opus_val16 *, opus_val32 *, int, int, int) = {
+    const opus_val16 *, opus_val32 *, int, int) = {
   celt_pitch_xcorr_c,              /* ARMv4 */
   celt_pitch_xcorr_c,              /* EDSP */
   celt_pitch_xcorr_c,              /* Media */
diff --git a/thirdparty/opus/celt/arm/armopts.s b/thirdparty/opus/celt/arm/armopts.s
deleted file mode 100644
index fb9196072a..0000000000
--- a/thirdparty/opus/celt/arm/armopts.s
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Copyright (C) 2013 Mozilla Corporation */
-/*
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions
-   are met:
-
-   - Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-
-   - Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
-   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-; Set the following to 1 if we have EDSP instructions
-;  (LDRD/STRD, etc., ARMv5E and later).
-OPUS_ARM_MAY_HAVE_EDSP  * 
-
-; Set the following to 1 if we have ARMv6 media instructions.
-OPUS_ARM_MAY_HAVE_MEDIA * 
-
-; Set the following to 1 if we have NEON (some ARMv7)
-OPUS_ARM_MAY_HAVE_NEON  * 
-
-END
diff --git a/thirdparty/opus/celt/arm/celt_fft_ne10.c b/thirdparty/opus/celt/arm/celt_ne10_fft.c
index ea5fd7808b..42d96a7117 100644
--- a/thirdparty/opus/celt/arm/celt_fft_ne10.c
+++ b/thirdparty/opus/celt/arm/celt_ne10_fft.c
@@ -1,7 +1,7 @@
 /* Copyright (c) 2015 Xiph.Org Foundation
    Written by Viswanath Puttagunta */
 /**
-   @file celt_fft_ne10.c
+   @file celt_ne10_fft.c
    @brief ARM Neon optimizations for fft using NE10 library
  */
 
@@ -36,6 +36,7 @@
 #endif
 #endif
 
+#include <NE10_init.h>
 #include <NE10_dsp.h>
 #include "os_support.h"
 #include "kiss_fft.h"
diff --git a/thirdparty/opus/celt/arm/celt_mdct_ne10.c b/thirdparty/opus/celt/arm/celt_ne10_mdct.c
index 3531d02d10..293c3efd7a 100644
--- a/thirdparty/opus/celt/arm/celt_mdct_ne10.c
+++ b/thirdparty/opus/celt/arm/celt_ne10_mdct.c
@@ -1,7 +1,7 @@
 /* Copyright (c) 2015 Xiph.Org Foundation
    Written by Viswanath Puttagunta */
 /**
-   @file celt_mdct_ne10.c
+   @file celt_ne10_mdct.c
    @brief ARM Neon optimizations for mdct using NE10 library
  */
 
diff --git a/thirdparty/opus/celt/arm/celt_neon_intr.c b/thirdparty/opus/celt/arm/celt_neon_intr.c
index effda769d0..47bbe3dc22 100644
--- a/thirdparty/opus/celt/arm/celt_neon_intr.c
+++ b/thirdparty/opus/celt/arm/celt_neon_intr.c
@@ -191,21 +191,121 @@ static void xcorr_kernel_neon_float(const float32_t *x, const float32_t *y,
    vst1q_f32(sum, SUMM);
 }
 
+/*
+ * Function: xcorr_kernel_neon_float_process1
+ * ---------------------------------
+ * Computes single correlation values and stores in *sum
+ */
+static void xcorr_kernel_neon_float_process1(const float32_t *x,
+      const float32_t *y, float32_t *sum, int len) {
+   float32x4_t XX[4];
+   float32x4_t YY[4];
+   float32x2_t XX_2;
+   float32x2_t YY_2;
+   float32x4_t SUMM;
+   float32x2_t SUMM_2[2];
+   const float32_t *xi = x;
+   const float32_t *yi = y;
+
+   SUMM = vdupq_n_f32(0);
+
+   /* Work on 16 values per iteration */
+   while (len >= 16) {
+      XX[0] = vld1q_f32(xi);
+      xi += 4;
+      XX[1] = vld1q_f32(xi);
+      xi += 4;
+      XX[2] = vld1q_f32(xi);
+      xi += 4;
+      XX[3] = vld1q_f32(xi);
+      xi += 4;
+
+      YY[0] = vld1q_f32(yi);
+      yi += 4;
+      YY[1] = vld1q_f32(yi);
+      yi += 4;
+      YY[2] = vld1q_f32(yi);
+      yi += 4;
+      YY[3] = vld1q_f32(yi);
+      yi += 4;
+
+      SUMM = vmlaq_f32(SUMM, YY[0], XX[0]);
+      SUMM = vmlaq_f32(SUMM, YY[1], XX[1]);
+      SUMM = vmlaq_f32(SUMM, YY[2], XX[2]);
+      SUMM = vmlaq_f32(SUMM, YY[3], XX[3]);
+      len -= 16;
+   }
+
+   /* Work on 8 values */
+   if (len >= 8) {
+      XX[0] = vld1q_f32(xi);
+      xi += 4;
+      XX[1] = vld1q_f32(xi);
+      xi += 4;
+
+      YY[0] = vld1q_f32(yi);
+      yi += 4;
+      YY[1] = vld1q_f32(yi);
+      yi += 4;
+
+      SUMM = vmlaq_f32(SUMM, YY[0], XX[0]);
+      SUMM = vmlaq_f32(SUMM, YY[1], XX[1]);
+      len -= 8;
+   }
+
+   /* Work on 4 values */
+   if (len >= 4) {
+      XX[0] = vld1q_f32(xi);
+      xi += 4;
+      YY[0] = vld1q_f32(yi);
+      yi += 4;
+      SUMM = vmlaq_f32(SUMM, YY[0], XX[0]);
+      len -= 4;
+   }
+
+   /* Start accumulating results */
+   SUMM_2[0] = vget_low_f32(SUMM);
+   if (len >= 2) {
+      /* While at it, consume 2 more values if available */
+      XX_2 = vld1_f32(xi);
+      xi += 2;
+      YY_2 = vld1_f32(yi);
+      yi += 2;
+      SUMM_2[0] = vmla_f32(SUMM_2[0], YY_2, XX_2);
+      len -= 2;
+   }
+   SUMM_2[1] = vget_high_f32(SUMM);
+   SUMM_2[0] = vadd_f32(SUMM_2[0], SUMM_2[1]);
+   SUMM_2[0] = vpadd_f32(SUMM_2[0], SUMM_2[0]);
+   /* Ok, now we have result accumulated in SUMM_2[0].0 */
+
+   if (len > 0) {
+      /* Case when you have one value left */
+      XX_2 = vld1_dup_f32(xi);
+      YY_2 = vld1_dup_f32(yi);
+      SUMM_2[0] = vmla_f32(SUMM_2[0], XX_2, YY_2);
+   }
+
+   vst1_lane_f32(sum, SUMM_2[0], 0);
+}
+
 void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
-                        opus_val32 *xcorr, int len, int max_pitch, int arch) {
+                        opus_val32 *xcorr, int len, int max_pitch) {
    int i;
-   (void)arch;
    celt_assert(max_pitch > 0);
-   celt_sig_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
+   celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
 
    for (i = 0; i < (max_pitch-3); i += 4) {
       xcorr_kernel_neon_float((const float32_t *)_x, (const float32_t *)_y+i,
             (float32_t *)xcorr+i, len);
    }
 
-   /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
+   /* In case max_pitch isn't multiple of 4
+    * compute single correlation value per iteration
+    */
    for (; i < max_pitch; i++) {
-      xcorr[i] = celt_inner_prod_neon(_x, _y+i, len);
+      xcorr_kernel_neon_float_process1((const float32_t *)_x,
+            (const float32_t *)_y+i, (float32_t *)xcorr+i, len);
    }
 }
 #endif
diff --git a/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm-gnu.S b/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm-gnu.S
index 10668e54a5..5b2ee55a10 100644
--- a/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm-gnu.S
+++ b/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm-gnu.S
@@ -44,7 +44,7 @@
  .if OPUS_ARM_MAY_HAVE_NEON
 
 @ Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3
-	.type	xcorr_kernel_neon, %function; xcorr_kernel_neon: @ PROC
+; xcorr_kernel_neon: @ PROC
 xcorr_kernel_neon_start:
   @ input:
   @   r3     = int         len
@@ -156,8 +156,8 @@ xcorr_kernel_neon_process1:
 	.size xcorr_kernel_neon, .-xcorr_kernel_neon  @ ENDP
 
 @ opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y,
-@  opus_val32 *xcorr, int len, int max_pitch, int arch)
-	.type	celt_pitch_xcorr_neon, %function; celt_pitch_xcorr_neon: @ PROC
+@  opus_val32 *xcorr, int len, int max_pitch)
+; celt_pitch_xcorr_neon: @ PROC
   @ input:
   @   r0  = opus_val16 *_x
   @   r1  = opus_val16 *_y
@@ -171,8 +171,6 @@ xcorr_kernel_neon_process1:
   @   r6  = int         max_pitch
   @   r12 = int         j
   @   q15 = int         maxcorr[4] (q15 is not used by xcorr_kernel_neon())
-  @ ignored:
-  @         int         arch
   STMFD        sp!, {r4-r6, lr}
   LDR          r6, [sp, #16]
   VMOV.S32     q15, #1
@@ -262,7 +260,7 @@ celt_pitch_xcorr_neon_done:
 
 @ This will get used on ARMv7 devices without NEON, so it has been optimized
 @ to take advantage of dual-issuing where possible.
-	.type	xcorr_kernel_edsp, %function; xcorr_kernel_edsp: @ PROC
+; xcorr_kernel_edsp: @ PROC
 xcorr_kernel_edsp_start:
   @ input:
   @   r3      = int         len
@@ -346,7 +344,7 @@ xcorr_kernel_edsp_done:
   LDMFD        sp!, {r2,r4,r5,pc}
 	.size xcorr_kernel_edsp, .-xcorr_kernel_edsp  @ ENDP
 
-	.type	celt_pitch_xcorr_edsp, %function; celt_pitch_xcorr_edsp: @ PROC
+; celt_pitch_xcorr_edsp: @ PROC
   @ input:
   @   r0  = opus_val16 *_x (must be 32-bit aligned)
   @   r1  = opus_val16 *_y (only needs to be 16-bit aligned)
@@ -363,8 +361,6 @@ xcorr_kernel_edsp_done:
   @   r9  = opus_val32  sum3
   @   r1  = int         max_pitch
   @   r12 = int         j
-  @ ignored:
-  @         int         arch
   STMFD        sp!, {r4-r11, lr}
   MOV          r5, r1
   LDR          r1, [sp, #36]
diff --git a/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm.s b/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm.s
index 6e873afc37..f96e0a88bb 100644
--- a/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm.s
+++ b/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm.s
@@ -153,7 +153,7 @@ xcorr_kernel_neon_process1
   ENDP
 
 ; opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y,
-;  opus_val32 *xcorr, int len, int max_pitch, int arch)
+;  opus_val32 *xcorr, int len, int max_pitch)
 celt_pitch_xcorr_neon PROC
   ; input:
   ;   r0  = opus_val16 *_x
@@ -168,8 +168,6 @@ celt_pitch_xcorr_neon PROC
   ;   r6  = int         max_pitch
   ;   r12 = int         j
   ;   q15 = int         maxcorr[4] (q15 is not used by xcorr_kernel_neon())
-  ; ignored:
-  ;         int         arch
   STMFD        sp!, {r4-r6, lr}
   LDR          r6, [sp, #16]
   VMOV.S32     q15, #1
@@ -360,8 +358,6 @@ celt_pitch_xcorr_edsp PROC
   ;   r9  = opus_val32  sum3
   ;   r1  = int         max_pitch
   ;   r12 = int         j
-  ; ignored:
-  ;         int         arch
   STMFD        sp!, {r4-r11, lr}
   MOV          r5, r1
   LDR          r1, [sp, #36]
diff --git a/thirdparty/opus/celt/arm/fft_arm.h b/thirdparty/opus/celt/arm/fft_arm.h
index 0b78175f3a..0cb55d8e22 100644
--- a/thirdparty/opus/celt/arm/fft_arm.h
+++ b/thirdparty/opus/celt/arm/fft_arm.h
@@ -34,6 +34,7 @@
 #if !defined(FFT_ARM_H)
 #define FFT_ARM_H
 
+#include "config.h"
 #include "kiss_fft.h"
 
 #if defined(HAVE_ARM_NE10)
diff --git a/thirdparty/opus/celt/arm/fixed_armv4.h b/thirdparty/opus/celt/arm/fixed_armv4.h
index d84888a772..efb3b1896a 100644
--- a/thirdparty/opus/celt/arm/fixed_armv4.h
+++ b/thirdparty/opus/celt/arm/fixed_armv4.h
@@ -37,7 +37,7 @@ static OPUS_INLINE opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b)
       "#MULT16_32_Q16\n\t"
       "smull %0, %1, %2, %3\n\t"
       : "=&r"(rd_lo), "=&r"(rd_hi)
-      : "%r"(b),"r"(SHL32(a,16))
+      : "%r"(b),"r"(a<<16)
   );
   return rd_hi;
 }
@@ -54,10 +54,10 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
       "#MULT16_32_Q15\n\t"
       "smull %0, %1, %2, %3\n\t"
       : "=&r"(rd_lo), "=&r"(rd_hi)
-      : "%r"(b), "r"(SHL32(a,16))
+      : "%r"(b), "r"(a<<16)
   );
   /*We intentionally don't OR in the high bit of rd_lo for speed.*/
-  return SHL32(rd_hi,1);
+  return rd_hi<<1;
 }
 #define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv4(a, b))
 
diff --git a/thirdparty/opus/celt/arm/fixed_armv5e.h b/thirdparty/opus/celt/arm/fixed_armv5e.h
index 6bf73cbace..36a6321101 100644
--- a/thirdparty/opus/celt/arm/fixed_armv5e.h
+++ b/thirdparty/opus/celt/arm/fixed_armv5e.h
@@ -59,7 +59,7 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b)
       : "=r"(res)
       : "r"(b), "r"(a)
   );
-  return SHL32(res,1);
+  return res<<1;
 }
 #define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b))
 
@@ -76,7 +76,7 @@ static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,
       "#MAC16_32_Q15\n\t"
       "smlawb %0, %1, %2, %3;\n"
       : "=r"(res)
-      : "r"(SHL32(b,1)), "r"(a), "r"(c)
+      : "r"(b<<1), "r"(a), "r"(c)
   );
   return res;
 }
diff --git a/thirdparty/opus/celt/arm/mdct_arm.h b/thirdparty/opus/celt/arm/mdct_arm.h
index 14200bac4b..49cbb44576 100644
--- a/thirdparty/opus/celt/arm/mdct_arm.h
+++ b/thirdparty/opus/celt/arm/mdct_arm.h
@@ -33,6 +33,7 @@
 #if !defined(MDCT_ARM_H)
 #define MDCT_ARM_H
 
+#include "config.h"
 #include "mdct.h"
 
 #if defined(HAVE_ARM_NE10)
diff --git a/thirdparty/opus/celt/arm/pitch_arm.h b/thirdparty/opus/celt/arm/pitch_arm.h
index bed8b04eac..14331169ee 100644
--- a/thirdparty/opus/celt/arm/pitch_arm.h
+++ b/thirdparty/opus/celt/arm/pitch_arm.h
@@ -30,47 +30,11 @@
 
 # include "armcpu.h"
 
-# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
-opus_val32 celt_inner_prod_neon(const opus_val16 *x, const opus_val16 *y, int N);
-void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01,
-        const opus_val16 *y02, int N, opus_val32 *xy1, opus_val32 *xy2);
-
-#  if !defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_PRESUME_NEON)
-#   define OVERRIDE_CELT_INNER_PROD (1)
-#   define OVERRIDE_DUAL_INNER_PROD (1)
-#   define celt_inner_prod(x, y, N, arch) ((void)(arch), PRESUME_NEON(celt_inner_prod)(x, y, N))
-#   define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) ((void)(arch), PRESUME_NEON(dual_inner_prod)(x, y01, y02, N, xy1, xy2))
-#  endif
-# endif
-
-# if !defined(OVERRIDE_CELT_INNER_PROD)
-#  if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
-extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y, int N);
-#   define OVERRIDE_CELT_INNER_PROD (1)
-#   define celt_inner_prod(x, y, N, arch) ((*CELT_INNER_PROD_IMPL[(arch)&OPUS_ARCHMASK])(x, y, N))
-#  elif defined(OPUS_ARM_PRESUME_NEON_INTR)
-#   define OVERRIDE_CELT_INNER_PROD (1)
-#   define celt_inner_prod(x, y, N, arch) ((void)(arch), celt_inner_prod_neon(x, y, N))
-#  endif
-# endif
-
-# if !defined(OVERRIDE_DUAL_INNER_PROD)
-#  if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
-extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x,
-        const opus_val16 *y01, const opus_val16 *y02, int N, opus_val32 *xy1, opus_val32 *xy2);
-#   define OVERRIDE_DUAL_INNER_PROD (1)
-#   define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) ((*DUAL_INNER_PROD_IMPL[(arch)&OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2))
-#  elif defined(OPUS_ARM_PRESUME_NEON_INTR)
-#   define OVERRIDE_DUAL_INNER_PROD (1)
-#   define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) ((void)(arch), dual_inner_prod_neon(x, y01, y02, N, xy1, xy2))
-#  endif
-# endif
-
 # if defined(FIXED_POINT)
 
 #  if defined(OPUS_ARM_MAY_HAVE_NEON)
 opus_val32 celt_pitch_xcorr_neon(const opus_val16 *_x, const opus_val16 *_y,
-    opus_val32 *xcorr, int len, int max_pitch, int arch);
+    opus_val32 *xcorr, int len, int max_pitch);
 #  endif
 
 #  if defined(OPUS_ARM_MAY_HAVE_MEDIA)
@@ -79,7 +43,7 @@ opus_val32 celt_pitch_xcorr_neon(const opus_val16 *_x, const opus_val16 *_y,
 
 #  if defined(OPUS_ARM_MAY_HAVE_EDSP)
 opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
-    opus_val32 *xcorr, int len, int max_pitch, int arch);
+    opus_val32 *xcorr, int len, int max_pitch);
 #  endif
 
 #  if defined(OPUS_HAVE_RTCD) && \
@@ -88,17 +52,18 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
      (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
 extern opus_val32
 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
-      const opus_val16 *, opus_val32 *, int, int, int);
+      const opus_val16 *, opus_val32 *, int, int);
 #   define OVERRIDE_PITCH_XCORR (1)
 #   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
   ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
-        xcorr, len, max_pitch, arch))
+        xcorr, len, max_pitch))
 
 #  elif defined(OPUS_ARM_PRESUME_EDSP) || \
     defined(OPUS_ARM_PRESUME_MEDIA) || \
     defined(OPUS_ARM_PRESUME_NEON)
 #   define OVERRIDE_PITCH_XCORR (1)
-#   define celt_pitch_xcorr (PRESUME_NEON(celt_pitch_xcorr))
+#   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch))
 
 #  endif
 
@@ -134,24 +99,25 @@ extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
 /* Float case */
 #if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
 void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
-                                 opus_val32 *xcorr, int len, int max_pitch, int arch);
+                                 opus_val32 *xcorr, int len, int max_pitch);
 #endif
 
 #  if defined(OPUS_HAVE_RTCD) && \
     (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
 extern void
 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
-      const opus_val16 *, opus_val32 *, int, int, int);
+      const opus_val16 *, opus_val32 *, int, int);
 
 #  define OVERRIDE_PITCH_XCORR (1)
 #  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
   ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
-        xcorr, len, max_pitch, arch))
+        xcorr, len, max_pitch))
 
 #  elif defined(OPUS_ARM_PRESUME_NEON_INTR)
 
 #   define OVERRIDE_PITCH_XCORR (1)
-#   define celt_pitch_xcorr celt_pitch_xcorr_float_neon
+#   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+   ((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch))
 
 #  endif
 
diff --git a/thirdparty/opus/celt/arm/pitch_neon_intr.c b/thirdparty/opus/celt/arm/pitch_neon_intr.c
deleted file mode 100644
index 1ac38c433a..0000000000
--- a/thirdparty/opus/celt/arm/pitch_neon_intr.c
+++ /dev/null
@@ -1,290 +0,0 @@
-/***********************************************************************
-Copyright (c) 2017 Google Inc.
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-- Redistributions of source code must retain the above copyright notice,
-this list of conditions and the following disclaimer.
-- Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-- Neither the name of Internet Society, IETF or IETF Trust, nor the
-names of specific contributors, may be used to endorse or promote
-products derived from this software without specific prior written
-permission.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-***********************************************************************/
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include <arm_neon.h>
-#include "pitch.h"
-
-#ifdef FIXED_POINT
-
-opus_val32 celt_inner_prod_neon(const opus_val16 *x, const opus_val16 *y, int N)
-{
-    int i;
-    opus_val32 xy;
-    int16x8_t x_s16x8, y_s16x8;
-    int32x4_t xy_s32x4 = vdupq_n_s32(0);
-    int64x2_t xy_s64x2;
-    int64x1_t xy_s64x1;
-
-    for (i = 0; i < N - 7; i += 8) {
-        x_s16x8  = vld1q_s16(&x[i]);
-        y_s16x8  = vld1q_s16(&y[i]);
-        xy_s32x4 = vmlal_s16(xy_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y_s16x8));
-        xy_s32x4 = vmlal_s16(xy_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y_s16x8));
-    }
-
-    if (N - i >= 4) {
-        const int16x4_t x_s16x4 = vld1_s16(&x[i]);
-        const int16x4_t y_s16x4 = vld1_s16(&y[i]);
-        xy_s32x4 = vmlal_s16(xy_s32x4, x_s16x4, y_s16x4);
-        i += 4;
-    }
-
-    xy_s64x2 = vpaddlq_s32(xy_s32x4);
-    xy_s64x1 = vadd_s64(vget_low_s64(xy_s64x2), vget_high_s64(xy_s64x2));
-    xy       = vget_lane_s32(vreinterpret_s32_s64(xy_s64x1), 0);
-
-    for (; i < N; i++) {
-        xy = MAC16_16(xy, x[i], y[i]);
-    }
-
-#ifdef OPUS_CHECK_ASM
-    celt_assert(celt_inner_prod_c(x, y, N) == xy);
-#endif
-
-    return xy;
-}
-
-void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
-        int N, opus_val32 *xy1, opus_val32 *xy2)
-{
-    int i;
-    opus_val32 xy01, xy02;
-    int16x8_t x_s16x8, y01_s16x8, y02_s16x8;
-    int32x4_t xy01_s32x4 = vdupq_n_s32(0);
-    int32x4_t xy02_s32x4 = vdupq_n_s32(0);
-    int64x2_t xy01_s64x2, xy02_s64x2;
-    int64x1_t xy01_s64x1, xy02_s64x1;
-
-    for (i = 0; i < N - 7; i += 8) {
-        x_s16x8    = vld1q_s16(&x[i]);
-        y01_s16x8  = vld1q_s16(&y01[i]);
-        y02_s16x8  = vld1q_s16(&y02[i]);
-        xy01_s32x4 = vmlal_s16(xy01_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y01_s16x8));
-        xy02_s32x4 = vmlal_s16(xy02_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y02_s16x8));
-        xy01_s32x4 = vmlal_s16(xy01_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y01_s16x8));
-        xy02_s32x4 = vmlal_s16(xy02_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y02_s16x8));
-    }
-
-    if (N - i >= 4) {
-        const int16x4_t x_s16x4   = vld1_s16(&x[i]);
-        const int16x4_t y01_s16x4 = vld1_s16(&y01[i]);
-        const int16x4_t y02_s16x4 = vld1_s16(&y02[i]);
-        xy01_s32x4 = vmlal_s16(xy01_s32x4, x_s16x4, y01_s16x4);
-        xy02_s32x4 = vmlal_s16(xy02_s32x4, x_s16x4, y02_s16x4);
-        i += 4;
-    }
-
-    xy01_s64x2 = vpaddlq_s32(xy01_s32x4);
-    xy02_s64x2 = vpaddlq_s32(xy02_s32x4);
-    xy01_s64x1 = vadd_s64(vget_low_s64(xy01_s64x2), vget_high_s64(xy01_s64x2));
-    xy02_s64x1 = vadd_s64(vget_low_s64(xy02_s64x2), vget_high_s64(xy02_s64x2));
-    xy01       = vget_lane_s32(vreinterpret_s32_s64(xy01_s64x1), 0);
-    xy02       = vget_lane_s32(vreinterpret_s32_s64(xy02_s64x1), 0);
-
-    for (; i < N; i++) {
-        xy01 = MAC16_16(xy01, x[i], y01[i]);
-        xy02 = MAC16_16(xy02, x[i], y02[i]);
-    }
-    *xy1 = xy01;
-    *xy2 = xy02;
-
-#ifdef OPUS_CHECK_ASM
-    {
-        opus_val32 xy1_c, xy2_c;
-        dual_inner_prod_c(x, y01, y02, N, &xy1_c, &xy2_c);
-        celt_assert(xy1_c == *xy1);
-        celt_assert(xy2_c == *xy2);
-    }
-#endif
-}
-
-#else /* !FIXED_POINT */
-
-/* ========================================================================== */
-
-#ifdef OPUS_CHECK_ASM
-
-/* This part of code simulates floating-point NEON operations. */
-
-/* celt_inner_prod_neon_float_c_simulation() simulates the floating-point   */
-/* operations of celt_inner_prod_neon(), and both functions should have bit */
-/* exact output.                                                            */
-static opus_val32 celt_inner_prod_neon_float_c_simulation(const opus_val16 *x, const opus_val16 *y, int N)
-{
-   int i;
-   opus_val32 xy, xy0 = 0, xy1 = 0, xy2 = 0, xy3 = 0;
-   for (i = 0; i < N - 3; i += 4) {
-      xy0 = MAC16_16(xy0, x[i + 0], y[i + 0]);
-      xy1 = MAC16_16(xy1, x[i + 1], y[i + 1]);
-      xy2 = MAC16_16(xy2, x[i + 2], y[i + 2]);
-      xy3 = MAC16_16(xy3, x[i + 3], y[i + 3]);
-   }
-   xy0 += xy2;
-   xy1 += xy3;
-   xy = xy0 + xy1;
-   for (; i < N; i++) {
-      xy = MAC16_16(xy, x[i], y[i]);
-   }
-   return xy;
-}
-
-/* dual_inner_prod_neon_float_c_simulation() simulates the floating-point   */
-/* operations of dual_inner_prod_neon(), and both functions should have bit */
-/* exact output.                                                            */
-static void dual_inner_prod_neon_float_c_simulation(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
-      int N, opus_val32 *xy1, opus_val32 *xy2)
-{
-   int i;
-   opus_val32 xy01, xy02, xy01_0 = 0, xy01_1 = 0, xy01_2 = 0, xy01_3 = 0, xy02_0 = 0, xy02_1 = 0, xy02_2 = 0, xy02_3 = 0;
-   for (i = 0; i < N - 3; i += 4) {
-      xy01_0 = MAC16_16(xy01_0, x[i + 0], y01[i + 0]);
-      xy01_1 = MAC16_16(xy01_1, x[i + 1], y01[i + 1]);
-      xy01_2 = MAC16_16(xy01_2, x[i + 2], y01[i + 2]);
-      xy01_3 = MAC16_16(xy01_3, x[i + 3], y01[i + 3]);
-      xy02_0 = MAC16_16(xy02_0, x[i + 0], y02[i + 0]);
-      xy02_1 = MAC16_16(xy02_1, x[i + 1], y02[i + 1]);
-      xy02_2 = MAC16_16(xy02_2, x[i + 2], y02[i + 2]);
-      xy02_3 = MAC16_16(xy02_3, x[i + 3], y02[i + 3]);
-   }
-   xy01_0 += xy01_2;
-   xy02_0 += xy02_2;
-   xy01_1 += xy01_3;
-   xy02_1 += xy02_3;
-   xy01 = xy01_0 + xy01_1;
-   xy02 = xy02_0 + xy02_1;
-   for (; i < N; i++) {
-      xy01 = MAC16_16(xy01, x[i], y01[i]);
-      xy02 = MAC16_16(xy02, x[i], y02[i]);
-   }
-   *xy1 = xy01;
-   *xy2 = xy02;
-}
-
-#endif /* OPUS_CHECK_ASM */
-
-/* ========================================================================== */
-
-opus_val32 celt_inner_prod_neon(const opus_val16 *x, const opus_val16 *y, int N)
-{
-    int i;
-    opus_val32 xy;
-    float32x4_t xy_f32x4 = vdupq_n_f32(0);
-    float32x2_t xy_f32x2;
-
-    for (i = 0; i < N - 7; i += 8) {
-        float32x4_t x_f32x4, y_f32x4;
-        x_f32x4  = vld1q_f32(&x[i]);
-        y_f32x4  = vld1q_f32(&y[i]);
-        xy_f32x4 = vmlaq_f32(xy_f32x4, x_f32x4, y_f32x4);
-        x_f32x4  = vld1q_f32(&x[i + 4]);
-        y_f32x4  = vld1q_f32(&y[i + 4]);
-        xy_f32x4 = vmlaq_f32(xy_f32x4, x_f32x4, y_f32x4);
-    }
-
-    if (N - i >= 4) {
-        const float32x4_t x_f32x4 = vld1q_f32(&x[i]);
-        const float32x4_t y_f32x4 = vld1q_f32(&y[i]);
-        xy_f32x4 = vmlaq_f32(xy_f32x4, x_f32x4, y_f32x4);
-        i += 4;
-    }
-
-    xy_f32x2 = vadd_f32(vget_low_f32(xy_f32x4), vget_high_f32(xy_f32x4));
-    xy_f32x2 = vpadd_f32(xy_f32x2, xy_f32x2);
-    xy       = vget_lane_f32(xy_f32x2, 0);
-
-    for (; i < N; i++) {
-        xy = MAC16_16(xy, x[i], y[i]);
-    }
-
-#ifdef OPUS_CHECK_ASM
-    celt_assert(ABS32(celt_inner_prod_neon_float_c_simulation(x, y, N) - xy) <= VERY_SMALL);
-#endif
-
-    return xy;
-}
-
-void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
-        int N, opus_val32 *xy1, opus_val32 *xy2)
-{
-    int i;
-    opus_val32 xy01, xy02;
-    float32x4_t xy01_f32x4 = vdupq_n_f32(0);
-    float32x4_t xy02_f32x4 = vdupq_n_f32(0);
-    float32x2_t xy01_f32x2, xy02_f32x2;
-
-    for (i = 0; i < N - 7; i += 8) {
-        float32x4_t x_f32x4, y01_f32x4, y02_f32x4;
-        x_f32x4    = vld1q_f32(&x[i]);
-        y01_f32x4  = vld1q_f32(&y01[i]);
-        y02_f32x4  = vld1q_f32(&y02[i]);
-        xy01_f32x4 = vmlaq_f32(xy01_f32x4, x_f32x4, y01_f32x4);
-        xy02_f32x4 = vmlaq_f32(xy02_f32x4, x_f32x4, y02_f32x4);
-        x_f32x4    = vld1q_f32(&x[i + 4]);
-        y01_f32x4  = vld1q_f32(&y01[i + 4]);
-        y02_f32x4  = vld1q_f32(&y02[i + 4]);
-        xy01_f32x4 = vmlaq_f32(xy01_f32x4, x_f32x4, y01_f32x4);
-        xy02_f32x4 = vmlaq_f32(xy02_f32x4, x_f32x4, y02_f32x4);
-    }
-
-    if (N - i >= 4) {
-        const float32x4_t x_f32x4   = vld1q_f32(&x[i]);
-        const float32x4_t y01_f32x4 = vld1q_f32(&y01[i]);
-        const float32x4_t y02_f32x4 = vld1q_f32(&y02[i]);
-        xy01_f32x4 = vmlaq_f32(xy01_f32x4, x_f32x4, y01_f32x4);
-        xy02_f32x4 = vmlaq_f32(xy02_f32x4, x_f32x4, y02_f32x4);
-        i += 4;
-    }
-
-    xy01_f32x2 = vadd_f32(vget_low_f32(xy01_f32x4), vget_high_f32(xy01_f32x4));
-    xy02_f32x2 = vadd_f32(vget_low_f32(xy02_f32x4), vget_high_f32(xy02_f32x4));
-    xy01_f32x2 = vpadd_f32(xy01_f32x2, xy01_f32x2);
-    xy02_f32x2 = vpadd_f32(xy02_f32x2, xy02_f32x2);
-    xy01       = vget_lane_f32(xy01_f32x2, 0);
-    xy02       = vget_lane_f32(xy02_f32x2, 0);
-
-    for (; i < N; i++) {
-        xy01 = MAC16_16(xy01, x[i], y01[i]);
-        xy02 = MAC16_16(xy02, x[i], y02[i]);
-    }
-    *xy1 = xy01;
-    *xy2 = xy02;
-
-#ifdef OPUS_CHECK_ASM
-    {
-        opus_val32 xy1_c, xy2_c;
-        dual_inner_prod_neon_float_c_simulation(x, y01, y02, N, &xy1_c, &xy2_c);
-        celt_assert(ABS32(xy1_c - *xy1) <= VERY_SMALL);
-        celt_assert(ABS32(xy2_c - *xy2) <= VERY_SMALL);
-    }
-#endif
-}
-
-#endif /* FIXED_POINT */