summaryrefslogtreecommitdiff
path: root/thirdparty/opus/analysis.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/opus/analysis.h')
-rw-r--r--thirdparty/opus/analysis.h38
1 files changed, 19 insertions, 19 deletions
diff --git a/thirdparty/opus/analysis.h b/thirdparty/opus/analysis.h
index 9eae56a525..0b66555f21 100644
--- a/thirdparty/opus/analysis.h
+++ b/thirdparty/opus/analysis.h
@@ -30,16 +30,24 @@
#include "celt.h"
#include "opus_private.h"
+#include "mlp.h"
#define NB_FRAMES 8
#define NB_TBANDS 18
-#define NB_TOT_BANDS 21
-#define ANALYSIS_BUF_SIZE 720 /* 15 ms at 48 kHz */
+#define ANALYSIS_BUF_SIZE 720 /* 30 ms at 24 kHz */
-#define DETECT_SIZE 200
+/* At that point we can stop counting frames because it no longer matters. */
+#define ANALYSIS_COUNT_MAX 10000
+
+#define DETECT_SIZE 100
+
+/* Uncomment this to print the MLP features on stdout. */
+/*#define MLP_TRAINING*/
typedef struct {
int arch;
+ int application;
+ opus_int32 Fs;
#define TONALITY_ANALYSIS_RESET_START angle
float angle[240];
float d_angle[240];
@@ -48,35 +56,27 @@ typedef struct {
int mem_fill; /* number of usable samples in the buffer */
float prev_band_tonality[NB_TBANDS];
float prev_tonality;
+ int prev_bandwidth;
float E[NB_FRAMES][NB_TBANDS];
+ float logE[NB_FRAMES][NB_TBANDS];
float lowE[NB_TBANDS];
float highE[NB_TBANDS];
- float meanE[NB_TOT_BANDS];
+ float meanE[NB_TBANDS+1];
float mem[32];
float cmean[8];
float std[9];
- float music_prob;
float Etracker;
float lowECount;
int E_count;
- int last_music;
- int last_transition;
int count;
- float subframe_mem[3];
int analysis_offset;
- /** Probability of having speech for time i to DETECT_SIZE-1 (and music before).
- pspeech[0] is the probability that all frames in the window are speech. */
- float pspeech[DETECT_SIZE];
- /** Probability of having music for time i to DETECT_SIZE-1 (and speech before).
- pmusic[0] is the probability that all frames in the window are music. */
- float pmusic[DETECT_SIZE];
- float speech_confidence;
- float music_confidence;
- int speech_confidence_count;
- int music_confidence_count;
int write_pos;
int read_pos;
int read_subframe;
+ float hp_ener_accum;
+ int initialized;
+ float rnn_state[MAX_NEURONS];
+ opus_val32 downmix_state[3];
AnalysisInfo info[DETECT_SIZE];
} TonalityAnalysisState;
@@ -86,7 +86,7 @@ typedef struct {
* not be repeated every analysis step. No allocated memory is retained
* by the state struct, so no cleanup call is required.
*/
-void tonality_analysis_init(TonalityAnalysisState *analysis);
+void tonality_analysis_init(TonalityAnalysisState *analysis, opus_int32 Fs);
/** Reset a TonalityAnalysisState stuct.
*