summaryrefslogtreecommitdiff
path: root/thirdparty/opus/analysis.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/opus/analysis.h')
-rw-r--r--thirdparty/opus/analysis.h38
1 files changed, 19 insertions, 19 deletions
diff --git a/thirdparty/opus/analysis.h b/thirdparty/opus/analysis.h
index 0b66555f21..9eae56a525 100644
--- a/thirdparty/opus/analysis.h
+++ b/thirdparty/opus/analysis.h
@@ -30,24 +30,16 @@
#include "celt.h"
#include "opus_private.h"
-#include "mlp.h"
#define NB_FRAMES 8
#define NB_TBANDS 18
-#define ANALYSIS_BUF_SIZE 720 /* 30 ms at 24 kHz */
+#define NB_TOT_BANDS 21
+#define ANALYSIS_BUF_SIZE 720 /* 15 ms at 48 kHz */
-/* At that point we can stop counting frames because it no longer matters. */
-#define ANALYSIS_COUNT_MAX 10000
-
-#define DETECT_SIZE 100
-
-/* Uncomment this to print the MLP features on stdout. */
-/*#define MLP_TRAINING*/
+#define DETECT_SIZE 200
typedef struct {
int arch;
- int application;
- opus_int32 Fs;
#define TONALITY_ANALYSIS_RESET_START angle
float angle[240];
float d_angle[240];
@@ -56,27 +48,35 @@ typedef struct {
int mem_fill; /* number of usable samples in the buffer */
float prev_band_tonality[NB_TBANDS];
float prev_tonality;
- int prev_bandwidth;
float E[NB_FRAMES][NB_TBANDS];
- float logE[NB_FRAMES][NB_TBANDS];
float lowE[NB_TBANDS];
float highE[NB_TBANDS];
- float meanE[NB_TBANDS+1];
+ float meanE[NB_TOT_BANDS];
float mem[32];
float cmean[8];
float std[9];
+ float music_prob;
float Etracker;
float lowECount;
int E_count;
+ int last_music;
+ int last_transition;
int count;
+ float subframe_mem[3];
int analysis_offset;
+ /** Probability of having speech for time i to DETECT_SIZE-1 (and music before).
+ pspeech[0] is the probability that all frames in the window are speech. */
+ float pspeech[DETECT_SIZE];
+ /** Probability of having music for time i to DETECT_SIZE-1 (and speech before).
+ pmusic[0] is the probability that all frames in the window are music. */
+ float pmusic[DETECT_SIZE];
+ float speech_confidence;
+ float music_confidence;
+ int speech_confidence_count;
+ int music_confidence_count;
int write_pos;
int read_pos;
int read_subframe;
- float hp_ener_accum;
- int initialized;
- float rnn_state[MAX_NEURONS];
- opus_val32 downmix_state[3];
AnalysisInfo info[DETECT_SIZE];
} TonalityAnalysisState;
@@ -86,7 +86,7 @@ typedef struct {
* not be repeated every analysis step. No allocated memory is retained
* by the state struct, so no cleanup call is required.
*/
-void tonality_analysis_init(TonalityAnalysisState *analysis, opus_int32 Fs);
+void tonality_analysis_init(TonalityAnalysisState *analysis);
/** Reset a TonalityAnalysisState stuct.
*