| OLD | NEW |
| 1 /* Copyright (c) 2011 Xiph.Org Foundation | 1 /* Copyright (c) 2011 Xiph.Org Foundation |
| 2 Written by Jean-Marc Valin */ | 2 Written by Jean-Marc Valin */ |
| 3 /* | 3 /* |
| 4 Redistribution and use in source and binary forms, with or without | 4 Redistribution and use in source and binary forms, with or without |
| 5 modification, are permitted provided that the following conditions | 5 modification, are permitted provided that the following conditions |
| 6 are met: | 6 are met: |
| 7 | 7 |
| 8 - Redistributions of source code must retain the above copyright | 8 - Redistributions of source code must retain the above copyright |
| 9 notice, this list of conditions and the following disclaimer. | 9 notice, this list of conditions and the following disclaimer. |
| 10 | 10 |
| (...skipping 15 matching lines...) Expand all Loading... |
| 26 */ | 26 */ |
| 27 | 27 |
| 28 #ifndef ANALYSIS_H | 28 #ifndef ANALYSIS_H |
| 29 #define ANALYSIS_H | 29 #define ANALYSIS_H |
| 30 | 30 |
| 31 #include "celt.h" | 31 #include "celt.h" |
| 32 #include "opus_private.h" | 32 #include "opus_private.h" |
| 33 | 33 |
| 34 #define NB_FRAMES 8 | 34 #define NB_FRAMES 8 |
| 35 #define NB_TBANDS 18 | 35 #define NB_TBANDS 18 |
| 36 #define NB_TOT_BANDS 21 | 36 #define ANALYSIS_BUF_SIZE 720 /* 30 ms at 24 kHz */ |
| 37 #define ANALYSIS_BUF_SIZE 720 /* 15 ms at 48 kHz */ | |
| 38 | 37 |
| 39 #define DETECT_SIZE 200 | 38 /* At that point we can stop counting frames because it no longer matters. */ |
| 39 #define ANALYSIS_COUNT_MAX 10000 |
| 40 |
| 41 #define DETECT_SIZE 100 |
| 42 |
| 43 /* Uncomment this to print the MLP features on stdout. */ |
| 44 /*#define MLP_TRAINING*/ |
| 40 | 45 |
| 41 typedef struct { | 46 typedef struct { |
| 42 int arch; | 47 int arch; |
| 48 int application; |
| 49 opus_int32 Fs; |
| 43 #define TONALITY_ANALYSIS_RESET_START angle | 50 #define TONALITY_ANALYSIS_RESET_START angle |
| 44 float angle[240]; | 51 float angle[240]; |
| 45 float d_angle[240]; | 52 float d_angle[240]; |
| 46 float d2_angle[240]; | 53 float d2_angle[240]; |
| 47 opus_val32 inmem[ANALYSIS_BUF_SIZE]; | 54 opus_val32 inmem[ANALYSIS_BUF_SIZE]; |
| 48 int mem_fill; /* number of usable samples in the buffe
r */ | 55 int mem_fill; /* number of usable samples in the buffe
r */ |
| 49 float prev_band_tonality[NB_TBANDS]; | 56 float prev_band_tonality[NB_TBANDS]; |
| 50 float prev_tonality; | 57 float prev_tonality; |
| 58 int prev_bandwidth; |
| 51 float E[NB_FRAMES][NB_TBANDS]; | 59 float E[NB_FRAMES][NB_TBANDS]; |
| 60 float logE[NB_FRAMES][NB_TBANDS]; |
| 52 float lowE[NB_TBANDS]; | 61 float lowE[NB_TBANDS]; |
| 53 float highE[NB_TBANDS]; | 62 float highE[NB_TBANDS]; |
| 54 float meanE[NB_TOT_BANDS]; | 63 float meanE[NB_TBANDS+1]; |
| 55 float mem[32]; | 64 float mem[32]; |
| 56 float cmean[8]; | 65 float cmean[8]; |
| 57 float std[9]; | 66 float std[9]; |
| 58 float music_prob; | 67 float music_prob; |
| 68 float vad_prob; |
| 59 float Etracker; | 69 float Etracker; |
| 60 float lowECount; | 70 float lowECount; |
| 61 int E_count; | 71 int E_count; |
| 62 int last_music; | 72 int last_music; |
| 63 int last_transition; | |
| 64 int count; | 73 int count; |
| 65 float subframe_mem[3]; | |
| 66 int analysis_offset; | 74 int analysis_offset; |
| 67 /** Probability of having speech for time i to DETECT_SIZE-1 (and music befor
e). | 75 /** Probability of having speech for time i to DETECT_SIZE-1 (and music befor
e). |
| 68 pspeech[0] is the probability that all frames in the window are speech. *
/ | 76 pspeech[0] is the probability that all frames in the window are speech. *
/ |
| 69 float pspeech[DETECT_SIZE]; | 77 float pspeech[DETECT_SIZE]; |
| 70 /** Probability of having music for time i to DETECT_SIZE-1 (and speech befor
e). | 78 /** Probability of having music for time i to DETECT_SIZE-1 (and speech befor
e). |
| 71 pmusic[0] is the probability that all frames in the window are music. */ | 79 pmusic[0] is the probability that all frames in the window are music. */ |
| 72 float pmusic[DETECT_SIZE]; | 80 float pmusic[DETECT_SIZE]; |
| 73 float speech_confidence; | 81 float speech_confidence; |
| 74 float music_confidence; | 82 float music_confidence; |
| 75 int speech_confidence_count; | 83 int speech_confidence_count; |
| 76 int music_confidence_count; | 84 int music_confidence_count; |
| 77 int write_pos; | 85 int write_pos; |
| 78 int read_pos; | 86 int read_pos; |
| 79 int read_subframe; | 87 int read_subframe; |
| 88 float hp_ener_accum; |
| 89 opus_val32 downmix_state[3]; |
| 80 AnalysisInfo info[DETECT_SIZE]; | 90 AnalysisInfo info[DETECT_SIZE]; |
| 81 } TonalityAnalysisState; | 91 } TonalityAnalysisState; |
| 82 | 92 |
| 83 /** Initialize a TonalityAnalysisState struct. | 93 /** Initialize a TonalityAnalysisState struct. |
| 84 * | 94 * |
| 85 * This performs some possibly slow initialization steps which should | 95 * This performs some possibly slow initialization steps which should |
| 86 * not be repeated every analysis step. No allocated memory is retained | 96 * not be repeated every analysis step. No allocated memory is retained |
| 87 * by the state struct, so no cleanup call is required. | 97 * by the state struct, so no cleanup call is required. |
| 88 */ | 98 */ |
| 89 void tonality_analysis_init(TonalityAnalysisState *analysis); | 99 void tonality_analysis_init(TonalityAnalysisState *analysis, opus_int32 Fs); |
| 90 | 100 |
| 91 /** Reset a TonalityAnalysisState stuct. | 101 /** Reset a TonalityAnalysisState stuct. |
| 92 * | 102 * |
| 93 * Call this when there's a discontinuity in the data. | 103 * Call this when there's a discontinuity in the data. |
| 94 */ | 104 */ |
| 95 void tonality_analysis_reset(TonalityAnalysisState *analysis); | 105 void tonality_analysis_reset(TonalityAnalysisState *analysis); |
| 96 | 106 |
| 97 void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
len); | 107 void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
len); |
| 98 | 108 |
| 99 void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, co
nst void *analysis_pcm, | 109 void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, co
nst void *analysis_pcm, |
| 100 int analysis_frame_size, int frame_size, int c1, int c2, int C,
opus_int32 Fs, | 110 int analysis_frame_size, int frame_size, int c1, int c2, int C,
opus_int32 Fs, |
| 101 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_inf
o); | 111 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_inf
o); |
| 102 | 112 |
| 103 #endif | 113 #endif |
| OLD | NEW |