src/analysis.c - Issue 28553003: Updating Opus to a pre-release of 1.1

Side by Side Diff: src/analysis.c

Issue 28553003: Updating Opus to a pre-release of 1.1 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/opus

Patch Set: Removing failing file Created 7 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 /* Copyright (c) 2011 Xiph.Org Foundation

	2 Written by Jean-Marc Valin */

	3 /*

	4 Redistribution and use in source and binary forms, with or without

	5 modification, are permitted provided that the following conditions

	6 are met:

	7

	8 - Redistributions of source code must retain the above copyright

	9 notice, this list of conditions and the following disclaimer.

	10

	11 - Redistributions in binary form must reproduce the above copyright

	12 notice, this list of conditions and the following disclaimer in the

	13 documentation and/or other materials provided with the distribution.

	14

	15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

	16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

	17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

	18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR

	19 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

	20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

	21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

	22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

	23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

	24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

	25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	26 */

	27

	28 #ifdef HAVE_CONFIG_H

	29 #include "config.h"

	30 #endif

	31

	32 #include "kiss_fft.h"

	33 #include "celt.h"

	34 #include "modes.h"

	35 #include "arch.h"

	36 #include "quant_bands.h"

	37 #include <stdio.h>

	38 #include "analysis.h"

	39 #include "mlp.h"

	40 #include "stack_alloc.h"

	41

	42 extern const MLP net;

	43

	44 #ifndef M_PI

	45 #define M_PI 3.141592653

	46 #endif

	47

	48 static const float dct_table[128] = {

	49 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.2500 00f, 0.250000f,

	50 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.2500 00f, 0.250000f,

	51 0.351851f, 0.338330f, 0.311806f, 0.273300f, 0.224292f, 0.166664f, 0.1026 31f, 0.034654f,

	52 -0.034654f,-0.102631f,-0.166664f,-0.224292f,-0.273300f,-0.311806f,-0.3383 30f,-0.351851f,

	53 0.346760f, 0.293969f, 0.196424f, 0.068975f,-0.068975f,-0.196424f,-0.2939 69f,-0.346760f,

	54 -0.346760f,-0.293969f,-0.196424f,-0.068975f, 0.068975f, 0.196424f, 0.2939 69f, 0.346760f,

	55 0.338330f, 0.224292f, 0.034654f,-0.166664f,-0.311806f,-0.351851f,-0.2733 00f,-0.102631f,

	56 0.102631f, 0.273300f, 0.351851f, 0.311806f, 0.166664f,-0.034654f,-0.2242 92f,-0.338330f,

	57 0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.1352 99f, 0.326641f,

	58 0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.1352 99f, 0.326641f,

	59 0.311806f, 0.034654f,-0.273300f,-0.338330f,-0.102631f, 0.224292f, 0.3518 51f, 0.166664f,

	60 -0.166664f,-0.351851f,-0.224292f, 0.102631f, 0.338330f, 0.273300f,-0.0346 54f,-0.311806f,

	61 0.293969f,-0.068975f,-0.346760f,-0.196424f, 0.196424f, 0.346760f, 0.0689 75f,-0.293969f,

	62 -0.293969f, 0.068975f, 0.346760f, 0.196424f,-0.196424f,-0.346760f,-0.0689 75f, 0.293969f,

	63 0.273300f,-0.166664f,-0.338330f, 0.034654f, 0.351851f, 0.102631f,-0.3118 06f,-0.224292f,

	64 0.224292f, 0.311806f,-0.102631f,-0.351851f,-0.034654f, 0.338330f, 0.1666 64f,-0.273300f,

	65 };

	66

	67 static const float analysis_window[240] = {

	68 0.000043f, 0.000171f, 0.000385f, 0.000685f, 0.001071f, 0.001541f, 0.002098 f, 0.002739f,

	69 0.003466f, 0.004278f, 0.005174f, 0.006156f, 0.007222f, 0.008373f, 0.009607 f, 0.010926f,

	70 0.012329f, 0.013815f, 0.015385f, 0.017037f, 0.018772f, 0.020590f, 0.022490 f, 0.024472f,

	71 0.026535f, 0.028679f, 0.030904f, 0.033210f, 0.035595f, 0.038060f, 0.040604 f, 0.043227f,

	72 0.045928f, 0.048707f, 0.051564f, 0.054497f, 0.057506f, 0.060591f, 0.063752 f, 0.066987f,

	73 0.070297f, 0.073680f, 0.077136f, 0.080665f, 0.084265f, 0.087937f, 0.091679 f, 0.095492f,

	74 0.099373f, 0.103323f, 0.107342f, 0.111427f, 0.115579f, 0.119797f, 0.124080 f, 0.128428f,

	75 0.132839f, 0.137313f, 0.141849f, 0.146447f, 0.151105f, 0.155823f, 0.160600 f, 0.165435f,

	76 0.170327f, 0.175276f, 0.180280f, 0.185340f, 0.190453f, 0.195619f, 0.200838 f, 0.206107f,

	77 0.211427f, 0.216797f, 0.222215f, 0.227680f, 0.233193f, 0.238751f, 0.244353 f, 0.250000f,

	78 0.255689f, 0.261421f, 0.267193f, 0.273005f, 0.278856f, 0.284744f, 0.290670 f, 0.296632f,

	79 0.302628f, 0.308658f, 0.314721f, 0.320816f, 0.326941f, 0.333097f, 0.339280 f, 0.345492f,

	80 0.351729f, 0.357992f, 0.364280f, 0.370590f, 0.376923f, 0.383277f, 0.389651 f, 0.396044f,

	81 0.402455f, 0.408882f, 0.415325f, 0.421783f, 0.428254f, 0.434737f, 0.441231 f, 0.447736f,

	82 0.454249f, 0.460770f, 0.467298f, 0.473832f, 0.480370f, 0.486912f, 0.493455 f, 0.500000f,

	83 0.506545f, 0.513088f, 0.519630f, 0.526168f, 0.532702f, 0.539230f, 0.545751 f, 0.552264f,

	84 0.558769f, 0.565263f, 0.571746f, 0.578217f, 0.584675f, 0.591118f, 0.597545 f, 0.603956f,

	85 0.610349f, 0.616723f, 0.623077f, 0.629410f, 0.635720f, 0.642008f, 0.648271 f, 0.654508f,

	86 0.660720f, 0.666903f, 0.673059f, 0.679184f, 0.685279f, 0.691342f, 0.697372 f, 0.703368f,

	87 0.709330f, 0.715256f, 0.721144f, 0.726995f, 0.732807f, 0.738579f, 0.744311 f, 0.750000f,

	88 0.755647f, 0.761249f, 0.766807f, 0.772320f, 0.777785f, 0.783203f, 0.788573 f, 0.793893f,

	89 0.799162f, 0.804381f, 0.809547f, 0.814660f, 0.819720f, 0.824724f, 0.829673 f, 0.834565f,

	90 0.839400f, 0.844177f, 0.848895f, 0.853553f, 0.858151f, 0.862687f, 0.867161 f, 0.871572f,

	91 0.875920f, 0.880203f, 0.884421f, 0.888573f, 0.892658f, 0.896677f, 0.900627 f, 0.904508f,

	92 0.908321f, 0.912063f, 0.915735f, 0.919335f, 0.922864f, 0.926320f, 0.929703 f, 0.933013f,

	93 0.936248f, 0.939409f, 0.942494f, 0.945503f, 0.948436f, 0.951293f, 0.954072 f, 0.956773f,

	94 0.959396f, 0.961940f, 0.964405f, 0.966790f, 0.969096f, 0.971321f, 0.973465 f, 0.975528f,

	95 0.977510f, 0.979410f, 0.981228f, 0.982963f, 0.984615f, 0.986185f, 0.987671 f, 0.989074f,

	96 0.990393f, 0.991627f, 0.992778f, 0.993844f, 0.994826f, 0.995722f, 0.996534 f, 0.997261f,

	97 0.997902f, 0.998459f, 0.998929f, 0.999315f, 0.999615f, 0.999829f, 0.999957 f, 1.000000f,

	98 };

	99

	100 static const int tbands[NB_TBANDS+1] = {

	101 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 12 0

	102 };

	103

	104 static const int extra_bands[NB_TOT_BANDS+1] = {

	105 1, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120, 160, 200

	106 };

	107

	108 /*static const float tweight[NB_TBANDS+1] = {

	109 .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5

	110 };*/

	111

	112 #define NB_TONAL_SKIP_BANDS 9

	113

	114 #define cA 0.43157974f

	115 #define cB 0.67848403f

	116 #define cC 0.08595542f

	117 #define cE ((float)M_PI/2)

	118 static inline float fast_atan2f(float y, float x) {

	119 float x2, y2;

	120 /* Should avoid underflow on the values we'll get */

	121 if (ABS16(x)+ABS16(y)<1e-9f)

	122 {

	123 x*=1e12f;

	124 y*=1e12f;

	125 }

	126 x2 = x*x;

	127 y2 = y*y;

	128 if(x2<y2){

	129 float den = (y2 + cBx2) (y2 + cC*x2);

	130 if (den!=0)

	131 return -xy(y2 + cA*x2) / den + (y<0 ? -cE : cE);

	132 else

	133 return (y<0 ? -cE : cE);

	134 }else{

	135 float den = (x2 + cBy2) (x2 + cC*y2);

	136 if (den!=0)

	137 return xy(x2 + cAy2) / den + (y<0 ? -cE : cE) - (xy<0 ? -cE : cE);

	138 else

	139 return (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);

	140 }

	141 }

	142

	143 void tonality_get_info(TonalityAnalysisState tonal, AnalysisInfo info_out, int len)

	144 {

	145 int pos;

	146 int curr_lookahead;

	147 float psum;

	148 int i;

	149

	150 pos = tonal->read_pos;

	151 curr_lookahead = tonal->write_pos-tonal->read_pos;

	152 if (curr_lookahead<0)

	153 curr_lookahead += DETECT_SIZE;

	154

	155 if (len > 480 && pos != tonal->write_pos)

	156 {

	157 pos++;

	158 if (pos==DETECT_SIZE)

	159 pos=0;

	160 }

	161 if (pos == tonal->write_pos)

	162 pos--;

	163 if (pos<0)

	164 pos = DETECT_SIZE-1;

	165 OPUS_COPY(info_out, &tonal->info[pos], 1);

	166 tonal->read_subframe += len/120;

	167 while (tonal->read_subframe>=4)

	168 {

	169 tonal->read_subframe -= 4;

	170 tonal->read_pos++;

	171 }

	172 if (tonal->read_pos>=DETECT_SIZE)

	173 tonal->read_pos-=DETECT_SIZE;

	174

	175 /* Compensate for the delay in the features themselves.

	176 FIXME: Need a better estimate the 10 I just made up */

	177 curr_lookahead = IMAX(curr_lookahead-10, 0);

	178

	179 psum=0;

	180 /* Summing the probability of transition patterns that involve music at

	181 time (DETECT_SIZE-curr_lookahead-1) */

	182 for (i=0;i<DETECT_SIZE-curr_lookahead;i++)

	183 psum += tonal->pmusic[i];

	184 for (;i<DETECT_SIZE;i++)

	185 psum += tonal->pspeech[i];

	186 psum = psumtonal->music_confidence + (1-psum)tonal->speech_confidence;

	187 /printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);/

	188

	189 info_out->music_prob = psum;

	190 }

	191

	192 void tonality_analysis(TonalityAnalysisState tonal, AnalysisInfo info_out, con st CELTMode celt_mode, const void x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)

	193 {

	194 int i, b;

	195 const kiss_fft_state *kfft;

	196 VARDECL(kiss_fft_cpx, in);

	197 VARDECL(kiss_fft_cpx, out);

	198 int N = 480, N2=240;

	199 float * OPUS_RESTRICT A = tonal->angle;

	200 float * OPUS_RESTRICT dA = tonal->d_angle;

	201 float * OPUS_RESTRICT d2A = tonal->d2_angle;

	202 VARDECL(float, tonality);

	203 VARDECL(float, noisiness);

	204 float band_tonality[NB_TBANDS];

	205 float logE[NB_TBANDS];

	206 float BFCC[8];

	207 float features[25];

	208 float frame_tonality;

	209 float max_frame_tonality;

	210 /float tw_sum=0;/

	211 float frame_noisiness;

	212 const float pi4 = (float)(M_PIM_PIM_PI*M_PI);

	213 float slope=0;

	214 float frame_stationarity;

	215 float relativeE;

	216 float frame_probs[2];

	217 float alpha, alphaE, alphaE2;

	218 float frame_loudness;

	219 float bandwidth_mask;

	220 int bandwidth=0;

	221 float maxE = 0;

	222 float noise_floor;

	223 int remaining;

	224 AnalysisInfo *info;

	225 SAVE_STACK;

	226

	227 tonal->last_transition++;

	228 alpha = 1.f/IMIN(20, 1+tonal->count);

	229 alphaE = 1.f/IMIN(50, 1+tonal->count);

	230 alphaE2 = 1.f/IMIN(1000, 1+tonal->count);

	231

	232 if (tonal->count<4)

	233 tonal->music_prob = .5;

	234 kfft = celt_mode->mdct.kfft[0];

	235 if (tonal->count==0)

	236 tonal->mem_fill = 240;

	237 downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal ->mem_fill), offset, c1, c2, C);

	238 if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)

	239 {

	240 tonal->mem_fill += len;

	241 /* Don't have enough to update the analysis */

	242 RESTORE_STACK;

	243 return;

	244 }

	245 info = &tonal->info[tonal->write_pos++];

	246 if (tonal->write_pos>=DETECT_SIZE)

	247 tonal->write_pos-=DETECT_SIZE;

	248

	249 ALLOC(in, 480, kiss_fft_cpx);

	250 ALLOC(out, 480, kiss_fft_cpx);

	251 ALLOC(tonality, 240, float);

	252 ALLOC(noisiness, 240, float);

	253 for (i=0;i<N2;i++)

	254 {

	255 float w = analysis_window[i];

	256 in[i].r = w*tonal->inmem[i];

	257 in[i].i = w*tonal->inmem[N2+i];

	258 in[N-i-1].r = w*tonal->inmem[N-i-1];

	259 in[N-i-1].i = w*tonal->inmem[N+N2-i-1];

	260 }

	261 OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);

	262 remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);

	263 downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->me m_fill, c1, c2, C);

	264 tonal->mem_fill = 240 + remaining;

	265 opus_fft(kfft, in, out);

	266

	267 for (i=1;i<N2;i++)

	268 {

	269 float X1r, X2r, X1i, X2i;

	270 float angle, d_angle, d2_angle;

	271 float angle2, d_angle2, d2_angle2;

	272 float mod1, mod2, avg_mod;

	273 X1r = out[i].r+out[N-i].r;

	274 X1i = out[i].i-out[N-i].i;

	275 X2r = out[i].i+out[N-i].i;

	276 X2i = out[N-i].r-out[i].r;

	277

	278 angle = (float)(.5f/M_PI)*fast_atan2f(X1i, X1r);

	279 d_angle = angle - A[i];

	280 d2_angle = d_angle - dA[i];

	281

	282 angle2 = (float)(.5f/M_PI)*fast_atan2f(X2i, X2r);

	283 d_angle2 = angle2 - angle;

	284 d2_angle2 = d_angle2 - d_angle;

	285

	286 mod1 = d2_angle - (float)floor(.5+d2_angle);

	287 noisiness[i] = ABS16(mod1);

	288 mod1 *= mod1;

	289 mod1 *= mod1;

	290

	291 mod2 = d2_angle2 - (float)floor(.5+d2_angle2);

	292 noisiness[i] += ABS16(mod2);

	293 mod2 *= mod2;

	294 mod2 *= mod2;

	295

	296 avg_mod = .25f(d2A[i]+2.fmod1+mod2);

	297 tonality[i] = 1.f/(1.f+40.f16.fpi4*avg_mod)-.015f;

	298

	299 A[i] = angle2;

	300 dA[i] = d_angle2;

	301 d2A[i] = mod2;

	302 }

	303

	304 frame_tonality = 0;

	305 max_frame_tonality = 0;

	306 /tw_sum = 0;/

	307 info->activity = 0;

	308 frame_noisiness = 0;

	309 frame_stationarity = 0;

	310 if (!tonal->count)

	311 {

	312 for (b=0;b<NB_TBANDS;b++)

	313 {

	314 tonal->lowE[b] = 1e10;

	315 tonal->highE[b] = -1e10;

	316 }

	317 }

	318 relativeE = 0;

	319 frame_loudness = 0;

	320 bandwidth_mask = 0;

	321 for (b=0;b<NB_TBANDS;b++)

	322 {

	323 float E=0, tE=0, nE=0;

	324 float L1, L2;

	325 float stationarity;

	326 for (i=tbands[b];i<tbands[b+1];i++)

	327 {

	328 float binE = out[i].r(float)out[i].r + out[N-i].r(float)out[N-i].r

	329 + out[i].i(float)out[i].i + out[N-i].i(float)out[N-i].i;

	330 #ifdef FIXED_POINT

	331 /* FIXME: It's probably best to change the BFCC filter initial state i nstead */

	332 binE *= 5.55e-17f;

	333 #endif

	334 E += binE;

	335 tE += binE*tonality[i];

	336 nE += binE2.f(.5f-noisiness[i]);

	337 }

	338 tonal->E[tonal->E_count][b] = E;

	339 frame_noisiness += nE/(1e-15f+E);

	340

	341 frame_loudness += sqrt(E+1e-10f);

	342 logE[b] = (float)log(E+1e-10f);

	343 tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f);

	344 tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f);

	345 if (tonal->highE[b] < tonal->lowE[b]+1.f)

	346 {

	347 tonal->highE[b]+=.5f;

	348 tonal->lowE[b]-=.5f;

	349 }

	350 relativeE += (logE[b]-tonal->lowE[b])/(1e-15+tonal->highE[b]-tonal->lowE[ b]);

	351

	352 L1=L2=0;

	353 for (i=0;i<NB_FRAMES;i++)

	354 {

	355 L1 += sqrt(tonal->E[i][b]);

	356 L2 += tonal->E[i][b];

	357 }

	358

	359 stationarity = MIN16(0.99f,L1/sqrt(1e-15+NB_FRAMES*L2));

	360 stationarity *= stationarity;

	361 stationarity *= stationarity;

	362 frame_stationarity += stationarity;

	363 /band_tonality[b] = tE/(1e-15+E)/;

	364 band_tonality[b] = MAX16(tE/(1e-15+E), stationarity*tonal->prev_band_tona lity[b]);

	365 #if 0

	366 if (b>=NB_TONAL_SKIP_BANDS)

	367 {

	368 frame_tonality += tweight[b]*band_tonality[b];

	369 tw_sum += tweight[b];

	370 }

	371 #else

	372 frame_tonality += band_tonality[b];

	373 if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS)

	374 frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS];

	375 #endif

	376 max_frame_tonality = MAX16(max_frame_tonality, (1.f+.03f(b-NB_TBANDS))f rame_tonality);

	377 slope += band_tonality[b]*(b-8);

	378 /printf("%f %f ", band_tonality[b], stationarity);/

	379 tonal->prev_band_tonality[b] = band_tonality[b];

	380 }

	381

	382 bandwidth_mask = 0;

	383 bandwidth = 0;

	384 maxE = 0;

	385 noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));

	386 #ifdef FIXED_POINT

	387 noise_floor *= 1<<(15+SIG_SHIFT);

	388 #endif

	389 noise_floor *= noise_floor;

	390 for (b=0;b<NB_TOT_BANDS;b++)

	391 {

	392 float E=0;

	393 int band_start, band_end;

	394 /* Keep a margin of 300 Hz for aliasing */

	395 band_start = extra_bands[b];

	396 band_end = extra_bands[b+1];

	397 for (i=band_start;i<band_end;i++)

	398 {

	399 float binE = out[i].r(float)out[i].r + out[N-i].r(float)out[N-i].r

	400 + out[i].i(float)out[i].i + out[N-i].i(float)out[N-i].i;

	401 E += binE;

	402 }

	403 maxE = MAX32(maxE, E);

	404 tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);

	405 E = MAX32(E, tonal->meanE[b]);

	406 /* Use a simple follower with 13 dB/Bark slope for spreading function */

	407 bandwidth_mask = MAX32(.05f*bandwidth_mask, E);

	408 /* Consider the band "active" only if all these conditions are met:

	409 1) less than 10 dB below the simple follower

	410 2) less than 90 dB below the peak band (maximal masking possible consi dering

	411 both the ATH and the loudness-dependent slope of the spreading func tion)

	412 3) above the PCM quantization noise floor

	413 */

	414 if (E>.1bandwidth_mask && E1e9f > maxE && E > noise_floor*(band_end-ban d_start))

	415 bandwidth = b;

	416 }

	417 if (tonal->count<=2)

	418 bandwidth = 20;

	419 frame_loudness = 20*(float)log10(frame_loudness);

	420 tonal->Etracker = MAX32(tonal->Etracker-.03f, frame_loudness);

	421 tonal->lowECount *= (1-alphaE);

	422 if (frame_loudness < tonal->Etracker-30)

	423 tonal->lowECount += alphaE;

	424

	425 for (i=0;i<8;i++)

	426 {

	427 float sum=0;

	428 for (b=0;b<16;b++)

	429 sum += dct_table[i16+b]logE[b];

	430 BFCC[i] = sum;

	431 }

	432

	433 frame_stationarity /= NB_TBANDS;

	434 relativeE /= NB_TBANDS;

	435 if (tonal->count<10)

	436 relativeE = .5;

	437 frame_noisiness /= NB_TBANDS;

	438 #if 1

	439 info->activity = frame_noisiness + (1-frame_noisiness)*relativeE;

	440 #else

	441 info->activity = .5*(1+frame_noisiness-frame_stationarity);

	442 #endif

	443 frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS));

	444 frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8f);

	445 tonal->prev_tonality = frame_tonality;

	446

	447 slope /= 8*8;

	448 info->tonality_slope = slope;

	449

	450 tonal->E_count = (tonal->E_count+1)%NB_FRAMES;

	451 tonal->count++;

	452 info->tonality = frame_tonality;

	453

	454 for (i=0;i<4;i++)

	455 features[i] = -0.12299f(BFCC[i]+tonal->mem[i+24]) + 0.49195f(tonal->mem [i]+tonal->mem[i+16]) + 0.69693ftonal->mem[i+8] - 1.4349ftonal->cmean[i];

	456

	457 for (i=0;i<4;i++)

	458 tonal->cmean[i] = (1-alpha)tonal->cmean[i] + alphaBFCC[i];

	459

	460 for (i=0;i<4;i++)

	461 features[4+i] = 0.63246f(BFCC[i]-tonal->mem[i+24]) + 0.31623f(tonal->m em[i]-tonal->mem[i+16]);

	462 for (i=0;i<3;i++)

	463 features[8+i] = 0.53452f(BFCC[i]+tonal->mem[i+24]) - 0.26726f(tonal->m em[i]+tonal->mem[i+16]) -0.53452f*tonal->mem[i+8];

	464

	465 if (tonal->count > 5)

	466 {

	467 for (i=0;i<9;i++)

	468 tonal->std[i] = (1-alpha)tonal->std[i] + alphafeatures[i]*features[i ];

	469 }

	470

	471 for (i=0;i<8;i++)

	472 {

	473 tonal->mem[i+24] = tonal->mem[i+16];

	474 tonal->mem[i+16] = tonal->mem[i+8];

	475 tonal->mem[i+8] = tonal->mem[i];

	476 tonal->mem[i] = BFCC[i];

	477 }

	478 for (i=0;i<9;i++)

	479 features[11+i] = sqrt(tonal->std[i]);

	480 features[20] = info->tonality;

	481 features[21] = info->activity;

	482 features[22] = frame_stationarity;

	483 features[23] = info->tonality_slope;

	484 features[24] = tonal->lowECount;

	485

	486 #ifndef DISABLE_FLOAT_API

	487 mlp_process(&net, features, frame_probs);

	488 frame_probs[0] = .5f*(frame_probs[0]+1);

	489 /* Curve fitting between the MLP probability and the actual probability */

	490 frame_probs[0] = .01f + 1.21fframe_probs[0]frame_probs[0] - .23f*(float)po w(frame_probs[0], 10);

	491 /* Probability of active audio (as opposed to silence) */

	492 frame_probs[1] = .5f*frame_probs[1]+.5f;

	493 /* Consider that silence has a 50-50 probability. */

	494 frame_probs[0] = frame_probs[1]frame_probs[0] + (1-frame_probs[1]).5f;

	495

	496 /printf("%f %f ", frame_probs[0], frame_probs[1]);/

	497 {

	498 /* Probability of state transition */

	499 float tau;

	500 /* Represents independence of the MLP probabilities, where

	501 beta=1 means fully independent. */

	502 float beta;

	503 /* Denormalized probability of speech (p0) and music (p1) after update */

	504 float p0, p1;

	505 /* Probabilities for "all speech" and "all music" */

	506 float s0, m0;

	507 /* Probability sum for renormalisation */

	508 float psum;

	509 /* Instantaneous probability of speech and music, with beta pre-applied. */

	510 float speech0;

	511 float music0;

	512

	513 /* One transition every 3 minutes of active audio */

	514 tau = .00005f*frame_probs[1];

	515 beta = .05f;

	516 if (1) {

	517 /* Adapt beta based on how "unexpected" the new prob is */

	518 float p, q;

	519 p = MAX16(.05f,MIN16(.95f,frame_probs[0]));

	520 q = MAX16(.05f,MIN16(.95f,tonal->music_prob));

	521 beta = .01f+.05fABS16(p-q)/(p(1-q)+q*(1-p));

	522 }

	523 /* p0 and p1 are the probabilities of speech and music at this frame

	524 using only information from previous frame and applying the

	525 state transition model */

	526 p0 = (1-tonal->music_prob)(1-tau) + tonal->music_prob tau;

	527 p1 = tonal->music_prob (1-tau) + (1-tonal->music_prob)tau;

	528 /* We apply the current probability with exponent beta to work around

	529 the fact that the probability estimates aren't independent. */

	530 p0 *= (float)pow(1-frame_probs[0], beta);

	531 p1 *= (float)pow(frame_probs[0], beta);

	532 /* Normalise the probabilities to get the Marokv probability of music. */

	533 tonal->music_prob = p1/(p0+p1);

	534 info->music_prob = tonal->music_prob;

	535

	536 /* This chunk of code deals with delayed decision. */

	537 psum=1e-20f;

	538 /* Instantaneous probability of speech and music, with beta pre-applied. */

	539 speech0 = (float)pow(1-frame_probs[0], beta);

	540 music0 = (float)pow(frame_probs[0], beta);

	541 if (tonal->count==1)

	542 {

	543 tonal->pspeech[0]=.5;

	544 tonal->pmusic [0]=.5;

	545 }

	546 /* Updated probability of having only speech (s0) or only music (m0),

	547 before considering the new observation. */

	548 s0 = tonal->pspeech[0] + tonal->pspeech[1];

	549 m0 = tonal->pmusic [0] + tonal->pmusic [1];

	550 /* Updates s0 and m0 with instantaneous probability. */

	551 tonal->pspeech[0] = s0(1-tau)speech0;

	552 tonal->pmusic [0] = m0(1-tau)music0;

	553 /* Propagate the transition probabilities */

	554 for (i=1;i<DETECT_SIZE-1;i++)

	555 {

	556 tonal->pspeech[i] = tonal->pspeech[i+1]*speech0;

	557 tonal->pmusic [i] = tonal->pmusic [i+1]*music0;

	558 }

	559 /* Probability that the latest frame is speech, when all the previous one s were music. */

	560 tonal->pspeech[DETECT_SIZE-1] = m0tauspeech0;

	561 /* Probability that the latest frame is music, when all the previous ones were speech. */

	562 tonal->pmusic [DETECT_SIZE-1] = s0taumusic0;

	563

	564 /* Renormalise probabilities to 1 */

	565 for (i=0;i<DETECT_SIZE;i++)

	566 psum += tonal->pspeech[i] + tonal->pmusic[i];

	567 psum = 1.f/psum;

	568 for (i=0;i<DETECT_SIZE;i++)

	569 {

	570 tonal->pspeech[i] *= psum;

	571 tonal->pmusic [i] *= psum;

	572 }

	573 psum = tonal->pmusic[0];

	574 for (i=1;i<DETECT_SIZE;i++)

	575 psum += tonal->pspeech[i];

	576

	577 /* Estimate our confidence in the speech/music decisions */

	578 if (frame_probs[1]>.75)

	579 {

	580 if (tonal->music_prob>.9)

	581 {

	582 float adapt;

	583 adapt = 1.f/(++tonal->music_confidence_count);

	584 tonal->music_confidence_count = IMIN(tonal->music_confidence_count, 500);

	585 tonal->music_confidence += adapt*MAX16(-.2f,frame_probs[0]-tonal->m usic_confidence);

	586 }

	587 if (tonal->music_prob<.1)

	588 {

	589 float adapt;

	590 adapt = 1.f/(++tonal->speech_confidence_count);

	591 tonal->speech_confidence_count = IMIN(tonal->speech_confidence_coun t, 500);

	592 tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->s peech_confidence);

	593 }

	594 } else {

	595 if (tonal->music_confidence_count==0)

	596 tonal->music_confidence = .9f;

	597 if (tonal->speech_confidence_count==0)

	598 tonal->speech_confidence = .1f;

	599 }

	600 psum = MAX16(tonal->speech_confidence, MIN16(tonal->music_confidence, psu m));

	601 }

	602 if (tonal->last_music != (tonal->music_prob>.5f))

	603 tonal->last_transition=0;

	604 tonal->last_music = tonal->music_prob>.5f;

	605 #else

	606 info->music_prob = 0;

	607 #endif

	608 /*for (i=0;i<25;i++)

	609 printf("%f ", features[i]);

	610 printf("\n");*/

	611

	612 info->bandwidth = bandwidth;

	613 /printf("%d %d\n", info->bandwidth, info->opus_bandwidth);/

	614 info->noisiness = frame_noisiness;

	615 info->valid = 1;

	616 if (info_out!=NULL)

	617 OPUS_COPY(info_out, info, 1);

	618 RESTORE_STACK;

	619 }

	620

	621 void run_analysis(TonalityAnalysisState analysis, const CELTMode celt_mode, co nst void *analysis_pcm,

	622 int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,

	623 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_inf o)

	624 {

	625 int offset;

	626 int pcm_len;

	627

	628 if (analysis_pcm != NULL)

	629 {

	630 /* Avoid overflow/wrap-around of the analysis buffer */

	631 analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size);

	632

	633 pcm_len = analysis_frame_size - analysis->analysis_offset;

	634 offset = analysis->analysis_offset;

	635 do {

	636 tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pc m_len), offset, c1, c2, C, lsb_depth, downmix);

	637 offset += 480;

	638 pcm_len -= 480;

	639 } while (pcm_len>0);

	640 analysis->analysis_offset = analysis_frame_size;

	641

	642 analysis->analysis_offset -= frame_size;

	643 }

	644

	645 analysis_info->valid = 0;

	646 tonality_get_info(analysis, analysis_info, frame_size);

	647 }

OLD	NEW

« no previous file with comments | « src/analysis.h ('k') | src/mlp.h » ('j') | no next file with comments »