OLD | NEW |
1 /* Copyright (c) 2011 Xiph.Org Foundation | 1 /* Copyright (c) 2011 Xiph.Org Foundation |
2 Written by Jean-Marc Valin */ | 2 Written by Jean-Marc Valin */ |
3 /* | 3 /* |
4 Redistribution and use in source and binary forms, with or without | 4 Redistribution and use in source and binary forms, with or without |
5 modification, are permitted provided that the following conditions | 5 modification, are permitted provided that the following conditions |
6 are met: | 6 are met: |
7 | 7 |
8 - Redistributions of source code must retain the above copyright | 8 - Redistributions of source code must retain the above copyright |
9 notice, this list of conditions and the following disclaimer. | 9 notice, this list of conditions and the following disclaimer. |
10 | 10 |
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
182 for (i=0;i<DETECT_SIZE-curr_lookahead;i++) | 182 for (i=0;i<DETECT_SIZE-curr_lookahead;i++) |
183 psum += tonal->pmusic[i]; | 183 psum += tonal->pmusic[i]; |
184 for (;i<DETECT_SIZE;i++) | 184 for (;i<DETECT_SIZE;i++) |
185 psum += tonal->pspeech[i]; | 185 psum += tonal->pspeech[i]; |
186 psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence; | 186 psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence; |
187 /*printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);*/ | 187 /*printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);*/ |
188 | 188 |
189 info_out->music_prob = psum; | 189 info_out->music_prob = psum; |
190 } | 190 } |
191 | 191 |
192 void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
st CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int
C, int lsb_depth, downmix_func downmix) | 192 static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth,
downmix_func downmix) |
193 { | 193 { |
194 int i, b; | 194 int i, b; |
195 const kiss_fft_state *kfft; | 195 const kiss_fft_state *kfft; |
196 VARDECL(kiss_fft_cpx, in); | 196 VARDECL(kiss_fft_cpx, in); |
197 VARDECL(kiss_fft_cpx, out); | 197 VARDECL(kiss_fft_cpx, out); |
198 int N = 480, N2=240; | 198 int N = 480, N2=240; |
199 float * OPUS_RESTRICT A = tonal->angle; | 199 float * OPUS_RESTRICT A = tonal->angle; |
200 float * OPUS_RESTRICT dA = tonal->d_angle; | 200 float * OPUS_RESTRICT dA = tonal->d_angle; |
201 float * OPUS_RESTRICT d2A = tonal->d2_angle; | 201 float * OPUS_RESTRICT d2A = tonal->d2_angle; |
202 VARDECL(float, tonality); | 202 VARDECL(float, tonality); |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
256 in[i].r = (kiss_fft_scalar)(w*tonal->inmem[i]); | 256 in[i].r = (kiss_fft_scalar)(w*tonal->inmem[i]); |
257 in[i].i = (kiss_fft_scalar)(w*tonal->inmem[N2+i]); | 257 in[i].i = (kiss_fft_scalar)(w*tonal->inmem[N2+i]); |
258 in[N-i-1].r = (kiss_fft_scalar)(w*tonal->inmem[N-i-1]); | 258 in[N-i-1].r = (kiss_fft_scalar)(w*tonal->inmem[N-i-1]); |
259 in[N-i-1].i = (kiss_fft_scalar)(w*tonal->inmem[N+N2-i-1]); | 259 in[N-i-1].i = (kiss_fft_scalar)(w*tonal->inmem[N+N2-i-1]); |
260 } | 260 } |
261 OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240); | 261 OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240); |
262 remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill); | 262 remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill); |
263 downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->me
m_fill, c1, c2, C); | 263 downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->me
m_fill, c1, c2, C); |
264 tonal->mem_fill = 240 + remaining; | 264 tonal->mem_fill = 240 + remaining; |
265 opus_fft(kfft, in, out); | 265 opus_fft(kfft, in, out); |
| 266 #ifndef FIXED_POINT |
| 267 /* If there's any NaN on the input, the entire output will be NaN, so we onl
y need to check one value. */ |
| 268 if (celt_isnan(out[0].r)) |
| 269 { |
| 270 info->valid = 0; |
| 271 RESTORE_STACK; |
| 272 return; |
| 273 } |
| 274 #endif |
266 | 275 |
267 for (i=1;i<N2;i++) | 276 for (i=1;i<N2;i++) |
268 { | 277 { |
269 float X1r, X2r, X1i, X2i; | 278 float X1r, X2r, X1i, X2i; |
270 float angle, d_angle, d2_angle; | 279 float angle, d_angle, d2_angle; |
271 float angle2, d_angle2, d2_angle2; | 280 float angle2, d_angle2, d2_angle2; |
272 float mod1, mod2, avg_mod; | 281 float mod1, mod2, avg_mod; |
273 X1r = (float)out[i].r+out[N-i].r; | 282 X1r = (float)out[i].r+out[N-i].r; |
274 X1i = (float)out[i].i-out[N-i].i; | 283 X1i = (float)out[i].i-out[N-i].i; |
275 X2r = (float)out[i].i+out[N-i].i; | 284 X2r = (float)out[i].i+out[N-i].i; |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
327 float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r | 336 float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r |
328 + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i; | 337 + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i; |
329 #ifdef FIXED_POINT | 338 #ifdef FIXED_POINT |
330 /* FIXME: It's probably best to change the BFCC filter initial state i
nstead */ | 339 /* FIXME: It's probably best to change the BFCC filter initial state i
nstead */ |
331 binE *= 5.55e-17f; | 340 binE *= 5.55e-17f; |
332 #endif | 341 #endif |
333 E += binE; | 342 E += binE; |
334 tE += binE*tonality[i]; | 343 tE += binE*tonality[i]; |
335 nE += binE*2.f*(.5f-noisiness[i]); | 344 nE += binE*2.f*(.5f-noisiness[i]); |
336 } | 345 } |
| 346 #ifndef FIXED_POINT |
| 347 /* Check for extreme band energies that could cause NaNs later. */ |
| 348 if (!(E<1e9f) || celt_isnan(E)) |
| 349 { |
| 350 info->valid = 0; |
| 351 RESTORE_STACK; |
| 352 return; |
| 353 } |
| 354 #endif |
| 355 |
337 tonal->E[tonal->E_count][b] = E; | 356 tonal->E[tonal->E_count][b] = E; |
338 frame_noisiness += nE/(1e-15f+E); | 357 frame_noisiness += nE/(1e-15f+E); |
339 | 358 |
340 frame_loudness += (float)sqrt(E+1e-10f); | 359 frame_loudness += (float)sqrt(E+1e-10f); |
341 logE[b] = (float)log(E+1e-10f); | 360 logE[b] = (float)log(E+1e-10f); |
342 tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f); | 361 tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f); |
343 tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f); | 362 tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f); |
344 if (tonal->highE[b] < tonal->lowE[b]+1.f) | 363 if (tonal->highE[b] < tonal->lowE[b]+1.f) |
345 { | 364 { |
346 tonal->highE[b]+=.5f; | 365 tonal->highE[b]+=.5f; |
(...skipping 257 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
604 info->music_prob = 0; | 623 info->music_prob = 0; |
605 #endif | 624 #endif |
606 /*for (i=0;i<25;i++) | 625 /*for (i=0;i<25;i++) |
607 printf("%f ", features[i]); | 626 printf("%f ", features[i]); |
608 printf("\n");*/ | 627 printf("\n");*/ |
609 | 628 |
610 info->bandwidth = bandwidth; | 629 info->bandwidth = bandwidth; |
611 /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/ | 630 /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/ |
612 info->noisiness = frame_noisiness; | 631 info->noisiness = frame_noisiness; |
613 info->valid = 1; | 632 info->valid = 1; |
614 if (info_out!=NULL) | |
615 OPUS_COPY(info_out, info, 1); | |
616 RESTORE_STACK; | 633 RESTORE_STACK; |
617 } | 634 } |
618 | 635 |
619 void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, co
nst void *analysis_pcm, | 636 void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, co
nst void *analysis_pcm, |
620 int analysis_frame_size, int frame_size, int c1, int c2, int C,
opus_int32 Fs, | 637 int analysis_frame_size, int frame_size, int c1, int c2, int C,
opus_int32 Fs, |
621 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_inf
o) | 638 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_inf
o) |
622 { | 639 { |
623 int offset; | 640 int offset; |
624 int pcm_len; | 641 int pcm_len; |
625 | 642 |
626 if (analysis_pcm != NULL) | 643 if (analysis_pcm != NULL) |
627 { | 644 { |
628 /* Avoid overflow/wrap-around of the analysis buffer */ | 645 /* Avoid overflow/wrap-around of the analysis buffer */ |
629 analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size); | 646 analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size); |
630 | 647 |
631 pcm_len = analysis_frame_size - analysis->analysis_offset; | 648 pcm_len = analysis_frame_size - analysis->analysis_offset; |
632 offset = analysis->analysis_offset; | 649 offset = analysis->analysis_offset; |
633 do { | 650 do { |
634 tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pc
m_len), offset, c1, c2, C, lsb_depth, downmix); | 651 tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len)
, offset, c1, c2, C, lsb_depth, downmix); |
635 offset += 480; | 652 offset += 480; |
636 pcm_len -= 480; | 653 pcm_len -= 480; |
637 } while (pcm_len>0); | 654 } while (pcm_len>0); |
638 analysis->analysis_offset = analysis_frame_size; | 655 analysis->analysis_offset = analysis_frame_size; |
639 | 656 |
640 analysis->analysis_offset -= frame_size; | 657 analysis->analysis_offset -= frame_size; |
641 } | 658 } |
642 | 659 |
643 analysis_info->valid = 0; | 660 analysis_info->valid = 0; |
644 tonality_get_info(analysis, analysis_info, frame_size); | 661 tonality_get_info(analysis, analysis_info, frame_size); |
645 } | 662 } |
OLD | NEW |