| OLD | NEW |
| 1 /* Copyright (c) 2011 Xiph.Org Foundation | 1 /* Copyright (c) 2011 Xiph.Org Foundation |
| 2 Written by Jean-Marc Valin */ | 2 Written by Jean-Marc Valin */ |
| 3 /* | 3 /* |
| 4 Redistribution and use in source and binary forms, with or without | 4 Redistribution and use in source and binary forms, with or without |
| 5 modification, are permitted provided that the following conditions | 5 modification, are permitted provided that the following conditions |
| 6 are met: | 6 are met: |
| 7 | 7 |
| 8 - Redistributions of source code must retain the above copyright | 8 - Redistributions of source code must retain the above copyright |
| 9 notice, this list of conditions and the following disclaimer. | 9 notice, this list of conditions and the following disclaimer. |
| 10 | 10 |
| (...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 182 for (i=0;i<DETECT_SIZE-curr_lookahead;i++) | 182 for (i=0;i<DETECT_SIZE-curr_lookahead;i++) |
| 183 psum += tonal->pmusic[i]; | 183 psum += tonal->pmusic[i]; |
| 184 for (;i<DETECT_SIZE;i++) | 184 for (;i<DETECT_SIZE;i++) |
| 185 psum += tonal->pspeech[i]; | 185 psum += tonal->pspeech[i]; |
| 186 psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence; | 186 psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence; |
| 187 /*printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);*/ | 187 /*printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);*/ |
| 188 | 188 |
| 189 info_out->music_prob = psum; | 189 info_out->music_prob = psum; |
| 190 } | 190 } |
| 191 | 191 |
| 192 void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
st CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int
C, int lsb_depth, downmix_func downmix) | 192 static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth,
downmix_func downmix) |
| 193 { | 193 { |
| 194 int i, b; | 194 int i, b; |
| 195 const kiss_fft_state *kfft; | 195 const kiss_fft_state *kfft; |
| 196 VARDECL(kiss_fft_cpx, in); | 196 VARDECL(kiss_fft_cpx, in); |
| 197 VARDECL(kiss_fft_cpx, out); | 197 VARDECL(kiss_fft_cpx, out); |
| 198 int N = 480, N2=240; | 198 int N = 480, N2=240; |
| 199 float * OPUS_RESTRICT A = tonal->angle; | 199 float * OPUS_RESTRICT A = tonal->angle; |
| 200 float * OPUS_RESTRICT dA = tonal->d_angle; | 200 float * OPUS_RESTRICT dA = tonal->d_angle; |
| 201 float * OPUS_RESTRICT d2A = tonal->d2_angle; | 201 float * OPUS_RESTRICT d2A = tonal->d2_angle; |
| 202 VARDECL(float, tonality); | 202 VARDECL(float, tonality); |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 256 in[i].r = (kiss_fft_scalar)(w*tonal->inmem[i]); | 256 in[i].r = (kiss_fft_scalar)(w*tonal->inmem[i]); |
| 257 in[i].i = (kiss_fft_scalar)(w*tonal->inmem[N2+i]); | 257 in[i].i = (kiss_fft_scalar)(w*tonal->inmem[N2+i]); |
| 258 in[N-i-1].r = (kiss_fft_scalar)(w*tonal->inmem[N-i-1]); | 258 in[N-i-1].r = (kiss_fft_scalar)(w*tonal->inmem[N-i-1]); |
| 259 in[N-i-1].i = (kiss_fft_scalar)(w*tonal->inmem[N+N2-i-1]); | 259 in[N-i-1].i = (kiss_fft_scalar)(w*tonal->inmem[N+N2-i-1]); |
| 260 } | 260 } |
| 261 OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240); | 261 OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240); |
| 262 remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill); | 262 remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill); |
| 263 downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->me
m_fill, c1, c2, C); | 263 downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->me
m_fill, c1, c2, C); |
| 264 tonal->mem_fill = 240 + remaining; | 264 tonal->mem_fill = 240 + remaining; |
| 265 opus_fft(kfft, in, out); | 265 opus_fft(kfft, in, out); |
| 266 #ifndef FIXED_POINT |
| 267 /* If there's any NaN on the input, the entire output will be NaN, so we onl
y need to check one value. */ |
| 268 if (celt_isnan(out[0].r)) |
| 269 { |
| 270 info->valid = 0; |
| 271 RESTORE_STACK; |
| 272 return; |
| 273 } |
| 274 #endif |
| 266 | 275 |
| 267 for (i=1;i<N2;i++) | 276 for (i=1;i<N2;i++) |
| 268 { | 277 { |
| 269 float X1r, X2r, X1i, X2i; | 278 float X1r, X2r, X1i, X2i; |
| 270 float angle, d_angle, d2_angle; | 279 float angle, d_angle, d2_angle; |
| 271 float angle2, d_angle2, d2_angle2; | 280 float angle2, d_angle2, d2_angle2; |
| 272 float mod1, mod2, avg_mod; | 281 float mod1, mod2, avg_mod; |
| 273 X1r = (float)out[i].r+out[N-i].r; | 282 X1r = (float)out[i].r+out[N-i].r; |
| 274 X1i = (float)out[i].i-out[N-i].i; | 283 X1i = (float)out[i].i-out[N-i].i; |
| 275 X2r = (float)out[i].i+out[N-i].i; | 284 X2r = (float)out[i].i+out[N-i].i; |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 327 float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r | 336 float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r |
| 328 + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i; | 337 + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i; |
| 329 #ifdef FIXED_POINT | 338 #ifdef FIXED_POINT |
| 330 /* FIXME: It's probably best to change the BFCC filter initial state i
nstead */ | 339 /* FIXME: It's probably best to change the BFCC filter initial state i
nstead */ |
| 331 binE *= 5.55e-17f; | 340 binE *= 5.55e-17f; |
| 332 #endif | 341 #endif |
| 333 E += binE; | 342 E += binE; |
| 334 tE += binE*tonality[i]; | 343 tE += binE*tonality[i]; |
| 335 nE += binE*2.f*(.5f-noisiness[i]); | 344 nE += binE*2.f*(.5f-noisiness[i]); |
| 336 } | 345 } |
| 346 #ifndef FIXED_POINT |
| 347 /* Check for extreme band energies that could cause NaNs later. */ |
| 348 if (!(E<1e9f) || celt_isnan(E)) |
| 349 { |
| 350 info->valid = 0; |
| 351 RESTORE_STACK; |
| 352 return; |
| 353 } |
| 354 #endif |
| 355 |
| 337 tonal->E[tonal->E_count][b] = E; | 356 tonal->E[tonal->E_count][b] = E; |
| 338 frame_noisiness += nE/(1e-15f+E); | 357 frame_noisiness += nE/(1e-15f+E); |
| 339 | 358 |
| 340 frame_loudness += (float)sqrt(E+1e-10f); | 359 frame_loudness += (float)sqrt(E+1e-10f); |
| 341 logE[b] = (float)log(E+1e-10f); | 360 logE[b] = (float)log(E+1e-10f); |
| 342 tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f); | 361 tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f); |
| 343 tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f); | 362 tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f); |
| 344 if (tonal->highE[b] < tonal->lowE[b]+1.f) | 363 if (tonal->highE[b] < tonal->lowE[b]+1.f) |
| 345 { | 364 { |
| 346 tonal->highE[b]+=.5f; | 365 tonal->highE[b]+=.5f; |
| (...skipping 257 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 604 info->music_prob = 0; | 623 info->music_prob = 0; |
| 605 #endif | 624 #endif |
| 606 /*for (i=0;i<25;i++) | 625 /*for (i=0;i<25;i++) |
| 607 printf("%f ", features[i]); | 626 printf("%f ", features[i]); |
| 608 printf("\n");*/ | 627 printf("\n");*/ |
| 609 | 628 |
| 610 info->bandwidth = bandwidth; | 629 info->bandwidth = bandwidth; |
| 611 /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/ | 630 /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/ |
| 612 info->noisiness = frame_noisiness; | 631 info->noisiness = frame_noisiness; |
| 613 info->valid = 1; | 632 info->valid = 1; |
| 614 if (info_out!=NULL) | |
| 615 OPUS_COPY(info_out, info, 1); | |
| 616 RESTORE_STACK; | 633 RESTORE_STACK; |
| 617 } | 634 } |
| 618 | 635 |
| 619 void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, co
nst void *analysis_pcm, | 636 void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, co
nst void *analysis_pcm, |
| 620 int analysis_frame_size, int frame_size, int c1, int c2, int C,
opus_int32 Fs, | 637 int analysis_frame_size, int frame_size, int c1, int c2, int C,
opus_int32 Fs, |
| 621 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_inf
o) | 638 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_inf
o) |
| 622 { | 639 { |
| 623 int offset; | 640 int offset; |
| 624 int pcm_len; | 641 int pcm_len; |
| 625 | 642 |
| 626 if (analysis_pcm != NULL) | 643 if (analysis_pcm != NULL) |
| 627 { | 644 { |
| 628 /* Avoid overflow/wrap-around of the analysis buffer */ | 645 /* Avoid overflow/wrap-around of the analysis buffer */ |
| 629 analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size); | 646 analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size); |
| 630 | 647 |
| 631 pcm_len = analysis_frame_size - analysis->analysis_offset; | 648 pcm_len = analysis_frame_size - analysis->analysis_offset; |
| 632 offset = analysis->analysis_offset; | 649 offset = analysis->analysis_offset; |
| 633 do { | 650 do { |
| 634 tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pc
m_len), offset, c1, c2, C, lsb_depth, downmix); | 651 tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len)
, offset, c1, c2, C, lsb_depth, downmix); |
| 635 offset += 480; | 652 offset += 480; |
| 636 pcm_len -= 480; | 653 pcm_len -= 480; |
| 637 } while (pcm_len>0); | 654 } while (pcm_len>0); |
| 638 analysis->analysis_offset = analysis_frame_size; | 655 analysis->analysis_offset = analysis_frame_size; |
| 639 | 656 |
| 640 analysis->analysis_offset -= frame_size; | 657 analysis->analysis_offset -= frame_size; |
| 641 } | 658 } |
| 642 | 659 |
| 643 analysis_info->valid = 0; | 660 analysis_info->valid = 0; |
| 644 tonality_get_info(analysis, analysis_info, frame_size); | 661 tonality_get_info(analysis, analysis_info, frame_size); |
| 645 } | 662 } |
| OLD | NEW |