Index: silk/float/pitch_analysis_core_FLP.c |
diff --git a/silk/float/pitch_analysis_core_FLP.c b/silk/float/pitch_analysis_core_FLP.c |
index fbff90c3ffbfc4bfd90d9403db113a4431467205..605ea25fa271affdc204a66f1dc043b26879e6bf 100644 |
--- a/silk/float/pitch_analysis_core_FLP.c |
+++ b/silk/float/pitch_analysis_core_FLP.c |
@@ -8,11 +8,11 @@ this list of conditions and the following disclaimer. |
- Redistributions in binary form must reproduce the above copyright |
notice, this list of conditions and the following disclaimer in the |
documentation and/or other materials provided with the distribution. |
-- Neither the name of Internet Society, IETF or IETF Trust, nor the |
+- Neither the name of Internet Society, IETF or IETF Trust, nor the |
names of specific contributors, may be used to endorse or promote |
products derived from this software without specific prior written |
permission. |
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” |
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
@@ -35,9 +35,9 @@ POSSIBILITY OF SUCH DAMAGE. |
#include "SigProc_FLP.h" |
#include "SigProc_FIX.h" |
#include "pitch_est_defines.h" |
+#include "pitch.h" |
#define SCRATCH_SIZE 22 |
-#define eps 1.192092896e-07f |
/************************************************************/ |
/* Internally used functions */ |
@@ -85,6 +85,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, |
opus_int32 filt_state[ 6 ]; |
silk_float threshold, contour_bias; |
silk_float C[ PE_MAX_NB_SUBFR][ (PE_MAX_LAG >> 1) + 5 ]; |
+ opus_val32 xcorr[ PE_MAX_LAG_MS * 4 - PE_MIN_LAG_MS * 4 + 1 ]; |
silk_float CC[ PE_NB_CBKS_STAGE2_EXT ]; |
const silk_float *target_ptr, *basis_ptr; |
double cross_corr, normalizer, energy, energy_tmp; |
@@ -129,8 +130,6 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, |
max_lag_4kHz = PE_MAX_LAG_MS * 4; |
max_lag_8kHz = PE_MAX_LAG_MS * 8 - 1; |
- silk_memset(C, 0, sizeof(silk_float) * nb_subfr * ((PE_MAX_LAG >> 1) + 5)); |
- |
/* Resample from input sampled at Fs_kHz to 8 kHz */ |
if( Fs_kHz == 16 ) { |
/* Resample to 16 -> 8 khz */ |
@@ -164,6 +163,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, |
/****************************************************************************** |
* FIRST STAGE, operating in 4 khz |
******************************************************************************/ |
+ silk_memset(C, 0, sizeof(silk_float) * nb_subfr * ((PE_MAX_LAG >> 1) + 5)); |
target_ptr = &frame_4kHz[ silk_LSHIFT( sf_length_4kHz, 2 ) ]; |
for( k = 0; k < nb_subfr >> 1; k++ ) { |
/* Check that we are within range of the array */ |
@@ -176,27 +176,31 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, |
silk_assert( basis_ptr >= frame_4kHz ); |
silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz ); |
+ celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1 ); |
+ |
/* Calculate first vector products before loop */ |
- cross_corr = silk_inner_product_FLP( target_ptr, basis_ptr, sf_length_8kHz ); |
- normalizer = silk_energy_FLP( basis_ptr, sf_length_8kHz ) + sf_length_8kHz * 4000.0f; |
+ cross_corr = xcorr[ max_lag_4kHz - min_lag_4kHz ]; |
+ normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) + |
+ silk_energy_FLP( basis_ptr, sf_length_8kHz ) + |
+ sf_length_8kHz * 4000.0f; |
- C[ 0 ][ min_lag_4kHz ] += (silk_float)(cross_corr / sqrt(normalizer)); |
+ C[ 0 ][ min_lag_4kHz ] += (silk_float)( 2 * cross_corr / normalizer ); |
/* From now on normalizer is computed recursively */ |
- for(d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++) { |
+ for( d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++ ) { |
basis_ptr--; |
/* Check that we are within range of the array */ |
silk_assert( basis_ptr >= frame_4kHz ); |
silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz ); |
- cross_corr = silk_inner_product_FLP(target_ptr, basis_ptr, sf_length_8kHz); |
+ cross_corr = xcorr[ max_lag_4kHz - d ]; |
/* Add contribution of new sample and remove contribution from oldest sample */ |
normalizer += |
basis_ptr[ 0 ] * (double)basis_ptr[ 0 ] - |
basis_ptr[ sf_length_8kHz ] * (double)basis_ptr[ sf_length_8kHz ]; |
- C[ 0 ][ d ] += (silk_float)(cross_corr / sqrt( normalizer )); |
+ C[ 0 ][ d ] += (silk_float)( 2 * cross_corr / normalizer ); |
} |
/* Update target pointer */ |
target_ptr += sf_length_8kHz; |
@@ -214,13 +218,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, |
/* Escape if correlation is very low already here */ |
Cmax = C[ 0 ][ min_lag_4kHz ]; |
- target_ptr = &frame_4kHz[ silk_SMULBB( sf_length_4kHz, nb_subfr ) ]; |
- energy = 1000.0f; |
- for( i = 0; i < silk_LSHIFT( sf_length_4kHz, 2 ); i++ ) { |
- energy += target_ptr[i] * (double)target_ptr[i]; |
- } |
- threshold = Cmax * Cmax; |
- if( energy / 16.0f > threshold ) { |
+ if( Cmax < 0.2f ) { |
silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) ); |
*LTPCorr = 0.0f; |
*lagIndex = 0; |
@@ -287,14 +285,14 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, |
target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ]; |
} |
for( k = 0; k < nb_subfr; k++ ) { |
- energy_tmp = silk_energy_FLP( target_ptr, sf_length_8kHz ); |
+ energy_tmp = silk_energy_FLP( target_ptr, sf_length_8kHz ) + 1.0; |
for( j = 0; j < length_d_comp; j++ ) { |
d = d_comp[ j ]; |
basis_ptr = target_ptr - d; |
cross_corr = silk_inner_product_FLP( basis_ptr, target_ptr, sf_length_8kHz ); |
- energy = silk_energy_FLP( basis_ptr, sf_length_8kHz ); |
if( cross_corr > 0.0f ) { |
- C[ k ][ d ] = (silk_float)(cross_corr * cross_corr / (energy * energy_tmp + eps)); |
+ energy = silk_energy_FLP( basis_ptr, sf_length_8kHz ); |
+ C[ k ][ d ] = (silk_float)( 2 * cross_corr / ( energy + energy_tmp ) ); |
} else { |
C[ k ][ d ] = 0.0f; |
} |
@@ -317,7 +315,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, |
} else if( Fs_kHz == 16 ) { |
prevLag = silk_RSHIFT( prevLag, 1 ); |
} |
- prevLag_log2 = silk_log2((silk_float)prevLag); |
+ prevLag_log2 = silk_log2( (silk_float)prevLag ); |
} else { |
prevLag_log2 = 0; |
} |
@@ -356,23 +354,20 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, |
CBimax_new = i; |
} |
} |
- CCmax_new = silk_max_float(CCmax_new, 0.0f); /* To avoid taking square root of negative number later */ |
- CCmax_new_b = CCmax_new; |
/* Bias towards shorter lags */ |
- lag_log2 = silk_log2((silk_float)d); |
- CCmax_new_b -= PE_SHORTLAG_BIAS * nb_subfr * lag_log2; |
+ lag_log2 = silk_log2( (silk_float)d ); |
+ CCmax_new_b = CCmax_new - PE_SHORTLAG_BIAS * nb_subfr * lag_log2; |
/* Bias towards previous lag */ |
if( prevLag > 0 ) { |
delta_lag_log2_sqr = lag_log2 - prevLag_log2; |
delta_lag_log2_sqr *= delta_lag_log2_sqr; |
- CCmax_new_b -= PE_PREVLAG_BIAS * nb_subfr * (*LTPCorr) * delta_lag_log2_sqr / (delta_lag_log2_sqr + 0.5f); |
+ CCmax_new_b -= PE_PREVLAG_BIAS * nb_subfr * (*LTPCorr) * delta_lag_log2_sqr / ( delta_lag_log2_sqr + 0.5f ); |
} |
- if( CCmax_new_b > CCmax_b && /* Find maximum biased correlation */ |
- CCmax_new > nb_subfr * search_thres2 * search_thres2 && /* Correlation needs to be high enough to be voiced */ |
- silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= min_lag_8kHz /* Lag must be in range */ |
+ if( CCmax_new_b > CCmax_b && /* Find maximum biased correlation */ |
+ CCmax_new > nb_subfr * search_thres2 /* Correlation needs to be high enough to be voiced */ |
) { |
CCmax_b = CCmax_new_b; |
CCmax = CCmax_new; |
@@ -390,6 +385,10 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, |
return 1; |
} |
+ /* Output normalized correlation */ |
+ *LTPCorr = (silk_float)( CCmax / nb_subfr ); |
+ silk_assert( *LTPCorr >= 0.0f ); |
+ |
if( Fs_kHz > 8 ) { |
/* Search in original signal */ |
@@ -406,8 +405,6 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, |
end_lag = silk_min_int( lag + 2, max_lag ); |
lag_new = lag; /* to avoid undefined lag */ |
CBimax = 0; /* to avoid undefined lag */ |
- silk_assert( CCmax >= 0.0f ); |
- *LTPCorr = (silk_float)sqrt( CCmax / nb_subfr ); /* Output normalized correlation */ |
CCmax = -1000.0f; |
@@ -430,25 +427,25 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, |
Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; |
} |
+ target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ]; |
+ energy_tmp = silk_energy_FLP( target_ptr, nb_subfr * sf_length ) + 1.0; |
for( d = start_lag; d <= end_lag; d++ ) { |
for( j = 0; j < nb_cbk_search; j++ ) { |
cross_corr = 0.0; |
- energy = eps; |
+ energy = energy_tmp; |
for( k = 0; k < nb_subfr; k++ ) { |
- energy += energies_st3[ k ][ j ][ lag_counter ]; |
cross_corr += cross_corr_st3[ k ][ j ][ lag_counter ]; |
+ energy += energies_st3[ k ][ j ][ lag_counter ]; |
} |
if( cross_corr > 0.0 ) { |
- CCmax_new = (silk_float)(cross_corr * cross_corr / energy); |
+ CCmax_new = (silk_float)( 2 * cross_corr / energy ); |
/* Reduce depending on flatness of contour */ |
CCmax_new *= 1.0f - contour_bias * j; |
} else { |
CCmax_new = 0.0f; |
} |
- if( CCmax_new > CCmax && |
- ( d + (opus_int)silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag |
- ) { |
+ if( CCmax_new > CCmax && ( d + (opus_int)silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) { |
CCmax = CCmax_new; |
lag_new = d; |
CBimax = j; |
@@ -464,12 +461,10 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, |
*lagIndex = (opus_int16)( lag_new - min_lag ); |
*contourIndex = (opus_int8)CBimax; |
} else { /* Fs_kHz == 8 */ |
- /* Save Lags and correlation */ |
- silk_assert( CCmax >= 0.0f ); |
- *LTPCorr = (silk_float)sqrt( CCmax / nb_subfr ); /* Output normalized correlation */ |
+ /* Save Lags */ |
for( k = 0; k < nb_subfr; k++ ) { |
pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size ); |
- pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * Fs_kHz ); |
+ pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * 8 ); |
} |
*lagIndex = (opus_int16)( lag - min_lag_8kHz ); |
*contourIndex = (opus_int8)CBimax; |
@@ -479,6 +474,19 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, |
return 0; |
} |
+/*********************************************************************** |
+ * Calculates the correlations used in stage 3 search. In order to cover |
+ * the whole lag codebook for all the searched offset lags (lag +- 2), |
+ * the following correlations are needed in each sub frame: |
+ * |
+ * sf1: lag range [-8,...,7] total 16 correlations |
+ * sf2: lag range [-4,...,4] total 9 correlations |
+ * sf3: lag range [-3,....4] total 8 correltions |
+ * sf4: lag range [-6,....8] total 15 correlations |
+ * |
+ * In total 48 correlations. The direct implementation computed in worst |
+ * case 4*12*5 = 240 correlations, but more likely around 120. |
+ ***********************************************************************/ |
static void silk_P_Ana_calc_corr_st3( |
silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */ |
const silk_float frame[], /* I vector to correlate */ |
@@ -487,24 +495,12 @@ static void silk_P_Ana_calc_corr_st3( |
opus_int nb_subfr, /* I number of subframes */ |
opus_int complexity /* I Complexity setting */ |
) |
- /*********************************************************************** |
- Calculates the correlations used in stage 3 search. In order to cover |
- the whole lag codebook for all the searched offset lags (lag +- 2), |
- the following correlations are needed in each sub frame: |
- |
- sf1: lag range [-8,...,7] total 16 correlations |
- sf2: lag range [-4,...,4] total 9 correlations |
- sf3: lag range [-3,....4] total 8 correltions |
- sf4: lag range [-6,....8] total 15 correlations |
- |
- In total 48 correlations. The direct implementation computed in worst case |
- 4*12*5 = 240 correlations, but more likely around 120. |
- **********************************************************************/ |
{ |
const silk_float *target_ptr, *basis_ptr; |
opus_int i, j, k, lag_counter, lag_low, lag_high; |
opus_int nb_cbk_search, delta, idx, cbk_size; |
silk_float scratch_mem[ SCRATCH_SIZE ]; |
+ opus_val32 xcorr[ SCRATCH_SIZE ]; |
const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; |
silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); |
@@ -530,10 +526,12 @@ static void silk_P_Ana_calc_corr_st3( |
/* Calculate the correlations for each subframe */ |
lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 ); |
lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 ); |
+ silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE); |
+ celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr, sf_length, lag_high - lag_low + 1 ); |
for( j = lag_low; j <= lag_high; j++ ) { |
basis_ptr = target_ptr - ( start_lag + j ); |
silk_assert( lag_counter < SCRATCH_SIZE ); |
- scratch_mem[ lag_counter ] = (silk_float)silk_inner_product_FLP( target_ptr, basis_ptr, sf_length ); |
+ scratch_mem[ lag_counter ] = xcorr[ lag_high - j ]; |
lag_counter++; |
} |
@@ -552,6 +550,10 @@ static void silk_P_Ana_calc_corr_st3( |
} |
} |
+/********************************************************************/ |
+/* Calculate the energies for first two subframes. The energies are */ |
+/* calculated recursively. */ |
+/********************************************************************/ |
static void silk_P_Ana_calc_energy_st3( |
silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */ |
const silk_float frame[], /* I vector to correlate */ |
@@ -560,10 +562,6 @@ static void silk_P_Ana_calc_energy_st3( |
opus_int nb_subfr, /* I number of subframes */ |
opus_int complexity /* I Complexity setting */ |
) |
-/**************************************************************** |
-Calculate the energies for first two subframes. The energies are |
-calculated recursively. |
-****************************************************************/ |
{ |
const silk_float *target_ptr, *basis_ptr; |
double energy; |