third_party/opus/src/silk/VAD.c - Issue 2195313002: Remove Opus from DEPS and import a local copy

Side by Side Diff: third_party/opus/src/silk/VAD.c

Issue 2195313002: Remove Opus from DEPS and import a local copy (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Local copy of opus, opus/src/.gitignore, opus/DEPS, update README.chromium Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 /***********************************************************************

	2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.

	3 Redistribution and use in source and binary forms, with or without

	4 modification, are permitted provided that the following conditions

	5 are met:

	6 - Redistributions of source code must retain the above copyright notice,

	7 this list of conditions and the following disclaimer.

	8 - Redistributions in binary form must reproduce the above copyright

	9 notice, this list of conditions and the following disclaimer in the

	10 documentation and/or other materials provided with the distribution.

	11 - Neither the name of Internet Society, IETF or IETF Trust, nor the

	12 names of specific contributors, may be used to endorse or promote

	13 products derived from this software without specific prior written

	14 permission.

	15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

	16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

	17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

	18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

	19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

	20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

	21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

	22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

	23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

	24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

	25 POSSIBILITY OF SUCH DAMAGE.

	26 ***********************************************************************/

	27

	28 #ifdef HAVE_CONFIG_H

	29 #include "config.h"

	30 #endif

	31

	32 #include "main.h"

	33 #include "stack_alloc.h"

	34

	35 /* Silk VAD noise level estimation */

	36 # if !defined(OPUS_X86_MAY_HAVE_SSE4_1)

	37 static OPUS_INLINE void silk_VAD_GetNoiseLevels(

	38 const opus_int32 pX[ VAD_N_BANDS ], /* I subband energies */

	39 silk_VAD_state psSilk_VAD / I/O Pointer to Silk VAD state */

	40 );

	41 #endif

	42

	43 /**********************************/

	44 /* Initialization of the Silk VAD */

	45 /**********************************/

	46 opus_int silk_VAD_Init( /* O Return v alue, 0 if success */

	47 silk_VAD_state psSilk_VAD / I/O Pointer to Silk VAD state */

	48 )

	49 {

	50 opus_int b, ret = 0;

	51

	52 /* reset state memory */

	53 silk_memset( psSilk_VAD, 0, sizeof( silk_VAD_state ) );

	54

	55 /* init noise levels */

	56 /* Initialize array with approx pink noise levels (psd proportional to inver se of frequency) */

	57 for( b = 0; b < VAD_N_BANDS; b++ ) {

	58 psSilk_VAD->NoiseLevelBias[ b ] = silk_max_32( silk_DIV32_16( VAD_NOISE_ LEVELS_BIAS, b + 1 ), 1 );

	59 }

	60

	61 /* Initialize state */

	62 for( b = 0; b < VAD_N_BANDS; b++ ) {

	63 psSilk_VAD->NL[ b ] = silk_MUL( 100, psSilk_VAD->NoiseLevelBias[ b ] );

	64 psSilk_VAD->inv_NL[ b ] = silk_DIV32( silk_int32_MAX, psSilk_VAD->NL[ b ] );

	65 }

	66 psSilk_VAD->counter = 15;

	67

	68 /* init smoothed energy-to-noise ratio*/

	69 for( b = 0; b < VAD_N_BANDS; b++ ) {

	70 psSilk_VAD->NrgRatioSmth_Q8[ b ] = 100 * 256; /* 100 * 256 --> 20 dB SNR */

	71 }

	72

	73 return( ret );

	74 }

	75

	76 /* Weighting factors for tilt measure */

	77 static const opus_int32 tiltWeights[ VAD_N_BANDS ] = { 30000, 6000, -12000, -120 00 };

	78

	79 /***************************************/

	80 /* Get the speech activity level in Q8 */

	81 /***************************************/

	82 opus_int silk_VAD_GetSA_Q8_c( /* O Return v alue, 0 if success */

	83 silk_encoder_state psEncC, / I/O Encoder state */

	84 const opus_int16 pIn[] /* I PCM inpu t */

	85 )

	86 {

	87 opus_int SA_Q15, pSNR_dB_Q7, input_tilt;

	88 opus_int decimated_framelength1, decimated_framelength2;

	89 opus_int decimated_framelength;

	90 opus_int dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s;

	91 opus_int32 sumSquared, smooth_coef_Q16;

	92 opus_int16 HPstateTmp;

	93 VARDECL( opus_int16, X );

	94 opus_int32 Xnrg[ VAD_N_BANDS ];

	95 opus_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ];

	96 opus_int32 speech_nrg, x_tmp;

	97 opus_int X_offset[ VAD_N_BANDS ];

	98 opus_int ret = 0;

	99 silk_VAD_state *psSilk_VAD = &psEncC->sVAD;

	100 SAVE_STACK;

	101

	102 /* Safety checks */

	103 silk_assert( VAD_N_BANDS == 4 );

	104 silk_assert( MAX_FRAME_LENGTH >= psEncC->frame_length );

	105 silk_assert( psEncC->frame_length <= 512 );

	106 silk_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) );

	107

	108 /***********************/

	109 /* Filter and Decimate */

	110 /***********************/

	111 decimated_framelength1 = silk_RSHIFT( psEncC->frame_length, 1 );

	112 decimated_framelength2 = silk_RSHIFT( psEncC->frame_length, 2 );

	113 decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 );

	114 /* Decimate into 4 bands:

	115 0 L 3L L 3L 5L

	116 - -- - -- --

	117 8 8 2 4 4

	118

	119 [0-1 kHz\| temp. \|1-2 kHz\| 2-4 kHz \| 4-8 kHz \|

	120

	121 They're arranged to allow the minimal ( frame_length / 4 ) extra

	122 scratch space during the downsampling process */

	123 X_offset[ 0 ] = 0;

	124 X_offset[ 1 ] = decimated_framelength + decimated_framelength2;

	125 X_offset[ 2 ] = X_offset[ 1 ] + decimated_framelength;

	126 X_offset[ 3 ] = X_offset[ 2 ] + decimated_framelength2;

	127 ALLOC( X, X_offset[ 3 ] + decimated_framelength1, opus_int16 );

	128

	129 /* 0-8 kHz to 0-4 kHz and 4-8 kHz */

	130 silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[ 0 ],

	131 X, &X[ X_offset[ 3 ] ], psEncC->frame_length );

	132

	133 /* 0-4 kHz to 0-2 kHz and 2-4 kHz */

	134 silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState1[ 0 ],

	135 X, &X[ X_offset[ 2 ] ], decimated_framelength1 );

	136

	137 /* 0-2 kHz to 0-1 kHz and 1-2 kHz */

	138 silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState2[ 0 ],

	139 X, &X[ X_offset[ 1 ] ], decimated_framelength2 );

	140

	141 /*********************************************/

	142 /* HP filter on lowest band (differentiator) */

	143 /*********************************************/

	144 X[ decimated_framelength - 1 ] = silk_RSHIFT( X[ decimated_framelength - 1 ] , 1 );

	145 HPstateTmp = X[ decimated_framelength - 1 ];

	146 for( i = decimated_framelength - 1; i > 0; i-- ) {

	147 X[ i - 1 ] = silk_RSHIFT( X[ i - 1 ], 1 );

	148 X[ i ] -= X[ i - 1 ];

	149 }

	150 X[ 0 ] -= psSilk_VAD->HPstate;

	151 psSilk_VAD->HPstate = HPstateTmp;

	152

	153 /*************************************/

	154 /* Calculate the energy in each band */

	155 /*************************************/

	156 for( b = 0; b < VAD_N_BANDS; b++ ) {

	157 /* Find the decimated framelength in the non-uniformly divided bands */

	158 decimated_framelength = silk_RSHIFT( psEncC->frame_length, silk_min_int( VAD_N_BANDS - b, VAD_N_BANDS - 1 ) );

	159

	160 /* Split length into subframe lengths */

	161 dec_subframe_length = silk_RSHIFT( decimated_framelength, VAD_INTERNAL_S UBFRAMES_LOG2 );

	162 dec_subframe_offset = 0;

	163

	164 /* Compute energy per sub-frame */

	165 /* initialize with summed energy of last subframe */

	166 Xnrg[ b ] = psSilk_VAD->XnrgSubfr[ b ];

	167 for( s = 0; s < VAD_INTERNAL_SUBFRAMES; s++ ) {

	168 sumSquared = 0;

	169 for( i = 0; i < dec_subframe_length; i++ ) {

	170 /* The energy will be less than dec_subframe_length * ( silk_int 16_MIN / 8 ) ^ 2. */

	171 /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128) */

	172 x_tmp = silk_RSHIFT(

	173 X[ X_offset[ b ] + i + dec_subframe_offset ], 3 );

	174 sumSquared = silk_SMLABB( sumSquared, x_tmp, x_tmp );

	175

	176 /* Safety check */

	177 silk_assert( sumSquared >= 0 );

	178 }

	179

	180 /* Add/saturate summed energy of current subframe */

	181 if( s < VAD_INTERNAL_SUBFRAMES - 1 ) {

	182 Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], sumSquared );

	183 } else {

	184 /* Look-ahead subframe */

	185 Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], silk_RSHIFT( sumSquar ed, 1 ) );

	186 }

	187

	188 dec_subframe_offset += dec_subframe_length;

	189 }

	190 psSilk_VAD->XnrgSubfr[ b ] = sumSquared;

	191 }

	192

	193 /********************/

	194 /* Noise estimation */

	195 /********************/

	196 silk_VAD_GetNoiseLevels( &Xnrg[ 0 ], psSilk_VAD );

	197

	198 /***********************************************/

	199 /* Signal-plus-noise to noise ratio estimation */

	200 /***********************************************/

	201 sumSquared = 0;

	202 input_tilt = 0;

	203 for( b = 0; b < VAD_N_BANDS; b++ ) {

	204 speech_nrg = Xnrg[ b ] - psSilk_VAD->NL[ b ];

	205 if( speech_nrg > 0 ) {

	206 /* Divide, with sufficient resolution */

	207 if( ( Xnrg[ b ] & 0xFF800000 ) == 0 ) {

	208 NrgToNoiseRatio_Q8[ b ] = silk_DIV32( silk_LSHIFT( Xnrg[ b ], 8 ), psSilk_VAD->NL[ b ] + 1 );

	209 } else {

	210 NrgToNoiseRatio_Q8[ b ] = silk_DIV32( Xnrg[ b ], silk_RSHIFT( ps Silk_VAD->NL[ b ], 8 ) + 1 );

	211 }

	212

	213 /* Convert to log domain */

	214 SNR_Q7 = silk_lin2log( NrgToNoiseRatio_Q8[ b ] ) - 8 * 128;

	215

	216 /* Sum-of-squares */

	217 sumSquared = silk_SMLABB( sumSquared, SNR_Q7, SNR_Q7 ); /* Q14 */

	218

	219 /* Tilt measure */

	220 if( speech_nrg < ( (opus_int32)1 << 20 ) ) {

	221 /* Scale down SNR value for small subband speech energies */

	222 SNR_Q7 = silk_SMULWB( silk_LSHIFT( silk_SQRT_APPROX( speech_nrg ), 6 ), SNR_Q7 );

	223 }

	224 input_tilt = silk_SMLAWB( input_tilt, tiltWeights[ b ], SNR_Q7 );

	225 } else {

	226 NrgToNoiseRatio_Q8[ b ] = 256;

	227 }

	228 }

	229

	230 /* Mean-of-squares */

	231 sumSquared = silk_DIV32_16( sumSquared, VAD_N_BANDS ); /* Q14 */

	232

	233 /* Root-mean-square approximation, scale to dBs, and write to output pointer */

	234 pSNR_dB_Q7 = (opus_int16)( 3 * silk_SQRT_APPROX( sumSquared ) ); /* Q7 */

	235

	236 /*********************************/

	237 /* Speech Probability Estimation */

	238 /*********************************/

	239 SA_Q15 = silk_sigm_Q15( silk_SMULWB( VAD_SNR_FACTOR_Q16, pSNR_dB_Q7 ) - VAD_ NEGATIVE_OFFSET_Q5 );

	240

	241 /**************************/

	242 /* Frequency Tilt Measure */

	243 /**************************/

	244 psEncC->input_tilt_Q15 = silk_LSHIFT( silk_sigm_Q15( input_tilt ) - 16384, 1 );

	245

	246 /**************************************************/

	247 /* Scale the sigmoid output based on power levels */

	248 /**************************************************/

	249 speech_nrg = 0;

	250 for( b = 0; b < VAD_N_BANDS; b++ ) {

	251 /* Accumulate signal-without-noise energies, higher frequency bands have more weight */

	252 speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 );

	253 }

	254

	255 /* Power scaling */

	256 if( speech_nrg <= 0 ) {

	257 SA_Q15 = silk_RSHIFT( SA_Q15, 1 );

	258 } else if( speech_nrg < 32768 ) {

	259 if( psEncC->frame_length == 10 * psEncC->fs_kHz ) {

	260 speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 16 );

	261 } else {

	262 speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 15 );

	263 }

	264

	265 /* square-root */

	266 speech_nrg = silk_SQRT_APPROX( speech_nrg );

	267 SA_Q15 = silk_SMULWB( 32768 + speech_nrg, SA_Q15 );

	268 }

	269

	270 /* Copy the resulting speech activity in Q8 */

	271 psEncC->speech_activity_Q8 = silk_min_int( silk_RSHIFT( SA_Q15, 7 ), silk_ui nt8_MAX );

	272

	273 /***********************************/

	274 /* Energy Level and SNR estimation */

	275 /***********************************/

	276 /* Smoothing coefficient */

	277 smooth_coef_Q16 = silk_SMULWB( VAD_SNR_SMOOTH_COEF_Q18, silk_SMULWB( (opus_i nt32)SA_Q15, SA_Q15 ) );

	278

	279 if( psEncC->frame_length == 10 * psEncC->fs_kHz ) {

	280 smooth_coef_Q16 >>= 1;

	281 }

	282

	283 for( b = 0; b < VAD_N_BANDS; b++ ) {

	284 /* compute smoothed energy-to-noise ratio per band */

	285 psSilk_VAD->NrgRatioSmth_Q8[ b ] = silk_SMLAWB( psSilk_VAD->NrgRatioSmth _Q8[ b ],

	286 NrgToNoiseRatio_Q8[ b ] - psSilk_VAD->NrgRatioSmth_Q8[ b ], smooth_c oef_Q16 );

	287

	288 /* signal to noise ratio in dB per band */

	289 SNR_Q7 = 3 * ( silk_lin2log( psSilk_VAD->NrgRatioSmth_Q8[b] ) - 8 * 128 );

	290 /* quality = sigmoid( 0.25 * ( SNR_dB - 16 ) ); */

	291 psEncC->input_quality_bands_Q15[ b ] = silk_sigm_Q15( silk_RSHIFT( SNR_Q 7 - 16 * 128, 4 ) );

	292 }

	293

	294 RESTORE_STACK;

	295 return( ret );

	296 }

	297

	298 /**************************/

	299 /* Noise level estimation */

	300 /**************************/

	301 # if !defined(OPUS_X86_MAY_HAVE_SSE4_1)

	302 static OPUS_INLINE

	303 #endif

	304 void silk_VAD_GetNoiseLevels(

	305 const opus_int32 pX[ VAD_N_BANDS ], /* I subband energies */

	306 silk_VAD_state psSilk_VAD / I/O Pointer to Silk VAD state */

	307 )

	308 {

	309 opus_int k;

	310 opus_int32 nl, nrg, inv_nrg;

	311 opus_int coef, min_coef;

	312

	313 /* Initially faster smoothing */

	314 if( psSilk_VAD->counter < 1000 ) { /* 1000 = 20 sec */

	315 min_coef = silk_DIV32_16( silk_int16_MAX, silk_RSHIFT( psSilk_VAD->count er, 4 ) + 1 );

	316 } else {

	317 min_coef = 0;

	318 }

	319

	320 for( k = 0; k < VAD_N_BANDS; k++ ) {

	321 /* Get old noise level estimate for current band */

	322 nl = psSilk_VAD->NL[ k ];

	323 silk_assert( nl >= 0 );

	324

	325 /* Add bias */

	326 nrg = silk_ADD_POS_SAT32( pX[ k ], psSilk_VAD->NoiseLevelBias[ k ] );

	327 silk_assert( nrg > 0 );

	328

	329 /* Invert energies */

	330 inv_nrg = silk_DIV32( silk_int32_MAX, nrg );

	331 silk_assert( inv_nrg >= 0 );

	332

	333 /* Less update when subband energy is high */

	334 if( nrg > silk_LSHIFT( nl, 3 ) ) {

	335 coef = VAD_NOISE_LEVEL_SMOOTH_COEF_Q16 >> 3;

	336 } else if( nrg < nl ) {

	337 coef = VAD_NOISE_LEVEL_SMOOTH_COEF_Q16;

	338 } else {

	339 coef = silk_SMULWB( silk_SMULWW( inv_nrg, nl ), VAD_NOISE_LEVEL_SMOO TH_COEF_Q16 << 1 );

	340 }

	341

	342 /* Initially faster smoothing */

	343 coef = silk_max_int( coef, min_coef );

	344

	345 /* Smooth inverse energies */

	346 psSilk_VAD->inv_NL[ k ] = silk_SMLAWB( psSilk_VAD->inv_NL[ k ], inv_nrg - psSilk_VAD->inv_NL[ k ], coef );

	347 silk_assert( psSilk_VAD->inv_NL[ k ] >= 0 );

	348

	349 /* Compute noise level by inverting again */

	350 nl = silk_DIV32( silk_int32_MAX, psSilk_VAD->inv_NL[ k ] );

	351 silk_assert( nl >= 0 );

	352

	353 /* Limit noise levels (guarantee 7 bits of head room) */

	354 nl = silk_min( nl, 0x00FFFFFF );

	355

	356 /* Store as part of state */

	357 psSilk_VAD->NL[ k ] = nl;

	358 }

	359

	360 /* Increment frame counter */

	361 psSilk_VAD->counter++;

	362 }

OLD	NEW

« no previous file with comments | « third_party/opus/src/silk/SigProc_FIX.h ('k') | third_party/opus/src/silk/VQ_WMat_EC.c » ('j') | no next file with comments »