| OLD | NEW |
| (Empty) | |
| 1 /*********************************************************************** |
| 2 Copyright (c) 2006-2011, Skype Limited. All rights reserved. |
| 3 Redistribution and use in source and binary forms, with or without |
| 4 modification, are permitted provided that the following conditions |
| 5 are met: |
| 6 - Redistributions of source code must retain the above copyright notice, |
| 7 this list of conditions and the following disclaimer. |
| 8 - Redistributions in binary form must reproduce the above copyright |
| 9 notice, this list of conditions and the following disclaimer in the |
| 10 documentation and/or other materials provided with the distribution. |
| 11 - Neither the name of Internet Society, IETF or IETF Trust, nor the |
| 12 names of specific contributors, may be used to endorse or promote |
| 13 products derived from this software without specific prior written |
| 14 permission. |
| 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” |
| 16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 25 POSSIBILITY OF SUCH DAMAGE. |
| 26 ***********************************************************************/ |
| 27 |
| 28 #ifdef HAVE_CONFIG_H |
| 29 #include "config.h" |
| 30 #endif |
| 31 #include "API.h" |
| 32 #include "main.h" |
| 33 |
| 34 /************************/ |
| 35 /* Decoder Super Struct */ |
| 36 /************************/ |
| 37 typedef struct { |
| 38 silk_decoder_state channel_state[ DECODER_NUM_CHANNELS ]; |
| 39 stereo_dec_state sStereo; |
| 40 opus_int nChannelsAPI; |
| 41 opus_int nChannelsInternal; |
| 42 opus_int prev_decode_only_middle; |
| 43 } silk_decoder; |
| 44 |
| 45 /*********************/ |
| 46 /* Decoder functions */ |
| 47 /*********************/ |
| 48 |
| 49 opus_int silk_Get_Decoder_Size( /* O Returns error co
de */ |
| 50 opus_int *decSizeBytes /* O Number of bytes
in SILK decoder state */ |
| 51 ) |
| 52 { |
| 53 opus_int ret = SILK_NO_ERROR; |
| 54 |
| 55 *decSizeBytes = sizeof( silk_decoder ); |
| 56 |
| 57 return ret; |
| 58 } |
| 59 |
| 60 /* Reset decoder state */ |
| 61 opus_int silk_InitDecoder( /* O Returns error co
de */ |
| 62 void *decState /* I/O State
*/ |
| 63 ) |
| 64 { |
| 65 opus_int n, ret = SILK_NO_ERROR; |
| 66 silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_stat
e; |
| 67 |
| 68 for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) { |
| 69 ret = silk_init_decoder( &channel_state[ n ] ); |
| 70 } |
| 71 |
| 72 return ret; |
| 73 } |
| 74 |
| 75 /* Decode a frame */ |
| 76 opus_int silk_Decode( /* O Returns error co
de */ |
| 77 void* decState, /* I/O State
*/ |
| 78 silk_DecControlStruct* decControl, /* I/O Control Structur
e */ |
| 79 opus_int lostFlag, /* I 0: no loss, 1 lo
ss, 2 decode fec */ |
| 80 opus_int newPacketFlag, /* I Indicates first
decoder call for this packet */ |
| 81 ec_dec *psRangeDec, /* I/O Compressor data
structure */ |
| 82 opus_int16 *samplesOut, /* O Decoded output s
peech vector */ |
| 83 opus_int32 *nSamplesOut /* O Number of sample
s decoded */ |
| 84 ) |
| 85 { |
| 86 opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR; |
| 87 opus_int32 nSamplesOutDec, LBRR_symbol; |
| 88 opus_int16 samplesOut1_tmp[ 2 ][ MAX_FS_KHZ * MAX_FRAME_LENGTH_MS + 2 ]; |
| 89 opus_int16 samplesOut2_tmp[ MAX_API_FS_KHZ * MAX_FRAME_LENGTH_MS ]; |
| 90 opus_int32 MS_pred_Q13[ 2 ] = { 0 }; |
| 91 opus_int16 *resample_out_ptr; |
| 92 silk_decoder *psDec = ( silk_decoder * )decState; |
| 93 silk_decoder_state *channel_state = psDec->channel_state; |
| 94 opus_int has_side; |
| 95 opus_int stereo_to_mono; |
| 96 |
| 97 /**********************************/ |
| 98 /* Test if first frame in payload */ |
| 99 /**********************************/ |
| 100 if( newPacketFlag ) { |
| 101 for( n = 0; n < decControl->nChannelsInternal; n++ ) { |
| 102 channel_state[ n ].nFramesDecoded = 0; /* Used to count frames in p
acket */ |
| 103 } |
| 104 } |
| 105 |
| 106 /* If Mono -> Stereo transition in bitstream: init state of second channel *
/ |
| 107 if( decControl->nChannelsInternal > psDec->nChannelsInternal ) { |
| 108 ret += silk_init_decoder( &channel_state[ 1 ] ); |
| 109 } |
| 110 |
| 111 stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInter
nal == 2 && |
| 112 ( decControl->internalSampleRate == 1000*channel_state[ 0 ]
.fs_kHz ); |
| 113 |
| 114 if( channel_state[ 0 ].nFramesDecoded == 0 ) { |
| 115 for( n = 0; n < decControl->nChannelsInternal; n++ ) { |
| 116 opus_int fs_kHz_dec; |
| 117 if( decControl->payloadSize_ms == 0 ) { |
| 118 /* Assuming packet loss, use 10 ms */ |
| 119 channel_state[ n ].nFramesPerPacket = 1; |
| 120 channel_state[ n ].nb_subfr = 2; |
| 121 } else if( decControl->payloadSize_ms == 10 ) { |
| 122 channel_state[ n ].nFramesPerPacket = 1; |
| 123 channel_state[ n ].nb_subfr = 2; |
| 124 } else if( decControl->payloadSize_ms == 20 ) { |
| 125 channel_state[ n ].nFramesPerPacket = 1; |
| 126 channel_state[ n ].nb_subfr = 4; |
| 127 } else if( decControl->payloadSize_ms == 40 ) { |
| 128 channel_state[ n ].nFramesPerPacket = 2; |
| 129 channel_state[ n ].nb_subfr = 4; |
| 130 } else if( decControl->payloadSize_ms == 60 ) { |
| 131 channel_state[ n ].nFramesPerPacket = 3; |
| 132 channel_state[ n ].nb_subfr = 4; |
| 133 } else { |
| 134 silk_assert( 0 ); |
| 135 return SILK_DEC_INVALID_FRAME_SIZE; |
| 136 } |
| 137 fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1; |
| 138 if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) { |
| 139 silk_assert( 0 ); |
| 140 return SILK_DEC_INVALID_SAMPLING_FREQUENCY; |
| 141 } |
| 142 ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decCont
rol->API_sampleRate ); |
| 143 } |
| 144 } |
| 145 |
| 146 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && (
psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) { |
| 147 silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pre
d_prev_Q13 ) ); |
| 148 silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) ); |
| 149 silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].re
sampler_state, sizeof( silk_resampler_state_struct ) ); |
| 150 } |
| 151 psDec->nChannelsAPI = decControl->nChannelsAPI; |
| 152 psDec->nChannelsInternal = decControl->nChannelsInternal; |
| 153 |
| 154 if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decCon
trol->API_sampleRate < 8000 ) { |
| 155 ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY; |
| 156 return( ret ); |
| 157 } |
| 158 |
| 159 if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 )
{ |
| 160 /* First decoder call for this payload */ |
| 161 /* Decode VAD flags and LBRR flag */ |
| 162 for( n = 0; n < decControl->nChannelsInternal; n++ ) { |
| 163 for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { |
| 164 channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec,
1); |
| 165 } |
| 166 channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1); |
| 167 } |
| 168 /* Decode LBRR flags */ |
| 169 for( n = 0; n < decControl->nChannelsInternal; n++ ) { |
| 170 silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state
[ n ].LBRR_flags ) ); |
| 171 if( channel_state[ n ].LBRR_flag ) { |
| 172 if( channel_state[ n ].nFramesPerPacket == 1 ) { |
| 173 channel_state[ n ].LBRR_flags[ 0 ] = 1; |
| 174 } else { |
| 175 LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_
ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1; |
| 176 for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { |
| 177 channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_s
ymbol, i ) & 1; |
| 178 } |
| 179 } |
| 180 } |
| 181 } |
| 182 |
| 183 if( lostFlag == FLAG_DECODE_NORMAL ) { |
| 184 /* Regular decoding: skip all LBRR data */ |
| 185 for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) { |
| 186 for( n = 0; n < decControl->nChannelsInternal; n++ ) { |
| 187 if( channel_state[ n ].LBRR_flags[ i ] ) { |
| 188 opus_int pulses[ MAX_FRAME_LENGTH ]; |
| 189 opus_int condCoding; |
| 190 |
| 191 if( decControl->nChannelsInternal == 2 && n == 0 ) { |
| 192 silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); |
| 193 if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) { |
| 194 silk_stereo_decode_mid_only( psRangeDec, &decode
_only_middle ); |
| 195 } |
| 196 } |
| 197 /* Use conditional coding if previous frame available */ |
| 198 if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) { |
| 199 condCoding = CODE_CONDITIONALLY; |
| 200 } else { |
| 201 condCoding = CODE_INDEPENDENTLY; |
| 202 } |
| 203 silk_decode_indices( &channel_state[ n ], psRangeDec, i,
1, condCoding ); |
| 204 silk_decode_pulses( psRangeDec, pulses, channel_state[ n
].indices.signalType, |
| 205 channel_state[ n ].indices.quantOffsetType, channel_
state[ n ].frame_length ); |
| 206 } |
| 207 } |
| 208 } |
| 209 } |
| 210 } |
| 211 |
| 212 /* Get MS predictor index */ |
| 213 if( decControl->nChannelsInternal == 2 ) { |
| 214 if( lostFlag == FLAG_DECODE_NORMAL || |
| 215 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ cha
nnel_state[ 0 ].nFramesDecoded ] == 1 ) ) |
| 216 { |
| 217 silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); |
| 218 /* For LBRR data, decode mid-only flag only if side-channel's LBRR f
lag is false */ |
| 219 if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags
[ channel_state[ 0 ].nFramesDecoded ] == 0 ) || |
| 220 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[
channel_state[ 0 ].nFramesDecoded ] == 0 ) ) |
| 221 { |
| 222 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); |
| 223 } else { |
| 224 decode_only_middle = 0; |
| 225 } |
| 226 } else { |
| 227 for( n = 0; n < 2; n++ ) { |
| 228 MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ]; |
| 229 } |
| 230 } |
| 231 } |
| 232 |
| 233 /* Reset side channel decoder prediction memory for first frame with side co
ding */ |
| 234 if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->
prev_decode_only_middle == 1 ) { |
| 235 silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_
state[ 1 ].outBuf) ); |
| 236 silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->ch
annel_state[ 1 ].sLPC_Q14_buf) ); |
| 237 psDec->channel_state[ 1 ].lagPrev = 100; |
| 238 psDec->channel_state[ 1 ].LastGainIndex = 10; |
| 239 psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY; |
| 240 psDec->channel_state[ 1 ].first_frame_after_reset = 1; |
| 241 } |
| 242 |
| 243 if( lostFlag == FLAG_DECODE_NORMAL ) { |
| 244 has_side = !decode_only_middle; |
| 245 } else { |
| 246 has_side = !psDec->prev_decode_only_middle |
| 247 || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_
LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 ); |
| 248 } |
| 249 /* Call decoder for one frame */ |
| 250 for( n = 0; n < decControl->nChannelsInternal; n++ ) { |
| 251 if( n == 0 || has_side ) { |
| 252 opus_int FrameIndex; |
| 253 opus_int condCoding; |
| 254 |
| 255 FrameIndex = channel_state[ 0 ].nFramesDecoded - n; |
| 256 /* Use independent coding if no previous frame available */ |
| 257 if( FrameIndex <= 0 ) { |
| 258 condCoding = CODE_INDEPENDENTLY; |
| 259 } else if( lostFlag == FLAG_DECODE_LBRR ) { |
| 260 condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? C
ODE_CONDITIONALLY : CODE_INDEPENDENTLY; |
| 261 } else if( n > 0 && psDec->prev_decode_only_middle ) { |
| 262 /* If we skipped a side frame in this packet, we don't |
| 263 need LTP scaling; the LTP state is well-defined. */ |
| 264 condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING; |
| 265 } else { |
| 266 condCoding = CODE_CONDITIONALLY; |
| 267 } |
| 268 ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesO
ut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding); |
| 269 } else { |
| 270 silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof(
opus_int16 ) ); |
| 271 } |
| 272 channel_state[ n ].nFramesDecoded++; |
| 273 } |
| 274 |
| 275 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) { |
| 276 /* Convert Mid/Side to Left/Right */ |
| 277 silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1
_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec ); |
| 278 } else { |
| 279 /* Buffering */ |
| 280 silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus
_int16 ) ); |
| 281 silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec
], 2 * sizeof( opus_int16 ) ); |
| 282 } |
| 283 |
| 284 /* Number of output samples */ |
| 285 *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk
_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) ); |
| 286 |
| 287 /* Set up pointers to temp buffers */ |
| 288 if( decControl->nChannelsAPI == 2 ) { |
| 289 resample_out_ptr = samplesOut2_tmp; |
| 290 } else { |
| 291 resample_out_ptr = samplesOut; |
| 292 } |
| 293 |
| 294 for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInt
ernal ); n++ ) { |
| 295 |
| 296 /* Resample decoded signal to API_sampleRate */ |
| 297 ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out
_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec ); |
| 298 |
| 299 /* Interleave if stereo output and stereo stream */ |
| 300 if( decControl->nChannelsAPI == 2 ) { |
| 301 for( i = 0; i < *nSamplesOut; i++ ) { |
| 302 samplesOut[ n + 2 * i ] = resample_out_ptr[ i ]; |
| 303 } |
| 304 } |
| 305 } |
| 306 |
| 307 /* Create two channel output from mono stream */ |
| 308 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) { |
| 309 if ( stereo_to_mono ){ |
| 310 /* Resample right channel for newly collapsed stereo just in case |
| 311 we weren't doing collapsing when switching to mono */ |
| 312 ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample
_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec ); |
| 313 |
| 314 for( i = 0; i < *nSamplesOut; i++ ) { |
| 315 samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ]; |
| 316 } |
| 317 } else { |
| 318 for( i = 0; i < *nSamplesOut; i++ ) { |
| 319 samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ]; |
| 320 } |
| 321 } |
| 322 } |
| 323 |
| 324 /* Export pitch lag, measured at 48 kHz sampling rate */ |
| 325 if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) { |
| 326 int mult_tab[ 3 ] = { 6, 4, 3 }; |
| 327 decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( chan
nel_state[ 0 ].fs_kHz - 8 ) >> 2 ]; |
| 328 } else { |
| 329 decControl->prevPitchLag = 0; |
| 330 } |
| 331 |
| 332 if( lostFlag == FLAG_PACKET_LOST ) { |
| 333 /* On packet loss, remove the gain clamping to prevent having the energy
"bounce back" |
| 334 if we lose packets when the energy is going down */ |
| 335 for ( i = 0; i < psDec->nChannelsInternal; i++ ) |
| 336 psDec->channel_state[ i ].LastGainIndex = 10; |
| 337 } else { |
| 338 psDec->prev_decode_only_middle = decode_only_middle; |
| 339 } |
| 340 return ret; |
| 341 } |
| 342 |
| 343 #if 0 |
| 344 /* Getting table of contents for a packet */ |
| 345 opus_int silk_get_TOC( |
| 346 const opus_uint8 *payload, /* I Payload data
*/ |
| 347 const opus_int nBytesIn, /* I Number of input
bytes */ |
| 348 const opus_int nFramesPerPayload, /* I Number of SILK f
rames per payload */ |
| 349 silk_TOC_struct *Silk_TOC /* O Type of content
*/ |
| 350 ) |
| 351 { |
| 352 opus_int i, flags, ret = SILK_NO_ERROR; |
| 353 |
| 354 if( nBytesIn < 1 ) { |
| 355 return -1; |
| 356 } |
| 357 if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) { |
| 358 return -1; |
| 359 } |
| 360 |
| 361 silk_memset( Silk_TOC, 0, sizeof( *Silk_TOC ) ); |
| 362 |
| 363 /* For stereo, extract the flags for the mid channel */ |
| 364 flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT(
1, nFramesPerPayload + 1 ) - 1 ); |
| 365 |
| 366 Silk_TOC->inbandFECFlag = flags & 1; |
| 367 for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) { |
| 368 flags = silk_RSHIFT( flags, 1 ); |
| 369 Silk_TOC->VADFlags[ i ] = flags & 1; |
| 370 Silk_TOC->VADFlag |= flags & 1; |
| 371 } |
| 372 |
| 373 return ret; |
| 374 } |
| 375 #endif |
| OLD | NEW |