| OLD | NEW |
| (Empty) | |
| 1 /* Copyright (c) 2014, Cisco Systems, INC |
| 2 Written by XiangMingZhu WeiZhou MinPeng YanWang |
| 3 |
| 4 Redistribution and use in source and binary forms, with or without |
| 5 modification, are permitted provided that the following conditions |
| 6 are met: |
| 7 |
| 8 - Redistributions of source code must retain the above copyright |
| 9 notice, this list of conditions and the following disclaimer. |
| 10 |
| 11 - Redistributions in binary form must reproduce the above copyright |
| 12 notice, this list of conditions and the following disclaimer in the |
| 13 documentation and/or other materials provided with the distribution. |
| 14 |
| 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER |
| 19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 26 */ |
| 27 |
| 28 #ifdef HAVE_CONFIG_H |
| 29 #include "config.h" |
| 30 #endif |
| 31 |
| 32 #include <xmmintrin.h> |
| 33 #include <emmintrin.h> |
| 34 #include <smmintrin.h> |
| 35 |
| 36 #include "SigProc_FIX.h" |
| 37 #include "define.h" |
| 38 #include "tuning_parameters.h" |
| 39 #include "pitch.h" |
| 40 #include "celt/x86/x86cpu.h" |
| 41 |
| 42 #define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr =
( 0.005 * 16000 + 16 ) * 4 = 384 */ |
| 43 |
| 44 #define QA 25 |
| 45 #define N_BITS_HEAD_ROOM 2 |
| 46 #define MIN_RSHIFTS -16 |
| 47 #define MAX_RSHIFTS (32 - QA) |
| 48 |
| 49 /* Compute reflection coefficients from input signal */ |
| 50 void silk_burg_modified_sse4_1( |
| 51 opus_int32 *res_nrg, /* O Residual energy
*/ |
| 52 opus_int *res_nrg_Q, /* O Residual energy Q va
lue */ |
| 53 opus_int32 A_Q16[], /* O Prediction coefficie
nts (length order) */ |
| 54 const opus_int16 x[], /* I Input signal, length
: nb_subfr * ( D + subfr_length ) */ |
| 55 const opus_int32 minInvGain_Q30, /* I Inverse of max predi
ction gain */ |
| 56 const opus_int subfr_length, /* I Input signal subfram
e length (incl. D preceding samples) */ |
| 57 const opus_int nb_subfr, /* I Number of subframes
stacked in x */ |
| 58 const opus_int D, /* I Order
*/ |
| 59 int arch /* I Run-time architectur
e */ |
| 60 ) |
| 61 { |
| 62 opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain; |
| 63 opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tm
p2, x1, x2; |
| 64 const opus_int16 *x_ptr; |
| 65 opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ]; |
| 66 opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ]; |
| 67 opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ]; |
| 68 opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ]; |
| 69 opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ]; |
| 70 opus_int32 xcorr[ SILK_MAX_ORDER_LPC ]; |
| 71 |
| 72 __m128i FIRST_3210, LAST_3210, ATMP_3210, TMP1_3210, TMP2_3210, T1_3210, T2_
3210, PTR_3210, SUBFR_3210, X1_3210, X2_3210; |
| 73 __m128i CONST1 = _mm_set1_epi32(1); |
| 74 |
| 75 silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); |
| 76 |
| 77 /* Compute autocorrelations, added over subframes */ |
| 78 silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length ); |
| 79 if( rshifts > MAX_RSHIFTS ) { |
| 80 C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS ); |
| 81 silk_assert( C0 > 0 ); |
| 82 rshifts = MAX_RSHIFTS; |
| 83 } else { |
| 84 lz = silk_CLZ32( C0 ) - 1; |
| 85 rshifts_extra = N_BITS_HEAD_ROOM - lz; |
| 86 if( rshifts_extra > 0 ) { |
| 87 rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts ); |
| 88 C0 = silk_RSHIFT32( C0, rshifts_extra ); |
| 89 } else { |
| 90 rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts ); |
| 91 C0 = silk_LSHIFT32( C0, -rshifts_extra ); |
| 92 } |
| 93 rshifts += rshifts_extra; |
| 94 } |
| 95 CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32
), C0 ) + 1; /* Q(-rshifts) */ |
| 96 silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); |
| 97 if( rshifts > 0 ) { |
| 98 for( s = 0; s < nb_subfr; s++ ) { |
| 99 x_ptr = x + s * subfr_length; |
| 100 for( n = 1; n < D + 1; n++ ) { |
| 101 C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( |
| 102 silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length
- n, arch ), rshifts ); |
| 103 } |
| 104 } |
| 105 } else { |
| 106 for( s = 0; s < nb_subfr; s++ ) { |
| 107 int i; |
| 108 opus_int32 d; |
| 109 x_ptr = x + s * subfr_length; |
| 110 celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch
); |
| 111 for( n = 1; n < D + 1; n++ ) { |
| 112 for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ ) |
| 113 d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] ); |
| 114 xcorr[ n - 1 ] += d; |
| 115 } |
| 116 for( n = 1; n < D + 1; n++ ) { |
| 117 C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts
); |
| 118 } |
| 119 } |
| 120 } |
| 121 silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int3
2 ) ); |
| 122 |
| 123 /* Initialize */ |
| 124 CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32
), C0 ) + 1; /* Q(-rshifts) */ |
| 125 |
| 126 invGain_Q30 = (opus_int32)1 << 30; |
| 127 reached_max_gain = 0; |
| 128 for( n = 0; n < D; n++ ) { |
| 129 /* Update first row of correlation matrix (without first element) */ |
| 130 /* Update last row of correlation matrix (without last element, stored i
n reversed order) */ |
| 131 /* Update C * Af */ |
| 132 /* Update C * flipud(Af) (stored in reversed order) */ |
| 133 if( rshifts > -2 ) { |
| 134 for( s = 0; s < nb_subfr; s++ ) { |
| 135 x_ptr = x + s * subfr_length; |
| 136 x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],
16 - rshifts ); /* Q(16-rshifts) */ |
| 137 x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ],
16 - rshifts ); /* Q(16-rshifts) */ |
| 138 tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],
QA - 16 ); /* Q(QA-16) */ |
| 139 tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ],
QA - 16 ); /* Q(QA-16) */ |
| 140 for( k = 0; k < n; k++ ) { |
| 141 C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[
n - k - 1 ] ); /* Q( -rshifts ) */ |
| 142 C_last_row[ k ] = silk_SMLAWB( C_last_row[ k ], x2, x_ptr[
subfr_length - n + k ] ); /* Q( -rshifts ) */ |
| 143 Atmp_QA = Af_QA[ k ]; |
| 144 tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ]
); /* Q(QA-16) */ |
| 145 tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n +
k ] ); /* Q(QA-16) */ |
| 146 } |
| 147 tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts );
/* Q(16-rshifts) */ |
| 148 tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts );
/* Q(16-rshifts) */ |
| 149 for( k = 0; k <= n; k++ ) { |
| 150 CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ]
); /* Q( -rshift ) */ |
| 151 CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length
- n + k - 1 ] ); /* Q( -rshift ) */ |
| 152 } |
| 153 } |
| 154 } else { |
| 155 for( s = 0; s < nb_subfr; s++ ) { |
| 156 x_ptr = x + s * subfr_length; |
| 157 x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],
-rshifts ); /* Q( -rshifts ) */ |
| 158 x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ],
-rshifts ); /* Q( -rshifts ) */ |
| 159 tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],
17 ); /* Q17 */ |
| 160 tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ],
17 ); /* Q17 */ |
| 161 |
| 162 X1_3210 = _mm_set1_epi32( x1 ); |
| 163 X2_3210 = _mm_set1_epi32( x2 ); |
| 164 TMP1_3210 = _mm_setzero_si128(); |
| 165 TMP2_3210 = _mm_setzero_si128(); |
| 166 for( k = 0; k < n - 3; k += 4 ) { |
| 167 PTR_3210 = OP_CVTEPI16_EPI32_M64( &x_ptr[ n - k - 1 - 3 ]
); |
| 168 SUBFR_3210 = OP_CVTEPI16_EPI32_M64( &x_ptr[ subfr_length - n
+ k ] ); |
| 169 FIRST_3210 = _mm_loadu_si128( (__m128i *)&C_first_row[ k ] )
; |
| 170 PTR_3210 = _mm_shuffle_epi32( PTR_3210, _MM_SHUFFLE( 0, 1
, 2, 3 ) ); |
| 171 LAST_3210 = _mm_loadu_si128( (__m128i *)&C_last_row[ k ] ); |
| 172 ATMP_3210 = _mm_loadu_si128( (__m128i *)&Af_QA[ k ] ); |
| 173 |
| 174 T1_3210 = _mm_mullo_epi32( PTR_3210, X1_3210 ); |
| 175 T2_3210 = _mm_mullo_epi32( SUBFR_3210, X2_3210 ); |
| 176 |
| 177 ATMP_3210 = _mm_srai_epi32( ATMP_3210, 7 ); |
| 178 ATMP_3210 = _mm_add_epi32( ATMP_3210, CONST1 ); |
| 179 ATMP_3210 = _mm_srai_epi32( ATMP_3210, 1 ); |
| 180 |
| 181 FIRST_3210 = _mm_add_epi32( FIRST_3210, T1_3210 ); |
| 182 LAST_3210 = _mm_add_epi32( LAST_3210, T2_3210 ); |
| 183 |
| 184 PTR_3210 = _mm_mullo_epi32( ATMP_3210, PTR_3210 ); |
| 185 SUBFR_3210 = _mm_mullo_epi32( ATMP_3210, SUBFR_3210 ); |
| 186 |
| 187 _mm_storeu_si128( (__m128i *)&C_first_row[ k ], FIRST_3210 )
; |
| 188 _mm_storeu_si128( (__m128i *)&C_last_row[ k ], LAST_3210 ); |
| 189 |
| 190 TMP1_3210 = _mm_add_epi32( TMP1_3210, PTR_3210 ); |
| 191 TMP2_3210 = _mm_add_epi32( TMP2_3210, SUBFR_3210 ); |
| 192 } |
| 193 |
| 194 TMP1_3210 = _mm_add_epi32( TMP1_3210, _mm_unpackhi_epi64(TMP1_32
10, TMP1_3210 ) ); |
| 195 TMP2_3210 = _mm_add_epi32( TMP2_3210, _mm_unpackhi_epi64(TMP2_32
10, TMP2_3210 ) ); |
| 196 TMP1_3210 = _mm_add_epi32( TMP1_3210, _mm_shufflelo_epi16(TMP1_3
210, 0x0E ) ); |
| 197 TMP2_3210 = _mm_add_epi32( TMP2_3210, _mm_shufflelo_epi16(TMP2_3
210, 0x0E ) ); |
| 198 |
| 199 tmp1 += _mm_cvtsi128_si32( TMP1_3210 ); |
| 200 tmp2 += _mm_cvtsi128_si32( TMP2_3210 ); |
| 201 |
| 202 for( ; k < n; k++ ) { |
| 203 C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n
- k - 1 ] ); /* Q( -rshifts ) */ |
| 204 C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ su
bfr_length - n + k ] ); /* Q( -rshifts ) */ |
| 205 Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 );
/* Q17 */ |
| 206 tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ], Atmp1
); /* Q17 */ |
| 207 tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1
); /* Q17 */ |
| 208 } |
| 209 |
| 210 tmp1 = -tmp1; /* Q17 */ |
| 211 tmp2 = -tmp2; /* Q17 */ |
| 212 |
| 213 { |
| 214 __m128i xmm_tmp1, xmm_tmp2; |
| 215 __m128i xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1; |
| 216 __m128i xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1; |
| 217 |
| 218 xmm_tmp1 = _mm_set1_epi32( tmp1 ); |
| 219 xmm_tmp2 = _mm_set1_epi32( tmp2 ); |
| 220 |
| 221 for( k = 0; k <= n - 3; k += 4 ) { |
| 222 xmm_x_ptr_n_k_x2x0 = OP_CVTEPI16_EPI32_M64( &x_ptr[ n -
k - 3 ] ); |
| 223 xmm_x_ptr_sub_x2x0 = OP_CVTEPI16_EPI32_M64( &x_ptr[ subf
r_length - n + k - 1 ] ); |
| 224 |
| 225 xmm_x_ptr_n_k_x2x0 = _mm_shuffle_epi32( xmm_x_ptr_n_k_x2
x0, _MM_SHUFFLE( 0, 1, 2, 3 ) ); |
| 226 |
| 227 xmm_x_ptr_n_k_x2x0 = _mm_slli_epi32( xmm_x_ptr_n_k_x2x0,
-rshifts - 1 ); |
| 228 xmm_x_ptr_sub_x2x0 = _mm_slli_epi32( xmm_x_ptr_sub_x2x0,
-rshifts - 1 ); |
| 229 |
| 230 /* equal shift right 4 bytes, xmm_x_ptr_n_k_x3x1 = _mm_s
rli_si128(xmm_x_ptr_n_k_x2x0, 4)*/ |
| 231 xmm_x_ptr_n_k_x3x1 = _mm_shuffle_epi32( xmm_x_ptr_n_k_x2
x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); |
| 232 xmm_x_ptr_sub_x3x1 = _mm_shuffle_epi32( xmm_x_ptr_sub_x2
x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); |
| 233 |
| 234 xmm_x_ptr_n_k_x2x0 = _mm_mul_epi32( xmm_x_ptr_n_k_x2x0,
xmm_tmp1 ); |
| 235 xmm_x_ptr_n_k_x3x1 = _mm_mul_epi32( xmm_x_ptr_n_k_x3x1,
xmm_tmp1 ); |
| 236 xmm_x_ptr_sub_x2x0 = _mm_mul_epi32( xmm_x_ptr_sub_x2x0,
xmm_tmp2 ); |
| 237 xmm_x_ptr_sub_x3x1 = _mm_mul_epi32( xmm_x_ptr_sub_x3x1,
xmm_tmp2 ); |
| 238 |
| 239 xmm_x_ptr_n_k_x2x0 = _mm_srli_epi64( xmm_x_ptr_n_k_x2x0,
16 ); |
| 240 xmm_x_ptr_n_k_x3x1 = _mm_slli_epi64( xmm_x_ptr_n_k_x3x1,
16 ); |
| 241 xmm_x_ptr_sub_x2x0 = _mm_srli_epi64( xmm_x_ptr_sub_x2x0,
16 ); |
| 242 xmm_x_ptr_sub_x3x1 = _mm_slli_epi64( xmm_x_ptr_sub_x3x1,
16 ); |
| 243 |
| 244 xmm_x_ptr_n_k_x2x0 = _mm_blend_epi16( xmm_x_ptr_n_k_x2x0
, xmm_x_ptr_n_k_x3x1, 0xCC ); |
| 245 xmm_x_ptr_sub_x2x0 = _mm_blend_epi16( xmm_x_ptr_sub_x2x0
, xmm_x_ptr_sub_x3x1, 0xCC ); |
| 246 |
| 247 X1_3210 = _mm_loadu_si128( (__m128i *)&CAf[ k ] ); |
| 248 PTR_3210 = _mm_loadu_si128( (__m128i *)&CAb[ k ] ); |
| 249 |
| 250 X1_3210 = _mm_add_epi32( X1_3210, xmm_x_ptr_n_k_x2x0 ); |
| 251 PTR_3210 = _mm_add_epi32( PTR_3210, xmm_x_ptr_sub_x2x0 )
; |
| 252 |
| 253 _mm_storeu_si128( (__m128i *)&CAf[ k ], X1_3210 ); |
| 254 _mm_storeu_si128( (__m128i *)&CAb[ k ], PTR_3210 ); |
| 255 } |
| 256 |
| 257 for( ; k <= n; k++ ) { |
| 258 CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1, |
| 259 silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts
- 1 ) ); /* Q( -rshift ) */ |
| 260 CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2, |
| 261 silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n +
k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */ |
| 262 } |
| 263 } |
| 264 } |
| 265 } |
| 266 |
| 267 /* Calculate nominator and denominator for the next order reflection (pa
rcor) coefficient */ |
| 268 tmp1 = C_first_row[ n ];
/* Q( -rshifts ) */ |
| 269 tmp2 = C_last_row[ n ];
/* Q( -rshifts ) */ |
| 270 num = 0;
/* Q( -rshifts ) */ |
| 271 nrg = silk_ADD32( CAb[ 0 ], CAf[ 0 ] );
/* Q( 1-rshifts ) */ |
| 272 for( k = 0; k < n; k++ ) { |
| 273 Atmp_QA = Af_QA[ k ]; |
| 274 lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1; |
| 275 lz = silk_min( 32 - QA, lz ); |
| 276 Atmp1 = silk_LSHIFT32( Atmp_QA, lz );
/* Q( QA + lz ) */ |
| 277 |
| 278 tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[ n - k - 1 ]
, Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ |
| 279 tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ]
, Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ |
| 280 num = silk_ADD_LSHIFT32( num, silk_SMMUL( CAb[ n - k ],
Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ |
| 281 nrg = silk_ADD_LSHIFT32( nrg, silk_SMMUL( silk_ADD32( CAb[ k + 1 ]
, CAf[ k + 1 ] ), |
| 282
Atmp1 ), 32 - QA - lz ); /* Q( 1-rshifts ) */ |
| 283 } |
| 284 CAf[ n + 1 ] = tmp1;
/* Q( -rshifts ) */ |
| 285 CAb[ n + 1 ] = tmp2;
/* Q( -rshifts ) */ |
| 286 num = silk_ADD32( num, tmp2 );
/* Q( -rshifts ) */ |
| 287 num = silk_LSHIFT32( -num, 1 );
/* Q( 1-rshifts ) */ |
| 288 |
| 289 /* Calculate the next order reflection (parcor) coefficient */ |
| 290 if( silk_abs( num ) < nrg ) { |
| 291 rc_Q31 = silk_DIV32_varQ( num, nrg, 31 ); |
| 292 } else { |
| 293 rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN; |
| 294 } |
| 295 |
| 296 /* Update inverse prediction gain */ |
| 297 tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); |
| 298 tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 ); |
| 299 if( tmp1 <= minInvGain_Q30 ) { |
| 300 /* Max prediction gain exceeded; set reflection coefficient such tha
t max prediction gain is exactly hit */ |
| 301 tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, in
vGain_Q30, 30 ); /* Q30 */ |
| 302 rc_Q31 = silk_SQRT_APPROX( tmp2 );
/* Q15 */ |
| 303 if( rc_Q31 > 0 ) { |
| 304 /* Newton-Raphson iteration */ |
| 305 rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 )
; /* Q15 */ |
| 306 rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );
/* Q31 */ |
| 307 if( num < 0 ) { |
| 308 /* Ensure adjusted reflection coefficients has the original
sign */ |
| 309 rc_Q31 = -rc_Q31; |
| 310 } |
| 311 } |
| 312 invGain_Q30 = minInvGain_Q30; |
| 313 reached_max_gain = 1; |
| 314 } else { |
| 315 invGain_Q30 = tmp1; |
| 316 } |
| 317 |
| 318 /* Update the AR coefficients */ |
| 319 for( k = 0; k < (n + 1) >> 1; k++ ) { |
| 320 tmp1 = Af_QA[ k ];
/* QA */ |
| 321 tmp2 = Af_QA[ n - k - 1 ];
/* QA */ |
| 322 Af_QA[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q
31 ), 1 ); /* QA */ |
| 323 Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q
31 ), 1 ); /* QA */ |
| 324 } |
| 325 Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA );
/* QA */ |
| 326 |
| 327 if( reached_max_gain ) { |
| 328 /* Reached max prediction gain; set remaining coefficients to zero a
nd exit loop */ |
| 329 for( k = n + 1; k < D; k++ ) { |
| 330 Af_QA[ k ] = 0; |
| 331 } |
| 332 break; |
| 333 } |
| 334 |
| 335 /* Update C * Af and C * Ab */ |
| 336 for( k = 0; k <= n + 1; k++ ) { |
| 337 tmp1 = CAf[ k ];
/* Q( -rshifts ) */ |
| 338 tmp2 = CAb[ n - k + 1 ];
/* Q( -rshifts ) */ |
| 339 CAf[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31
), 1 ); /* Q( -rshifts ) */ |
| 340 CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31
), 1 ); /* Q( -rshifts ) */ |
| 341 } |
| 342 } |
| 343 |
| 344 if( reached_max_gain ) { |
| 345 for( k = 0; k < D; k++ ) { |
| 346 /* Scale coefficients */ |
| 347 A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); |
| 348 } |
| 349 /* Subtract energy of preceding samples from C0 */ |
| 350 if( rshifts > 0 ) { |
| 351 for( s = 0; s < nb_subfr; s++ ) { |
| 352 x_ptr = x + s * subfr_length; |
| 353 C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x
_ptr, x_ptr, D, arch ), rshifts ); |
| 354 } |
| 355 } else { |
| 356 for( s = 0; s < nb_subfr; s++ ) { |
| 357 x_ptr = x + s * subfr_length; |
| 358 C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D, a
rch ), -rshifts ); |
| 359 } |
| 360 } |
| 361 /* Approximate residual energy */ |
| 362 *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 ); |
| 363 *res_nrg_Q = -rshifts; |
| 364 } else { |
| 365 /* Return residual energy */ |
| 366 nrg = CAf[ 0 ];
/* Q( -rshifts ) */ |
| 367 tmp1 = (opus_int32)1 << 16;
/* Q16 */ |
| 368 for( k = 0; k < D; k++ ) { |
| 369 Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );
/* Q16 */ |
| 370 nrg = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 );
/* Q( -rshifts ) */ |
| 371 tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 );
/* Q16 */ |
| 372 A_Q16[ k ] = -Atmp1; |
| 373 } |
| 374 *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_F
AC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */ |
| 375 *res_nrg_Q = -rshifts; |
| 376 } |
| 377 } |
| OLD | NEW |