| OLD | NEW |
| 1 /*********************************************************************** | 1 /* Copyright (c) 2014, Cisco Systems, INC |
| 2 Copyright (c) 2006-2011, Skype Limited. All rights reserved. | 2 Written by XiangMingZhu WeiZhou MinPeng YanWang |
| 3 Redistribution and use in source and binary forms, with or without | 3 |
| 4 modification, are permitted provided that the following conditions | 4 Redistribution and use in source and binary forms, with or without |
| 5 are met: | 5 modification, are permitted provided that the following conditions |
| 6 - Redistributions of source code must retain the above copyright notice, | 6 are met: |
| 7 this list of conditions and the following disclaimer. | 7 |
| 8 - Redistributions in binary form must reproduce the above copyright | 8 - Redistributions of source code must retain the above copyright |
| 9 notice, this list of conditions and the following disclaimer in the | 9 notice, this list of conditions and the following disclaimer. |
| 10 documentation and/or other materials provided with the distribution. | 10 |
| 11 - Neither the name of Internet Society, IETF or IETF Trust, nor the | 11 - Redistributions in binary form must reproduce the above copyright |
| 12 names of specific contributors, may be used to endorse or promote | 12 notice, this list of conditions and the following disclaimer in the |
| 13 products derived from this software without specific prior written | 13 documentation and/or other materials provided with the distribution. |
| 14 permission. | 14 |
| 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER |
| 19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 25 POSSIBILITY OF SUCH DAMAGE. | 25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 26 ***********************************************************************/ | 26 */ |
| 27 | 27 |
| 28 #ifdef HAVE_CONFIG_H | 28 #ifdef HAVE_CONFIG_H |
| 29 #include "config.h" | 29 #include "config.h" |
| 30 #endif | 30 #endif |
| 31 | 31 |
| 32 #include <xmmintrin.h> |
| 33 #include <emmintrin.h> |
| 34 #include <smmintrin.h> |
| 32 #include "main.h" | 35 #include "main.h" |
| 36 #include "celt/x86/x86cpu.h" |
| 37 |
| 33 #include "stack_alloc.h" | 38 #include "stack_alloc.h" |
| 34 | 39 |
| 35 typedef struct { | 40 typedef struct { |
| 36 opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ]; | 41 opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ]; |
| 37 opus_int32 RandState[ DECISION_DELAY ]; | 42 opus_int32 RandState[ DECISION_DELAY ]; |
| 38 opus_int32 Q_Q10[ DECISION_DELAY ]; | 43 opus_int32 Q_Q10[ DECISION_DELAY ]; |
| 39 opus_int32 Xq_Q14[ DECISION_DELAY ]; | 44 opus_int32 Xq_Q14[ DECISION_DELAY ]; |
| 40 opus_int32 Pred_Q15[ DECISION_DELAY ]; | 45 opus_int32 Pred_Q15[ DECISION_DELAY ]; |
| 41 opus_int32 Shape_Q14[ DECISION_DELAY ]; | 46 opus_int32 Shape_Q14[ DECISION_DELAY ]; |
| 42 opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ]; | 47 opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ]; |
| 43 opus_int32 LF_AR_Q14; | 48 opus_int32 LF_AR_Q14; |
| 44 opus_int32 Seed; | 49 opus_int32 Seed; |
| 45 opus_int32 SeedInit; | 50 opus_int32 SeedInit; |
| 46 opus_int32 RD_Q10; | 51 opus_int32 RD_Q10; |
| 47 } NSQ_del_dec_struct; | 52 } NSQ_del_dec_struct; |
| 48 | 53 |
| 49 typedef struct { | 54 typedef struct { |
| 50 opus_int32 Q_Q10; | 55 opus_int32 Q_Q10; |
| 51 opus_int32 RD_Q10; | 56 opus_int32 RD_Q10; |
| 52 opus_int32 xq_Q14; | 57 opus_int32 xq_Q14; |
| 53 opus_int32 LF_AR_Q14; | 58 opus_int32 LF_AR_Q14; |
| 54 opus_int32 sLTP_shp_Q14; | 59 opus_int32 sLTP_shp_Q14; |
| 55 opus_int32 LPC_exc_Q14; | 60 opus_int32 LPC_exc_Q14; |
| 56 } NSQ_sample_struct; | 61 } NSQ_sample_struct; |
| 57 | 62 |
| 58 typedef NSQ_sample_struct NSQ_sample_pair[ 2 ]; | 63 typedef NSQ_sample_struct NSQ_sample_pair[ 2 ]; |
| 59 | 64 |
| 60 static OPUS_INLINE void silk_nsq_del_dec_scale_states( | 65 static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( |
| 61 const silk_encoder_state *psEncC, /* I Encoder State
*/ | 66 const silk_encoder_state *psEncC, /* I Encoder State
*/ |
| 62 silk_nsq_state *NSQ, /* I/O NSQ state
*/ | 67 silk_nsq_state *NSQ, /* I/O NSQ state
*/ |
| 63 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision sta
tes */ | 68 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision sta
tes */ |
| 64 const opus_int32 x_Q3[], /* I Input in Q3
*/ | 69 const opus_int32 x_Q3[], /* I Input in Q3
*/ |
| 65 opus_int32 x_sc_Q10[], /* O Input scaled with 1/
Gain in Q10 */ | 70 opus_int32 x_sc_Q10[], /* O Input scaled with 1/
Gain in Q10 */ |
| 66 const opus_int16 sLTP[], /* I Re-whitened LTP stat
e in Q0 */ | 71 const opus_int16 sLTP[], /* I Re-whitened LTP stat
e in Q0 */ |
| 67 opus_int32 sLTP_Q15[], /* O LTP state matching s
caled input */ | 72 opus_int32 sLTP_Q15[], /* O LTP state matching s
caled input */ |
| 68 opus_int subfr, /* I Subframe number
*/ | 73 opus_int subfr, /* I Subframe number
*/ |
| 69 opus_int nStatesDelayedDecision, /* I Number of del dec st
ates */ | 74 opus_int nStatesDelayedDecision, /* I Number of del dec st
ates */ |
| 70 const opus_int LTP_scale_Q14, /* I LTP state scaling
*/ | 75 const opus_int LTP_scale_Q14, /* I LTP state scaling
*/ |
| 71 const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I
*/ | 76 const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I
*/ |
| 72 const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag
*/ | 77 const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag
*/ |
| 73 const opus_int signal_type, /* I Signal type
*/ | 78 const opus_int signal_type, /* I Signal type
*/ |
| 74 const opus_int decisionDelay /* I Decision delay
*/ | 79 const opus_int decisionDelay /* I Decision delay
*/ |
| 75 ); | 80 ); |
| 76 | 81 |
| 77 /******************************************/ | 82 /******************************************/ |
| 78 /* Noise shape quantizer for one subframe */ | 83 /* Noise shape quantizer for one subframe */ |
| 79 /******************************************/ | 84 /******************************************/ |
| 80 static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( | 85 static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( |
| 81 silk_nsq_state *NSQ, /* I/O NSQ state
*/ | 86 silk_nsq_state *NSQ, /* I/O NSQ state
*/ |
| 82 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states
*/ | 87 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states
*/ |
| 83 opus_int signalType, /* I Signal type
*/ | 88 opus_int signalType, /* I Signal type
*/ |
| 84 const opus_int32 x_Q10[], /* I
*/ | 89 const opus_int32 x_Q10[], /* I
*/ |
| 85 opus_int8 pulses[], /* O
*/ | 90 opus_int8 pulses[], /* O
*/ |
| 86 opus_int16 xq[], /* O
*/ | 91 opus_int16 xq[], /* O
*/ |
| 87 opus_int32 sLTP_Q15[], /* I/O LTP filter state
*/ | 92 opus_int32 sLTP_Q15[], /* I/O LTP filter state
*/ |
| 88 opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer
*/ | 93 opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer
*/ |
| 89 const opus_int16 a_Q12[], /* I Short term prediction co
efs */ | 94 const opus_int16 a_Q12[], /* I Short term prediction co
efs */ |
| 90 const opus_int16 b_Q14[], /* I Long term prediction coe
fs */ | 95 const opus_int16 b_Q14[], /* I Long term prediction coe
fs */ |
| 91 const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs
*/ | 96 const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs
*/ |
| 92 opus_int lag, /* I Pitch lag
*/ | 97 opus_int lag, /* I Pitch lag
*/ |
| 93 opus_int32 HarmShapeFIRPacked_Q14, /* I
*/ | 98 opus_int32 HarmShapeFIRPacked_Q14, /* I
*/ |
| 94 opus_int Tilt_Q14, /* I Spectral tilt
*/ | 99 opus_int Tilt_Q14, /* I Spectral tilt
*/ |
| 95 opus_int32 LF_shp_Q14, /* I
*/ | 100 opus_int32 LF_shp_Q14, /* I
*/ |
| 96 opus_int32 Gain_Q16, /* I
*/ | 101 opus_int32 Gain_Q16, /* I
*/ |
| 97 opus_int Lambda_Q10, /* I
*/ | 102 opus_int Lambda_Q10, /* I
*/ |
| 98 opus_int offset_Q10, /* I
*/ | 103 opus_int offset_Q10, /* I
*/ |
| 99 opus_int length, /* I Input length
*/ | 104 opus_int length, /* I Input length
*/ |
| 100 opus_int subfr, /* I Subframe number
*/ | 105 opus_int subfr, /* I Subframe number
*/ |
| 101 opus_int shapingLPCOrder, /* I Shaping LPC filter order
*/ | 106 opus_int shapingLPCOrder, /* I Shaping LPC filter order
*/ |
| 102 opus_int predictLPCOrder, /* I Prediction filter order
*/ | 107 opus_int predictLPCOrder, /* I Prediction filter order
*/ |
| 103 opus_int warping_Q16, /* I
*/ | 108 opus_int warping_Q16, /* I
*/ |
| 104 opus_int nStatesDelayedDecision, /* I Number of states in deci
sion tree */ | 109 opus_int nStatesDelayedDecision, /* I Number of states in deci
sion tree */ |
| 105 opus_int *smpl_buf_idx, /* I Index to newest samples
in buffers */ | 110 opus_int *smpl_buf_idx, /* I Index to newest samples
in buffers */ |
| 106 opus_int decisionDelay /* I
*/ | 111 opus_int decisionDelay /* I
*/ |
| 107 ); | 112 ); |
| 108 | 113 |
| 109 void silk_NSQ_del_dec( | 114 void silk_NSQ_del_dec_sse4_1( |
| 110 const silk_encoder_state *psEncC, /* I
/O Encoder State */ | 115 const silk_encoder_state *psEncC, /* I
/O Encoder State */ |
| 111 silk_nsq_state *NSQ, /* I
/O NSQ state */ | 116 silk_nsq_state *NSQ, /* I
/O NSQ state */ |
| 112 SideInfoIndices *psIndices, /* I
/O Quantization Indices */ | 117 SideInfoIndices *psIndices, /* I
/O Quantization Indices */ |
| 113 const opus_int32 x_Q3[], /* I
Prefiltered input signal */ | 118 const opus_int32 x_Q3[], /* I
Prefiltered input signal */ |
| 114 opus_int8 pulses[], /* O
Quantized pulse signal */ | 119 opus_int8 pulses[], /* O
Quantized pulse signal */ |
| 115 const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I
Short term prediction coefs */ | 120 const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I
Short term prediction coefs */ |
| 116 const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I
Long term prediction coefs */ | 121 const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I
Long term prediction coefs */ |
| 117 const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /
* I Noise shaping coefs */ | 122 const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /
* I Noise shaping coefs */ |
| 118 const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I
Long term shaping coefs */ | 123 const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I
Long term shaping coefs */ |
| 119 const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I
Spectral tilt */ | 124 const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I
Spectral tilt */ |
| (...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 237 } | 242 } |
| 238 | 243 |
| 239 subfr = 0; | 244 subfr = 0; |
| 240 } | 245 } |
| 241 | 246 |
| 242 /* Rewhiten with new A coefs */ | 247 /* Rewhiten with new A coefs */ |
| 243 start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrd
er - LTP_ORDER / 2; | 248 start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrd
er - LTP_ORDER / 2; |
| 244 silk_assert( start_idx > 0 ); | 249 silk_assert( start_idx > 0 ); |
| 245 | 250 |
| 246 silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_id
x + k * psEncC->subfr_length ], | 251 silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_id
x + k * psEncC->subfr_length ], |
| 247 A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLP
COrder ); | 252 A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLP
COrder, psEncC->arch ); |
| 248 | 253 |
| 249 NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; | 254 NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; |
| 250 NSQ->rewhite_flag = 1; | 255 NSQ->rewhite_flag = 1; |
| 251 } | 256 } |
| 252 } | 257 } |
| 253 | 258 |
| 254 silk_nsq_del_dec_scale_states( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sL
TP, sLTP_Q15, k, | 259 silk_nsq_del_dec_scale_states_sse4_1( psEncC, NSQ, psDelDec, x_Q3, x_sc_
Q10, sLTP, sLTP_Q15, k, |
| 255 psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, ps
Indices->signalType, decisionDelay ); | 260 psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, ps
Indices->signalType, decisionDelay ); |
| 256 | 261 |
| 257 silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType
, x_sc_Q10, pulses, pxq, sLTP_Q15, | 262 silk_noise_shape_quantizer_del_dec_sse4_1( NSQ, psDelDec, psIndices->sig
nalType, x_sc_Q10, pulses, pxq, sLTP_Q15, |
| 258 delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q
14, Tilt_Q14[ k ], LF_shp_Q14[ k ], | 263 delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q
14, Tilt_Q14[ k ], LF_shp_Q14[ k ], |
| 259 Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr+
+, psEncC->shapingLPCOrder, | 264 Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr+
+, psEncC->shapingLPCOrder, |
| 260 psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayed
Decision, &smpl_buf_idx, decisionDelay ); | 265 psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayed
Decision, &smpl_buf_idx, decisionDelay ); |
| 261 | 266 |
| 262 x_Q3 += psEncC->subfr_length; | 267 x_Q3 += psEncC->subfr_length; |
| 263 pulses += psEncC->subfr_length; | 268 pulses += psEncC->subfr_length; |
| 264 pxq += psEncC->subfr_length; | 269 pxq += psEncC->subfr_length; |
| 265 } | 270 } |
| 266 | 271 |
| 267 /* Find winner */ | 272 /* Find winner */ |
| (...skipping 28 matching lines...) Expand all Loading... |
| 296 /* Save quantized speech signal */ | 301 /* Save quantized speech signal */ |
| 297 /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->fram
e_length * sizeof( opus_int16 ) ) */ | 302 /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->fram
e_length * sizeof( opus_int16 ) ) */ |
| 298 silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ],
psEncC->ltp_mem_length * sizeof( opus_int16 ) ); | 303 silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ],
psEncC->ltp_mem_length * sizeof( opus_int16 ) ); |
| 299 silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ],
psEncC->ltp_mem_length * sizeof( opus_int32 ) ); | 304 silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ],
psEncC->ltp_mem_length * sizeof( opus_int32 ) ); |
| 300 RESTORE_STACK; | 305 RESTORE_STACK; |
| 301 } | 306 } |
| 302 | 307 |
| 303 /******************************************/ | 308 /******************************************/ |
| 304 /* Noise shape quantizer for one subframe */ | 309 /* Noise shape quantizer for one subframe */ |
| 305 /******************************************/ | 310 /******************************************/ |
| 306 static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( | 311 static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( |
| 307 silk_nsq_state *NSQ, /* I/O NSQ state
*/ | 312 silk_nsq_state *NSQ, /* I/O NSQ state
*/ |
| 308 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states
*/ | 313 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states
*/ |
| 309 opus_int signalType, /* I Signal type
*/ | 314 opus_int signalType, /* I Signal type
*/ |
| 310 const opus_int32 x_Q10[], /* I
*/ | 315 const opus_int32 x_Q10[], /* I
*/ |
| 311 opus_int8 pulses[], /* O
*/ | 316 opus_int8 pulses[], /* O
*/ |
| 312 opus_int16 xq[], /* O
*/ | 317 opus_int16 xq[], /* O
*/ |
| 313 opus_int32 sLTP_Q15[], /* I/O LTP filter state
*/ | 318 opus_int32 sLTP_Q15[], /* I/O LTP filter state
*/ |
| 314 opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer
*/ | 319 opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer
*/ |
| 315 const opus_int16 a_Q12[], /* I Short term prediction co
efs */ | 320 const opus_int16 a_Q12[], /* I Short term prediction co
efs */ |
| 316 const opus_int16 b_Q14[], /* I Long term prediction coe
fs */ | 321 const opus_int16 b_Q14[], /* I Long term prediction coe
fs */ |
| (...skipping 18 matching lines...) Expand all Loading... |
| 335 opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx; | 340 opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx; |
| 336 opus_int32 Winner_rand_state; | 341 opus_int32 Winner_rand_state; |
| 337 opus_int32 LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14; | 342 opus_int32 LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14; |
| 338 opus_int32 n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10
; | 343 opus_int32 n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10
; |
| 339 opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; | 344 opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; |
| 340 opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; | 345 opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; |
| 341 opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14; | 346 opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14; |
| 342 VARDECL( NSQ_sample_pair, psSampleState ); | 347 VARDECL( NSQ_sample_pair, psSampleState ); |
| 343 NSQ_del_dec_struct *psDD; | 348 NSQ_del_dec_struct *psDD; |
| 344 NSQ_sample_struct *psSS; | 349 NSQ_sample_struct *psSS; |
| 350 |
| 351 __m128i a_Q12_0123, a_Q12_4567, a_Q12_89AB, a_Q12_CDEF; |
| 352 __m128i b_Q12_0123, b_sr_Q12_0123; |
| 345 SAVE_STACK; | 353 SAVE_STACK; |
| 346 | 354 |
| 347 silk_assert( nStatesDelayedDecision > 0 ); | 355 silk_assert( nStatesDelayedDecision > 0 ); |
| 348 ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair ); | 356 ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair ); |
| 349 | 357 |
| 350 shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_
FIR_TAPS / 2 ]; | 358 shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_
FIR_TAPS / 2 ]; |
| 351 pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; | 359 pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; |
| 352 Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); | 360 Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); |
| 353 | 361 |
| 362 a_Q12_0123 = OP_CVTEPI16_EPI32_M64( a_Q12 ); |
| 363 a_Q12_4567 = OP_CVTEPI16_EPI32_M64( a_Q12 + 4 ); |
| 364 |
| 365 if( opus_likely( predictLPCOrder == 16 ) ) { |
| 366 a_Q12_89AB = OP_CVTEPI16_EPI32_M64( a_Q12 + 8 ); |
| 367 a_Q12_CDEF = OP_CVTEPI16_EPI32_M64( a_Q12 + 12 ); |
| 368 } |
| 369 |
| 370 if( signalType == TYPE_VOICED ){ |
| 371 b_Q12_0123 = OP_CVTEPI16_EPI32_M64( b_Q14 ); |
| 372 b_sr_Q12_0123 = _mm_shuffle_epi32( b_Q12_0123, _MM_SHUFFLE( 0, 3, 2, 1 )
); /* equal shift right 4 bytes */ |
| 373 } |
| 354 for( i = 0; i < length; i++ ) { | 374 for( i = 0; i < length; i++ ) { |
| 355 /* Perform common calculations used in all states */ | 375 /* Perform common calculations used in all states */ |
| 356 | 376 |
| 357 /* Long-term prediction */ | 377 /* Long-term prediction */ |
| 358 if( signalType == TYPE_VOICED ) { | 378 if( signalType == TYPE_VOICED ) { |
| 359 /* Unrolled loop */ | 379 /* Unrolled loop */ |
| 360 /* Avoids introducing a bias because silk_SMLAWB() always rounds to
-inf */ | 380 /* Avoids introducing a bias because silk_SMLAWB() always rounds to
-inf */ |
| 361 LTP_pred_Q14 = 2; | 381 LTP_pred_Q14 = 2; |
| 362 LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ 0 ], b_Q14[
0 ] ); | 382 { |
| 363 LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -1 ], b_Q14[
1 ] ); | 383 __m128i tmpa, tmpb, pred_lag_ptr_tmp; |
| 364 LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -2 ], b_Q14[
2 ] ); | 384 pred_lag_ptr_tmp = _mm_loadu_si128( (__m128i *)(&pred_lag_ptr
[ -3 ] ) ); |
| 365 LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -3 ], b_Q14[
3 ] ); | 385 pred_lag_ptr_tmp = _mm_shuffle_epi32( pred_lag_ptr_tmp, 0x1B
); |
| 366 LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_Q14[
4 ] ); | 386 tmpa = _mm_mul_epi32( pred_lag_ptr_tmp, b_Q12_012
3 ); |
| 367 LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 );
/* Q13 -> Q14 */ | 387 tmpa = _mm_srli_si128( tmpa, 2 ); |
| 368 pred_lag_ptr++; | 388 |
| 389 pred_lag_ptr_tmp = _mm_shuffle_epi32( pred_lag_ptr_tmp, _MM_SHUF
FLE( 0, 3, 2, 1 ) );/* equal shift right 4 bytes */ |
| 390 pred_lag_ptr_tmp = _mm_mul_epi32( pred_lag_ptr_tmp, b_sr_Q12_
0123 ); |
| 391 pred_lag_ptr_tmp = _mm_srli_si128( pred_lag_ptr_tmp, 2 ); |
| 392 pred_lag_ptr_tmp = _mm_add_epi32( pred_lag_ptr_tmp, tmpa ); |
| 393 |
| 394 tmpb = _mm_shuffle_epi32( pred_lag_ptr_tmp, _MM_SHUFFLE( 0, 0, 3
, 2 ) );/* equal shift right 8 bytes */ |
| 395 pred_lag_ptr_tmp = _mm_add_epi32( pred_lag_ptr_tmp, tmpb ); |
| 396 LTP_pred_Q14 += _mm_cvtsi128_si32( pred_lag_ptr_tmp ); |
| 397 |
| 398 LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_
Q14[ 4 ] ); |
| 399 LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 );
/* Q13 -> Q14 */ |
| 400 pred_lag_ptr++; |
| 401 } |
| 369 } else { | 402 } else { |
| 370 LTP_pred_Q14 = 0; | 403 LTP_pred_Q14 = 0; |
| 371 } | 404 } |
| 372 | 405 |
| 373 /* Long-term shaping */ | 406 /* Long-term shaping */ |
| 374 if( lag > 0 ) { | 407 if( lag > 0 ) { |
| 375 /* Symmetric, packed FIR coefficients */ | 408 /* Symmetric, packed FIR coefficients */ |
| 376 n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[
-2 ] ), HarmShapeFIRPacked_Q14 ); | 409 n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[
-2 ] ), HarmShapeFIRPacked_Q14 ); |
| 377 n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ],
HarmShapeFIRPacked_Q14 ); | 410 n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ],
HarmShapeFIRPacked_Q14 ); |
| 378 n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 );
/* Q12 -> Q14 */ | 411 n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 );
/* Q12 -> Q14 */ |
| 379 shp_lag_ptr++; | 412 shp_lag_ptr++; |
| 380 } else { | 413 } else { |
| 381 n_LTP_Q14 = 0; | 414 n_LTP_Q14 = 0; |
| 382 } | 415 } |
| 383 | 416 { |
| 384 for( k = 0; k < nStatesDelayedDecision; k++ ) { | 417 __m128i tmpa, tmpb, psLPC_Q14_tmp, a_Q12_tmp; |
| 385 /* Delayed decision state */ | 418 |
| 386 psDD = &psDelDec[ k ]; | 419 for( k = 0; k < nStatesDelayedDecision; k++ ) { |
| 387 | 420 /* Delayed decision state */ |
| 388 /* Sample state */ | 421 psDD = &psDelDec[ k ]; |
| 389 psSS = psSampleState[ k ]; | 422 |
| 390 | 423 /* Sample state */ |
| 391 /* Generate dither */ | 424 psSS = psSampleState[ k ]; |
| 392 psDD->Seed = silk_RAND( psDD->Seed ); | 425 |
| 393 | 426 /* Generate dither */ |
| 394 /* Pointer used in short term prediction and shaping */ | 427 psDD->Seed = silk_RAND( psDD->Seed ); |
| 395 psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ]; | 428 |
| 396 /* Short-term prediction */ | 429 /* Pointer used in short term prediction and shaping */ |
| 397 silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 ); | 430 psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ]; |
| 398 /* Avoids introducing a bias because silk_SMLAWB() always rounds to
-inf */ | 431 /* Short-term prediction */ |
| 399 LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 ); | 432 silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 ); |
| 400 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ 0 ], a_Q12[ 0
] ); | 433 /* Avoids introducing a bias because silk_SMLAWB() always rounds
to -inf */ |
| 401 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -1 ], a_Q12[ 1
] ); | 434 LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 ); |
| 402 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -2 ], a_Q12[ 2
] ); | 435 |
| 403 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -3 ], a_Q12[ 3
] ); | 436 tmpb = _mm_setzero_si128(); |
| 404 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -4 ], a_Q12[ 4
] ); | 437 |
| 405 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -5 ], a_Q12[ 5
] ); | 438 /* step 1 */ |
| 406 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -6 ], a_Q12[ 6
] ); | 439 psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -3 ]
) ); /* -3, -2 , -1, 0 */ |
| 407 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -7 ], a_Q12[ 7
] ); | 440 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );
/* 0, -1, -2, -3 */ |
| 408 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8
] ); | 441 tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_0123 );
/* 0, -1, -2, -3 * 0123 -> 0*0, 2*-2 */ |
| 409 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9
] ); | 442 |
| 410 if( predictLPCOrder == 16 ) { | 443 tmpa = _mm_srli_epi64( tmpa, 16 ); |
| 411 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -10 ], a_Q1
2[ 10 ] ); | 444 tmpb = _mm_add_epi32( tmpb, tmpa ); |
| 412 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -11 ], a_Q1
2[ 11 ] ); | 445 |
| 413 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -12 ], a_Q1
2[ 12 ] ); | 446 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0
, 3, 2, 1 ) ); /* equal shift right 4 bytes */ |
| 414 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -13 ], a_Q1
2[ 13 ] ); | 447 a_Q12_tmp = _mm_shuffle_epi32( a_Q12_0123, _MM_SHUFFLE(0, 3, 2,
1 ) ); /* equal shift right 4 bytes */ |
| 415 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -14 ], a_Q1
2[ 14 ] ); | 448 psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); /*
1*-1, 3*-3 */ |
| 416 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -15 ], a_Q1
2[ 15 ] ); | 449 psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); |
| 450 tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); |
| 451 |
| 452 /* step 2 */ |
| 453 psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -7 ]
) ); |
| 454 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); |
| 455 tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_4567 ); |
| 456 tmpa = _mm_srli_epi64( tmpa, 16 ); |
| 457 tmpb = _mm_add_epi32( tmpb, tmpa ); |
| 458 |
| 459 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0
, 3, 2, 1 ) ); /* equal shift right 4 bytes */ |
| 460 a_Q12_tmp = _mm_shuffle_epi32( a_Q12_4567, _MM_SHUFFLE(0, 3, 2,
1 ) ); /* equal shift right 4 bytes */ |
| 461 psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); |
| 462 psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); |
| 463 tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); |
| 464 |
| 465 if ( opus_likely( predictLPCOrder == 16 ) ) |
| 466 { |
| 467 /* step 3 */ |
| 468 psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -
11 ] ) ); |
| 469 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); |
| 470 tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_89AB )
; |
| 471 tmpa = _mm_srli_epi64( tmpa, 16 ); |
| 472 tmpb = _mm_add_epi32( tmpb, tmpa ); |
| 473 |
| 474 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFL
E( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ |
| 475 a_Q12_tmp = _mm_shuffle_epi32( a_Q12_89AB, _MM_SHUFFLE(0, 3,
2, 1 ) );/* equal shift right 4 bytes */ |
| 476 psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); |
| 477 psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); |
| 478 tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); |
| 479 |
| 480 /* setp 4 */ |
| 481 psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -
15 ] ) ); |
| 482 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); |
| 483 tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_CDEF )
; |
| 484 tmpa = _mm_srli_epi64( tmpa, 16 ); |
| 485 tmpb = _mm_add_epi32( tmpb, tmpa ); |
| 486 |
| 487 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFL
E( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ |
| 488 a_Q12_tmp = _mm_shuffle_epi32( a_Q12_CDEF, _MM_SHUFFLE(0, 3,
2, 1 ) ); /* equal shift right 4 bytes */ |
| 489 psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); |
| 490 psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); |
| 491 tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); |
| 492 |
| 493 /* add at last */ |
| 494 /* equal shift right 8 bytes*/ |
| 495 tmpa = _mm_shuffle_epi32( tmpb, _MM_SHUFFLE( 0, 0
, 3, 2 ) ); |
| 496 tmpb = _mm_add_epi32( tmpb, tmpa ); |
| 497 LPC_pred_Q14 += _mm_cvtsi128_si32( tmpb ); |
| 498 } |
| 499 else |
| 500 { |
| 501 /* add at last */ |
| 502 tmpa = _mm_shuffle_epi32( tmpb, _MM_SHUFFLE( 0, 0
, 3, 2 ) ); /* equal shift right 8 bytes*/ |
| 503 tmpb = _mm_add_epi32( tmpb, tmpa ); |
| 504 LPC_pred_Q14 += _mm_cvtsi128_si32( tmpb ); |
| 505 |
| 506 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a
_Q12[ 8 ] ); |
| 507 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a
_Q12[ 9 ] ); |
| 508 } |
| 509 |
| 510 LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */ |
| 511 |
| 512 /* Noise shape feedback */ |
| 513 silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that ord
er is even */ |
| 514 /* Output of lowpass section */ |
| 515 tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping
_Q16 ); |
| 516 /* Output of allpass section */ |
| 517 tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - t
mp2, warping_Q16 ); |
| 518 psDD->sAR2_Q14[ 0 ] = tmp2; |
| 519 n_AR_Q14 = silk_RSHIFT( shapingLPCOrder, 1 ); |
| 520 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ 0 ] ); |
| 521 /* Loop over allpass sections */ |
| 522 for( j = 2; j < shapingLPCOrder; j += 2 ) { |
| 523 /* Output of allpass section */ |
| 524 tmp2 = silk_SMLAWB( psDD->sAR2_Q14[ j - 1 ], psDD->sAR2_Q14[
j + 0 ] - tmp1, warping_Q16 ); |
| 525 psDD->sAR2_Q14[ j - 1 ] = tmp1; |
| 526 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ j - 1 ]
); |
| 527 /* Output of allpass section */ |
| 528 tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ j + 0 ], psDD->sAR2_Q14[
j + 1 ] - tmp2, warping_Q16 ); |
| 529 psDD->sAR2_Q14[ j + 0 ] = tmp2; |
| 530 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ j ] ); |
| 531 } |
| 532 psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1; |
| 533 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ shapingLPCOr
der - 1 ] ); |
| 534 |
| 535 n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 );
/* Q11 -> Q12 */ |
| 536 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 );
/* Q12 */ |
| 537 n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 );
/* Q12 -> Q14 */ |
| 538 |
| 539 n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp
_Q14 ); /* Q12 */ |
| 540 n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 );
/* Q12 */ |
| 541 n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 );
/* Q12 -> Q14 */ |
| 542 |
| 543 /* Input minus prediction plus noise feedback
*/ |
| 544 /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_L
TP */ |
| 545 tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 );
/* Q14 */ |
| 546 tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 );
/* Q13 */ |
| 547 tmp1 = silk_SUB32( tmp2, tmp1 );
/* Q13 */ |
| 548 tmp1 = silk_RSHIFT_ROUND( tmp1, 4 );
/* Q10 */ |
| 549 |
| 550 r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 );
/* residual error Q10 */ |
| 551 |
| 552 /* Flip sign depending on dither */ |
| 553 if ( psDD->Seed < 0 ) { |
| 554 r_Q10 = -r_Q10; |
| 555 } |
| 556 r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); |
| 557 |
| 558 /* Find two quantization level candidates and measure their rate
-distortion */ |
| 559 q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); |
| 560 q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); |
| 561 if( q1_Q0 > 0 ) { |
| 562 q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_
ADJUST_Q10 ); |
| 563 q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); |
| 564 q2_Q10 = silk_ADD32( q1_Q10, 1024 ); |
| 565 rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); |
| 566 rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); |
| 567 } else if( q1_Q0 == 0 ) { |
| 568 q1_Q10 = offset_Q10; |
| 569 q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10
); |
| 570 rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); |
| 571 rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); |
| 572 } else if( q1_Q0 == -1 ) { |
| 573 q2_Q10 = offset_Q10; |
| 574 q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10
); |
| 575 rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); |
| 576 rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); |
| 577 } else { /* q1_Q0 < -1 */ |
| 578 q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_
ADJUST_Q10 ); |
| 579 q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); |
| 580 q2_Q10 = silk_ADD32( q1_Q10, 1024 ); |
| 581 rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); |
| 582 rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 ); |
| 583 } |
| 584 rr_Q10 = silk_SUB32( r_Q10, q1_Q10 ); |
| 585 rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 1
0 ); |
| 586 rr_Q10 = silk_SUB32( r_Q10, q2_Q10 ); |
| 587 rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 1
0 ); |
| 588 |
| 589 if( rd1_Q10 < rd2_Q10 ) { |
| 590 psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); |
| 591 psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); |
| 592 psSS[ 0 ].Q_Q10 = q1_Q10; |
| 593 psSS[ 1 ].Q_Q10 = q2_Q10; |
| 594 } else { |
| 595 psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); |
| 596 psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); |
| 597 psSS[ 0 ].Q_Q10 = q2_Q10; |
| 598 psSS[ 1 ].Q_Q10 = q1_Q10; |
| 599 } |
| 600 |
| 601 /* Update states for best quantization */ |
| 602 |
| 603 /* Quantized excitation */ |
| 604 exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 ); |
| 605 if ( psDD->Seed < 0 ) { |
| 606 exc_Q14 = -exc_Q14; |
| 607 } |
| 608 |
| 609 /* Add predictions */ |
| 610 LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); |
| 611 xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); |
| 612 |
| 613 /* Update states */ |
| 614 sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); |
| 615 psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); |
| 616 psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14; |
| 617 psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14; |
| 618 psSS[ 0 ].xq_Q14 = xq_Q14; |
| 619 |
| 620 /* Update states for second best quantization */ |
| 621 |
| 622 /* Quantized excitation */ |
| 623 exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 ); |
| 624 if ( psDD->Seed < 0 ) { |
| 625 exc_Q14 = -exc_Q14; |
| 626 } |
| 627 |
| 628 |
| 629 /* Add predictions */ |
| 630 LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); |
| 631 xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); |
| 632 |
| 633 /* Update states */ |
| 634 sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); |
| 635 psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); |
| 636 psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14; |
| 637 psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14; |
| 638 psSS[ 1 ].xq_Q14 = xq_Q14; |
| 417 } | 639 } |
| 418 LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 );
/* Q10 -> Q14 */ | |
| 419 | |
| 420 /* Noise shape feedback */ | |
| 421 silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order i
s even */ | |
| 422 /* Output of lowpass section */ | |
| 423 tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16
); | |
| 424 /* Output of allpass section */ | |
| 425 tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2,
warping_Q16 ); | |
| 426 psDD->sAR2_Q14[ 0 ] = tmp2; | |
| 427 n_AR_Q14 = silk_RSHIFT( shapingLPCOrder, 1 ); | |
| 428 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ 0 ] ); | |
| 429 /* Loop over allpass sections */ | |
| 430 for( j = 2; j < shapingLPCOrder; j += 2 ) { | |
| 431 /* Output of allpass section */ | |
| 432 tmp2 = silk_SMLAWB( psDD->sAR2_Q14[ j - 1 ], psDD->sAR2_Q14[ j +
0 ] - tmp1, warping_Q16 ); | |
| 433 psDD->sAR2_Q14[ j - 1 ] = tmp1; | |
| 434 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ j - 1 ] ); | |
| 435 /* Output of allpass section */ | |
| 436 tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ j + 0 ], psDD->sAR2_Q14[ j +
1 ] - tmp2, warping_Q16 ); | |
| 437 psDD->sAR2_Q14[ j + 0 ] = tmp2; | |
| 438 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ j ] ); | |
| 439 } | |
| 440 psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1; | |
| 441 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ shapingLPCOrder
- 1 ] ); | |
| 442 | |
| 443 n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 );
/* Q11 -> Q12 */ | |
| 444 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 );
/* Q12 */ | |
| 445 n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 );
/* Q12 -> Q14 */ | |
| 446 | |
| 447 n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14
); /* Q12 */ | |
| 448 n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 );
/* Q12 */ | |
| 449 n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 );
/* Q12 -> Q14 */ | |
| 450 | |
| 451 /* Input minus prediction plus noise feedback
*/ | |
| 452 /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP
*/ | |
| 453 tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 );
/* Q14 */ | |
| 454 tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 );
/* Q13 */ | |
| 455 tmp1 = silk_SUB32( tmp2, tmp1 );
/* Q13 */ | |
| 456 tmp1 = silk_RSHIFT_ROUND( tmp1, 4 );
/* Q10 */ | |
| 457 | |
| 458 r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 );
/* residual error Q10 */ | |
| 459 | |
| 460 /* Flip sign depending on dither */ | |
| 461 if ( psDD->Seed < 0 ) { | |
| 462 r_Q10 = -r_Q10; | |
| 463 } | |
| 464 r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); | |
| 465 | |
| 466 /* Find two quantization level candidates and measure their rate-dis
tortion */ | |
| 467 q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); | |
| 468 q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); | |
| 469 if( q1_Q0 > 0 ) { | |
| 470 q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJU
ST_Q10 ); | |
| 471 q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); | |
| 472 q2_Q10 = silk_ADD32( q1_Q10, 1024 ); | |
| 473 rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); | |
| 474 rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); | |
| 475 } else if( q1_Q0 == 0 ) { | |
| 476 q1_Q10 = offset_Q10; | |
| 477 q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); | |
| 478 rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); | |
| 479 rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); | |
| 480 } else if( q1_Q0 == -1 ) { | |
| 481 q2_Q10 = offset_Q10; | |
| 482 q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); | |
| 483 rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); | |
| 484 rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); | |
| 485 } else { /* q1_Q0 < -1 */ | |
| 486 q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJU
ST_Q10 ); | |
| 487 q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); | |
| 488 q2_Q10 = silk_ADD32( q1_Q10, 1024 ); | |
| 489 rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); | |
| 490 rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 ); | |
| 491 } | |
| 492 rr_Q10 = silk_SUB32( r_Q10, q1_Q10 ); | |
| 493 rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 ); | |
| 494 rr_Q10 = silk_SUB32( r_Q10, q2_Q10 ); | |
| 495 rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 ); | |
| 496 | |
| 497 if( rd1_Q10 < rd2_Q10 ) { | |
| 498 psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); | |
| 499 psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); | |
| 500 psSS[ 0 ].Q_Q10 = q1_Q10; | |
| 501 psSS[ 1 ].Q_Q10 = q2_Q10; | |
| 502 } else { | |
| 503 psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); | |
| 504 psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); | |
| 505 psSS[ 0 ].Q_Q10 = q2_Q10; | |
| 506 psSS[ 1 ].Q_Q10 = q1_Q10; | |
| 507 } | |
| 508 | |
| 509 /* Update states for best quantization */ | |
| 510 | |
| 511 /* Quantized excitation */ | |
| 512 exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 ); | |
| 513 if ( psDD->Seed < 0 ) { | |
| 514 exc_Q14 = -exc_Q14; | |
| 515 } | |
| 516 | |
| 517 /* Add predictions */ | |
| 518 LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); | |
| 519 xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); | |
| 520 | |
| 521 /* Update states */ | |
| 522 sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); | |
| 523 psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); | |
| 524 psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14; | |
| 525 psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14; | |
| 526 psSS[ 0 ].xq_Q14 = xq_Q14; | |
| 527 | |
| 528 /* Update states for second best quantization */ | |
| 529 | |
| 530 /* Quantized excitation */ | |
| 531 exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 ); | |
| 532 if ( psDD->Seed < 0 ) { | |
| 533 exc_Q14 = -exc_Q14; | |
| 534 } | |
| 535 | |
| 536 | |
| 537 /* Add predictions */ | |
| 538 LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); | |
| 539 xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); | |
| 540 | |
| 541 /* Update states */ | |
| 542 sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); | |
| 543 psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); | |
| 544 psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14; | |
| 545 psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14; | |
| 546 psSS[ 1 ].xq_Q14 = xq_Q14; | |
| 547 } | 640 } |
| 548 | |
| 549 *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK;
/* Index to newest samples */ | 641 *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK;
/* Index to newest samples */ |
| 550 last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK
; /* Index to decisionDelay old samples */ | 642 last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK
; /* Index to decisionDelay old samples */ |
| 551 | 643 |
| 552 /* Find winner */ | 644 /* Find winner */ |
| 553 RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; | 645 RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; |
| 554 Winner_ind = 0; | 646 Winner_ind = 0; |
| 555 for( k = 1; k < nStatesDelayedDecision; k++ ) { | 647 for( k = 1; k < nStatesDelayedDecision; k++ ) { |
| 556 if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) { | 648 if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) { |
| 557 RDmin_Q10 = psSampleState[ k ][ 0 ].RD_Q10; | 649 RDmin_Q10 = psSampleState[ k ][ 0 ].RD_Q10; |
| 558 Winner_ind = k; | 650 Winner_ind = k; |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 623 delayedGain_Q10[ *smpl_buf_idx ] = Gain_Q10; | 715 delayedGain_Q10[ *smpl_buf_idx ] = Gain_Q10; |
| 624 } | 716 } |
| 625 /* Update LPC states */ | 717 /* Update LPC states */ |
| 626 for( k = 0; k < nStatesDelayedDecision; k++ ) { | 718 for( k = 0; k < nStatesDelayedDecision; k++ ) { |
| 627 psDD = &psDelDec[ k ]; | 719 psDD = &psDelDec[ k ]; |
| 628 silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENG
TH * sizeof( opus_int32 ) ); | 720 silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENG
TH * sizeof( opus_int32 ) ); |
| 629 } | 721 } |
| 630 RESTORE_STACK; | 722 RESTORE_STACK; |
| 631 } | 723 } |
| 632 | 724 |
| 633 static OPUS_INLINE void silk_nsq_del_dec_scale_states( | 725 static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( |
| 634 const silk_encoder_state *psEncC, /* I Encoder State
*/ | 726 const silk_encoder_state *psEncC, /* I Encoder State
*/ |
| 635 silk_nsq_state *NSQ, /* I/O NSQ state
*/ | 727 silk_nsq_state *NSQ, /* I/O NSQ state
*/ |
| 636 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision sta
tes */ | 728 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision sta
tes */ |
| 637 const opus_int32 x_Q3[], /* I Input in Q3
*/ | 729 const opus_int32 x_Q3[], /* I Input in Q3
*/ |
| 638 opus_int32 x_sc_Q10[], /* O Input scaled with 1/
Gain in Q10 */ | 730 opus_int32 x_sc_Q10[], /* O Input scaled with 1/
Gain in Q10 */ |
| 639 const opus_int16 sLTP[], /* I Re-whitened LTP stat
e in Q0 */ | 731 const opus_int16 sLTP[], /* I Re-whitened LTP stat
e in Q0 */ |
| 640 opus_int32 sLTP_Q15[], /* O LTP state matching s
caled input */ | 732 opus_int32 sLTP_Q15[], /* O LTP state matching s
caled input */ |
| 641 opus_int subfr, /* I Subframe number
*/ | 733 opus_int subfr, /* I Subframe number
*/ |
| 642 opus_int nStatesDelayedDecision, /* I Number of del dec st
ates */ | 734 opus_int nStatesDelayedDecision, /* I Number of del dec st
ates */ |
| 643 const opus_int LTP_scale_Q14, /* I LTP state scaling
*/ | 735 const opus_int LTP_scale_Q14, /* I LTP state scaling
*/ |
| 644 const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I
*/ | 736 const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I
*/ |
| 645 const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag
*/ | 737 const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag
*/ |
| 646 const opus_int signal_type, /* I Signal type
*/ | 738 const opus_int signal_type, /* I Signal type
*/ |
| 647 const opus_int decisionDelay /* I Decision delay
*/ | 739 const opus_int decisionDelay /* I Decision delay
*/ |
| 648 ) | 740 ) |
| 649 { | 741 { |
| 650 opus_int i, k, lag; | 742 opus_int i, k, lag; |
| 651 opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; | 743 opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; |
| 652 NSQ_del_dec_struct *psDD; | 744 NSQ_del_dec_struct *psDD; |
| 745 __m128i xmm_inv_gain_Q23, xmm_x_Q3_x2x0, xmm_x_Q3_x3x1; |
| 653 | 746 |
| 654 lag = pitchL[ subfr ]; | 747 lag = pitchL[ subfr ]; |
| 655 inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); | 748 inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); |
| 749 |
| 656 silk_assert( inv_gain_Q31 != 0 ); | 750 silk_assert( inv_gain_Q31 != 0 ); |
| 657 | 751 |
| 658 /* Calculate gain adjustment factor */ | 752 /* Calculate gain adjustment factor */ |
| 659 if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { | 753 if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { |
| 660 gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ],
16 ); | 754 gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ],
16 ); |
| 661 } else { | 755 } else { |
| 662 gain_adj_Q16 = (opus_int32)1 << 16; | 756 gain_adj_Q16 = (opus_int32)1 << 16; |
| 663 } | 757 } |
| 664 | 758 |
| 665 /* Scale input */ | 759 /* Scale input */ |
| 666 inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); | 760 inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); |
| 667 for( i = 0; i < psEncC->subfr_length; i++ ) { | 761 |
| 762 /* prepare inv_gain_Q23 in packed 4 32-bits */ |
| 763 xmm_inv_gain_Q23 = _mm_set1_epi32(inv_gain_Q23); |
| 764 |
| 765 for( i = 0; i < psEncC->subfr_length - 3; i += 4 ) { |
| 766 xmm_x_Q3_x2x0 = _mm_loadu_si128( (__m128i *)(&(x_Q3[ i ] ) ) ); |
| 767 /* equal shift right 4 bytes*/ |
| 768 xmm_x_Q3_x3x1 = _mm_shuffle_epi32( xmm_x_Q3_x2x0, _MM_SHUFFLE( 0, 3, 2,
1 ) ); |
| 769 |
| 770 xmm_x_Q3_x2x0 = _mm_mul_epi32( xmm_x_Q3_x2x0, xmm_inv_gain_Q23 ); |
| 771 xmm_x_Q3_x3x1 = _mm_mul_epi32( xmm_x_Q3_x3x1, xmm_inv_gain_Q23 ); |
| 772 |
| 773 xmm_x_Q3_x2x0 = _mm_srli_epi64( xmm_x_Q3_x2x0, 16 ); |
| 774 xmm_x_Q3_x3x1 = _mm_slli_epi64( xmm_x_Q3_x3x1, 16 ); |
| 775 |
| 776 xmm_x_Q3_x2x0 = _mm_blend_epi16( xmm_x_Q3_x2x0, xmm_x_Q3_x3x1, 0xCC ); |
| 777 |
| 778 _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ])), xmm_x_Q3_x2x0 ); |
| 779 } |
| 780 |
| 781 for( ; i < psEncC->subfr_length; i++ ) { |
| 668 x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); | 782 x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); |
| 669 } | 783 } |
| 670 | 784 |
| 671 /* Save inverse gain */ | 785 /* Save inverse gain */ |
| 672 NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; | 786 NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; |
| 673 | 787 |
| 674 /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16
*/ | 788 /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16
*/ |
| 675 if( NSQ->rewhite_flag ) { | 789 if( NSQ->rewhite_flag ) { |
| 676 if( subfr == 0 ) { | 790 if( subfr == 0 ) { |
| 677 /* Do LTP downscaling */ | 791 /* Do LTP downscaling */ |
| 678 inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14
), 2 ); | 792 inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14
), 2 ); |
| 679 } | 793 } |
| 680 for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx;
i++ ) { | 794 for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx;
i++ ) { |
| 681 silk_assert( i < MAX_FRAME_LENGTH ); | 795 silk_assert( i < MAX_FRAME_LENGTH ); |
| 682 sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] ); | 796 sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] ); |
| 683 } | 797 } |
| 684 } | 798 } |
| 685 | 799 |
| 686 /* Adjust for changing gain */ | 800 /* Adjust for changing gain */ |
| 687 if( gain_adj_Q16 != (opus_int32)1 << 16 ) { | 801 if( gain_adj_Q16 != (opus_int32)1 << 16 ) { |
| 688 /* Scale long-term shaping state */ | 802 /* Scale long-term shaping state */ |
| 689 for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_s
hp_buf_idx; i++ ) { | 803 { |
| 690 NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q1
4[ i ] ); | 804 __m128i xmm_gain_adj_Q16, xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3
x1; |
| 691 } | |
| 692 | 805 |
| 693 /* Scale long-term prediction state */ | 806 /* prepare gain_adj_Q16 in packed 4 32-bits */ |
| 694 if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) { | 807 xmm_gain_adj_Q16 = _mm_set1_epi32( gain_adj_Q16 ); |
| 695 for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_
idx - decisionDelay; i++ ) { | 808 |
| 696 sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] ); | 809 for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sL
TP_shp_buf_idx - 3; i += 4 ) |
| 810 { |
| 811 xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(&(NSQ->sLTP
_shp_Q14[ i ] ) ) ); |
| 812 /* equal shift right 4 bytes*/ |
| 813 xmm_sLTP_shp_Q14_x3x1 = _mm_shuffle_epi32( xmm_sLTP_shp_Q14_x2x0
, _MM_SHUFFLE( 0, 3, 2, 1 ) ); |
| 814 |
| 815 xmm_sLTP_shp_Q14_x2x0 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x2x0, xm
m_gain_adj_Q16 ); |
| 816 xmm_sLTP_shp_Q14_x3x1 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x3x1, xm
m_gain_adj_Q16 ); |
| 817 |
| 818 xmm_sLTP_shp_Q14_x2x0 = _mm_srli_epi64( xmm_sLTP_shp_Q14_x2x0, 1
6 ); |
| 819 xmm_sLTP_shp_Q14_x3x1 = _mm_slli_epi64( xmm_sLTP_shp_Q14_x3x1, 1
6 ); |
| 820 |
| 821 xmm_sLTP_shp_Q14_x2x0 = _mm_blend_epi16( xmm_sLTP_shp_Q14_x2x0,
xmm_sLTP_shp_Q14_x3x1, 0xCC ); |
| 822 |
| 823 _mm_storeu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_
sLTP_shp_Q14_x2x0 ); |
| 697 } | 824 } |
| 698 } | |
| 699 | 825 |
| 700 for( k = 0; k < nStatesDelayedDecision; k++ ) { | 826 for( ; i < NSQ->sLTP_shp_buf_idx; i++ ) { |
| 701 psDD = &psDelDec[ k ]; | 827 NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_sh
p_Q14[ i ] ); |
| 828 } |
| 702 | 829 |
| 703 /* Scale scalar states */ | 830 /* Scale long-term prediction state */ |
| 704 psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 ); | 831 if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) { |
| 832 for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_
buf_idx - decisionDelay; i++ ) { |
| 833 sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] ); |
| 834 } |
| 835 } |
| 705 | 836 |
| 706 /* Scale short-term prediction and shaping states */ | 837 for( k = 0; k < nStatesDelayedDecision; k++ ) { |
| 707 for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { | 838 psDD = &psDelDec[ k ]; |
| 708 psDD->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sLPC_Q14[
i ] ); | 839 |
| 709 } | 840 /* Scale scalar states */ |
| 710 for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { | 841 psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 ); |
| 711 psDD->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sAR2_Q14[
i ] ); | 842 |
| 712 } | 843 /* Scale short-term prediction and shaping states */ |
| 713 for( i = 0; i < DECISION_DELAY; i++ ) { | 844 for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { |
| 714 psDD->Pred_Q15[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Pred_Q15
[ i ] ); | 845 psDD->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sLPC_
Q14[ i ] ); |
| 715 psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q1
4[ i ] ); | 846 } |
| 847 for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { |
| 848 psDD->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sAR2_
Q14[ i ] ); |
| 849 } |
| 850 for( i = 0; i < DECISION_DELAY; i++ ) { |
| 851 psDD->Pred_Q15[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Pred
_Q15[ i ] ); |
| 852 psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shap
e_Q14[ i ] ); |
| 853 } |
| 716 } | 854 } |
| 717 } | 855 } |
| 718 } | 856 } |
| 719 } | 857 } |
| OLD | NEW |