OLD | NEW |
1 /*********************************************************************** | 1 /* Copyright (c) 2014, Cisco Systems, INC |
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved. | 2 Written by XiangMingZhu WeiZhou MinPeng YanWang |
3 Redistribution and use in source and binary forms, with or without | 3 |
4 modification, are permitted provided that the following conditions | 4 Redistribution and use in source and binary forms, with or without |
5 are met: | 5 modification, are permitted provided that the following conditions |
6 - Redistributions of source code must retain the above copyright notice, | 6 are met: |
7 this list of conditions and the following disclaimer. | 7 |
8 - Redistributions in binary form must reproduce the above copyright | 8 - Redistributions of source code must retain the above copyright |
9 notice, this list of conditions and the following disclaimer in the | 9 notice, this list of conditions and the following disclaimer. |
10 documentation and/or other materials provided with the distribution. | 10 |
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the | 11 - Redistributions in binary form must reproduce the above copyright |
12 names of specific contributors, may be used to endorse or promote | 12 notice, this list of conditions and the following disclaimer in the |
13 products derived from this software without specific prior written | 13 documentation and/or other materials provided with the distribution. |
14 permission. | 14 |
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER |
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
25 POSSIBILITY OF SUCH DAMAGE. | 25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 ***********************************************************************/ | 26 */ |
27 | 27 |
28 #ifdef HAVE_CONFIG_H | 28 #ifdef HAVE_CONFIG_H |
29 #include "config.h" | 29 #include "config.h" |
30 #endif | 30 #endif |
31 | 31 |
| 32 #include <xmmintrin.h> |
| 33 #include <emmintrin.h> |
| 34 #include <smmintrin.h> |
32 #include "main.h" | 35 #include "main.h" |
| 36 #include "celt/x86/x86cpu.h" |
| 37 |
33 #include "stack_alloc.h" | 38 #include "stack_alloc.h" |
34 | 39 |
35 typedef struct { | 40 typedef struct { |
36 opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ]; | 41 opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ]; |
37 opus_int32 RandState[ DECISION_DELAY ]; | 42 opus_int32 RandState[ DECISION_DELAY ]; |
38 opus_int32 Q_Q10[ DECISION_DELAY ]; | 43 opus_int32 Q_Q10[ DECISION_DELAY ]; |
39 opus_int32 Xq_Q14[ DECISION_DELAY ]; | 44 opus_int32 Xq_Q14[ DECISION_DELAY ]; |
40 opus_int32 Pred_Q15[ DECISION_DELAY ]; | 45 opus_int32 Pred_Q15[ DECISION_DELAY ]; |
41 opus_int32 Shape_Q14[ DECISION_DELAY ]; | 46 opus_int32 Shape_Q14[ DECISION_DELAY ]; |
42 opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ]; | 47 opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ]; |
43 opus_int32 LF_AR_Q14; | 48 opus_int32 LF_AR_Q14; |
44 opus_int32 Seed; | 49 opus_int32 Seed; |
45 opus_int32 SeedInit; | 50 opus_int32 SeedInit; |
46 opus_int32 RD_Q10; | 51 opus_int32 RD_Q10; |
47 } NSQ_del_dec_struct; | 52 } NSQ_del_dec_struct; |
48 | 53 |
49 typedef struct { | 54 typedef struct { |
50 opus_int32 Q_Q10; | 55 opus_int32 Q_Q10; |
51 opus_int32 RD_Q10; | 56 opus_int32 RD_Q10; |
52 opus_int32 xq_Q14; | 57 opus_int32 xq_Q14; |
53 opus_int32 LF_AR_Q14; | 58 opus_int32 LF_AR_Q14; |
54 opus_int32 sLTP_shp_Q14; | 59 opus_int32 sLTP_shp_Q14; |
55 opus_int32 LPC_exc_Q14; | 60 opus_int32 LPC_exc_Q14; |
56 } NSQ_sample_struct; | 61 } NSQ_sample_struct; |
57 | 62 |
58 typedef NSQ_sample_struct NSQ_sample_pair[ 2 ]; | 63 typedef NSQ_sample_struct NSQ_sample_pair[ 2 ]; |
59 | 64 |
60 static OPUS_INLINE void silk_nsq_del_dec_scale_states( | 65 static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( |
61 const silk_encoder_state *psEncC, /* I Encoder State
*/ | 66 const silk_encoder_state *psEncC, /* I Encoder State
*/ |
62 silk_nsq_state *NSQ, /* I/O NSQ state
*/ | 67 silk_nsq_state *NSQ, /* I/O NSQ state
*/ |
63 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision sta
tes */ | 68 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision sta
tes */ |
64 const opus_int32 x_Q3[], /* I Input in Q3
*/ | 69 const opus_int32 x_Q3[], /* I Input in Q3
*/ |
65 opus_int32 x_sc_Q10[], /* O Input scaled with 1/
Gain in Q10 */ | 70 opus_int32 x_sc_Q10[], /* O Input scaled with 1/
Gain in Q10 */ |
66 const opus_int16 sLTP[], /* I Re-whitened LTP stat
e in Q0 */ | 71 const opus_int16 sLTP[], /* I Re-whitened LTP stat
e in Q0 */ |
67 opus_int32 sLTP_Q15[], /* O LTP state matching s
caled input */ | 72 opus_int32 sLTP_Q15[], /* O LTP state matching s
caled input */ |
68 opus_int subfr, /* I Subframe number
*/ | 73 opus_int subfr, /* I Subframe number
*/ |
69 opus_int nStatesDelayedDecision, /* I Number of del dec st
ates */ | 74 opus_int nStatesDelayedDecision, /* I Number of del dec st
ates */ |
70 const opus_int LTP_scale_Q14, /* I LTP state scaling
*/ | 75 const opus_int LTP_scale_Q14, /* I LTP state scaling
*/ |
71 const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I
*/ | 76 const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I
*/ |
72 const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag
*/ | 77 const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag
*/ |
73 const opus_int signal_type, /* I Signal type
*/ | 78 const opus_int signal_type, /* I Signal type
*/ |
74 const opus_int decisionDelay /* I Decision delay
*/ | 79 const opus_int decisionDelay /* I Decision delay
*/ |
75 ); | 80 ); |
76 | 81 |
77 /******************************************/ | 82 /******************************************/ |
78 /* Noise shape quantizer for one subframe */ | 83 /* Noise shape quantizer for one subframe */ |
79 /******************************************/ | 84 /******************************************/ |
80 static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( | 85 static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( |
81 silk_nsq_state *NSQ, /* I/O NSQ state
*/ | 86 silk_nsq_state *NSQ, /* I/O NSQ state
*/ |
82 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states
*/ | 87 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states
*/ |
83 opus_int signalType, /* I Signal type
*/ | 88 opus_int signalType, /* I Signal type
*/ |
84 const opus_int32 x_Q10[], /* I
*/ | 89 const opus_int32 x_Q10[], /* I
*/ |
85 opus_int8 pulses[], /* O
*/ | 90 opus_int8 pulses[], /* O
*/ |
86 opus_int16 xq[], /* O
*/ | 91 opus_int16 xq[], /* O
*/ |
87 opus_int32 sLTP_Q15[], /* I/O LTP filter state
*/ | 92 opus_int32 sLTP_Q15[], /* I/O LTP filter state
*/ |
88 opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer
*/ | 93 opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer
*/ |
89 const opus_int16 a_Q12[], /* I Short term prediction co
efs */ | 94 const opus_int16 a_Q12[], /* I Short term prediction co
efs */ |
90 const opus_int16 b_Q14[], /* I Long term prediction coe
fs */ | 95 const opus_int16 b_Q14[], /* I Long term prediction coe
fs */ |
91 const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs
*/ | 96 const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs
*/ |
92 opus_int lag, /* I Pitch lag
*/ | 97 opus_int lag, /* I Pitch lag
*/ |
93 opus_int32 HarmShapeFIRPacked_Q14, /* I
*/ | 98 opus_int32 HarmShapeFIRPacked_Q14, /* I
*/ |
94 opus_int Tilt_Q14, /* I Spectral tilt
*/ | 99 opus_int Tilt_Q14, /* I Spectral tilt
*/ |
95 opus_int32 LF_shp_Q14, /* I
*/ | 100 opus_int32 LF_shp_Q14, /* I
*/ |
96 opus_int32 Gain_Q16, /* I
*/ | 101 opus_int32 Gain_Q16, /* I
*/ |
97 opus_int Lambda_Q10, /* I
*/ | 102 opus_int Lambda_Q10, /* I
*/ |
98 opus_int offset_Q10, /* I
*/ | 103 opus_int offset_Q10, /* I
*/ |
99 opus_int length, /* I Input length
*/ | 104 opus_int length, /* I Input length
*/ |
100 opus_int subfr, /* I Subframe number
*/ | 105 opus_int subfr, /* I Subframe number
*/ |
101 opus_int shapingLPCOrder, /* I Shaping LPC filter order
*/ | 106 opus_int shapingLPCOrder, /* I Shaping LPC filter order
*/ |
102 opus_int predictLPCOrder, /* I Prediction filter order
*/ | 107 opus_int predictLPCOrder, /* I Prediction filter order
*/ |
103 opus_int warping_Q16, /* I
*/ | 108 opus_int warping_Q16, /* I
*/ |
104 opus_int nStatesDelayedDecision, /* I Number of states in deci
sion tree */ | 109 opus_int nStatesDelayedDecision, /* I Number of states in deci
sion tree */ |
105 opus_int *smpl_buf_idx, /* I Index to newest samples
in buffers */ | 110 opus_int *smpl_buf_idx, /* I Index to newest samples
in buffers */ |
106 opus_int decisionDelay /* I
*/ | 111 opus_int decisionDelay /* I
*/ |
107 ); | 112 ); |
108 | 113 |
109 void silk_NSQ_del_dec( | 114 void silk_NSQ_del_dec_sse4_1( |
110 const silk_encoder_state *psEncC, /* I
/O Encoder State */ | 115 const silk_encoder_state *psEncC, /* I
/O Encoder State */ |
111 silk_nsq_state *NSQ, /* I
/O NSQ state */ | 116 silk_nsq_state *NSQ, /* I
/O NSQ state */ |
112 SideInfoIndices *psIndices, /* I
/O Quantization Indices */ | 117 SideInfoIndices *psIndices, /* I
/O Quantization Indices */ |
113 const opus_int32 x_Q3[], /* I
Prefiltered input signal */ | 118 const opus_int32 x_Q3[], /* I
Prefiltered input signal */ |
114 opus_int8 pulses[], /* O
Quantized pulse signal */ | 119 opus_int8 pulses[], /* O
Quantized pulse signal */ |
115 const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I
Short term prediction coefs */ | 120 const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I
Short term prediction coefs */ |
116 const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I
Long term prediction coefs */ | 121 const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I
Long term prediction coefs */ |
117 const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /
* I Noise shaping coefs */ | 122 const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /
* I Noise shaping coefs */ |
118 const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I
Long term shaping coefs */ | 123 const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I
Long term shaping coefs */ |
119 const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I
Spectral tilt */ | 124 const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I
Spectral tilt */ |
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
237 } | 242 } |
238 | 243 |
239 subfr = 0; | 244 subfr = 0; |
240 } | 245 } |
241 | 246 |
242 /* Rewhiten with new A coefs */ | 247 /* Rewhiten with new A coefs */ |
243 start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrd
er - LTP_ORDER / 2; | 248 start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrd
er - LTP_ORDER / 2; |
244 silk_assert( start_idx > 0 ); | 249 silk_assert( start_idx > 0 ); |
245 | 250 |
246 silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_id
x + k * psEncC->subfr_length ], | 251 silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_id
x + k * psEncC->subfr_length ], |
247 A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLP
COrder ); | 252 A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLP
COrder, psEncC->arch ); |
248 | 253 |
249 NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; | 254 NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; |
250 NSQ->rewhite_flag = 1; | 255 NSQ->rewhite_flag = 1; |
251 } | 256 } |
252 } | 257 } |
253 | 258 |
254 silk_nsq_del_dec_scale_states( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sL
TP, sLTP_Q15, k, | 259 silk_nsq_del_dec_scale_states_sse4_1( psEncC, NSQ, psDelDec, x_Q3, x_sc_
Q10, sLTP, sLTP_Q15, k, |
255 psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, ps
Indices->signalType, decisionDelay ); | 260 psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, ps
Indices->signalType, decisionDelay ); |
256 | 261 |
257 silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType
, x_sc_Q10, pulses, pxq, sLTP_Q15, | 262 silk_noise_shape_quantizer_del_dec_sse4_1( NSQ, psDelDec, psIndices->sig
nalType, x_sc_Q10, pulses, pxq, sLTP_Q15, |
258 delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q
14, Tilt_Q14[ k ], LF_shp_Q14[ k ], | 263 delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q
14, Tilt_Q14[ k ], LF_shp_Q14[ k ], |
259 Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr+
+, psEncC->shapingLPCOrder, | 264 Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr+
+, psEncC->shapingLPCOrder, |
260 psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayed
Decision, &smpl_buf_idx, decisionDelay ); | 265 psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayed
Decision, &smpl_buf_idx, decisionDelay ); |
261 | 266 |
262 x_Q3 += psEncC->subfr_length; | 267 x_Q3 += psEncC->subfr_length; |
263 pulses += psEncC->subfr_length; | 268 pulses += psEncC->subfr_length; |
264 pxq += psEncC->subfr_length; | 269 pxq += psEncC->subfr_length; |
265 } | 270 } |
266 | 271 |
267 /* Find winner */ | 272 /* Find winner */ |
(...skipping 28 matching lines...) Expand all Loading... |
296 /* Save quantized speech signal */ | 301 /* Save quantized speech signal */ |
297 /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->fram
e_length * sizeof( opus_int16 ) ) */ | 302 /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->fram
e_length * sizeof( opus_int16 ) ) */ |
298 silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ],
psEncC->ltp_mem_length * sizeof( opus_int16 ) ); | 303 silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ],
psEncC->ltp_mem_length * sizeof( opus_int16 ) ); |
299 silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ],
psEncC->ltp_mem_length * sizeof( opus_int32 ) ); | 304 silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ],
psEncC->ltp_mem_length * sizeof( opus_int32 ) ); |
300 RESTORE_STACK; | 305 RESTORE_STACK; |
301 } | 306 } |
302 | 307 |
303 /******************************************/ | 308 /******************************************/ |
304 /* Noise shape quantizer for one subframe */ | 309 /* Noise shape quantizer for one subframe */ |
305 /******************************************/ | 310 /******************************************/ |
306 static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( | 311 static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( |
307 silk_nsq_state *NSQ, /* I/O NSQ state
*/ | 312 silk_nsq_state *NSQ, /* I/O NSQ state
*/ |
308 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states
*/ | 313 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states
*/ |
309 opus_int signalType, /* I Signal type
*/ | 314 opus_int signalType, /* I Signal type
*/ |
310 const opus_int32 x_Q10[], /* I
*/ | 315 const opus_int32 x_Q10[], /* I
*/ |
311 opus_int8 pulses[], /* O
*/ | 316 opus_int8 pulses[], /* O
*/ |
312 opus_int16 xq[], /* O
*/ | 317 opus_int16 xq[], /* O
*/ |
313 opus_int32 sLTP_Q15[], /* I/O LTP filter state
*/ | 318 opus_int32 sLTP_Q15[], /* I/O LTP filter state
*/ |
314 opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer
*/ | 319 opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer
*/ |
315 const opus_int16 a_Q12[], /* I Short term prediction co
efs */ | 320 const opus_int16 a_Q12[], /* I Short term prediction co
efs */ |
316 const opus_int16 b_Q14[], /* I Long term prediction coe
fs */ | 321 const opus_int16 b_Q14[], /* I Long term prediction coe
fs */ |
(...skipping 18 matching lines...) Expand all Loading... |
335 opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx; | 340 opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx; |
336 opus_int32 Winner_rand_state; | 341 opus_int32 Winner_rand_state; |
337 opus_int32 LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14; | 342 opus_int32 LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14; |
338 opus_int32 n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10
; | 343 opus_int32 n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10
; |
339 opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; | 344 opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; |
340 opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; | 345 opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; |
341 opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14; | 346 opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14; |
342 VARDECL( NSQ_sample_pair, psSampleState ); | 347 VARDECL( NSQ_sample_pair, psSampleState ); |
343 NSQ_del_dec_struct *psDD; | 348 NSQ_del_dec_struct *psDD; |
344 NSQ_sample_struct *psSS; | 349 NSQ_sample_struct *psSS; |
| 350 |
| 351 __m128i a_Q12_0123, a_Q12_4567, a_Q12_89AB, a_Q12_CDEF; |
| 352 __m128i b_Q12_0123, b_sr_Q12_0123; |
345 SAVE_STACK; | 353 SAVE_STACK; |
346 | 354 |
347 silk_assert( nStatesDelayedDecision > 0 ); | 355 silk_assert( nStatesDelayedDecision > 0 ); |
348 ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair ); | 356 ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair ); |
349 | 357 |
350 shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_
FIR_TAPS / 2 ]; | 358 shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_
FIR_TAPS / 2 ]; |
351 pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; | 359 pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; |
352 Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); | 360 Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); |
353 | 361 |
| 362 a_Q12_0123 = OP_CVTEPI16_EPI32_M64( a_Q12 ); |
| 363 a_Q12_4567 = OP_CVTEPI16_EPI32_M64( a_Q12 + 4 ); |
| 364 |
| 365 if( opus_likely( predictLPCOrder == 16 ) ) { |
| 366 a_Q12_89AB = OP_CVTEPI16_EPI32_M64( a_Q12 + 8 ); |
| 367 a_Q12_CDEF = OP_CVTEPI16_EPI32_M64( a_Q12 + 12 ); |
| 368 } |
| 369 |
| 370 if( signalType == TYPE_VOICED ){ |
| 371 b_Q12_0123 = OP_CVTEPI16_EPI32_M64( b_Q14 ); |
| 372 b_sr_Q12_0123 = _mm_shuffle_epi32( b_Q12_0123, _MM_SHUFFLE( 0, 3, 2, 1 )
); /* equal shift right 4 bytes */ |
| 373 } |
354 for( i = 0; i < length; i++ ) { | 374 for( i = 0; i < length; i++ ) { |
355 /* Perform common calculations used in all states */ | 375 /* Perform common calculations used in all states */ |
356 | 376 |
357 /* Long-term prediction */ | 377 /* Long-term prediction */ |
358 if( signalType == TYPE_VOICED ) { | 378 if( signalType == TYPE_VOICED ) { |
359 /* Unrolled loop */ | 379 /* Unrolled loop */ |
360 /* Avoids introducing a bias because silk_SMLAWB() always rounds to
-inf */ | 380 /* Avoids introducing a bias because silk_SMLAWB() always rounds to
-inf */ |
361 LTP_pred_Q14 = 2; | 381 LTP_pred_Q14 = 2; |
362 LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ 0 ], b_Q14[
0 ] ); | 382 { |
363 LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -1 ], b_Q14[
1 ] ); | 383 __m128i tmpa, tmpb, pred_lag_ptr_tmp; |
364 LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -2 ], b_Q14[
2 ] ); | 384 pred_lag_ptr_tmp = _mm_loadu_si128( (__m128i *)(&pred_lag_ptr
[ -3 ] ) ); |
365 LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -3 ], b_Q14[
3 ] ); | 385 pred_lag_ptr_tmp = _mm_shuffle_epi32( pred_lag_ptr_tmp, 0x1B
); |
366 LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_Q14[
4 ] ); | 386 tmpa = _mm_mul_epi32( pred_lag_ptr_tmp, b_Q12_012
3 ); |
367 LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 );
/* Q13 -> Q14 */ | 387 tmpa = _mm_srli_si128( tmpa, 2 ); |
368 pred_lag_ptr++; | 388 |
| 389 pred_lag_ptr_tmp = _mm_shuffle_epi32( pred_lag_ptr_tmp, _MM_SHUF
FLE( 0, 3, 2, 1 ) );/* equal shift right 4 bytes */ |
| 390 pred_lag_ptr_tmp = _mm_mul_epi32( pred_lag_ptr_tmp, b_sr_Q12_
0123 ); |
| 391 pred_lag_ptr_tmp = _mm_srli_si128( pred_lag_ptr_tmp, 2 ); |
| 392 pred_lag_ptr_tmp = _mm_add_epi32( pred_lag_ptr_tmp, tmpa ); |
| 393 |
| 394 tmpb = _mm_shuffle_epi32( pred_lag_ptr_tmp, _MM_SHUFFLE( 0, 0, 3
, 2 ) );/* equal shift right 8 bytes */ |
| 395 pred_lag_ptr_tmp = _mm_add_epi32( pred_lag_ptr_tmp, tmpb ); |
| 396 LTP_pred_Q14 += _mm_cvtsi128_si32( pred_lag_ptr_tmp ); |
| 397 |
| 398 LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_
Q14[ 4 ] ); |
| 399 LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 );
/* Q13 -> Q14 */ |
| 400 pred_lag_ptr++; |
| 401 } |
369 } else { | 402 } else { |
370 LTP_pred_Q14 = 0; | 403 LTP_pred_Q14 = 0; |
371 } | 404 } |
372 | 405 |
373 /* Long-term shaping */ | 406 /* Long-term shaping */ |
374 if( lag > 0 ) { | 407 if( lag > 0 ) { |
375 /* Symmetric, packed FIR coefficients */ | 408 /* Symmetric, packed FIR coefficients */ |
376 n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[
-2 ] ), HarmShapeFIRPacked_Q14 ); | 409 n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[
-2 ] ), HarmShapeFIRPacked_Q14 ); |
377 n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ],
HarmShapeFIRPacked_Q14 ); | 410 n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ],
HarmShapeFIRPacked_Q14 ); |
378 n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 );
/* Q12 -> Q14 */ | 411 n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 );
/* Q12 -> Q14 */ |
379 shp_lag_ptr++; | 412 shp_lag_ptr++; |
380 } else { | 413 } else { |
381 n_LTP_Q14 = 0; | 414 n_LTP_Q14 = 0; |
382 } | 415 } |
383 | 416 { |
384 for( k = 0; k < nStatesDelayedDecision; k++ ) { | 417 __m128i tmpa, tmpb, psLPC_Q14_tmp, a_Q12_tmp; |
385 /* Delayed decision state */ | 418 |
386 psDD = &psDelDec[ k ]; | 419 for( k = 0; k < nStatesDelayedDecision; k++ ) { |
387 | 420 /* Delayed decision state */ |
388 /* Sample state */ | 421 psDD = &psDelDec[ k ]; |
389 psSS = psSampleState[ k ]; | 422 |
390 | 423 /* Sample state */ |
391 /* Generate dither */ | 424 psSS = psSampleState[ k ]; |
392 psDD->Seed = silk_RAND( psDD->Seed ); | 425 |
393 | 426 /* Generate dither */ |
394 /* Pointer used in short term prediction and shaping */ | 427 psDD->Seed = silk_RAND( psDD->Seed ); |
395 psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ]; | 428 |
396 /* Short-term prediction */ | 429 /* Pointer used in short term prediction and shaping */ |
397 silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 ); | 430 psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ]; |
398 /* Avoids introducing a bias because silk_SMLAWB() always rounds to
-inf */ | 431 /* Short-term prediction */ |
399 LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 ); | 432 silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 ); |
400 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ 0 ], a_Q12[ 0
] ); | 433 /* Avoids introducing a bias because silk_SMLAWB() always rounds
to -inf */ |
401 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -1 ], a_Q12[ 1
] ); | 434 LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 ); |
402 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -2 ], a_Q12[ 2
] ); | 435 |
403 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -3 ], a_Q12[ 3
] ); | 436 tmpb = _mm_setzero_si128(); |
404 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -4 ], a_Q12[ 4
] ); | 437 |
405 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -5 ], a_Q12[ 5
] ); | 438 /* step 1 */ |
406 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -6 ], a_Q12[ 6
] ); | 439 psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -3 ]
) ); /* -3, -2 , -1, 0 */ |
407 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -7 ], a_Q12[ 7
] ); | 440 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );
/* 0, -1, -2, -3 */ |
408 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8
] ); | 441 tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_0123 );
/* 0, -1, -2, -3 * 0123 -> 0*0, 2*-2 */ |
409 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9
] ); | 442 |
410 if( predictLPCOrder == 16 ) { | 443 tmpa = _mm_srli_epi64( tmpa, 16 ); |
411 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -10 ], a_Q1
2[ 10 ] ); | 444 tmpb = _mm_add_epi32( tmpb, tmpa ); |
412 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -11 ], a_Q1
2[ 11 ] ); | 445 |
413 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -12 ], a_Q1
2[ 12 ] ); | 446 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0
, 3, 2, 1 ) ); /* equal shift right 4 bytes */ |
414 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -13 ], a_Q1
2[ 13 ] ); | 447 a_Q12_tmp = _mm_shuffle_epi32( a_Q12_0123, _MM_SHUFFLE(0, 3, 2,
1 ) ); /* equal shift right 4 bytes */ |
415 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -14 ], a_Q1
2[ 14 ] ); | 448 psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); /*
1*-1, 3*-3 */ |
416 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -15 ], a_Q1
2[ 15 ] ); | 449 psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); |
| 450 tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); |
| 451 |
| 452 /* step 2 */ |
| 453 psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -7 ]
) ); |
| 454 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); |
| 455 tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_4567 ); |
| 456 tmpa = _mm_srli_epi64( tmpa, 16 ); |
| 457 tmpb = _mm_add_epi32( tmpb, tmpa ); |
| 458 |
| 459 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0
, 3, 2, 1 ) ); /* equal shift right 4 bytes */ |
| 460 a_Q12_tmp = _mm_shuffle_epi32( a_Q12_4567, _MM_SHUFFLE(0, 3, 2,
1 ) ); /* equal shift right 4 bytes */ |
| 461 psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); |
| 462 psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); |
| 463 tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); |
| 464 |
| 465 if ( opus_likely( predictLPCOrder == 16 ) ) |
| 466 { |
| 467 /* step 3 */ |
| 468 psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -
11 ] ) ); |
| 469 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); |
| 470 tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_89AB )
; |
| 471 tmpa = _mm_srli_epi64( tmpa, 16 ); |
| 472 tmpb = _mm_add_epi32( tmpb, tmpa ); |
| 473 |
| 474 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFL
E( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ |
| 475 a_Q12_tmp = _mm_shuffle_epi32( a_Q12_89AB, _MM_SHUFFLE(0, 3,
2, 1 ) );/* equal shift right 4 bytes */ |
| 476 psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); |
| 477 psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); |
| 478 tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); |
| 479 |
| 480 /* setp 4 */ |
| 481 psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -
15 ] ) ); |
| 482 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); |
| 483 tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_CDEF )
; |
| 484 tmpa = _mm_srli_epi64( tmpa, 16 ); |
| 485 tmpb = _mm_add_epi32( tmpb, tmpa ); |
| 486 |
| 487 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFL
E( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ |
| 488 a_Q12_tmp = _mm_shuffle_epi32( a_Q12_CDEF, _MM_SHUFFLE(0, 3,
2, 1 ) ); /* equal shift right 4 bytes */ |
| 489 psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); |
| 490 psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); |
| 491 tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); |
| 492 |
| 493 /* add at last */ |
| 494 /* equal shift right 8 bytes*/ |
| 495 tmpa = _mm_shuffle_epi32( tmpb, _MM_SHUFFLE( 0, 0
, 3, 2 ) ); |
| 496 tmpb = _mm_add_epi32( tmpb, tmpa ); |
| 497 LPC_pred_Q14 += _mm_cvtsi128_si32( tmpb ); |
| 498 } |
| 499 else |
| 500 { |
| 501 /* add at last */ |
| 502 tmpa = _mm_shuffle_epi32( tmpb, _MM_SHUFFLE( 0, 0
, 3, 2 ) ); /* equal shift right 8 bytes*/ |
| 503 tmpb = _mm_add_epi32( tmpb, tmpa ); |
| 504 LPC_pred_Q14 += _mm_cvtsi128_si32( tmpb ); |
| 505 |
| 506 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a
_Q12[ 8 ] ); |
| 507 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a
_Q12[ 9 ] ); |
| 508 } |
| 509 |
| 510 LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */ |
| 511 |
| 512 /* Noise shape feedback */ |
| 513 silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that ord
er is even */ |
| 514 /* Output of lowpass section */ |
| 515 tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping
_Q16 ); |
| 516 /* Output of allpass section */ |
| 517 tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - t
mp2, warping_Q16 ); |
| 518 psDD->sAR2_Q14[ 0 ] = tmp2; |
| 519 n_AR_Q14 = silk_RSHIFT( shapingLPCOrder, 1 ); |
| 520 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ 0 ] ); |
| 521 /* Loop over allpass sections */ |
| 522 for( j = 2; j < shapingLPCOrder; j += 2 ) { |
| 523 /* Output of allpass section */ |
| 524 tmp2 = silk_SMLAWB( psDD->sAR2_Q14[ j - 1 ], psDD->sAR2_Q14[
j + 0 ] - tmp1, warping_Q16 ); |
| 525 psDD->sAR2_Q14[ j - 1 ] = tmp1; |
| 526 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ j - 1 ]
); |
| 527 /* Output of allpass section */ |
| 528 tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ j + 0 ], psDD->sAR2_Q14[
j + 1 ] - tmp2, warping_Q16 ); |
| 529 psDD->sAR2_Q14[ j + 0 ] = tmp2; |
| 530 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ j ] ); |
| 531 } |
| 532 psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1; |
| 533 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ shapingLPCOr
der - 1 ] ); |
| 534 |
| 535 n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 );
/* Q11 -> Q12 */ |
| 536 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 );
/* Q12 */ |
| 537 n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 );
/* Q12 -> Q14 */ |
| 538 |
| 539 n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp
_Q14 ); /* Q12 */ |
| 540 n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 );
/* Q12 */ |
| 541 n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 );
/* Q12 -> Q14 */ |
| 542 |
| 543 /* Input minus prediction plus noise feedback
*/ |
| 544 /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_L
TP */ |
| 545 tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 );
/* Q14 */ |
| 546 tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 );
/* Q13 */ |
| 547 tmp1 = silk_SUB32( tmp2, tmp1 );
/* Q13 */ |
| 548 tmp1 = silk_RSHIFT_ROUND( tmp1, 4 );
/* Q10 */ |
| 549 |
| 550 r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 );
/* residual error Q10 */ |
| 551 |
| 552 /* Flip sign depending on dither */ |
| 553 if ( psDD->Seed < 0 ) { |
| 554 r_Q10 = -r_Q10; |
| 555 } |
| 556 r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); |
| 557 |
| 558 /* Find two quantization level candidates and measure their rate
-distortion */ |
| 559 q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); |
| 560 q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); |
| 561 if( q1_Q0 > 0 ) { |
| 562 q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_
ADJUST_Q10 ); |
| 563 q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); |
| 564 q2_Q10 = silk_ADD32( q1_Q10, 1024 ); |
| 565 rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); |
| 566 rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); |
| 567 } else if( q1_Q0 == 0 ) { |
| 568 q1_Q10 = offset_Q10; |
| 569 q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10
); |
| 570 rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); |
| 571 rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); |
| 572 } else if( q1_Q0 == -1 ) { |
| 573 q2_Q10 = offset_Q10; |
| 574 q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10
); |
| 575 rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); |
| 576 rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); |
| 577 } else { /* q1_Q0 < -1 */ |
| 578 q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_
ADJUST_Q10 ); |
| 579 q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); |
| 580 q2_Q10 = silk_ADD32( q1_Q10, 1024 ); |
| 581 rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); |
| 582 rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 ); |
| 583 } |
| 584 rr_Q10 = silk_SUB32( r_Q10, q1_Q10 ); |
| 585 rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 1
0 ); |
| 586 rr_Q10 = silk_SUB32( r_Q10, q2_Q10 ); |
| 587 rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 1
0 ); |
| 588 |
| 589 if( rd1_Q10 < rd2_Q10 ) { |
| 590 psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); |
| 591 psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); |
| 592 psSS[ 0 ].Q_Q10 = q1_Q10; |
| 593 psSS[ 1 ].Q_Q10 = q2_Q10; |
| 594 } else { |
| 595 psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); |
| 596 psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); |
| 597 psSS[ 0 ].Q_Q10 = q2_Q10; |
| 598 psSS[ 1 ].Q_Q10 = q1_Q10; |
| 599 } |
| 600 |
| 601 /* Update states for best quantization */ |
| 602 |
| 603 /* Quantized excitation */ |
| 604 exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 ); |
| 605 if ( psDD->Seed < 0 ) { |
| 606 exc_Q14 = -exc_Q14; |
| 607 } |
| 608 |
| 609 /* Add predictions */ |
| 610 LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); |
| 611 xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); |
| 612 |
| 613 /* Update states */ |
| 614 sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); |
| 615 psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); |
| 616 psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14; |
| 617 psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14; |
| 618 psSS[ 0 ].xq_Q14 = xq_Q14; |
| 619 |
| 620 /* Update states for second best quantization */ |
| 621 |
| 622 /* Quantized excitation */ |
| 623 exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 ); |
| 624 if ( psDD->Seed < 0 ) { |
| 625 exc_Q14 = -exc_Q14; |
| 626 } |
| 627 |
| 628 |
| 629 /* Add predictions */ |
| 630 LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); |
| 631 xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); |
| 632 |
| 633 /* Update states */ |
| 634 sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); |
| 635 psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); |
| 636 psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14; |
| 637 psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14; |
| 638 psSS[ 1 ].xq_Q14 = xq_Q14; |
417 } | 639 } |
418 LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 );
/* Q10 -> Q14 */ | |
419 | |
420 /* Noise shape feedback */ | |
421 silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order i
s even */ | |
422 /* Output of lowpass section */ | |
423 tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16
); | |
424 /* Output of allpass section */ | |
425 tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2,
warping_Q16 ); | |
426 psDD->sAR2_Q14[ 0 ] = tmp2; | |
427 n_AR_Q14 = silk_RSHIFT( shapingLPCOrder, 1 ); | |
428 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ 0 ] ); | |
429 /* Loop over allpass sections */ | |
430 for( j = 2; j < shapingLPCOrder; j += 2 ) { | |
431 /* Output of allpass section */ | |
432 tmp2 = silk_SMLAWB( psDD->sAR2_Q14[ j - 1 ], psDD->sAR2_Q14[ j +
0 ] - tmp1, warping_Q16 ); | |
433 psDD->sAR2_Q14[ j - 1 ] = tmp1; | |
434 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ j - 1 ] ); | |
435 /* Output of allpass section */ | |
436 tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ j + 0 ], psDD->sAR2_Q14[ j +
1 ] - tmp2, warping_Q16 ); | |
437 psDD->sAR2_Q14[ j + 0 ] = tmp2; | |
438 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ j ] ); | |
439 } | |
440 psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1; | |
441 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ shapingLPCOrder
- 1 ] ); | |
442 | |
443 n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 );
/* Q11 -> Q12 */ | |
444 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 );
/* Q12 */ | |
445 n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 );
/* Q12 -> Q14 */ | |
446 | |
447 n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14
); /* Q12 */ | |
448 n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 );
/* Q12 */ | |
449 n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 );
/* Q12 -> Q14 */ | |
450 | |
451 /* Input minus prediction plus noise feedback
*/ | |
452 /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP
*/ | |
453 tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 );
/* Q14 */ | |
454 tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 );
/* Q13 */ | |
455 tmp1 = silk_SUB32( tmp2, tmp1 );
/* Q13 */ | |
456 tmp1 = silk_RSHIFT_ROUND( tmp1, 4 );
/* Q10 */ | |
457 | |
458 r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 );
/* residual error Q10 */ | |
459 | |
460 /* Flip sign depending on dither */ | |
461 if ( psDD->Seed < 0 ) { | |
462 r_Q10 = -r_Q10; | |
463 } | |
464 r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); | |
465 | |
466 /* Find two quantization level candidates and measure their rate-dis
tortion */ | |
467 q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); | |
468 q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); | |
469 if( q1_Q0 > 0 ) { | |
470 q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJU
ST_Q10 ); | |
471 q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); | |
472 q2_Q10 = silk_ADD32( q1_Q10, 1024 ); | |
473 rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); | |
474 rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); | |
475 } else if( q1_Q0 == 0 ) { | |
476 q1_Q10 = offset_Q10; | |
477 q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); | |
478 rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); | |
479 rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); | |
480 } else if( q1_Q0 == -1 ) { | |
481 q2_Q10 = offset_Q10; | |
482 q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); | |
483 rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); | |
484 rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); | |
485 } else { /* q1_Q0 < -1 */ | |
486 q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJU
ST_Q10 ); | |
487 q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); | |
488 q2_Q10 = silk_ADD32( q1_Q10, 1024 ); | |
489 rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); | |
490 rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 ); | |
491 } | |
492 rr_Q10 = silk_SUB32( r_Q10, q1_Q10 ); | |
493 rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 ); | |
494 rr_Q10 = silk_SUB32( r_Q10, q2_Q10 ); | |
495 rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 ); | |
496 | |
497 if( rd1_Q10 < rd2_Q10 ) { | |
498 psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); | |
499 psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); | |
500 psSS[ 0 ].Q_Q10 = q1_Q10; | |
501 psSS[ 1 ].Q_Q10 = q2_Q10; | |
502 } else { | |
503 psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); | |
504 psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); | |
505 psSS[ 0 ].Q_Q10 = q2_Q10; | |
506 psSS[ 1 ].Q_Q10 = q1_Q10; | |
507 } | |
508 | |
509 /* Update states for best quantization */ | |
510 | |
511 /* Quantized excitation */ | |
512 exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 ); | |
513 if ( psDD->Seed < 0 ) { | |
514 exc_Q14 = -exc_Q14; | |
515 } | |
516 | |
517 /* Add predictions */ | |
518 LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); | |
519 xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); | |
520 | |
521 /* Update states */ | |
522 sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); | |
523 psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); | |
524 psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14; | |
525 psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14; | |
526 psSS[ 0 ].xq_Q14 = xq_Q14; | |
527 | |
528 /* Update states for second best quantization */ | |
529 | |
530 /* Quantized excitation */ | |
531 exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 ); | |
532 if ( psDD->Seed < 0 ) { | |
533 exc_Q14 = -exc_Q14; | |
534 } | |
535 | |
536 | |
537 /* Add predictions */ | |
538 LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); | |
539 xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); | |
540 | |
541 /* Update states */ | |
542 sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); | |
543 psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); | |
544 psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14; | |
545 psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14; | |
546 psSS[ 1 ].xq_Q14 = xq_Q14; | |
547 } | 640 } |
548 | |
549 *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK;
/* Index to newest samples */ | 641 *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK;
/* Index to newest samples */ |
550 last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK
; /* Index to decisionDelay old samples */ | 642 last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK
; /* Index to decisionDelay old samples */ |
551 | 643 |
552 /* Find winner */ | 644 /* Find winner */ |
553 RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; | 645 RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; |
554 Winner_ind = 0; | 646 Winner_ind = 0; |
555 for( k = 1; k < nStatesDelayedDecision; k++ ) { | 647 for( k = 1; k < nStatesDelayedDecision; k++ ) { |
556 if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) { | 648 if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) { |
557 RDmin_Q10 = psSampleState[ k ][ 0 ].RD_Q10; | 649 RDmin_Q10 = psSampleState[ k ][ 0 ].RD_Q10; |
558 Winner_ind = k; | 650 Winner_ind = k; |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
623 delayedGain_Q10[ *smpl_buf_idx ] = Gain_Q10; | 715 delayedGain_Q10[ *smpl_buf_idx ] = Gain_Q10; |
624 } | 716 } |
625 /* Update LPC states */ | 717 /* Update LPC states */ |
626 for( k = 0; k < nStatesDelayedDecision; k++ ) { | 718 for( k = 0; k < nStatesDelayedDecision; k++ ) { |
627 psDD = &psDelDec[ k ]; | 719 psDD = &psDelDec[ k ]; |
628 silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENG
TH * sizeof( opus_int32 ) ); | 720 silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENG
TH * sizeof( opus_int32 ) ); |
629 } | 721 } |
630 RESTORE_STACK; | 722 RESTORE_STACK; |
631 } | 723 } |
632 | 724 |
633 static OPUS_INLINE void silk_nsq_del_dec_scale_states( | 725 static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( |
634 const silk_encoder_state *psEncC, /* I Encoder State
*/ | 726 const silk_encoder_state *psEncC, /* I Encoder State
*/ |
635 silk_nsq_state *NSQ, /* I/O NSQ state
*/ | 727 silk_nsq_state *NSQ, /* I/O NSQ state
*/ |
636 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision sta
tes */ | 728 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision sta
tes */ |
637 const opus_int32 x_Q3[], /* I Input in Q3
*/ | 729 const opus_int32 x_Q3[], /* I Input in Q3
*/ |
638 opus_int32 x_sc_Q10[], /* O Input scaled with 1/
Gain in Q10 */ | 730 opus_int32 x_sc_Q10[], /* O Input scaled with 1/
Gain in Q10 */ |
639 const opus_int16 sLTP[], /* I Re-whitened LTP stat
e in Q0 */ | 731 const opus_int16 sLTP[], /* I Re-whitened LTP stat
e in Q0 */ |
640 opus_int32 sLTP_Q15[], /* O LTP state matching s
caled input */ | 732 opus_int32 sLTP_Q15[], /* O LTP state matching s
caled input */ |
641 opus_int subfr, /* I Subframe number
*/ | 733 opus_int subfr, /* I Subframe number
*/ |
642 opus_int nStatesDelayedDecision, /* I Number of del dec st
ates */ | 734 opus_int nStatesDelayedDecision, /* I Number of del dec st
ates */ |
643 const opus_int LTP_scale_Q14, /* I LTP state scaling
*/ | 735 const opus_int LTP_scale_Q14, /* I LTP state scaling
*/ |
644 const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I
*/ | 736 const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I
*/ |
645 const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag
*/ | 737 const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag
*/ |
646 const opus_int signal_type, /* I Signal type
*/ | 738 const opus_int signal_type, /* I Signal type
*/ |
647 const opus_int decisionDelay /* I Decision delay
*/ | 739 const opus_int decisionDelay /* I Decision delay
*/ |
648 ) | 740 ) |
649 { | 741 { |
650 opus_int i, k, lag; | 742 opus_int i, k, lag; |
651 opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; | 743 opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; |
652 NSQ_del_dec_struct *psDD; | 744 NSQ_del_dec_struct *psDD; |
| 745 __m128i xmm_inv_gain_Q23, xmm_x_Q3_x2x0, xmm_x_Q3_x3x1; |
653 | 746 |
654 lag = pitchL[ subfr ]; | 747 lag = pitchL[ subfr ]; |
655 inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); | 748 inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); |
| 749 |
656 silk_assert( inv_gain_Q31 != 0 ); | 750 silk_assert( inv_gain_Q31 != 0 ); |
657 | 751 |
658 /* Calculate gain adjustment factor */ | 752 /* Calculate gain adjustment factor */ |
659 if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { | 753 if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { |
660 gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ],
16 ); | 754 gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ],
16 ); |
661 } else { | 755 } else { |
662 gain_adj_Q16 = (opus_int32)1 << 16; | 756 gain_adj_Q16 = (opus_int32)1 << 16; |
663 } | 757 } |
664 | 758 |
665 /* Scale input */ | 759 /* Scale input */ |
666 inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); | 760 inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); |
667 for( i = 0; i < psEncC->subfr_length; i++ ) { | 761 |
| 762 /* prepare inv_gain_Q23 in packed 4 32-bits */ |
| 763 xmm_inv_gain_Q23 = _mm_set1_epi32(inv_gain_Q23); |
| 764 |
| 765 for( i = 0; i < psEncC->subfr_length - 3; i += 4 ) { |
| 766 xmm_x_Q3_x2x0 = _mm_loadu_si128( (__m128i *)(&(x_Q3[ i ] ) ) ); |
| 767 /* equal shift right 4 bytes*/ |
| 768 xmm_x_Q3_x3x1 = _mm_shuffle_epi32( xmm_x_Q3_x2x0, _MM_SHUFFLE( 0, 3, 2,
1 ) ); |
| 769 |
| 770 xmm_x_Q3_x2x0 = _mm_mul_epi32( xmm_x_Q3_x2x0, xmm_inv_gain_Q23 ); |
| 771 xmm_x_Q3_x3x1 = _mm_mul_epi32( xmm_x_Q3_x3x1, xmm_inv_gain_Q23 ); |
| 772 |
| 773 xmm_x_Q3_x2x0 = _mm_srli_epi64( xmm_x_Q3_x2x0, 16 ); |
| 774 xmm_x_Q3_x3x1 = _mm_slli_epi64( xmm_x_Q3_x3x1, 16 ); |
| 775 |
| 776 xmm_x_Q3_x2x0 = _mm_blend_epi16( xmm_x_Q3_x2x0, xmm_x_Q3_x3x1, 0xCC ); |
| 777 |
| 778 _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ])), xmm_x_Q3_x2x0 ); |
| 779 } |
| 780 |
| 781 for( ; i < psEncC->subfr_length; i++ ) { |
668 x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); | 782 x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); |
669 } | 783 } |
670 | 784 |
671 /* Save inverse gain */ | 785 /* Save inverse gain */ |
672 NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; | 786 NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; |
673 | 787 |
674 /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16
*/ | 788 /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16
*/ |
675 if( NSQ->rewhite_flag ) { | 789 if( NSQ->rewhite_flag ) { |
676 if( subfr == 0 ) { | 790 if( subfr == 0 ) { |
677 /* Do LTP downscaling */ | 791 /* Do LTP downscaling */ |
678 inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14
), 2 ); | 792 inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14
), 2 ); |
679 } | 793 } |
680 for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx;
i++ ) { | 794 for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx;
i++ ) { |
681 silk_assert( i < MAX_FRAME_LENGTH ); | 795 silk_assert( i < MAX_FRAME_LENGTH ); |
682 sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] ); | 796 sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] ); |
683 } | 797 } |
684 } | 798 } |
685 | 799 |
686 /* Adjust for changing gain */ | 800 /* Adjust for changing gain */ |
687 if( gain_adj_Q16 != (opus_int32)1 << 16 ) { | 801 if( gain_adj_Q16 != (opus_int32)1 << 16 ) { |
688 /* Scale long-term shaping state */ | 802 /* Scale long-term shaping state */ |
689 for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_s
hp_buf_idx; i++ ) { | 803 { |
690 NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q1
4[ i ] ); | 804 __m128i xmm_gain_adj_Q16, xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3
x1; |
691 } | |
692 | 805 |
693 /* Scale long-term prediction state */ | 806 /* prepare gain_adj_Q16 in packed 4 32-bits */ |
694 if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) { | 807 xmm_gain_adj_Q16 = _mm_set1_epi32( gain_adj_Q16 ); |
695 for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_
idx - decisionDelay; i++ ) { | 808 |
696 sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] ); | 809 for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sL
TP_shp_buf_idx - 3; i += 4 ) |
| 810 { |
| 811 xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(&(NSQ->sLTP
_shp_Q14[ i ] ) ) ); |
| 812 /* equal shift right 4 bytes*/ |
| 813 xmm_sLTP_shp_Q14_x3x1 = _mm_shuffle_epi32( xmm_sLTP_shp_Q14_x2x0
, _MM_SHUFFLE( 0, 3, 2, 1 ) ); |
| 814 |
| 815 xmm_sLTP_shp_Q14_x2x0 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x2x0, xm
m_gain_adj_Q16 ); |
| 816 xmm_sLTP_shp_Q14_x3x1 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x3x1, xm
m_gain_adj_Q16 ); |
| 817 |
| 818 xmm_sLTP_shp_Q14_x2x0 = _mm_srli_epi64( xmm_sLTP_shp_Q14_x2x0, 1
6 ); |
| 819 xmm_sLTP_shp_Q14_x3x1 = _mm_slli_epi64( xmm_sLTP_shp_Q14_x3x1, 1
6 ); |
| 820 |
| 821 xmm_sLTP_shp_Q14_x2x0 = _mm_blend_epi16( xmm_sLTP_shp_Q14_x2x0,
xmm_sLTP_shp_Q14_x3x1, 0xCC ); |
| 822 |
| 823 _mm_storeu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_
sLTP_shp_Q14_x2x0 ); |
697 } | 824 } |
698 } | |
699 | 825 |
700 for( k = 0; k < nStatesDelayedDecision; k++ ) { | 826 for( ; i < NSQ->sLTP_shp_buf_idx; i++ ) { |
701 psDD = &psDelDec[ k ]; | 827 NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_sh
p_Q14[ i ] ); |
| 828 } |
702 | 829 |
703 /* Scale scalar states */ | 830 /* Scale long-term prediction state */ |
704 psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 ); | 831 if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) { |
| 832 for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_
buf_idx - decisionDelay; i++ ) { |
| 833 sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] ); |
| 834 } |
| 835 } |
705 | 836 |
706 /* Scale short-term prediction and shaping states */ | 837 for( k = 0; k < nStatesDelayedDecision; k++ ) { |
707 for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { | 838 psDD = &psDelDec[ k ]; |
708 psDD->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sLPC_Q14[
i ] ); | 839 |
709 } | 840 /* Scale scalar states */ |
710 for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { | 841 psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 ); |
711 psDD->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sAR2_Q14[
i ] ); | 842 |
712 } | 843 /* Scale short-term prediction and shaping states */ |
713 for( i = 0; i < DECISION_DELAY; i++ ) { | 844 for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { |
714 psDD->Pred_Q15[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Pred_Q15
[ i ] ); | 845 psDD->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sLPC_
Q14[ i ] ); |
715 psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q1
4[ i ] ); | 846 } |
| 847 for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { |
| 848 psDD->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sAR2_
Q14[ i ] ); |
| 849 } |
| 850 for( i = 0; i < DECISION_DELAY; i++ ) { |
| 851 psDD->Pred_Q15[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Pred
_Q15[ i ] ); |
| 852 psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shap
e_Q14[ i ] ); |
| 853 } |
716 } | 854 } |
717 } | 855 } |
718 } | 856 } |
719 } | 857 } |
OLD | NEW |