Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(29)

Side by Side Diff: silk/x86/NSQ_del_dec_sse.c

Issue 882843002: Update to opus-HEAD-66611f1. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/opus.git@master
Patch Set: Add the contents of Makefile.mips back. Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « silk/tuning_parameters.h ('k') | silk/x86/NSQ_sse.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /*********************************************************************** 1 /* Copyright (c) 2014, Cisco Systems, INC
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved. 2 Written by XiangMingZhu WeiZhou MinPeng YanWang
3 Redistribution and use in source and binary forms, with or without 3
4 modification, are permitted provided that the following conditions 4 Redistribution and use in source and binary forms, with or without
5 are met: 5 modification, are permitted provided that the following conditions
6 - Redistributions of source code must retain the above copyright notice, 6 are met:
7 this list of conditions and the following disclaimer. 7
8 - Redistributions in binary form must reproduce the above copyright 8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer in the 9 notice, this list of conditions and the following disclaimer.
10 documentation and/or other materials provided with the distribution. 10
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the 11 - Redistributions in binary form must reproduce the above copyright
12 names of specific contributors, may be used to endorse or promote 12 notice, this list of conditions and the following disclaimer in the
13 products derived from this software without specific prior written 13 documentation and/or other materials provided with the distribution.
14 permission. 14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 POSSIBILITY OF SUCH DAMAGE. 25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/ 26 */
27 27
28 #ifdef HAVE_CONFIG_H 28 #ifdef HAVE_CONFIG_H
29 #include "config.h" 29 #include "config.h"
30 #endif 30 #endif
31 31
32 #include <xmmintrin.h>
33 #include <emmintrin.h>
34 #include <smmintrin.h>
32 #include "main.h" 35 #include "main.h"
36 #include "celt/x86/x86cpu.h"
37
33 #include "stack_alloc.h" 38 #include "stack_alloc.h"
34 39
35 typedef struct { 40 typedef struct {
36 opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ]; 41 opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ];
37 opus_int32 RandState[ DECISION_DELAY ]; 42 opus_int32 RandState[ DECISION_DELAY ];
38 opus_int32 Q_Q10[ DECISION_DELAY ]; 43 opus_int32 Q_Q10[ DECISION_DELAY ];
39 opus_int32 Xq_Q14[ DECISION_DELAY ]; 44 opus_int32 Xq_Q14[ DECISION_DELAY ];
40 opus_int32 Pred_Q15[ DECISION_DELAY ]; 45 opus_int32 Pred_Q15[ DECISION_DELAY ];
41 opus_int32 Shape_Q14[ DECISION_DELAY ]; 46 opus_int32 Shape_Q14[ DECISION_DELAY ];
42 opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ]; 47 opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ];
43 opus_int32 LF_AR_Q14; 48 opus_int32 LF_AR_Q14;
44 opus_int32 Seed; 49 opus_int32 Seed;
45 opus_int32 SeedInit; 50 opus_int32 SeedInit;
46 opus_int32 RD_Q10; 51 opus_int32 RD_Q10;
47 } NSQ_del_dec_struct; 52 } NSQ_del_dec_struct;
48 53
49 typedef struct { 54 typedef struct {
50 opus_int32 Q_Q10; 55 opus_int32 Q_Q10;
51 opus_int32 RD_Q10; 56 opus_int32 RD_Q10;
52 opus_int32 xq_Q14; 57 opus_int32 xq_Q14;
53 opus_int32 LF_AR_Q14; 58 opus_int32 LF_AR_Q14;
54 opus_int32 sLTP_shp_Q14; 59 opus_int32 sLTP_shp_Q14;
55 opus_int32 LPC_exc_Q14; 60 opus_int32 LPC_exc_Q14;
56 } NSQ_sample_struct; 61 } NSQ_sample_struct;
57 62
58 typedef NSQ_sample_struct NSQ_sample_pair[ 2 ]; 63 typedef NSQ_sample_struct NSQ_sample_pair[ 2 ];
59 64
60 static OPUS_INLINE void silk_nsq_del_dec_scale_states( 65 static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1(
61 const silk_encoder_state *psEncC, /* I Encoder State */ 66 const silk_encoder_state *psEncC, /* I Encoder State */
62 silk_nsq_state *NSQ, /* I/O NSQ state */ 67 silk_nsq_state *NSQ, /* I/O NSQ state */
63 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision sta tes */ 68 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision sta tes */
64 const opus_int32 x_Q3[], /* I Input in Q3 */ 69 const opus_int32 x_Q3[], /* I Input in Q3 */
65 opus_int32 x_sc_Q10[], /* O Input scaled with 1/ Gain in Q10 */ 70 opus_int32 x_sc_Q10[], /* O Input scaled with 1/ Gain in Q10 */
66 const opus_int16 sLTP[], /* I Re-whitened LTP stat e in Q0 */ 71 const opus_int16 sLTP[], /* I Re-whitened LTP stat e in Q0 */
67 opus_int32 sLTP_Q15[], /* O LTP state matching s caled input */ 72 opus_int32 sLTP_Q15[], /* O LTP state matching s caled input */
68 opus_int subfr, /* I Subframe number */ 73 opus_int subfr, /* I Subframe number */
69 opus_int nStatesDelayedDecision, /* I Number of del dec st ates */ 74 opus_int nStatesDelayedDecision, /* I Number of del dec st ates */
70 const opus_int LTP_scale_Q14, /* I LTP state scaling */ 75 const opus_int LTP_scale_Q14, /* I LTP state scaling */
71 const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ 76 const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */
72 const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ 77 const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */
73 const opus_int signal_type, /* I Signal type */ 78 const opus_int signal_type, /* I Signal type */
74 const opus_int decisionDelay /* I Decision delay */ 79 const opus_int decisionDelay /* I Decision delay */
75 ); 80 );
76 81
77 /******************************************/ 82 /******************************************/
78 /* Noise shape quantizer for one subframe */ 83 /* Noise shape quantizer for one subframe */
79 /******************************************/ 84 /******************************************/
80 static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( 85 static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
81 silk_nsq_state *NSQ, /* I/O NSQ state */ 86 silk_nsq_state *NSQ, /* I/O NSQ state */
82 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ 87 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */
83 opus_int signalType, /* I Signal type */ 88 opus_int signalType, /* I Signal type */
84 const opus_int32 x_Q10[], /* I */ 89 const opus_int32 x_Q10[], /* I */
85 opus_int8 pulses[], /* O */ 90 opus_int8 pulses[], /* O */
86 opus_int16 xq[], /* O */ 91 opus_int16 xq[], /* O */
87 opus_int32 sLTP_Q15[], /* I/O LTP filter state */ 92 opus_int32 sLTP_Q15[], /* I/O LTP filter state */
88 opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ 93 opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */
89 const opus_int16 a_Q12[], /* I Short term prediction co efs */ 94 const opus_int16 a_Q12[], /* I Short term prediction co efs */
90 const opus_int16 b_Q14[], /* I Long term prediction coe fs */ 95 const opus_int16 b_Q14[], /* I Long term prediction coe fs */
91 const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */ 96 const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */
92 opus_int lag, /* I Pitch lag */ 97 opus_int lag, /* I Pitch lag */
93 opus_int32 HarmShapeFIRPacked_Q14, /* I */ 98 opus_int32 HarmShapeFIRPacked_Q14, /* I */
94 opus_int Tilt_Q14, /* I Spectral tilt */ 99 opus_int Tilt_Q14, /* I Spectral tilt */
95 opus_int32 LF_shp_Q14, /* I */ 100 opus_int32 LF_shp_Q14, /* I */
96 opus_int32 Gain_Q16, /* I */ 101 opus_int32 Gain_Q16, /* I */
97 opus_int Lambda_Q10, /* I */ 102 opus_int Lambda_Q10, /* I */
98 opus_int offset_Q10, /* I */ 103 opus_int offset_Q10, /* I */
99 opus_int length, /* I Input length */ 104 opus_int length, /* I Input length */
100 opus_int subfr, /* I Subframe number */ 105 opus_int subfr, /* I Subframe number */
101 opus_int shapingLPCOrder, /* I Shaping LPC filter order */ 106 opus_int shapingLPCOrder, /* I Shaping LPC filter order */
102 opus_int predictLPCOrder, /* I Prediction filter order */ 107 opus_int predictLPCOrder, /* I Prediction filter order */
103 opus_int warping_Q16, /* I */ 108 opus_int warping_Q16, /* I */
104 opus_int nStatesDelayedDecision, /* I Number of states in deci sion tree */ 109 opus_int nStatesDelayedDecision, /* I Number of states in deci sion tree */
105 opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ 110 opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */
106 opus_int decisionDelay /* I */ 111 opus_int decisionDelay /* I */
107 ); 112 );
108 113
109 void silk_NSQ_del_dec( 114 void silk_NSQ_del_dec_sse4_1(
110 const silk_encoder_state *psEncC, /* I /O Encoder State */ 115 const silk_encoder_state *psEncC, /* I /O Encoder State */
111 silk_nsq_state *NSQ, /* I /O NSQ state */ 116 silk_nsq_state *NSQ, /* I /O NSQ state */
112 SideInfoIndices *psIndices, /* I /O Quantization Indices */ 117 SideInfoIndices *psIndices, /* I /O Quantization Indices */
113 const opus_int32 x_Q3[], /* I Prefiltered input signal */ 118 const opus_int32 x_Q3[], /* I Prefiltered input signal */
114 opus_int8 pulses[], /* O Quantized pulse signal */ 119 opus_int8 pulses[], /* O Quantized pulse signal */
115 const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ 120 const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
116 const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ 121 const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
117 const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], / * I Noise shaping coefs */ 122 const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], / * I Noise shaping coefs */
118 const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ 123 const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
119 const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ 124 const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
237 } 242 }
238 243
239 subfr = 0; 244 subfr = 0;
240 } 245 }
241 246
242 /* Rewhiten with new A coefs */ 247 /* Rewhiten with new A coefs */
243 start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrd er - LTP_ORDER / 2; 248 start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrd er - LTP_ORDER / 2;
244 silk_assert( start_idx > 0 ); 249 silk_assert( start_idx > 0 );
245 250
246 silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_id x + k * psEncC->subfr_length ], 251 silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_id x + k * psEncC->subfr_length ],
247 A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLP COrder ); 252 A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLP COrder, psEncC->arch );
248 253
249 NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; 254 NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
250 NSQ->rewhite_flag = 1; 255 NSQ->rewhite_flag = 1;
251 } 256 }
252 } 257 }
253 258
254 silk_nsq_del_dec_scale_states( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sL TP, sLTP_Q15, k, 259 silk_nsq_del_dec_scale_states_sse4_1( psEncC, NSQ, psDelDec, x_Q3, x_sc_ Q10, sLTP, sLTP_Q15, k,
255 psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, ps Indices->signalType, decisionDelay ); 260 psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, ps Indices->signalType, decisionDelay );
256 261
257 silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType , x_sc_Q10, pulses, pxq, sLTP_Q15, 262 silk_noise_shape_quantizer_del_dec_sse4_1( NSQ, psDelDec, psIndices->sig nalType, x_sc_Q10, pulses, pxq, sLTP_Q15,
258 delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q 14, Tilt_Q14[ k ], LF_shp_Q14[ k ], 263 delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q 14, Tilt_Q14[ k ], LF_shp_Q14[ k ],
259 Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr+ +, psEncC->shapingLPCOrder, 264 Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr+ +, psEncC->shapingLPCOrder,
260 psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayed Decision, &smpl_buf_idx, decisionDelay ); 265 psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayed Decision, &smpl_buf_idx, decisionDelay );
261 266
262 x_Q3 += psEncC->subfr_length; 267 x_Q3 += psEncC->subfr_length;
263 pulses += psEncC->subfr_length; 268 pulses += psEncC->subfr_length;
264 pxq += psEncC->subfr_length; 269 pxq += psEncC->subfr_length;
265 } 270 }
266 271
267 /* Find winner */ 272 /* Find winner */
(...skipping 28 matching lines...) Expand all
296 /* Save quantized speech signal */ 301 /* Save quantized speech signal */
297 /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->fram e_length * sizeof( opus_int16 ) ) */ 302 /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->fram e_length * sizeof( opus_int16 ) ) */
298 silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); 303 silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
299 silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); 304 silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
300 RESTORE_STACK; 305 RESTORE_STACK;
301 } 306 }
302 307
303 /******************************************/ 308 /******************************************/
304 /* Noise shape quantizer for one subframe */ 309 /* Noise shape quantizer for one subframe */
305 /******************************************/ 310 /******************************************/
306 static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( 311 static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
307 silk_nsq_state *NSQ, /* I/O NSQ state */ 312 silk_nsq_state *NSQ, /* I/O NSQ state */
308 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ 313 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */
309 opus_int signalType, /* I Signal type */ 314 opus_int signalType, /* I Signal type */
310 const opus_int32 x_Q10[], /* I */ 315 const opus_int32 x_Q10[], /* I */
311 opus_int8 pulses[], /* O */ 316 opus_int8 pulses[], /* O */
312 opus_int16 xq[], /* O */ 317 opus_int16 xq[], /* O */
313 opus_int32 sLTP_Q15[], /* I/O LTP filter state */ 318 opus_int32 sLTP_Q15[], /* I/O LTP filter state */
314 opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ 319 opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */
315 const opus_int16 a_Q12[], /* I Short term prediction co efs */ 320 const opus_int16 a_Q12[], /* I Short term prediction co efs */
316 const opus_int16 b_Q14[], /* I Long term prediction coe fs */ 321 const opus_int16 b_Q14[], /* I Long term prediction coe fs */
(...skipping 18 matching lines...) Expand all
335 opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx; 340 opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
336 opus_int32 Winner_rand_state; 341 opus_int32 Winner_rand_state;
337 opus_int32 LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14; 342 opus_int32 LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14;
338 opus_int32 n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10 ; 343 opus_int32 n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10 ;
339 opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; 344 opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
340 opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; 345 opus_int32 tmp1, tmp2, sLF_AR_shp_Q14;
341 opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14; 346 opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;
342 VARDECL( NSQ_sample_pair, psSampleState ); 347 VARDECL( NSQ_sample_pair, psSampleState );
343 NSQ_del_dec_struct *psDD; 348 NSQ_del_dec_struct *psDD;
344 NSQ_sample_struct *psSS; 349 NSQ_sample_struct *psSS;
350
351 __m128i a_Q12_0123, a_Q12_4567, a_Q12_89AB, a_Q12_CDEF;
352 __m128i b_Q12_0123, b_sr_Q12_0123;
345 SAVE_STACK; 353 SAVE_STACK;
346 354
347 silk_assert( nStatesDelayedDecision > 0 ); 355 silk_assert( nStatesDelayedDecision > 0 );
348 ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair ); 356 ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair );
349 357
350 shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_ FIR_TAPS / 2 ]; 358 shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_ FIR_TAPS / 2 ];
351 pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; 359 pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
352 Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); 360 Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 );
353 361
362 a_Q12_0123 = OP_CVTEPI16_EPI32_M64( a_Q12 );
363 a_Q12_4567 = OP_CVTEPI16_EPI32_M64( a_Q12 + 4 );
364
365 if( opus_likely( predictLPCOrder == 16 ) ) {
366 a_Q12_89AB = OP_CVTEPI16_EPI32_M64( a_Q12 + 8 );
367 a_Q12_CDEF = OP_CVTEPI16_EPI32_M64( a_Q12 + 12 );
368 }
369
370 if( signalType == TYPE_VOICED ){
371 b_Q12_0123 = OP_CVTEPI16_EPI32_M64( b_Q14 );
372 b_sr_Q12_0123 = _mm_shuffle_epi32( b_Q12_0123, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
373 }
354 for( i = 0; i < length; i++ ) { 374 for( i = 0; i < length; i++ ) {
355 /* Perform common calculations used in all states */ 375 /* Perform common calculations used in all states */
356 376
357 /* Long-term prediction */ 377 /* Long-term prediction */
358 if( signalType == TYPE_VOICED ) { 378 if( signalType == TYPE_VOICED ) {
359 /* Unrolled loop */ 379 /* Unrolled loop */
360 /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ 380 /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
361 LTP_pred_Q14 = 2; 381 LTP_pred_Q14 = 2;
362 LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ 0 ], b_Q14[ 0 ] ); 382 {
363 LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -1 ], b_Q14[ 1 ] ); 383 __m128i tmpa, tmpb, pred_lag_ptr_tmp;
364 LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -2 ], b_Q14[ 2 ] ); 384 pred_lag_ptr_tmp = _mm_loadu_si128( (__m128i *)(&pred_lag_ptr [ -3 ] ) );
365 LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -3 ], b_Q14[ 3 ] ); 385 pred_lag_ptr_tmp = _mm_shuffle_epi32( pred_lag_ptr_tmp, 0x1B );
366 LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_Q14[ 4 ] ); 386 tmpa = _mm_mul_epi32( pred_lag_ptr_tmp, b_Q12_012 3 );
367 LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 ); /* Q13 -> Q14 */ 387 tmpa = _mm_srli_si128( tmpa, 2 );
368 pred_lag_ptr++; 388
389 pred_lag_ptr_tmp = _mm_shuffle_epi32( pred_lag_ptr_tmp, _MM_SHUF FLE( 0, 3, 2, 1 ) );/* equal shift right 4 bytes */
390 pred_lag_ptr_tmp = _mm_mul_epi32( pred_lag_ptr_tmp, b_sr_Q12_ 0123 );
391 pred_lag_ptr_tmp = _mm_srli_si128( pred_lag_ptr_tmp, 2 );
392 pred_lag_ptr_tmp = _mm_add_epi32( pred_lag_ptr_tmp, tmpa );
393
394 tmpb = _mm_shuffle_epi32( pred_lag_ptr_tmp, _MM_SHUFFLE( 0, 0, 3 , 2 ) );/* equal shift right 8 bytes */
395 pred_lag_ptr_tmp = _mm_add_epi32( pred_lag_ptr_tmp, tmpb );
396 LTP_pred_Q14 += _mm_cvtsi128_si32( pred_lag_ptr_tmp );
397
398 LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_ Q14[ 4 ] );
399 LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 ); /* Q13 -> Q14 */
400 pred_lag_ptr++;
401 }
369 } else { 402 } else {
370 LTP_pred_Q14 = 0; 403 LTP_pred_Q14 = 0;
371 } 404 }
372 405
373 /* Long-term shaping */ 406 /* Long-term shaping */
374 if( lag > 0 ) { 407 if( lag > 0 ) {
375 /* Symmetric, packed FIR coefficients */ 408 /* Symmetric, packed FIR coefficients */
376 n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); 409 n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
377 n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); 410 n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 );
378 n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 ); /* Q12 -> Q14 */ 411 n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 ); /* Q12 -> Q14 */
379 shp_lag_ptr++; 412 shp_lag_ptr++;
380 } else { 413 } else {
381 n_LTP_Q14 = 0; 414 n_LTP_Q14 = 0;
382 } 415 }
383 416 {
384 for( k = 0; k < nStatesDelayedDecision; k++ ) { 417 __m128i tmpa, tmpb, psLPC_Q14_tmp, a_Q12_tmp;
385 /* Delayed decision state */ 418
386 psDD = &psDelDec[ k ]; 419 for( k = 0; k < nStatesDelayedDecision; k++ ) {
387 420 /* Delayed decision state */
388 /* Sample state */ 421 psDD = &psDelDec[ k ];
389 psSS = psSampleState[ k ]; 422
390 423 /* Sample state */
391 /* Generate dither */ 424 psSS = psSampleState[ k ];
392 psDD->Seed = silk_RAND( psDD->Seed ); 425
393 426 /* Generate dither */
394 /* Pointer used in short term prediction and shaping */ 427 psDD->Seed = silk_RAND( psDD->Seed );
395 psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ]; 428
396 /* Short-term prediction */ 429 /* Pointer used in short term prediction and shaping */
397 silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 ); 430 psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];
398 /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ 431 /* Short-term prediction */
399 LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 ); 432 silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
400 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ 0 ], a_Q12[ 0 ] ); 433 /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
401 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -1 ], a_Q12[ 1 ] ); 434 LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 );
402 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -2 ], a_Q12[ 2 ] ); 435
403 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -3 ], a_Q12[ 3 ] ); 436 tmpb = _mm_setzero_si128();
404 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -4 ], a_Q12[ 4 ] ); 437
405 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -5 ], a_Q12[ 5 ] ); 438 /* step 1 */
406 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -6 ], a_Q12[ 6 ] ); 439 psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -3 ] ) ); /* -3, -2 , -1, 0 */
407 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -7 ], a_Q12[ 7 ] ); 440 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); /* 0, -1, -2, -3 */
408 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8 ] ); 441 tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_0123 ); /* 0, -1, -2, -3 * 0123 -> 0*0, 2*-2 */
409 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9 ] ); 442
410 if( predictLPCOrder == 16 ) { 443 tmpa = _mm_srli_epi64( tmpa, 16 );
411 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -10 ], a_Q1 2[ 10 ] ); 444 tmpb = _mm_add_epi32( tmpb, tmpa );
412 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -11 ], a_Q1 2[ 11 ] ); 445
413 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -12 ], a_Q1 2[ 12 ] ); 446 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0 , 3, 2, 1 ) ); /* equal shift right 4 bytes */
414 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -13 ], a_Q1 2[ 13 ] ); 447 a_Q12_tmp = _mm_shuffle_epi32( a_Q12_0123, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
415 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -14 ], a_Q1 2[ 14 ] ); 448 psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); /* 1*-1, 3*-3 */
416 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -15 ], a_Q1 2[ 15 ] ); 449 psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 );
450 tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp );
451
452 /* step 2 */
453 psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -7 ] ) );
454 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );
455 tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_4567 );
456 tmpa = _mm_srli_epi64( tmpa, 16 );
457 tmpb = _mm_add_epi32( tmpb, tmpa );
458
459 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0 , 3, 2, 1 ) ); /* equal shift right 4 bytes */
460 a_Q12_tmp = _mm_shuffle_epi32( a_Q12_4567, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
461 psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp );
462 psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 );
463 tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp );
464
465 if ( opus_likely( predictLPCOrder == 16 ) )
466 {
467 /* step 3 */
468 psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ - 11 ] ) );
469 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );
470 tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_89AB ) ;
471 tmpa = _mm_srli_epi64( tmpa, 16 );
472 tmpb = _mm_add_epi32( tmpb, tmpa );
473
474 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFL E( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
475 a_Q12_tmp = _mm_shuffle_epi32( a_Q12_89AB, _MM_SHUFFLE(0, 3, 2, 1 ) );/* equal shift right 4 bytes */
476 psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp );
477 psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 );
478 tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp );
479
480 /* setp 4 */
481 psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ - 15 ] ) );
482 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );
483 tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_CDEF ) ;
484 tmpa = _mm_srli_epi64( tmpa, 16 );
485 tmpb = _mm_add_epi32( tmpb, tmpa );
486
487 psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFL E( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
488 a_Q12_tmp = _mm_shuffle_epi32( a_Q12_CDEF, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
489 psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp );
490 psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 );
491 tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp );
492
493 /* add at last */
494 /* equal shift right 8 bytes*/
495 tmpa = _mm_shuffle_epi32( tmpb, _MM_SHUFFLE( 0, 0 , 3, 2 ) );
496 tmpb = _mm_add_epi32( tmpb, tmpa );
497 LPC_pred_Q14 += _mm_cvtsi128_si32( tmpb );
498 }
499 else
500 {
501 /* add at last */
502 tmpa = _mm_shuffle_epi32( tmpb, _MM_SHUFFLE( 0, 0 , 3, 2 ) ); /* equal shift right 8 bytes*/
503 tmpb = _mm_add_epi32( tmpb, tmpa );
504 LPC_pred_Q14 += _mm_cvtsi128_si32( tmpb );
505
506 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a _Q12[ 8 ] );
507 LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a _Q12[ 9 ] );
508 }
509
510 LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */
511
512 /* Noise shape feedback */
513 silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that ord er is even */
514 /* Output of lowpass section */
515 tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping _Q16 );
516 /* Output of allpass section */
517 tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - t mp2, warping_Q16 );
518 psDD->sAR2_Q14[ 0 ] = tmp2;
519 n_AR_Q14 = silk_RSHIFT( shapingLPCOrder, 1 );
520 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ 0 ] );
521 /* Loop over allpass sections */
522 for( j = 2; j < shapingLPCOrder; j += 2 ) {
523 /* Output of allpass section */
524 tmp2 = silk_SMLAWB( psDD->sAR2_Q14[ j - 1 ], psDD->sAR2_Q14[ j + 0 ] - tmp1, warping_Q16 );
525 psDD->sAR2_Q14[ j - 1 ] = tmp1;
526 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ j - 1 ] );
527 /* Output of allpass section */
528 tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ j + 0 ], psDD->sAR2_Q14[ j + 1 ] - tmp2, warping_Q16 );
529 psDD->sAR2_Q14[ j + 0 ] = tmp2;
530 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ j ] );
531 }
532 psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
533 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ shapingLPCOr der - 1 ] );
534
535 n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 ); /* Q11 -> Q12 */
536 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 ); /* Q12 */
537 n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 ); /* Q12 -> Q14 */
538
539 n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp _Q14 ); /* Q12 */
540 n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 ); /* Q12 */
541 n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 ); /* Q12 -> Q14 */
542
543 /* Input minus prediction plus noise feedback */
544 /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_L TP */
545 tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 ); /* Q14 */
546 tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 ); /* Q13 */
547 tmp1 = silk_SUB32( tmp2, tmp1 ); /* Q13 */
548 tmp1 = silk_RSHIFT_ROUND( tmp1, 4 ); /* Q10 */
549
550 r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 ); /* residual error Q10 */
551
552 /* Flip sign depending on dither */
553 if ( psDD->Seed < 0 ) {
554 r_Q10 = -r_Q10;
555 }
556 r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );
557
558 /* Find two quantization level candidates and measure their rate -distortion */
559 q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
560 q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
561 if( q1_Q0 > 0 ) {
562 q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ ADJUST_Q10 );
563 q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 );
564 q2_Q10 = silk_ADD32( q1_Q10, 1024 );
565 rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
566 rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
567 } else if( q1_Q0 == 0 ) {
568 q1_Q10 = offset_Q10;
569 q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
570 rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
571 rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
572 } else if( q1_Q0 == -1 ) {
573 q2_Q10 = offset_Q10;
574 q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
575 rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
576 rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
577 } else { /* q1_Q0 < -1 */
578 q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ ADJUST_Q10 );
579 q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 );
580 q2_Q10 = silk_ADD32( q1_Q10, 1024 );
581 rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
582 rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 );
583 }
584 rr_Q10 = silk_SUB32( r_Q10, q1_Q10 );
585 rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 1 0 );
586 rr_Q10 = silk_SUB32( r_Q10, q2_Q10 );
587 rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 1 0 );
588
589 if( rd1_Q10 < rd2_Q10 ) {
590 psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
591 psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
592 psSS[ 0 ].Q_Q10 = q1_Q10;
593 psSS[ 1 ].Q_Q10 = q2_Q10;
594 } else {
595 psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
596 psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
597 psSS[ 0 ].Q_Q10 = q2_Q10;
598 psSS[ 1 ].Q_Q10 = q1_Q10;
599 }
600
601 /* Update states for best quantization */
602
603 /* Quantized excitation */
604 exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 );
605 if ( psDD->Seed < 0 ) {
606 exc_Q14 = -exc_Q14;
607 }
608
609 /* Add predictions */
610 LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
611 xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
612
613 /* Update states */
614 sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 );
615 psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
616 psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14;
617 psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14;
618 psSS[ 0 ].xq_Q14 = xq_Q14;
619
620 /* Update states for second best quantization */
621
622 /* Quantized excitation */
623 exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 );
624 if ( psDD->Seed < 0 ) {
625 exc_Q14 = -exc_Q14;
626 }
627
628
629 /* Add predictions */
630 LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
631 xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
632
633 /* Update states */
634 sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 );
635 psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
636 psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14;
637 psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14;
638 psSS[ 1 ].xq_Q14 = xq_Q14;
417 } 639 }
418 LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */
419
420 /* Noise shape feedback */
421 silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order i s even */
422 /* Output of lowpass section */
423 tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 );
424 /* Output of allpass section */
425 tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 );
426 psDD->sAR2_Q14[ 0 ] = tmp2;
427 n_AR_Q14 = silk_RSHIFT( shapingLPCOrder, 1 );
428 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ 0 ] );
429 /* Loop over allpass sections */
430 for( j = 2; j < shapingLPCOrder; j += 2 ) {
431 /* Output of allpass section */
432 tmp2 = silk_SMLAWB( psDD->sAR2_Q14[ j - 1 ], psDD->sAR2_Q14[ j + 0 ] - tmp1, warping_Q16 );
433 psDD->sAR2_Q14[ j - 1 ] = tmp1;
434 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ j - 1 ] );
435 /* Output of allpass section */
436 tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ j + 0 ], psDD->sAR2_Q14[ j + 1 ] - tmp2, warping_Q16 );
437 psDD->sAR2_Q14[ j + 0 ] = tmp2;
438 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ j ] );
439 }
440 psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
441 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );
442
443 n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 ); /* Q11 -> Q12 */
444 n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 ); /* Q12 */
445 n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 ); /* Q12 -> Q14 */
446
447 n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 ); /* Q12 */
448 n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 ); /* Q12 */
449 n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 ); /* Q12 -> Q14 */
450
451 /* Input minus prediction plus noise feedback */
452 /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP */
453 tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 ); /* Q14 */
454 tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 ); /* Q13 */
455 tmp1 = silk_SUB32( tmp2, tmp1 ); /* Q13 */
456 tmp1 = silk_RSHIFT_ROUND( tmp1, 4 ); /* Q10 */
457
458 r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 ); /* residual error Q10 */
459
460 /* Flip sign depending on dither */
461 if ( psDD->Seed < 0 ) {
462 r_Q10 = -r_Q10;
463 }
464 r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );
465
466 /* Find two quantization level candidates and measure their rate-dis tortion */
467 q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
468 q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
469 if( q1_Q0 > 0 ) {
470 q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJU ST_Q10 );
471 q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 );
472 q2_Q10 = silk_ADD32( q1_Q10, 1024 );
473 rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
474 rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
475 } else if( q1_Q0 == 0 ) {
476 q1_Q10 = offset_Q10;
477 q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
478 rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
479 rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
480 } else if( q1_Q0 == -1 ) {
481 q2_Q10 = offset_Q10;
482 q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
483 rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
484 rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
485 } else { /* q1_Q0 < -1 */
486 q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJU ST_Q10 );
487 q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 );
488 q2_Q10 = silk_ADD32( q1_Q10, 1024 );
489 rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
490 rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 );
491 }
492 rr_Q10 = silk_SUB32( r_Q10, q1_Q10 );
493 rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 );
494 rr_Q10 = silk_SUB32( r_Q10, q2_Q10 );
495 rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 );
496
497 if( rd1_Q10 < rd2_Q10 ) {
498 psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
499 psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
500 psSS[ 0 ].Q_Q10 = q1_Q10;
501 psSS[ 1 ].Q_Q10 = q2_Q10;
502 } else {
503 psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
504 psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
505 psSS[ 0 ].Q_Q10 = q2_Q10;
506 psSS[ 1 ].Q_Q10 = q1_Q10;
507 }
508
509 /* Update states for best quantization */
510
511 /* Quantized excitation */
512 exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 );
513 if ( psDD->Seed < 0 ) {
514 exc_Q14 = -exc_Q14;
515 }
516
517 /* Add predictions */
518 LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
519 xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
520
521 /* Update states */
522 sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 );
523 psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
524 psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14;
525 psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14;
526 psSS[ 0 ].xq_Q14 = xq_Q14;
527
528 /* Update states for second best quantization */
529
530 /* Quantized excitation */
531 exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 );
532 if ( psDD->Seed < 0 ) {
533 exc_Q14 = -exc_Q14;
534 }
535
536
537 /* Add predictions */
538 LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
539 xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
540
541 /* Update states */
542 sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 );
543 psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
544 psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14;
545 psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14;
546 psSS[ 1 ].xq_Q14 = xq_Q14;
547 } 640 }
548
549 *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK; /* Index to newest samples */ 641 *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK; /* Index to newest samples */
550 last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK ; /* Index to decisionDelay old samples */ 642 last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK ; /* Index to decisionDelay old samples */
551 643
552 /* Find winner */ 644 /* Find winner */
553 RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; 645 RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10;
554 Winner_ind = 0; 646 Winner_ind = 0;
555 for( k = 1; k < nStatesDelayedDecision; k++ ) { 647 for( k = 1; k < nStatesDelayedDecision; k++ ) {
556 if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) { 648 if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) {
557 RDmin_Q10 = psSampleState[ k ][ 0 ].RD_Q10; 649 RDmin_Q10 = psSampleState[ k ][ 0 ].RD_Q10;
558 Winner_ind = k; 650 Winner_ind = k;
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
623 delayedGain_Q10[ *smpl_buf_idx ] = Gain_Q10; 715 delayedGain_Q10[ *smpl_buf_idx ] = Gain_Q10;
624 } 716 }
625 /* Update LPC states */ 717 /* Update LPC states */
626 for( k = 0; k < nStatesDelayedDecision; k++ ) { 718 for( k = 0; k < nStatesDelayedDecision; k++ ) {
627 psDD = &psDelDec[ k ]; 719 psDD = &psDelDec[ k ];
628 silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENG TH * sizeof( opus_int32 ) ); 720 silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENG TH * sizeof( opus_int32 ) );
629 } 721 }
630 RESTORE_STACK; 722 RESTORE_STACK;
631 } 723 }
632 724
633 static OPUS_INLINE void silk_nsq_del_dec_scale_states( 725 static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1(
634 const silk_encoder_state *psEncC, /* I Encoder State */ 726 const silk_encoder_state *psEncC, /* I Encoder State */
635 silk_nsq_state *NSQ, /* I/O NSQ state */ 727 silk_nsq_state *NSQ, /* I/O NSQ state */
636 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision sta tes */ 728 NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision sta tes */
637 const opus_int32 x_Q3[], /* I Input in Q3 */ 729 const opus_int32 x_Q3[], /* I Input in Q3 */
638 opus_int32 x_sc_Q10[], /* O Input scaled with 1/ Gain in Q10 */ 730 opus_int32 x_sc_Q10[], /* O Input scaled with 1/ Gain in Q10 */
639 const opus_int16 sLTP[], /* I Re-whitened LTP stat e in Q0 */ 731 const opus_int16 sLTP[], /* I Re-whitened LTP stat e in Q0 */
640 opus_int32 sLTP_Q15[], /* O LTP state matching s caled input */ 732 opus_int32 sLTP_Q15[], /* O LTP state matching s caled input */
641 opus_int subfr, /* I Subframe number */ 733 opus_int subfr, /* I Subframe number */
642 opus_int nStatesDelayedDecision, /* I Number of del dec st ates */ 734 opus_int nStatesDelayedDecision, /* I Number of del dec st ates */
643 const opus_int LTP_scale_Q14, /* I LTP state scaling */ 735 const opus_int LTP_scale_Q14, /* I LTP state scaling */
644 const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ 736 const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */
645 const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ 737 const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */
646 const opus_int signal_type, /* I Signal type */ 738 const opus_int signal_type, /* I Signal type */
647 const opus_int decisionDelay /* I Decision delay */ 739 const opus_int decisionDelay /* I Decision delay */
648 ) 740 )
649 { 741 {
650 opus_int i, k, lag; 742 opus_int i, k, lag;
651 opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; 743 opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23;
652 NSQ_del_dec_struct *psDD; 744 NSQ_del_dec_struct *psDD;
745 __m128i xmm_inv_gain_Q23, xmm_x_Q3_x2x0, xmm_x_Q3_x3x1;
653 746
654 lag = pitchL[ subfr ]; 747 lag = pitchL[ subfr ];
655 inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); 748 inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 );
749
656 silk_assert( inv_gain_Q31 != 0 ); 750 silk_assert( inv_gain_Q31 != 0 );
657 751
658 /* Calculate gain adjustment factor */ 752 /* Calculate gain adjustment factor */
659 if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { 753 if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
660 gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); 754 gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
661 } else { 755 } else {
662 gain_adj_Q16 = (opus_int32)1 << 16; 756 gain_adj_Q16 = (opus_int32)1 << 16;
663 } 757 }
664 758
665 /* Scale input */ 759 /* Scale input */
666 inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); 760 inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 );
667 for( i = 0; i < psEncC->subfr_length; i++ ) { 761
762 /* prepare inv_gain_Q23 in packed 4 32-bits */
763 xmm_inv_gain_Q23 = _mm_set1_epi32(inv_gain_Q23);
764
765 for( i = 0; i < psEncC->subfr_length - 3; i += 4 ) {
766 xmm_x_Q3_x2x0 = _mm_loadu_si128( (__m128i *)(&(x_Q3[ i ] ) ) );
767 /* equal shift right 4 bytes*/
768 xmm_x_Q3_x3x1 = _mm_shuffle_epi32( xmm_x_Q3_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
769
770 xmm_x_Q3_x2x0 = _mm_mul_epi32( xmm_x_Q3_x2x0, xmm_inv_gain_Q23 );
771 xmm_x_Q3_x3x1 = _mm_mul_epi32( xmm_x_Q3_x3x1, xmm_inv_gain_Q23 );
772
773 xmm_x_Q3_x2x0 = _mm_srli_epi64( xmm_x_Q3_x2x0, 16 );
774 xmm_x_Q3_x3x1 = _mm_slli_epi64( xmm_x_Q3_x3x1, 16 );
775
776 xmm_x_Q3_x2x0 = _mm_blend_epi16( xmm_x_Q3_x2x0, xmm_x_Q3_x3x1, 0xCC );
777
778 _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ])), xmm_x_Q3_x2x0 );
779 }
780
781 for( ; i < psEncC->subfr_length; i++ ) {
668 x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); 782 x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 );
669 } 783 }
670 784
671 /* Save inverse gain */ 785 /* Save inverse gain */
672 NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; 786 NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
673 787
674 /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */ 788 /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
675 if( NSQ->rewhite_flag ) { 789 if( NSQ->rewhite_flag ) {
676 if( subfr == 0 ) { 790 if( subfr == 0 ) {
677 /* Do LTP downscaling */ 791 /* Do LTP downscaling */
678 inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 ); 792 inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 );
679 } 793 }
680 for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) { 794 for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) {
681 silk_assert( i < MAX_FRAME_LENGTH ); 795 silk_assert( i < MAX_FRAME_LENGTH );
682 sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] ); 796 sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] );
683 } 797 }
684 } 798 }
685 799
686 /* Adjust for changing gain */ 800 /* Adjust for changing gain */
687 if( gain_adj_Q16 != (opus_int32)1 << 16 ) { 801 if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
688 /* Scale long-term shaping state */ 802 /* Scale long-term shaping state */
689 for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_s hp_buf_idx; i++ ) { 803 {
690 NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q1 4[ i ] ); 804 __m128i xmm_gain_adj_Q16, xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3 x1;
691 }
692 805
693 /* Scale long-term prediction state */ 806 /* prepare gain_adj_Q16 in packed 4 32-bits */
694 if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) { 807 xmm_gain_adj_Q16 = _mm_set1_epi32( gain_adj_Q16 );
695 for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_ idx - decisionDelay; i++ ) { 808
696 sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] ); 809 for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sL TP_shp_buf_idx - 3; i += 4 )
810 {
811 xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(&(NSQ->sLTP _shp_Q14[ i ] ) ) );
812 /* equal shift right 4 bytes*/
813 xmm_sLTP_shp_Q14_x3x1 = _mm_shuffle_epi32( xmm_sLTP_shp_Q14_x2x0 , _MM_SHUFFLE( 0, 3, 2, 1 ) );
814
815 xmm_sLTP_shp_Q14_x2x0 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x2x0, xm m_gain_adj_Q16 );
816 xmm_sLTP_shp_Q14_x3x1 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x3x1, xm m_gain_adj_Q16 );
817
818 xmm_sLTP_shp_Q14_x2x0 = _mm_srli_epi64( xmm_sLTP_shp_Q14_x2x0, 1 6 );
819 xmm_sLTP_shp_Q14_x3x1 = _mm_slli_epi64( xmm_sLTP_shp_Q14_x3x1, 1 6 );
820
821 xmm_sLTP_shp_Q14_x2x0 = _mm_blend_epi16( xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1, 0xCC );
822
823 _mm_storeu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_ sLTP_shp_Q14_x2x0 );
697 } 824 }
698 }
699 825
700 for( k = 0; k < nStatesDelayedDecision; k++ ) { 826 for( ; i < NSQ->sLTP_shp_buf_idx; i++ ) {
701 psDD = &psDelDec[ k ]; 827 NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_sh p_Q14[ i ] );
828 }
702 829
703 /* Scale scalar states */ 830 /* Scale long-term prediction state */
704 psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 ); 831 if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) {
832 for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_ buf_idx - decisionDelay; i++ ) {
833 sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] );
834 }
835 }
705 836
706 /* Scale short-term prediction and shaping states */ 837 for( k = 0; k < nStatesDelayedDecision; k++ ) {
707 for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { 838 psDD = &psDelDec[ k ];
708 psDD->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sLPC_Q14[ i ] ); 839
709 } 840 /* Scale scalar states */
710 for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { 841 psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 );
711 psDD->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sAR2_Q14[ i ] ); 842
712 } 843 /* Scale short-term prediction and shaping states */
713 for( i = 0; i < DECISION_DELAY; i++ ) { 844 for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) {
714 psDD->Pred_Q15[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Pred_Q15 [ i ] ); 845 psDD->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sLPC_ Q14[ i ] );
715 psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q1 4[ i ] ); 846 }
847 for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) {
848 psDD->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sAR2_ Q14[ i ] );
849 }
850 for( i = 0; i < DECISION_DELAY; i++ ) {
851 psDD->Pred_Q15[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Pred _Q15[ i ] );
852 psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shap e_Q14[ i ] );
853 }
716 } 854 }
717 } 855 }
718 } 856 }
719 } 857 }
OLDNEW
« no previous file with comments | « silk/tuning_parameters.h ('k') | silk/x86/NSQ_sse.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698