Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(53)

Side by Side Diff: silk/fixed/x86/burg_modified_FIX_sse.c

Issue 882843002: Update to opus-HEAD-66611f1. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/opus.git@master
Patch Set: Add the contents of Makefile.mips back. Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « silk/fixed/warped_autocorrelation_FIX.c ('k') | silk/fixed/x86/prefilter_FIX_sse.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /*********************************************************************** 1 /* Copyright (c) 2014, Cisco Systems, INC
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved. 2 Written by XiangMingZhu WeiZhou MinPeng YanWang
3 Redistribution and use in source and binary forms, with or without 3
4 modification, are permitted provided that the following conditions 4 Redistribution and use in source and binary forms, with or without
5 are met: 5 modification, are permitted provided that the following conditions
6 - Redistributions of source code must retain the above copyright notice, 6 are met:
7 this list of conditions and the following disclaimer. 7
8 - Redistributions in binary form must reproduce the above copyright 8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer in the 9 notice, this list of conditions and the following disclaimer.
10 documentation and/or other materials provided with the distribution. 10
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the 11 - Redistributions in binary form must reproduce the above copyright
12 names of specific contributors, may be used to endorse or promote 12 notice, this list of conditions and the following disclaimer in the
13 products derived from this software without specific prior written 13 documentation and/or other materials provided with the distribution.
14 permission. 14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 POSSIBILITY OF SUCH DAMAGE. 25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/ 26 */
27 27
28 #ifdef HAVE_CONFIG_H 28 #ifdef HAVE_CONFIG_H
29 #include "config.h" 29 #include "config.h"
30 #endif 30 #endif
31 31
32 #include <xmmintrin.h>
33 #include <emmintrin.h>
34 #include <smmintrin.h>
35
32 #include "SigProc_FIX.h" 36 #include "SigProc_FIX.h"
33 #include "define.h" 37 #include "define.h"
34 #include "tuning_parameters.h" 38 #include "tuning_parameters.h"
35 #include "pitch.h" 39 #include "pitch.h"
40 #include "celt/x86/x86cpu.h"
36 41
37 #define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */ 42 #define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */
38 43
39 #define QA 25 44 #define QA 25
40 #define N_BITS_HEAD_ROOM 2 45 #define N_BITS_HEAD_ROOM 2
41 #define MIN_RSHIFTS -16 46 #define MIN_RSHIFTS -16
42 #define MAX_RSHIFTS (32 - QA) 47 #define MAX_RSHIFTS (32 - QA)
43 48
44 /* Compute reflection coefficients from input signal */ 49 /* Compute reflection coefficients from input signal */
45 void silk_burg_modified( 50 void silk_burg_modified_sse4_1(
46 opus_int32 *res_nrg, /* O Residual energy */ 51 opus_int32 *res_nrg, /* O Residual energy */
47 opus_int *res_nrg_Q, /* O Residual energy Q va lue */ 52 opus_int *res_nrg_Q, /* O Residual energy Q va lue */
48 opus_int32 A_Q16[], /* O Prediction coefficie nts (length order) */ 53 opus_int32 A_Q16[], /* O Prediction coefficie nts (length order) */
49 const opus_int16 x[], /* I Input signal, length : nb_subfr * ( D + subfr_length ) */ 54 const opus_int16 x[], /* I Input signal, length : nb_subfr * ( D + subfr_length ) */
50 const opus_int32 minInvGain_Q30, /* I Inverse of max predi ction gain */ 55 const opus_int32 minInvGain_Q30, /* I Inverse of max predi ction gain */
51 const opus_int subfr_length, /* I Input signal subfram e length (incl. D preceding samples) */ 56 const opus_int subfr_length, /* I Input signal subfram e length (incl. D preceding samples) */
52 const opus_int nb_subfr, /* I Number of subframes stacked in x */ 57 const opus_int nb_subfr, /* I Number of subframes stacked in x */
53 const opus_int D, /* I Order */ 58 const opus_int D, /* I Order */
54 int arch /* I Run-time architectur e */ 59 int arch /* I Run-time architectur e */
55 ) 60 )
56 { 61 {
57 opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain; 62 opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain;
58 opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tm p2, x1, x2; 63 opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tm p2, x1, x2;
59 const opus_int16 *x_ptr; 64 const opus_int16 *x_ptr;
60 opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ]; 65 opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ];
61 opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ]; 66 opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ];
62 opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ]; 67 opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ];
63 opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ]; 68 opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ];
64 opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ]; 69 opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ];
65 opus_int32 xcorr[ SILK_MAX_ORDER_LPC ]; 70 opus_int32 xcorr[ SILK_MAX_ORDER_LPC ];
66 71
72 __m128i FIRST_3210, LAST_3210, ATMP_3210, TMP1_3210, TMP2_3210, T1_3210, T2_ 3210, PTR_3210, SUBFR_3210, X1_3210, X2_3210;
73 __m128i CONST1 = _mm_set1_epi32(1);
74
67 silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); 75 silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
68 76
69 /* Compute autocorrelations, added over subframes */ 77 /* Compute autocorrelations, added over subframes */
70 silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length ); 78 silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length );
71 if( rshifts > MAX_RSHIFTS ) { 79 if( rshifts > MAX_RSHIFTS ) {
72 C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS ); 80 C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS );
73 silk_assert( C0 > 0 ); 81 silk_assert( C0 > 0 );
74 rshifts = MAX_RSHIFTS; 82 rshifts = MAX_RSHIFTS;
75 } else { 83 } else {
76 lz = silk_CLZ32( C0 ) - 1; 84 lz = silk_CLZ32( C0 ) - 1;
77 rshifts_extra = N_BITS_HEAD_ROOM - lz; 85 rshifts_extra = N_BITS_HEAD_ROOM - lz;
78 if( rshifts_extra > 0 ) { 86 if( rshifts_extra > 0 ) {
79 rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts ); 87 rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts );
80 C0 = silk_RSHIFT32( C0, rshifts_extra ); 88 C0 = silk_RSHIFT32( C0, rshifts_extra );
81 } else { 89 } else {
82 rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts ); 90 rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts );
83 C0 = silk_LSHIFT32( C0, -rshifts_extra ); 91 C0 = silk_LSHIFT32( C0, -rshifts_extra );
84 } 92 }
85 rshifts += rshifts_extra; 93 rshifts += rshifts_extra;
86 } 94 }
87 CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ 95 CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */
88 silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); 96 silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
89 if( rshifts > 0 ) { 97 if( rshifts > 0 ) {
90 for( s = 0; s < nb_subfr; s++ ) { 98 for( s = 0; s < nb_subfr; s++ ) {
91 x_ptr = x + s * subfr_length; 99 x_ptr = x + s * subfr_length;
92 for( n = 1; n < D + 1; n++ ) { 100 for( n = 1; n < D + 1; n++ ) {
93 C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( 101 C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64(
94 silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n ), rshifts ); 102 silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts );
95 } 103 }
96 } 104 }
97 } else { 105 } else {
98 for( s = 0; s < nb_subfr; s++ ) { 106 for( s = 0; s < nb_subfr; s++ ) {
99 int i; 107 int i;
100 opus_int32 d; 108 opus_int32 d;
101 x_ptr = x + s * subfr_length; 109 x_ptr = x + s * subfr_length;
102 celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch ); 110 celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch );
103 for( n = 1; n < D + 1; n++ ) { 111 for( n = 1; n < D + 1; n++ ) {
104 for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ ) 112 for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ )
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
143 CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] ); /* Q( -rshift ) */ 151 CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] ); /* Q( -rshift ) */
144 } 152 }
145 } 153 }
146 } else { 154 } else {
147 for( s = 0; s < nb_subfr; s++ ) { 155 for( s = 0; s < nb_subfr; s++ ) {
148 x_ptr = x + s * subfr_length; 156 x_ptr = x + s * subfr_length;
149 x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], -rshifts ); /* Q( -rshifts ) */ 157 x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], -rshifts ); /* Q( -rshifts ) */
150 x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts ); /* Q( -rshifts ) */ 158 x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts ); /* Q( -rshifts ) */
151 tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], 17 ); /* Q17 */ 159 tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], 17 ); /* Q17 */
152 tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 ); /* Q17 */ 160 tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 ); /* Q17 */
153 for( k = 0; k < n; k++ ) { 161
162 X1_3210 = _mm_set1_epi32( x1 );
163 X2_3210 = _mm_set1_epi32( x2 );
164 TMP1_3210 = _mm_setzero_si128();
165 TMP2_3210 = _mm_setzero_si128();
166 for( k = 0; k < n - 3; k += 4 ) {
167 PTR_3210 = OP_CVTEPI16_EPI32_M64( &x_ptr[ n - k - 1 - 3 ] );
168 SUBFR_3210 = OP_CVTEPI16_EPI32_M64( &x_ptr[ subfr_length - n + k ] );
169 FIRST_3210 = _mm_loadu_si128( (__m128i *)&C_first_row[ k ] ) ;
170 PTR_3210 = _mm_shuffle_epi32( PTR_3210, _MM_SHUFFLE( 0, 1 , 2, 3 ) );
171 LAST_3210 = _mm_loadu_si128( (__m128i *)&C_last_row[ k ] );
172 ATMP_3210 = _mm_loadu_si128( (__m128i *)&Af_QA[ k ] );
173
174 T1_3210 = _mm_mullo_epi32( PTR_3210, X1_3210 );
175 T2_3210 = _mm_mullo_epi32( SUBFR_3210, X2_3210 );
176
177 ATMP_3210 = _mm_srai_epi32( ATMP_3210, 7 );
178 ATMP_3210 = _mm_add_epi32( ATMP_3210, CONST1 );
179 ATMP_3210 = _mm_srai_epi32( ATMP_3210, 1 );
180
181 FIRST_3210 = _mm_add_epi32( FIRST_3210, T1_3210 );
182 LAST_3210 = _mm_add_epi32( LAST_3210, T2_3210 );
183
184 PTR_3210 = _mm_mullo_epi32( ATMP_3210, PTR_3210 );
185 SUBFR_3210 = _mm_mullo_epi32( ATMP_3210, SUBFR_3210 );
186
187 _mm_storeu_si128( (__m128i *)&C_first_row[ k ], FIRST_3210 ) ;
188 _mm_storeu_si128( (__m128i *)&C_last_row[ k ], LAST_3210 );
189
190 TMP1_3210 = _mm_add_epi32( TMP1_3210, PTR_3210 );
191 TMP2_3210 = _mm_add_epi32( TMP2_3210, SUBFR_3210 );
192 }
193
194 TMP1_3210 = _mm_add_epi32( TMP1_3210, _mm_unpackhi_epi64(TMP1_32 10, TMP1_3210 ) );
195 TMP2_3210 = _mm_add_epi32( TMP2_3210, _mm_unpackhi_epi64(TMP2_32 10, TMP2_3210 ) );
196 TMP1_3210 = _mm_add_epi32( TMP1_3210, _mm_shufflelo_epi16(TMP1_3 210, 0x0E ) );
197 TMP2_3210 = _mm_add_epi32( TMP2_3210, _mm_shufflelo_epi16(TMP2_3 210, 0x0E ) );
198
199 tmp1 += _mm_cvtsi128_si32( TMP1_3210 );
200 tmp2 += _mm_cvtsi128_si32( TMP2_3210 );
201
202 for( ; k < n; k++ ) {
154 C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ 203 C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */
155 C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ su bfr_length - n + k ] ); /* Q( -rshifts ) */ 204 C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ su bfr_length - n + k ] ); /* Q( -rshifts ) */
156 Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */ 205 Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */
157 tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */ 206 tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */
158 tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */ 207 tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */
159 } 208 }
160 tmp1 = -tmp1; /* Q17 */ 209
161 tmp2 = -tmp2; /* Q17 */ 210 tmp1 = -tmp1; /* Q17 */
162 for( k = 0; k <= n; k++ ) { 211 tmp2 = -tmp2; /* Q17 */
163 CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1, 212
164 silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) ); /* Q( -rshift ) */ 213 {
165 CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2, 214 __m128i xmm_tmp1, xmm_tmp2;
166 silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */ 215 __m128i xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1;
216 __m128i xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1;
217
218 xmm_tmp1 = _mm_set1_epi32( tmp1 );
219 xmm_tmp2 = _mm_set1_epi32( tmp2 );
220
221 for( k = 0; k <= n - 3; k += 4 ) {
222 xmm_x_ptr_n_k_x2x0 = OP_CVTEPI16_EPI32_M64( &x_ptr[ n - k - 3 ] );
223 xmm_x_ptr_sub_x2x0 = OP_CVTEPI16_EPI32_M64( &x_ptr[ subf r_length - n + k - 1 ] );
224
225 xmm_x_ptr_n_k_x2x0 = _mm_shuffle_epi32( xmm_x_ptr_n_k_x2 x0, _MM_SHUFFLE( 0, 1, 2, 3 ) );
226
227 xmm_x_ptr_n_k_x2x0 = _mm_slli_epi32( xmm_x_ptr_n_k_x2x0, -rshifts - 1 );
228 xmm_x_ptr_sub_x2x0 = _mm_slli_epi32( xmm_x_ptr_sub_x2x0, -rshifts - 1 );
229
230 /* equal shift right 4 bytes, xmm_x_ptr_n_k_x3x1 = _mm_s rli_si128(xmm_x_ptr_n_k_x2x0, 4)*/
231 xmm_x_ptr_n_k_x3x1 = _mm_shuffle_epi32( xmm_x_ptr_n_k_x2 x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
232 xmm_x_ptr_sub_x3x1 = _mm_shuffle_epi32( xmm_x_ptr_sub_x2 x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
233
234 xmm_x_ptr_n_k_x2x0 = _mm_mul_epi32( xmm_x_ptr_n_k_x2x0, xmm_tmp1 );
235 xmm_x_ptr_n_k_x3x1 = _mm_mul_epi32( xmm_x_ptr_n_k_x3x1, xmm_tmp1 );
236 xmm_x_ptr_sub_x2x0 = _mm_mul_epi32( xmm_x_ptr_sub_x2x0, xmm_tmp2 );
237 xmm_x_ptr_sub_x3x1 = _mm_mul_epi32( xmm_x_ptr_sub_x3x1, xmm_tmp2 );
238
239 xmm_x_ptr_n_k_x2x0 = _mm_srli_epi64( xmm_x_ptr_n_k_x2x0, 16 );
240 xmm_x_ptr_n_k_x3x1 = _mm_slli_epi64( xmm_x_ptr_n_k_x3x1, 16 );
241 xmm_x_ptr_sub_x2x0 = _mm_srli_epi64( xmm_x_ptr_sub_x2x0, 16 );
242 xmm_x_ptr_sub_x3x1 = _mm_slli_epi64( xmm_x_ptr_sub_x3x1, 16 );
243
244 xmm_x_ptr_n_k_x2x0 = _mm_blend_epi16( xmm_x_ptr_n_k_x2x0 , xmm_x_ptr_n_k_x3x1, 0xCC );
245 xmm_x_ptr_sub_x2x0 = _mm_blend_epi16( xmm_x_ptr_sub_x2x0 , xmm_x_ptr_sub_x3x1, 0xCC );
246
247 X1_3210 = _mm_loadu_si128( (__m128i *)&CAf[ k ] );
248 PTR_3210 = _mm_loadu_si128( (__m128i *)&CAb[ k ] );
249
250 X1_3210 = _mm_add_epi32( X1_3210, xmm_x_ptr_n_k_x2x0 );
251 PTR_3210 = _mm_add_epi32( PTR_3210, xmm_x_ptr_sub_x2x0 ) ;
252
253 _mm_storeu_si128( (__m128i *)&CAf[ k ], X1_3210 );
254 _mm_storeu_si128( (__m128i *)&CAb[ k ], PTR_3210 );
255 }
256
257 for( ; k <= n; k++ ) {
258 CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1,
259 silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) ); /* Q( -rshift ) */
260 CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2,
261 silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */
262 }
167 } 263 }
168 } 264 }
169 } 265 }
170 266
171 /* Calculate nominator and denominator for the next order reflection (pa rcor) coefficient */ 267 /* Calculate nominator and denominator for the next order reflection (pa rcor) coefficient */
172 tmp1 = C_first_row[ n ]; /* Q( -rshifts ) */ 268 tmp1 = C_first_row[ n ]; /* Q( -rshifts ) */
173 tmp2 = C_last_row[ n ]; /* Q( -rshifts ) */ 269 tmp2 = C_last_row[ n ]; /* Q( -rshifts ) */
174 num = 0; /* Q( -rshifts ) */ 270 num = 0; /* Q( -rshifts ) */
175 nrg = silk_ADD32( CAb[ 0 ], CAf[ 0 ] ); /* Q( 1-rshifts ) */ 271 nrg = silk_ADD32( CAb[ 0 ], CAf[ 0 ] ); /* Q( 1-rshifts ) */
176 for( k = 0; k < n; k++ ) { 272 for( k = 0; k < n; k++ ) {
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
245 341
246 if( reached_max_gain ) { 342 if( reached_max_gain ) {
247 for( k = 0; k < D; k++ ) { 343 for( k = 0; k < D; k++ ) {
248 /* Scale coefficients */ 344 /* Scale coefficients */
249 A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); 345 A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );
250 } 346 }
251 /* Subtract energy of preceding samples from C0 */ 347 /* Subtract energy of preceding samples from C0 */
252 if( rshifts > 0 ) { 348 if( rshifts > 0 ) {
253 for( s = 0; s < nb_subfr; s++ ) { 349 for( s = 0; s < nb_subfr; s++ ) {
254 x_ptr = x + s * subfr_length; 350 x_ptr = x + s * subfr_length;
255 C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x _ptr, x_ptr, D ), rshifts ); 351 C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x _ptr, x_ptr, D, arch ), rshifts );
256 } 352 }
257 } else { 353 } else {
258 for( s = 0; s < nb_subfr; s++ ) { 354 for( s = 0; s < nb_subfr; s++ ) {
259 x_ptr = x + s * subfr_length; 355 x_ptr = x + s * subfr_length;
260 C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D ), -rshifts ); 356 C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D, a rch ), -rshifts );
261 } 357 }
262 } 358 }
263 /* Approximate residual energy */ 359 /* Approximate residual energy */
264 *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 ); 360 *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 );
265 *res_nrg_Q = -rshifts; 361 *res_nrg_Q = -rshifts;
266 } else { 362 } else {
267 /* Return residual energy */ 363 /* Return residual energy */
268 nrg = CAf[ 0 ]; /* Q( -rshifts ) */ 364 nrg = CAf[ 0 ]; /* Q( -rshifts ) */
269 tmp1 = (opus_int32)1 << 16; /* Q16 */ 365 tmp1 = (opus_int32)1 << 16; /* Q16 */
270 for( k = 0; k < D; k++ ) { 366 for( k = 0; k < D; k++ ) {
271 Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); /* Q16 */ 367 Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); /* Q16 */
272 nrg = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 ); /* Q( -rshifts ) */ 368 nrg = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 ); /* Q( -rshifts ) */
273 tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 ); /* Q16 */ 369 tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 ); /* Q16 */
274 A_Q16[ k ] = -Atmp1; 370 A_Q16[ k ] = -Atmp1;
275 } 371 }
276 *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_F AC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */ 372 *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_F AC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */
277 *res_nrg_Q = -rshifts; 373 *res_nrg_Q = -rshifts;
278 } 374 }
279 } 375 }
OLDNEW
« no previous file with comments | « silk/fixed/warped_autocorrelation_FIX.c ('k') | silk/fixed/x86/prefilter_FIX_sse.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698