Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(447)

Side by Side Diff: silk/x86/VQ_WMat_EC_sse.c

Issue 882843002: Update to opus-HEAD-66611f1. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/opus.git@master
Patch Set: Add the contents of Makefile.mips back. Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « silk/x86/VAD_sse.c ('k') | silk/x86/main_sse.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /*********************************************************************** 1 /* Copyright (c) 2014, Cisco Systems, INC
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved. 2 Written by XiangMingZhu WeiZhou MinPeng YanWang
3 Redistribution and use in source and binary forms, with or without 3
4 modification, are permitted provided that the following conditions 4 Redistribution and use in source and binary forms, with or without
5 are met: 5 modification, are permitted provided that the following conditions
6 - Redistributions of source code must retain the above copyright notice, 6 are met:
7 this list of conditions and the following disclaimer. 7
8 - Redistributions in binary form must reproduce the above copyright 8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer in the 9 notice, this list of conditions and the following disclaimer.
10 documentation and/or other materials provided with the distribution. 10
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the 11 - Redistributions in binary form must reproduce the above copyright
12 names of specific contributors, may be used to endorse or promote 12 notice, this list of conditions and the following disclaimer in the
13 products derived from this software without specific prior written 13 documentation and/or other materials provided with the distribution.
14 permission. 14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 POSSIBILITY OF SUCH DAMAGE. 25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/ 26 */
27 27
28 #ifdef HAVE_CONFIG_H 28 #ifdef HAVE_CONFIG_H
29 #include "config.h" 29 #include "config.h"
30 #endif 30 #endif
31 31
32 #include <xmmintrin.h>
33 #include <emmintrin.h>
34 #include <smmintrin.h>
32 #include "main.h" 35 #include "main.h"
36 #include "celt/x86/x86cpu.h"
33 37
34 /* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */ 38 /* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */
35 void silk_VQ_WMat_EC( 39 void silk_VQ_WMat_EC_sse4_1(
36 opus_int8 *ind, /* O index of best codebook vector */ 40 opus_int8 *ind, /* O index of best codebook vector */
37 opus_int32 *rate_dist_Q14, /* O best wei ghted quant error + mu * rate */ 41 opus_int32 *rate_dist_Q14, /* O best wei ghted quant error + mu * rate */
38 opus_int *gain_Q7, /* O sum of a bsolute LTP coefficients */ 42 opus_int *gain_Q7, /* O sum of a bsolute LTP coefficients */
39 const opus_int16 *in_Q14, /* I input ve ctor to be quantized */ 43 const opus_int16 *in_Q14, /* I input ve ctor to be quantized */
40 const opus_int32 *W_Q18, /* I weightin g matrix */ 44 const opus_int32 *W_Q18, /* I weightin g matrix */
41 const opus_int8 *cb_Q7, /* I codebook */ 45 const opus_int8 *cb_Q7, /* I codebook */
42 const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ 46 const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */
43 const opus_uint8 *cl_Q5, /* I code len gth for each codebook vector */ 47 const opus_uint8 *cl_Q5, /* I code len gth for each codebook vector */
44 const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ 48 const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */
45 const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ 49 const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */
46 opus_int L /* I number o f vectors in codebook */ 50 opus_int L /* I number o f vectors in codebook */
47 ) 51 )
48 { 52 {
49 opus_int k, gain_tmp_Q7; 53 opus_int k, gain_tmp_Q7;
50 const opus_int8 *cb_row_Q7; 54 const opus_int8 *cb_row_Q7;
51 opus_int16 diff_Q14[ 5 ]; 55 opus_int16 diff_Q14[ 5 ];
52 opus_int32 sum1_Q14, sum2_Q16; 56 opus_int32 sum1_Q14, sum2_Q16;
53 57
58 __m128i C_tmp1, C_tmp2, C_tmp3, C_tmp4, C_tmp5;
54 /* Loop over codebook */ 59 /* Loop over codebook */
55 *rate_dist_Q14 = silk_int32_MAX; 60 *rate_dist_Q14 = silk_int32_MAX;
56 cb_row_Q7 = cb_Q7; 61 cb_row_Q7 = cb_Q7;
57 for( k = 0; k < L; k++ ) { 62 for( k = 0; k < L; k++ ) {
58 » gain_tmp_Q7 = cb_gain_Q7[k]; 63 gain_tmp_Q7 = cb_gain_Q7[k];
59 64
60 diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 ); 65 diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 );
61 diff_Q14[ 1 ] = in_Q14[ 1 ] - silk_LSHIFT( cb_row_Q7[ 1 ], 7 ); 66
62 diff_Q14[ 2 ] = in_Q14[ 2 ] - silk_LSHIFT( cb_row_Q7[ 2 ], 7 ); 67 C_tmp1 = OP_CVTEPI16_EPI32_M64( &in_Q14[ 1 ] );
63 diff_Q14[ 3 ] = in_Q14[ 3 ] - silk_LSHIFT( cb_row_Q7[ 3 ], 7 ); 68 C_tmp2 = OP_CVTEPI8_EPI32_M32( &cb_row_Q7[ 1 ] );
64 diff_Q14[ 4 ] = in_Q14[ 4 ] - silk_LSHIFT( cb_row_Q7[ 4 ], 7 ); 69 C_tmp2 = _mm_slli_epi32( C_tmp2, 7 );
70 C_tmp1 = _mm_sub_epi32( C_tmp1, C_tmp2 );
71
72 diff_Q14[ 1 ] = _mm_extract_epi16( C_tmp1, 0 );
73 diff_Q14[ 2 ] = _mm_extract_epi16( C_tmp1, 2 );
74 diff_Q14[ 3 ] = _mm_extract_epi16( C_tmp1, 4 );
75 diff_Q14[ 4 ] = _mm_extract_epi16( C_tmp1, 6 );
65 76
66 /* Weighted rate */ 77 /* Weighted rate */
67 sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] ); 78 sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] );
68 79
69 » » /* Penalty for too large gain */ 80 /* Penalty for too large gain */
70 » » sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( ga in_tmp_Q7, max_gain_Q7 ), 0 ), 10 ); 81 sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q 7, max_gain_Q7 ), 0 ), 10 );
71 82
72 silk_assert( sum1_Q14 >= 0 ); 83 silk_assert( sum1_Q14 >= 0 );
73 84
74 /* first row of W_Q18 */ 85 /* first row of W_Q18 */
75 sum2_Q16 = silk_SMULWB( W_Q18[ 1 ], diff_Q14[ 1 ] ); 86 C_tmp3 = _mm_loadu_si128( (__m128i *)(&W_Q18[ 1 ] ) );
76 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 2 ], diff_Q14[ 2 ] ); 87 C_tmp4 = _mm_mul_epi32( C_tmp3, C_tmp1 );
77 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 3 ], diff_Q14[ 3 ] ); 88 C_tmp4 = _mm_srli_si128( C_tmp4, 2 );
78 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 4 ], diff_Q14[ 4 ] ); 89
79 sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); 90 C_tmp1 = _mm_shuffle_epi32( C_tmp1, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* shif t right 4 bytes */
91 C_tmp3 = _mm_shuffle_epi32( C_tmp3, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* shif t right 4 bytes */
92
93 C_tmp5 = _mm_mul_epi32( C_tmp3, C_tmp1 );
94 C_tmp5 = _mm_srli_si128( C_tmp5, 2 );
95
96 C_tmp5 = _mm_add_epi32( C_tmp4, C_tmp5 );
97 C_tmp5 = _mm_slli_epi32( C_tmp5, 1 );
98
99 C_tmp5 = _mm_add_epi32( C_tmp5, _mm_shuffle_epi32( C_tmp5, _MM_SHUFFLE( 0, 0, 0, 2 ) ) );
100 sum2_Q16 = _mm_cvtsi128_si32( C_tmp5 );
101
80 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 0 ], diff_Q14[ 0 ] ); 102 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 0 ], diff_Q14[ 0 ] );
81 sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 0 ] ); 103 sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 0 ] );
82 104
83 /* second row of W_Q18 */ 105 /* second row of W_Q18 */
84 sum2_Q16 = silk_SMULWB( W_Q18[ 7 ], diff_Q14[ 2 ] ); 106 sum2_Q16 = silk_SMULWB( W_Q18[ 7 ], diff_Q14[ 2 ] );
85 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 8 ], diff_Q14[ 3 ] ); 107 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 8 ], diff_Q14[ 3 ] );
86 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 9 ], diff_Q14[ 4 ] ); 108 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 9 ], diff_Q14[ 4 ] );
87 sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); 109 sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
88 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 6 ], diff_Q14[ 1 ] ); 110 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 6 ], diff_Q14[ 1 ] );
89 sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 1 ] ); 111 sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 1 ] );
(...skipping 14 matching lines...) Expand all
104 /* last row of W_Q18 */ 126 /* last row of W_Q18 */
105 sum2_Q16 = silk_SMULWB( W_Q18[ 24 ], diff_Q14[ 4 ] ); 127 sum2_Q16 = silk_SMULWB( W_Q18[ 24 ], diff_Q14[ 4 ] );
106 sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 4 ] ); 128 sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 4 ] );
107 129
108 silk_assert( sum1_Q14 >= 0 ); 130 silk_assert( sum1_Q14 >= 0 );
109 131
110 /* find best */ 132 /* find best */
111 if( sum1_Q14 < *rate_dist_Q14 ) { 133 if( sum1_Q14 < *rate_dist_Q14 ) {
112 *rate_dist_Q14 = sum1_Q14; 134 *rate_dist_Q14 = sum1_Q14;
113 *ind = (opus_int8)k; 135 *ind = (opus_int8)k;
114 » » » *gain_Q7 = gain_tmp_Q7; 136 *gain_Q7 = gain_tmp_Q7;
115 } 137 }
116 138
117 /* Go to next cbk vector */ 139 /* Go to next cbk vector */
118 cb_row_Q7 += LTP_ORDER; 140 cb_row_Q7 += LTP_ORDER;
119 } 141 }
120 } 142 }
OLDNEW
« no previous file with comments | « silk/x86/VAD_sse.c ('k') | silk/x86/main_sse.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698