silk/x86/VQ_WMat_EC_sse.c - Issue 882843002: Update to opus-HEAD-66611f1.

Side by Side Diff: silk/x86/VQ_WMat_EC_sse.c

Issue 882843002: Update to opus-HEAD-66611f1. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/opus.git@master

Patch Set: Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /***********************************************************************	1 /* Copyright (c) 2014, Cisco Systems, INC

2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.	2 Written by XiangMingZhu WeiZhou MinPeng YanWang

3 Redistribution and use in source and binary forms, with or without	3

4 modification, are permitted provided that the following conditions	4 Redistribution and use in source and binary forms, with or without

5 are met:	5 modification, are permitted provided that the following conditions

6 - Redistributions of source code must retain the above copyright notice,	6 are met:

7 this list of conditions and the following disclaimer.	7

8 - Redistributions in binary form must reproduce the above copyright	8 - Redistributions of source code must retain the above copyright

9 notice, this list of conditions and the following disclaimer in the	9 notice, this list of conditions and the following disclaimer.

10 documentation and/or other materials provided with the distribution.	10

11 - Neither the name of Internet Society, IETF or IETF Trust, nor the	11 - Redistributions in binary form must reproduce the above copyright

12 names of specific contributors, may be used to endorse or promote	12 notice, this list of conditions and the following disclaimer in the

13 products derived from this software without specific prior written	13 documentation and/or other materials provided with the distribution.

14 permission.	14

15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"	15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE	16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE	17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE	18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR	19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF	20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS	21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN	22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)	23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE	24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

25 POSSIBILITY OF SUCH DAMAGE.	25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

26 ***********************************************************************/	26 */

27	27

28 #ifdef HAVE_CONFIG_H	28 #ifdef HAVE_CONFIG_H

29 #include "config.h"	29 #include "config.h"

30 #endif	30 #endif

31	31

	32 #include <xmmintrin.h>

	33 #include <emmintrin.h>

	34 #include <smmintrin.h>

32 #include "main.h"	35 #include "main.h"

	36 #include "celt/x86/x86cpu.h"

33	37

34 /* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */	38 /* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */

35 void silk_VQ_WMat_EC(	39 void silk_VQ_WMat_EC_sse4_1(

36 opus_int8 ind, / O index of best codebook vector */	40 opus_int8 ind, / O index of best codebook vector */

37 opus_int32 rate_dist_Q14, / O best wei ghted quant error + mu * rate */	41 opus_int32 rate_dist_Q14, / O best wei ghted quant error + mu * rate */

38 opus_int gain_Q7, / O sum of a bsolute LTP coefficients */	42 opus_int gain_Q7, / O sum of a bsolute LTP coefficients */

39 const opus_int16 in_Q14, / I input ve ctor to be quantized */	43 const opus_int16 in_Q14, / I input ve ctor to be quantized */

40 const opus_int32 W_Q18, / I weightin g matrix */	44 const opus_int32 W_Q18, / I weightin g matrix */

41 const opus_int8 cb_Q7, / I codebook */	45 const opus_int8 cb_Q7, / I codebook */

42 const opus_uint8 cb_gain_Q7, / I codebook effective gain */	46 const opus_uint8 cb_gain_Q7, / I codebook effective gain */

43 const opus_uint8 cl_Q5, / I code len gth for each codebook vector */	47 const opus_uint8 cl_Q5, / I code len gth for each codebook vector */

44 const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */	48 const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */

45 const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */	49 const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */

46 opus_int L /* I number o f vectors in codebook */	50 opus_int L /* I number o f vectors in codebook */

47 )	51 )

48 {	52 {

49 opus_int k, gain_tmp_Q7;	53 opus_int k, gain_tmp_Q7;

50 const opus_int8 *cb_row_Q7;	54 const opus_int8 *cb_row_Q7;

51 opus_int16 diff_Q14[ 5 ];	55 opus_int16 diff_Q14[ 5 ];

52 opus_int32 sum1_Q14, sum2_Q16;	56 opus_int32 sum1_Q14, sum2_Q16;

53	57

	58 __m128i C_tmp1, C_tmp2, C_tmp3, C_tmp4, C_tmp5;

54 /* Loop over codebook */	59 /* Loop over codebook */

55 *rate_dist_Q14 = silk_int32_MAX;	60 *rate_dist_Q14 = silk_int32_MAX;

56 cb_row_Q7 = cb_Q7;	61 cb_row_Q7 = cb_Q7;

57 for( k = 0; k < L; k++ ) {	62 for( k = 0; k < L; k++ ) {

58 » gain_tmp_Q7 = cb_gain_Q7[k];	63 gain_tmp_Q7 = cb_gain_Q7[k];

59	64

60 diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 );	65 diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 );

61 diff_Q14[ 1 ] = in_Q14[ 1 ] - silk_LSHIFT( cb_row_Q7[ 1 ], 7 );	66

62 diff_Q14[ 2 ] = in_Q14[ 2 ] - silk_LSHIFT( cb_row_Q7[ 2 ], 7 );	67 C_tmp1 = OP_CVTEPI16_EPI32_M64( &in_Q14[ 1 ] );

63 diff_Q14[ 3 ] = in_Q14[ 3 ] - silk_LSHIFT( cb_row_Q7[ 3 ], 7 );	68 C_tmp2 = OP_CVTEPI8_EPI32_M32( &cb_row_Q7[ 1 ] );

64 diff_Q14[ 4 ] = in_Q14[ 4 ] - silk_LSHIFT( cb_row_Q7[ 4 ], 7 );	69 C_tmp2 = _mm_slli_epi32( C_tmp2, 7 );

	70 C_tmp1 = _mm_sub_epi32( C_tmp1, C_tmp2 );

	71

	72 diff_Q14[ 1 ] = _mm_extract_epi16( C_tmp1, 0 );

	73 diff_Q14[ 2 ] = _mm_extract_epi16( C_tmp1, 2 );

	74 diff_Q14[ 3 ] = _mm_extract_epi16( C_tmp1, 4 );

	75 diff_Q14[ 4 ] = _mm_extract_epi16( C_tmp1, 6 );

65	76

66 /* Weighted rate */	77 /* Weighted rate */

67 sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] );	78 sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] );

68	79

69 » » /* Penalty for too large gain */	80 /* Penalty for too large gain */

70 » » sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( ga in_tmp_Q7, max_gain_Q7 ), 0 ), 10 );	81 sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q 7, max_gain_Q7 ), 0 ), 10 );

71	82

72 silk_assert( sum1_Q14 >= 0 );	83 silk_assert( sum1_Q14 >= 0 );

73	84

74 /* first row of W_Q18 */	85 /* first row of W_Q18 */

75 sum2_Q16 = silk_SMULWB( W_Q18[ 1 ], diff_Q14[ 1 ] );	86 C_tmp3 = _mm_loadu_si128( (__m128i *)(&W_Q18[ 1 ] ) );

76 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 2 ], diff_Q14[ 2 ] );	87 C_tmp4 = _mm_mul_epi32( C_tmp3, C_tmp1 );

77 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 3 ], diff_Q14[ 3 ] );	88 C_tmp4 = _mm_srli_si128( C_tmp4, 2 );

78 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 4 ], diff_Q14[ 4 ] );	89

79 sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );	90 C_tmp1 = _mm_shuffle_epi32( C_tmp1, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* shif t right 4 bytes */

	91 C_tmp3 = _mm_shuffle_epi32( C_tmp3, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* shif t right 4 bytes */

	92

	93 C_tmp5 = _mm_mul_epi32( C_tmp3, C_tmp1 );

	94 C_tmp5 = _mm_srli_si128( C_tmp5, 2 );

	95

	96 C_tmp5 = _mm_add_epi32( C_tmp4, C_tmp5 );

	97 C_tmp5 = _mm_slli_epi32( C_tmp5, 1 );

	98

	99 C_tmp5 = _mm_add_epi32( C_tmp5, _mm_shuffle_epi32( C_tmp5, _MM_SHUFFLE( 0, 0, 0, 2 ) ) );

	100 sum2_Q16 = _mm_cvtsi128_si32( C_tmp5 );

	101

80 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 0 ], diff_Q14[ 0 ] );	102 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 0 ], diff_Q14[ 0 ] );

81 sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 0 ] );	103 sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 0 ] );

82	104

83 /* second row of W_Q18 */	105 /* second row of W_Q18 */

84 sum2_Q16 = silk_SMULWB( W_Q18[ 7 ], diff_Q14[ 2 ] );	106 sum2_Q16 = silk_SMULWB( W_Q18[ 7 ], diff_Q14[ 2 ] );

85 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 8 ], diff_Q14[ 3 ] );	107 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 8 ], diff_Q14[ 3 ] );

86 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 9 ], diff_Q14[ 4 ] );	108 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 9 ], diff_Q14[ 4 ] );

87 sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );	109 sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );

88 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 6 ], diff_Q14[ 1 ] );	110 sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 6 ], diff_Q14[ 1 ] );

89 sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 1 ] );	111 sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 1 ] );

(...skipping 14 matching lines...) Expand all Loading...
104 /* last row of W_Q18 */	126 /* last row of W_Q18 */

105 sum2_Q16 = silk_SMULWB( W_Q18[ 24 ], diff_Q14[ 4 ] );	127 sum2_Q16 = silk_SMULWB( W_Q18[ 24 ], diff_Q14[ 4 ] );

106 sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 4 ] );	128 sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 4 ] );

107	129

108 silk_assert( sum1_Q14 >= 0 );	130 silk_assert( sum1_Q14 >= 0 );

109	131

110 /* find best */	132 /* find best */

111 if( sum1_Q14 < *rate_dist_Q14 ) {	133 if( sum1_Q14 < *rate_dist_Q14 ) {

112 *rate_dist_Q14 = sum1_Q14;	134 *rate_dist_Q14 = sum1_Q14;

113 *ind = (opus_int8)k;	135 *ind = (opus_int8)k;

114 » » » *gain_Q7 = gain_tmp_Q7;	136 *gain_Q7 = gain_tmp_Q7;

115 }	137 }

116	138

117 /* Go to next cbk vector */	139 /* Go to next cbk vector */

118 cb_row_Q7 += LTP_ORDER;	140 cb_row_Q7 += LTP_ORDER;

119 }	141 }

120 }	142 }

OLD	NEW

« doc/release.txt ('K') | « silk/x86/VAD_sse.c ('k') | silk/x86/main_sse.h » ('j') | update_version » ('J')