celt/x86/pitch_sse.h - Issue 882843002: Update to opus-HEAD-66611f1.

Side by Side Diff: celt/x86/pitch_sse.h

Issue 882843002: Update to opus-HEAD-66611f1. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/opus.git@master

Patch Set: Add the contents of Makefile.mips back. Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /* Copyright (c) 2013 Jean-Marc Valin and John Ridges */	1 /* Copyright (c) 2013 Jean-Marc Valin and John Ridges

	2 Copyright (c) 2014, Cisco Systems, INC MingXiang WeiZhou MinPeng YanWang*/

2 /**	3 /**

3 @file pitch_sse.h	4 @file pitch_sse.h

4 @brief Pitch analysis	5 @brief Pitch analysis

5 */	6 */

6	7

7 /*	8 /*

8 Redistribution and use in source and binary forms, with or without	9 Redistribution and use in source and binary forms, with or without

9 modification, are permitted provided that the following conditions	10 modification, are permitted provided that the following conditions

10 are met:	11 are met:

11	12

(...skipping 13 matching lines...) Expand all Loading...
25 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR	26 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

26 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF	27 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

27 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING	28 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

28 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS	29 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	30 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

30 */	31 */

31	32

32 #ifndef PITCH_SSE_H	33 #ifndef PITCH_SSE_H

33 #define PITCH_SSE_H	34 #define PITCH_SSE_H

34	35

	36 #if defined(HAVE_CONFIG_H)

	37 #include "config.h"

	38 #endif

	39

	40 #if defined(OPUS_X86_MAY_HAVE_SSE4_1) \|\| defined(OPUS_X86_MAY_HAVE_SSE2)

	41 #if defined(OPUS_X86_MAY_HAVE_SSE4_1)

	42 void xcorr_kernel_sse4_1(

	43 const opus_int16 *x,

	44 const opus_int16 *y,

	45 opus_val32 sum[4],

	46 int len);

	47

	48 extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(

	49 const opus_int16 *x,

	50 const opus_int16 *y,

	51 opus_val32 sum[4],

	52 int len);

	53

	54 #define OVERRIDE_XCORR_KERNEL

	55 #define xcorr_kernel(x, y, sum, len, arch) \

	56 ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))

	57

	58 opus_val32 celt_inner_prod_sse4_1(

	59 const opus_int16 *x,

	60 const opus_int16 *y,

	61 int N);

	62 #endif

	63

	64 #if defined(OPUS_X86_MAY_HAVE_SSE2)

	65 opus_val32 celt_inner_prod_sse2(

	66 const opus_int16 *x,

	67 const opus_int16 *y,

	68 int N);

	69 #endif

	70

	71 extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(

	72 const opus_int16 *x,

	73 const opus_int16 *y,

	74 int N);

	75

	76 #define OVERRIDE_CELT_INNER_PROD

	77 #define celt_inner_prod(x, y, N, arch) \

	78 ((*CELT_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y, N))

	79 #else

	80

35 #include <xmmintrin.h>	81 #include <xmmintrin.h>

36 #include "arch.h"	82 #include "arch.h"

37	83

38 #define OVERRIDE_XCORR_KERNEL	84 #define OVERRIDE_XCORR_KERNEL

39 static OPUS_INLINE void xcorr_kernel(const opus_val16 x, const opus_val16 y, o pus_val32 sum[4], int len)	85 static OPUS_INLINE void xcorr_kernel_sse(const opus_val16 x, const opus_val16 y, opus_val32 sum[4], int len)

40 {	86 {

41 int j;	87 int j;

42 __m128 xsum1, xsum2;	88 __m128 xsum1, xsum2;

43 xsum1 = _mm_loadu_ps(sum);	89 xsum1 = _mm_loadu_ps(sum);

44 xsum2 = _mm_setzero_ps();	90 xsum2 = _mm_setzero_ps();

45	91

46 for (j = 0; j < len-3; j += 4)	92 for (j = 0; j < len-3; j += 4)

47 {	93 {

48 __m128 x0 = _mm_loadu_ps(x+j);	94 __m128 x0 = _mm_loadu_ps(x+j);

49 __m128 yj = _mm_loadu_ps(y+j);	95 __m128 yj = _mm_loadu_ps(y+j);

(...skipping 14 matching lines...) Expand all Loading...
64 xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j) ));	110 xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j) ));

65 if (++j < len)	111 if (++j < len)

66 {	112 {

67 xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y +j)));	113 xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y +j)));

68 }	114 }

69 }	115 }

70 }	116 }

71 _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2));	117 _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2));

72 }	118 }

73	119

	120 #define xcorr_kernel(_x, _y, _z, len, arch) \

	121 ((void)(arch),xcorr_kernel_sse(_x, _y, _z, len))

	122

74 #define OVERRIDE_DUAL_INNER_PROD	123 #define OVERRIDE_DUAL_INNER_PROD

75 static OPUS_INLINE void dual_inner_prod(const opus_val16 x, const opus_val16 y 01, const opus_val16 *y02,	124 static OPUS_INLINE void dual_inner_prod(const opus_val16 x, const opus_val16 y 01, const opus_val16 *y02,

76 int N, opus_val32 xy1, opus_val32 xy2)	125 int N, opus_val32 xy1, opus_val32 xy2)

77 {	126 {

78 int i;	127 int i;

79 __m128 xsum1, xsum2;	128 __m128 xsum1, xsum2;

80 xsum1 = _mm_setzero_ps();	129 xsum1 = _mm_setzero_ps();

81 xsum2 = _mm_setzero_ps();	130 xsum2 = _mm_setzero_ps();

82 for (i=0;i<N-3;i+=4)	131 for (i=0;i<N-3;i+=4)

83 {	132 {

(...skipping 10 matching lines...) Expand all Loading...
94 xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2));	143 xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2));

95 xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55));	144 xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55));

96 _mm_store_ss(xy2, xsum2);	145 _mm_store_ss(xy2, xsum2);

97 for (;i<N;i++)	146 for (;i<N;i++)

98 {	147 {

99 xy1 = MAC16_16(xy1, x[i], y01[i]);	148 xy1 = MAC16_16(xy1, x[i], y01[i]);

100 xy2 = MAC16_16(xy2, x[i], y02[i]);	149 xy2 = MAC16_16(xy2, x[i], y02[i]);

101 }	150 }

102 }	151 }

103	152

	153 #define OVERRIDE_CELT_INNER_PROD

	154 static OPUS_INLINE opus_val32 celt_inner_prod_sse(const opus_val16 x, const opu s_val16 y,

	155 int N)

	156 {

	157 int i;

	158 float xy;

	159 __m128 sum;

	160 sum = _mm_setzero_ps();

	161 /* FIXME: We should probably go 8-way and use 2 sums. */

	162 for (i=0;i<N-3;i+=4)

	163 {

	164 __m128 xi = _mm_loadu_ps(x+i);

	165 __m128 yi = _mm_loadu_ps(y+i);

	166 sum = _mm_add_ps(sum,_mm_mul_ps(xi, yi));

	167 }

	168 /* Horizontal sum */

	169 sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));

	170 sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));

	171 _mm_store_ss(&xy, sum);

	172 for (;i<N;i++)

	173 {

	174 xy = MAC16_16(xy, x[i], y[i]);

	175 }

	176 return xy;

	177 }

	178

	179 # define celt_inner_prod(_x, _y, len, arch) \

	180 ((void)(arch),celt_inner_prod_sse(_x, _y, len))

	181

104 #define OVERRIDE_COMB_FILTER_CONST	182 #define OVERRIDE_COMB_FILTER_CONST

105 static OPUS_INLINE void comb_filter_const(opus_val32 y, opus_val32 x, int T, i nt N,	183 static OPUS_INLINE void comb_filter_const(opus_val32 y, opus_val32 x, int T, i nt N,

106 opus_val16 g10, opus_val16 g11, opus_val16 g12)	184 opus_val16 g10, opus_val16 g11, opus_val16 g12)

107 {	185 {

108 int i;	186 int i;

109 __m128 x0v;	187 __m128 x0v;

110 __m128 g10v, g11v, g12v;	188 __m128 g10v, g11v, g12v;

111 g10v = _mm_load1_ps(&g10);	189 g10v = _mm_load1_ps(&g10);

112 g11v = _mm_load1_ps(&g11);	190 g11v = _mm_load1_ps(&g11);

113 g12v = _mm_load1_ps(&g12);	191 g12v = _mm_load1_ps(&g12);

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
147 {	225 {

148 y[i] = x[i]	226 y[i] = x[i]

149 + MULT16_32_Q15(g10,x[i-T])	227 + MULT16_32_Q15(g10,x[i-T])

150 + MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1]))	228 + MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1]))

151 + MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2]));	229 + MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2]));

152 }	230 }

153 #endif	231 #endif

154 }	232 }

155	233

156 #endif	234 #endif

	235 #endif

OLD	NEW

« no previous file with comments | « celt/x86/celt_lpc_sse.c ('k') | celt/x86/pitch_sse.c » ('j') | no next file with comments »