Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(568)

Side by Side Diff: celt/x86/pitch_sse.h

Issue 882843002: Update to opus-HEAD-66611f1. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/opus.git@master
Patch Set: Add the contents of Makefile.mips back. Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « celt/x86/celt_lpc_sse.c ('k') | celt/x86/pitch_sse.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* Copyright (c) 2013 Jean-Marc Valin and John Ridges */ 1 /* Copyright (c) 2013 Jean-Marc Valin and John Ridges
2 Copyright (c) 2014, Cisco Systems, INC MingXiang WeiZhou MinPeng YanWang*/
2 /** 3 /**
3 @file pitch_sse.h 4 @file pitch_sse.h
4 @brief Pitch analysis 5 @brief Pitch analysis
5 */ 6 */
6 7
7 /* 8 /*
8 Redistribution and use in source and binary forms, with or without 9 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions 10 modification, are permitted provided that the following conditions
10 are met: 11 are met:
11 12
(...skipping 13 matching lines...) Expand all
25 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 26 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 27 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 28 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */ 31 */
31 32
32 #ifndef PITCH_SSE_H 33 #ifndef PITCH_SSE_H
33 #define PITCH_SSE_H 34 #define PITCH_SSE_H
34 35
36 #if defined(HAVE_CONFIG_H)
37 #include "config.h"
38 #endif
39
40 #if defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)
41 #if defined(OPUS_X86_MAY_HAVE_SSE4_1)
42 void xcorr_kernel_sse4_1(
43 const opus_int16 *x,
44 const opus_int16 *y,
45 opus_val32 sum[4],
46 int len);
47
48 extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
49 const opus_int16 *x,
50 const opus_int16 *y,
51 opus_val32 sum[4],
52 int len);
53
54 #define OVERRIDE_XCORR_KERNEL
55 #define xcorr_kernel(x, y, sum, len, arch) \
56 ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))
57
58 opus_val32 celt_inner_prod_sse4_1(
59 const opus_int16 *x,
60 const opus_int16 *y,
61 int N);
62 #endif
63
64 #if defined(OPUS_X86_MAY_HAVE_SSE2)
65 opus_val32 celt_inner_prod_sse2(
66 const opus_int16 *x,
67 const opus_int16 *y,
68 int N);
69 #endif
70
71 extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
72 const opus_int16 *x,
73 const opus_int16 *y,
74 int N);
75
76 #define OVERRIDE_CELT_INNER_PROD
77 #define celt_inner_prod(x, y, N, arch) \
78 ((*CELT_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y, N))
79 #else
80
35 #include <xmmintrin.h> 81 #include <xmmintrin.h>
36 #include "arch.h" 82 #include "arch.h"
37 83
38 #define OVERRIDE_XCORR_KERNEL 84 #define OVERRIDE_XCORR_KERNEL
39 static OPUS_INLINE void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, o pus_val32 sum[4], int len) 85 static OPUS_INLINE void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 * y, opus_val32 sum[4], int len)
40 { 86 {
41 int j; 87 int j;
42 __m128 xsum1, xsum2; 88 __m128 xsum1, xsum2;
43 xsum1 = _mm_loadu_ps(sum); 89 xsum1 = _mm_loadu_ps(sum);
44 xsum2 = _mm_setzero_ps(); 90 xsum2 = _mm_setzero_ps();
45 91
46 for (j = 0; j < len-3; j += 4) 92 for (j = 0; j < len-3; j += 4)
47 { 93 {
48 __m128 x0 = _mm_loadu_ps(x+j); 94 __m128 x0 = _mm_loadu_ps(x+j);
49 __m128 yj = _mm_loadu_ps(y+j); 95 __m128 yj = _mm_loadu_ps(y+j);
(...skipping 14 matching lines...) Expand all
64 xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j) )); 110 xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j) ));
65 if (++j < len) 111 if (++j < len)
66 { 112 {
67 xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y +j))); 113 xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y +j)));
68 } 114 }
69 } 115 }
70 } 116 }
71 _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2)); 117 _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2));
72 } 118 }
73 119
120 #define xcorr_kernel(_x, _y, _z, len, arch) \
121 ((void)(arch),xcorr_kernel_sse(_x, _y, _z, len))
122
74 #define OVERRIDE_DUAL_INNER_PROD 123 #define OVERRIDE_DUAL_INNER_PROD
75 static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y 01, const opus_val16 *y02, 124 static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y 01, const opus_val16 *y02,
76 int N, opus_val32 *xy1, opus_val32 *xy2) 125 int N, opus_val32 *xy1, opus_val32 *xy2)
77 { 126 {
78 int i; 127 int i;
79 __m128 xsum1, xsum2; 128 __m128 xsum1, xsum2;
80 xsum1 = _mm_setzero_ps(); 129 xsum1 = _mm_setzero_ps();
81 xsum2 = _mm_setzero_ps(); 130 xsum2 = _mm_setzero_ps();
82 for (i=0;i<N-3;i+=4) 131 for (i=0;i<N-3;i+=4)
83 { 132 {
(...skipping 10 matching lines...) Expand all
94 xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2)); 143 xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2));
95 xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55)); 144 xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55));
96 _mm_store_ss(xy2, xsum2); 145 _mm_store_ss(xy2, xsum2);
97 for (;i<N;i++) 146 for (;i<N;i++)
98 { 147 {
99 *xy1 = MAC16_16(*xy1, x[i], y01[i]); 148 *xy1 = MAC16_16(*xy1, x[i], y01[i]);
100 *xy2 = MAC16_16(*xy2, x[i], y02[i]); 149 *xy2 = MAC16_16(*xy2, x[i], y02[i]);
101 } 150 }
102 } 151 }
103 152
153 #define OVERRIDE_CELT_INNER_PROD
154 static OPUS_INLINE opus_val32 celt_inner_prod_sse(const opus_val16 *x, const opu s_val16 *y,
155 int N)
156 {
157 int i;
158 float xy;
159 __m128 sum;
160 sum = _mm_setzero_ps();
161 /* FIXME: We should probably go 8-way and use 2 sums. */
162 for (i=0;i<N-3;i+=4)
163 {
164 __m128 xi = _mm_loadu_ps(x+i);
165 __m128 yi = _mm_loadu_ps(y+i);
166 sum = _mm_add_ps(sum,_mm_mul_ps(xi, yi));
167 }
168 /* Horizontal sum */
169 sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
170 sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
171 _mm_store_ss(&xy, sum);
172 for (;i<N;i++)
173 {
174 xy = MAC16_16(xy, x[i], y[i]);
175 }
176 return xy;
177 }
178
179 # define celt_inner_prod(_x, _y, len, arch) \
180 ((void)(arch),celt_inner_prod_sse(_x, _y, len))
181
104 #define OVERRIDE_COMB_FILTER_CONST 182 #define OVERRIDE_COMB_FILTER_CONST
105 static OPUS_INLINE void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, i nt N, 183 static OPUS_INLINE void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, i nt N,
106 opus_val16 g10, opus_val16 g11, opus_val16 g12) 184 opus_val16 g10, opus_val16 g11, opus_val16 g12)
107 { 185 {
108 int i; 186 int i;
109 __m128 x0v; 187 __m128 x0v;
110 __m128 g10v, g11v, g12v; 188 __m128 g10v, g11v, g12v;
111 g10v = _mm_load1_ps(&g10); 189 g10v = _mm_load1_ps(&g10);
112 g11v = _mm_load1_ps(&g11); 190 g11v = _mm_load1_ps(&g11);
113 g12v = _mm_load1_ps(&g12); 191 g12v = _mm_load1_ps(&g12);
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
147 { 225 {
148 y[i] = x[i] 226 y[i] = x[i]
149 + MULT16_32_Q15(g10,x[i-T]) 227 + MULT16_32_Q15(g10,x[i-T])
150 + MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1])) 228 + MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1]))
151 + MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2])); 229 + MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2]));
152 } 230 }
153 #endif 231 #endif
154 } 232 }
155 233
156 #endif 234 #endif
235 #endif
OLDNEW
« no previous file with comments | « celt/x86/celt_lpc_sse.c ('k') | celt/x86/pitch_sse.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698