Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(670)

Side by Side Diff: celt/x86/pitch_sse.h

Issue 107243004: Updating Opus to release 1.1 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/opus
Patch Set: Created 7 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « celt/stack_alloc.h ('k') | celt_headers.mk » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* Copyright (c) 2013 Jean-Marc Valin and John Ridges */ 1 /* Copyright (c) 2013 Jean-Marc Valin and John Ridges */
2 /** 2 /**
3 @file pitch_sse.h 3 @file pitch_sse.h
4 @brief Pitch analysis 4 @brief Pitch analysis
5 */ 5 */
6 6
7 /* 7 /*
8 Redistribution and use in source and binary forms, with or without 8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions 9 modification, are permitted provided that the following conditions
10 are met: 10 are met:
(...skipping 18 matching lines...) Expand all
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32 #ifndef PITCH_SSE_H 32 #ifndef PITCH_SSE_H
33 #define PITCH_SSE_H 33 #define PITCH_SSE_H
34 34
35 #include <xmmintrin.h> 35 #include <xmmintrin.h>
36 #include "arch.h" 36 #include "arch.h"
37 37
38 #define OVERRIDE_XCORR_KERNEL 38 #define OVERRIDE_XCORR_KERNEL
39 static inline void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, opus_v al32 sum[4], int len) 39 static OPUS_INLINE void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, o pus_val32 sum[4], int len)
40 { 40 {
41 int j; 41 int j;
42 __m128 xsum1, xsum2; 42 __m128 xsum1, xsum2;
43 xsum1 = _mm_loadu_ps(sum); 43 xsum1 = _mm_loadu_ps(sum);
44 xsum2 = _mm_setzero_ps(); 44 xsum2 = _mm_setzero_ps();
45 45
46 for (j = 0; j < len-3; j += 4) 46 for (j = 0; j < len-3; j += 4)
47 { 47 {
48 __m128 x0 = _mm_loadu_ps(x+j); 48 __m128 x0 = _mm_loadu_ps(x+j);
49 __m128 yj = _mm_loadu_ps(y+j); 49 __m128 yj = _mm_loadu_ps(y+j);
(...skipping 15 matching lines...) Expand all
65 if (++j < len) 65 if (++j < len)
66 { 66 {
67 xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y +j))); 67 xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y +j)));
68 } 68 }
69 } 69 }
70 } 70 }
71 _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2)); 71 _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2));
72 } 72 }
73 73
74 #define OVERRIDE_DUAL_INNER_PROD 74 #define OVERRIDE_DUAL_INNER_PROD
75 static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, c onst opus_val16 *y02, 75 static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y 01, const opus_val16 *y02,
76 int N, opus_val32 *xy1, opus_val32 *xy2) 76 int N, opus_val32 *xy1, opus_val32 *xy2)
77 { 77 {
78 int i; 78 int i;
79 __m128 xsum1, xsum2; 79 __m128 xsum1, xsum2;
80 xsum1 = _mm_setzero_ps(); 80 xsum1 = _mm_setzero_ps();
81 xsum2 = _mm_setzero_ps(); 81 xsum2 = _mm_setzero_ps();
82 for (i=0;i<N-3;i+=4) 82 for (i=0;i<N-3;i+=4)
83 { 83 {
84 __m128 xi = _mm_loadu_ps(x+i); 84 __m128 xi = _mm_loadu_ps(x+i);
85 __m128 y1i = _mm_loadu_ps(y01+i); 85 __m128 y1i = _mm_loadu_ps(y01+i);
86 __m128 y2i = _mm_loadu_ps(y02+i); 86 __m128 y2i = _mm_loadu_ps(y02+i);
87 xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(xi, y1i)); 87 xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(xi, y1i));
88 xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(xi, y2i)); 88 xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(xi, y2i));
89 } 89 }
90 /* Horizontal sum */ 90 /* Horizontal sum */
91 xsum1 = _mm_add_ps(xsum1, _mm_movehl_ps(xsum1, xsum1)); 91 xsum1 = _mm_add_ps(xsum1, _mm_movehl_ps(xsum1, xsum1));
92 xsum1 = _mm_add_ss(xsum1, _mm_shuffle_ps(xsum1, xsum1, 0x55)); 92 xsum1 = _mm_add_ss(xsum1, _mm_shuffle_ps(xsum1, xsum1, 0x55));
93 _mm_store_ss(xy1, xsum1); 93 _mm_store_ss(xy1, xsum1);
94 xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2)); 94 xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2));
95 xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55)); 95 xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55));
96 _mm_store_ss(xy2, xsum2); 96 _mm_store_ss(xy2, xsum2);
97 for (;i<N;i++) 97 for (;i<N;i++)
98 { 98 {
99 *xy1 = MAC16_16(*xy1, x[i], y01[i]); 99 *xy1 = MAC16_16(*xy1, x[i], y01[i]);
100 *xy2 = MAC16_16(*xy2, x[i], y02[i]); 100 *xy2 = MAC16_16(*xy2, x[i], y02[i]);
101 } 101 }
102 } 102 }
103 103
104 #define OVERRIDE_COMB_FILTER_CONST 104 #define OVERRIDE_COMB_FILTER_CONST
105 static inline void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N, 105 static OPUS_INLINE void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, i nt N,
106 opus_val16 g10, opus_val16 g11, opus_val16 g12) 106 opus_val16 g10, opus_val16 g11, opus_val16 g12)
107 { 107 {
108 int i; 108 int i;
109 __m128 x0v; 109 __m128 x0v;
110 __m128 g10v, g11v, g12v; 110 __m128 g10v, g11v, g12v;
111 g10v = _mm_load1_ps(&g10); 111 g10v = _mm_load1_ps(&g10);
112 g11v = _mm_load1_ps(&g11); 112 g11v = _mm_load1_ps(&g11);
113 g12v = _mm_load1_ps(&g12); 113 g12v = _mm_load1_ps(&g12);
114 x0v = _mm_loadu_ps(&x[-T-2]); 114 x0v = _mm_loadu_ps(&x[-T-2]);
115 for (i=0;i<N-3;i+=4) 115 for (i=0;i<N-3;i+=4)
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
147 { 147 {
148 y[i] = x[i] 148 y[i] = x[i]
149 + MULT16_32_Q15(g10,x[i-T]) 149 + MULT16_32_Q15(g10,x[i-T])
150 + MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1])) 150 + MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1]))
151 + MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2])); 151 + MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2]));
152 } 152 }
153 #endif 153 #endif
154 } 154 }
155 155
156 #endif 156 #endif
OLDNEW
« no previous file with comments | « celt/stack_alloc.h ('k') | celt_headers.mk » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698