Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(87)

Side by Side Diff: third_party/opus/src/celt/x86/celt_lpc_sse.c

Issue 2962373002: [Opus] Update to v1.2.1 (Closed)
Patch Set: Pre-increment instead of post-increment Created 3 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/opus/src/celt/x86/celt_lpc_sse.h ('k') | third_party/opus/src/celt/x86/vq_sse.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* Copyright (c) 2014, Cisco Systems, INC 1 /* Copyright (c) 2014, Cisco Systems, INC
2 Written by XiangMingZhu WeiZhou MinPeng YanWang 2 Written by XiangMingZhu WeiZhou MinPeng YanWang
3 3
4 Redistribution and use in source and binary forms, with or without 4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions 5 modification, are permitted provided that the following conditions
6 are met: 6 are met:
7 7
8 - Redistributions of source code must retain the above copyright 8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer. 9 notice, this list of conditions and the following disclaimer.
10 10
(...skipping 22 matching lines...) Expand all
33 #include <emmintrin.h> 33 #include <emmintrin.h>
34 #include <smmintrin.h> 34 #include <smmintrin.h>
35 #include "celt_lpc.h" 35 #include "celt_lpc.h"
36 #include "stack_alloc.h" 36 #include "stack_alloc.h"
37 #include "mathops.h" 37 #include "mathops.h"
38 #include "pitch.h" 38 #include "pitch.h"
39 #include "x86cpu.h" 39 #include "x86cpu.h"
40 40
41 #if defined(FIXED_POINT) 41 #if defined(FIXED_POINT)
42 42
43 void celt_fir_sse4_1(const opus_val16 *_x, 43 void celt_fir_sse4_1(const opus_val16 *x,
44 const opus_val16 *num, 44 const opus_val16 *num,
45 opus_val16 *_y, 45 opus_val16 *y,
46 int N, 46 int N,
47 int ord, 47 int ord,
48 opus_val16 *mem,
49 int arch) 48 int arch)
50 { 49 {
51 int i,j; 50 int i,j;
52 VARDECL(opus_val16, rnum); 51 VARDECL(opus_val16, rnum);
53 VARDECL(opus_val16, x);
54 52
55 __m128i vecNoA; 53 __m128i vecNoA;
56 opus_int32 noA ; 54 opus_int32 noA ;
57 SAVE_STACK; 55 SAVE_STACK;
58 56
59 ALLOC(rnum, ord, opus_val16); 57 ALLOC(rnum, ord, opus_val16);
60 ALLOC(x, N+ord, opus_val16);
61 for(i=0;i<ord;i++) 58 for(i=0;i<ord;i++)
62 rnum[i] = num[ord-i-1]; 59 rnum[i] = num[ord-i-1];
63 for(i=0;i<ord;i++)
64 x[i] = mem[ord-i-1];
65
66 for (i=0;i<N-7;i+=8)
67 {
68 x[i+ord ]=_x[i ];
69 x[i+ord+1]=_x[i+1];
70 x[i+ord+2]=_x[i+2];
71 x[i+ord+3]=_x[i+3];
72 x[i+ord+4]=_x[i+4];
73 x[i+ord+5]=_x[i+5];
74 x[i+ord+6]=_x[i+6];
75 x[i+ord+7]=_x[i+7];
76 }
77
78 for (;i<N-3;i+=4)
79 {
80 x[i+ord ]=_x[i ];
81 x[i+ord+1]=_x[i+1];
82 x[i+ord+2]=_x[i+2];
83 x[i+ord+3]=_x[i+3];
84 }
85
86 for (;i<N;i++)
87 x[i+ord]=_x[i];
88
89 for(i=0;i<ord;i++)
90 mem[i] = _x[N-i-1];
91 #ifdef SMALL_FOOTPRINT
92 for (i=0;i<N;i++)
93 {
94 opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
95 for (j=0;j<ord;j++)
96 {
97 sum = MAC16_16(sum,rnum[j],x[i+j]);
98 }
99 _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
100 }
101 #else
102 noA = EXTEND32(1) << SIG_SHIFT >> 1; 60 noA = EXTEND32(1) << SIG_SHIFT >> 1;
103 vecNoA = _mm_set_epi32(noA, noA, noA, noA); 61 vecNoA = _mm_set_epi32(noA, noA, noA, noA);
104 62
105 for (i=0;i<N-3;i+=4) 63 for (i=0;i<N-3;i+=4)
106 { 64 {
107 opus_val32 sums[4] = {0}; 65 opus_val32 sums[4] = {0};
108 __m128i vecSum, vecX; 66 __m128i vecSum, vecX;
109 67
110 xcorr_kernel(rnum, x+i, sums, ord, arch); 68 xcorr_kernel(rnum, x+i-ord, sums, ord, arch);
111 69
112 vecSum = _mm_loadu_si128((__m128i *)sums); 70 vecSum = _mm_loadu_si128((__m128i *)sums);
113 vecSum = _mm_add_epi32(vecSum, vecNoA); 71 vecSum = _mm_add_epi32(vecSum, vecNoA);
114 vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT); 72 vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT);
115 vecX = OP_CVTEPI16_EPI32_M64(_x + i); 73 vecX = OP_CVTEPI16_EPI32_M64(x + i);
116 vecSum = _mm_add_epi32(vecSum, vecX); 74 vecSum = _mm_add_epi32(vecSum, vecX);
117 vecSum = _mm_packs_epi32(vecSum, vecSum); 75 vecSum = _mm_packs_epi32(vecSum, vecSum);
118 _mm_storel_epi64((__m128i *)(_y + i), vecSum); 76 _mm_storel_epi64((__m128i *)(y + i), vecSum);
119 } 77 }
120 for (;i<N;i++) 78 for (;i<N;i++)
121 { 79 {
122 opus_val32 sum = 0; 80 opus_val32 sum = 0;
123 for (j=0;j<ord;j++) 81 for (j=0;j<ord;j++)
124 sum = MAC16_16(sum, rnum[j], x[i + j]); 82 sum = MAC16_16(sum, rnum[j], x[i+j-ord]);
125 _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT))); 83 y[i] = SATURATE16(ADD32(EXTEND32(x[i]), PSHR32(sum, SIG_SHIFT)));
126 } 84 }
127 85
128 #endif
129 RESTORE_STACK; 86 RESTORE_STACK;
130 } 87 }
131 88
132 #endif 89 #endif
OLDNEW
« no previous file with comments | « third_party/opus/src/celt/x86/celt_lpc_sse.h ('k') | third_party/opus/src/celt/x86/vq_sse.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698