Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(272)

Side by Side Diff: src/libFLAC/fixed_intrin_ssse3.c

Issue 1961133002: Update FLAC to 1.3.1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/flac.git@master
Patch Set: build config tweaks for Windows Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/libFLAC/fixed_intrin_sse2.c ('k') | src/libFLAC/float.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /* libFLAC - Free Lossless Audio Codec library
2 * Copyright (C) 2000-2009 Josh Coalson
3 * Copyright (C) 2011-2014 Xiph.Org Foundation
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * - Neither the name of the Xiph.org Foundation nor the names of its
17 * contributors may be used to endorse or promote products derived from
18 * this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
24 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifdef HAVE_CONFIG_H
34 # include <config.h>
35 #endif
36
37 #ifndef FLAC__INTEGER_ONLY_LIBRARY
38 #ifndef FLAC__NO_ASM
39 #if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X8 6INTRIN
40 #include "private/fixed.h"
41 #ifdef FLAC__SSSE3_SUPPORTED
42
43 #include <tmmintrin.h> /* SSSE3 */
44 #include <math.h>
45 #include "private/macros.h"
46 #include "share/compat.h"
47 #include "FLAC/assert.h"
48
49 #ifdef FLAC__CPU_IA32
50 #define m128i_to_i64(dest, src) _mm_storel_epi64((__m128i*)&dest, src)
51 #else
52 #define m128i_to_i64(dest, src) dest = _mm_cvtsi128_si64(src)
53 #endif
54
55 FLAC__SSE_TARGET("ssse3")
56 unsigned FLAC__fixed_compute_best_predictor_intrin_ssse3(const FLAC__int32 data[ ], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1])
57 {
58 FLAC__uint32 total_error_0, total_error_1, total_error_2, total_error_3, total_error_4;
59 unsigned i, order;
60
61 __m128i total_err0, total_err1, total_err2;
62
63 {
64 FLAC__int32 itmp;
65 __m128i last_error;
66
67 last_error = _mm_cvtsi32_si128(data[-1]); // 0 0 0 le0
68 itmp = data[-2];
69 last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0)) ;
70 last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp)); // 0 0 le0 le1
71 itmp -= data[-3];
72 last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0)) ;
73 last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp)); // 0 le0 le1 le2
74 itmp -= data[-3] - data[-4];
75 last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0)) ;
76 last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp)); // le0 le1 le2 le3
77
78 total_err0 = total_err1 = _mm_setzero_si128();
79 for(i = 0; i < data_len; i++) {
80 __m128i err0, err1;
81 err0 = _mm_cvtsi32_si128(data[i]); // 0 0 0 e0
82 err1 = _mm_shuffle_epi32(err0, _MM_SHUFFLE(0,0,0,0)); // e0 e0 e0 e0
83 #if 1 /* OPT_SSE */
84 err1 = _mm_sub_epi32(err1, last_error);
85 last_error = _mm_srli_si128(last_error, 4); // 0 le0 le1 le2
86 err1 = _mm_sub_epi32(err1, last_error);
87 last_error = _mm_srli_si128(last_error, 4); // 0 0 le0 le1
88 err1 = _mm_sub_epi32(err1, last_error);
89 last_error = _mm_srli_si128(last_error, 4); // 0 0 0 le0
90 err1 = _mm_sub_epi32(err1, last_error); // e1 e2 e3 e4
91 #else
92 last_error = _mm_add_epi32(last_error, _mm_srli_si128(la st_error, 8)); // le0 le1 le2+le0 le3+le1
93 last_error = _mm_add_epi32(last_error, _mm_srli_si128(la st_error, 4)); // le0 le1+le0 le2+le0+le1 le3+le1+le2+le0
94 err1 = _mm_sub_epi32(err1, last_error); // e1 e2 e3 e4
95 #endif
96 last_error = _mm_alignr_epi8(err0, err1, 4); // e0 e1 e2 e3
97
98 err0 = _mm_abs_epi32(err0);
99 err1 = _mm_abs_epi32(err1);
100
101 total_err0 = _mm_add_epi32(total_err0, err0); // 0 0 0 te0
102 total_err1 = _mm_add_epi32(total_err1, err1); // te1 te2 te3 te4
103 }
104 }
105
106 total_error_0 = _mm_cvtsi128_si32(total_err0);
107 total_err2 = total_err1; // te1 te2 te3 te4
108 total_err1 = _mm_srli_si128(total_err1, 8); // 0 0 te1 te2
109 total_error_4 = _mm_cvtsi128_si32(total_err2);
110 total_error_2 = _mm_cvtsi128_si32(total_err1);
111 total_err2 = _mm_srli_si128(total_err2, 4); // 0 te1 te2 te3
112 total_err1 = _mm_srli_si128(total_err1, 4); // 0 0 0 te1
113 total_error_3 = _mm_cvtsi128_si32(total_err2);
114 total_error_1 = _mm_cvtsi128_si32(total_err1);
115
116 /* prefer higher order */
117 if(total_error_0 < flac_min(flac_min(flac_min(total_error_1, total_error _2), total_error_3), total_error_4))
118 order = 0;
119 else if(total_error_1 < flac_min(flac_min(total_error_2, total_error_3), total_error_4))
120 order = 1;
121 else if(total_error_2 < flac_min(total_error_3, total_error_4))
122 order = 2;
123 else if(total_error_3 < total_error_4)
124 order = 3;
125 else
126 order = 4;
127
128 /* Estimate the expected number of bits per residual signal sample. */
129 /* 'total_error*' is linearly related to the variance of the residual */
130 /* signal, so we use it directly to compute E(|x|) */
131 FLAC__ASSERT(data_len > 0 || total_error_0 == 0);
132 FLAC__ASSERT(data_len > 0 || total_error_1 == 0);
133 FLAC__ASSERT(data_len > 0 || total_error_2 == 0);
134 FLAC__ASSERT(data_len > 0 || total_error_3 == 0);
135 FLAC__ASSERT(data_len > 0 || total_error_4 == 0);
136
137 residual_bits_per_sample[0] = (FLAC__float)((total_error_0 > 0) ? log(M_ LN2 * (FLAC__double)total_error_0 / (FLAC__double)data_len) / M_LN2 : 0.0);
138 residual_bits_per_sample[1] = (FLAC__float)((total_error_1 > 0) ? log(M_ LN2 * (FLAC__double)total_error_1 / (FLAC__double)data_len) / M_LN2 : 0.0);
139 residual_bits_per_sample[2] = (FLAC__float)((total_error_2 > 0) ? log(M_ LN2 * (FLAC__double)total_error_2 / (FLAC__double)data_len) / M_LN2 : 0.0);
140 residual_bits_per_sample[3] = (FLAC__float)((total_error_3 > 0) ? log(M_ LN2 * (FLAC__double)total_error_3 / (FLAC__double)data_len) / M_LN2 : 0.0);
141 residual_bits_per_sample[4] = (FLAC__float)((total_error_4 > 0) ? log(M_ LN2 * (FLAC__double)total_error_4 / (FLAC__double)data_len) / M_LN2 : 0.0);
142
143 return order;
144 }
145
146 FLAC__SSE_TARGET("ssse3")
147 unsigned FLAC__fixed_compute_best_predictor_wide_intrin_ssse3(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ ORDER + 1])
148 {
149 FLAC__uint64 total_error_0, total_error_1, total_error_2, total_error_3, total_error_4;
150 unsigned i, order;
151
152 __m128i total_err0, total_err1, total_err3;
153
154 {
155 FLAC__int32 itmp;
156 __m128i last_error, zero = _mm_setzero_si128();
157
158 last_error = _mm_cvtsi32_si128(data[-1]); // 0 0 0 le0
159 itmp = data[-2];
160 last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0)) ;
161 last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp)); // 0 0 le0 le1
162 itmp -= data[-3];
163 last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0)) ;
164 last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp)); // 0 le0 le1 le2
165 itmp -= data[-3] - data[-4];
166 last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0)) ;
167 last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp)); // le0 le1 le2 le3
168
169 total_err0 = total_err1 = total_err3 = _mm_setzero_si128();
170 for(i = 0; i < data_len; i++) {
171 __m128i err0, err1;
172 err0 = _mm_cvtsi32_si128(data[i]); // 0 0 0 e0
173 err1 = _mm_shuffle_epi32(err0, _MM_SHUFFLE(0,0,0,0)); // e0 e0 e0 e0
174 #if 1 /* OPT_SSE */
175 err1 = _mm_sub_epi32(err1, last_error);
176 last_error = _mm_srli_si128(last_error, 4); // 0 le0 le1 le2
177 err1 = _mm_sub_epi32(err1, last_error);
178 last_error = _mm_srli_si128(last_error, 4); // 0 0 le0 le1
179 err1 = _mm_sub_epi32(err1, last_error);
180 last_error = _mm_srli_si128(last_error, 4); // 0 0 0 le0
181 err1 = _mm_sub_epi32(err1, last_error); // e1 e2 e3 e4
182 #else
183 last_error = _mm_add_epi32(last_error, _mm_srli_si128(la st_error, 8)); // le0 le1 le2+le0 le3+le1
184 last_error = _mm_add_epi32(last_error, _mm_srli_si128(la st_error, 4)); // le0 le1+le0 le2+le0+le1 le3+le1+le2+le0
185 err1 = _mm_sub_epi32(err1, last_error); // e1 e2 e3 e4
186 #endif
187 last_error = _mm_alignr_epi8(err0, err1, 4); // e0 e1 e2 e3
188
189 err0 = _mm_abs_epi32(err0);
190 err1 = _mm_abs_epi32(err1); // |e1| |e2| |e3| |e4|
191
192 total_err0 = _mm_add_epi64(total_err0, err0); // 0 te0
193 err0 = _mm_unpacklo_epi32(err1, zero); // 0 |e3| 0 |e4|
194 err1 = _mm_unpackhi_epi32(err1, zero); // 0 |e1| 0 |e2|
195 total_err3 = _mm_add_epi64(total_err3, err0); // te3 te4
196 total_err1 = _mm_add_epi64(total_err1, err1); // te1 te2
197 }
198 }
199
200 m128i_to_i64(total_error_0, total_err0);
201 m128i_to_i64(total_error_4, total_err3);
202 m128i_to_i64(total_error_2, total_err1);
203 total_err3 = _mm_srli_si128(total_err3, 8); // 0 te3
204 total_err1 = _mm_srli_si128(total_err1, 8); // 0 te1
205 m128i_to_i64(total_error_3, total_err3);
206 m128i_to_i64(total_error_1, total_err1);
207
208 /* prefer higher order */
209 if(total_error_0 < flac_min(flac_min(flac_min(total_error_1, total_error _2), total_error_3), total_error_4))
210 order = 0;
211 else if(total_error_1 < flac_min(flac_min(total_error_2, total_error_3), total_error_4))
212 order = 1;
213 else if(total_error_2 < flac_min(total_error_3, total_error_4))
214 order = 2;
215 else if(total_error_3 < total_error_4)
216 order = 3;
217 else
218 order = 4;
219
220 /* Estimate the expected number of bits per residual signal sample. */
221 /* 'total_error*' is linearly related to the variance of the residual */
222 /* signal, so we use it directly to compute E(|x|) */
223 FLAC__ASSERT(data_len > 0 || total_error_0 == 0);
224 FLAC__ASSERT(data_len > 0 || total_error_1 == 0);
225 FLAC__ASSERT(data_len > 0 || total_error_2 == 0);
226 FLAC__ASSERT(data_len > 0 || total_error_3 == 0);
227 FLAC__ASSERT(data_len > 0 || total_error_4 == 0);
228
229 residual_bits_per_sample[0] = (FLAC__float)((total_error_0 > 0) ? log(M_ LN2 * (FLAC__double)total_error_0 / (FLAC__double)data_len) / M_LN2 : 0.0);
230 residual_bits_per_sample[1] = (FLAC__float)((total_error_1 > 0) ? log(M_ LN2 * (FLAC__double)total_error_1 / (FLAC__double)data_len) / M_LN2 : 0.0);
231 residual_bits_per_sample[2] = (FLAC__float)((total_error_2 > 0) ? log(M_ LN2 * (FLAC__double)total_error_2 / (FLAC__double)data_len) / M_LN2 : 0.0);
232 residual_bits_per_sample[3] = (FLAC__float)((total_error_3 > 0) ? log(M_ LN2 * (FLAC__double)total_error_3 / (FLAC__double)data_len) / M_LN2 : 0.0);
233 residual_bits_per_sample[4] = (FLAC__float)((total_error_4 > 0) ? log(M_ LN2 * (FLAC__double)total_error_4 / (FLAC__double)data_len) / M_LN2 : 0.0);
234
235 return order;
236 }
237
238 #endif /* FLAC__SSSE3_SUPPORTED */
239 #endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */
240 #endif /* FLAC__NO_ASM */
241 #endif /* FLAC__INTEGER_ONLY_LIBRARY */
OLDNEW
« no previous file with comments | « src/libFLAC/fixed_intrin_sse2.c ('k') | src/libFLAC/float.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698