Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(346)

Side by Side Diff: src/libFLAC/ia32/fixed_asm.nasm

Issue 1961133002: Update FLAC to 1.3.1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/flac.git@master
Patch Set: build config tweaks for Windows Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/libFLAC/ia32/cpu_asm.nasm ('k') | src/libFLAC/ia32/lpc_asm.nasm » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 ; vim:filetype=nasm ts=8
2
3 ; libFLAC - Free Lossless Audio Codec library
4 ; Copyright (C) 2001-2009 Josh Coalson
5 ; Copyright (C) 2011-2014 Xiph.Org Foundation
6 ;
7 ; Redistribution and use in source and binary forms, with or without
8 ; modification, are permitted provided that the following conditions
9 ; are met:
10 ;
11 ; - Redistributions of source code must retain the above copyright
12 ; notice, this list of conditions and the following disclaimer.
13 ;
14 ; - Redistributions in binary form must reproduce the above copyright
15 ; notice, this list of conditions and the following disclaimer in the
16 ; documentation and/or other materials provided with the distribution.
17 ;
18 ; - Neither the name of the Xiph.org Foundation nor the names of its
19 ; contributors may be used to endorse or promote products derived from
20 ; this software without specific prior written permission.
21 ;
22 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 ; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
26 ; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
27 ; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28 ; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29 ; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 ; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 ; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 ; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
34 %include "nasm.h"
35
36 data_section
37
38 cglobal FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov
39
40 code_section
41
42 ; **********************************************************************
43 ;
44 ; unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 *data, unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
45 ; {
46 ; FLAC__int32 last_error_0 = data[-1];
47 ; FLAC__int32 last_error_1 = data[-1] - data[-2];
48 ; FLAC__int32 last_error_2 = last_error_1 - (data[-2] - data[-3]);
49 ; FLAC__int32 last_error_3 = last_error_2 - (data[-2] - 2*data[-3] + data[ -4]);
50 ; FLAC__int32 error, save;
51 ; FLAC__uint32 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, to tal_error_3 = 0, total_error_4 = 0;
52 ; unsigned i, order;
53 ;
54 ; for(i = 0; i < data_len; i++) {
55 ; error = data[i] ; total_error_0 += local_abs(error); save = error;
56 ; error -= last_error_0; total_error_1 += local_abs(error); last_e rror_0 = save; save = error;
57 ; error -= last_error_1; total_error_2 += local_abs(error); last_e rror_1 = save; save = error;
58 ; error -= last_error_2; total_error_3 += local_abs(error); last_e rror_2 = save; save = error;
59 ; error -= last_error_3; total_error_4 += local_abs(error); last_e rror_3 = save;
60 ; }
61 ;
62 ; if(total_error_0 < min(min(min(total_error_1, total_error_2), total_erro r_3), total_error_4))
63 ; order = 0;
64 ; else if(total_error_1 < min(min(total_error_2, total_error_3), total_err or_4))
65 ; order = 1;
66 ; else if(total_error_2 < min(total_error_3, total_error_4))
67 ; order = 2;
68 ; else if(total_error_3 < total_error_4)
69 ; order = 3;
70 ; else
71 ; order = 4;
72 ;
73 ; residual_bits_per_sample[0] = (FLAC__float)((data_len > 0 && total_error _0 > 0) ? log(M_LN2 * (FLAC__double)total_error_0 / (FLAC__double)data_len) / M_ LN2 : 0.0);
74 ; residual_bits_per_sample[1] = (FLAC__float)((data_len > 0 && total_error _1 > 0) ? log(M_LN2 * (FLAC__double)total_error_1 / (FLAC__double)data_len) / M_ LN2 : 0.0);
75 ; residual_bits_per_sample[2] = (FLAC__float)((data_len > 0 && total_error _2 > 0) ? log(M_LN2 * (FLAC__double)total_error_2 / (FLAC__double)data_len) / M_ LN2 : 0.0);
76 ; residual_bits_per_sample[3] = (FLAC__float)((data_len > 0 && total_error _3 > 0) ? log(M_LN2 * (FLAC__double)total_error_3 / (FLAC__double)data_len) / M_ LN2 : 0.0);
77 ; residual_bits_per_sample[4] = (FLAC__float)((data_len > 0 && total_error _4 > 0) ? log(M_LN2 * (FLAC__double)total_error_4 / (FLAC__double)data_len) / M_ LN2 : 0.0);
78 ;
79 ; return order;
80 ; }
81 ALIGN 16
82 cident FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov
83
84 ; esp + 36 == data[]
85 ; esp + 40 == data_len
86 ; esp + 44 == residual_bits_per_sample[]
87
88 push ebp
89 push ebx
90 push esi
91 push edi
92 sub esp, byte 16
93 ; qword [esp] == temp space for loading FLAC__uint64s to FPU regs
94
95 ; ebx == &data[i]
96 ; ecx == loop counter (i)
97 ; ebp == order
98 ; mm0 == total_error_1:total_error_0
99 ; mm1 == total_error_2:total_error_3
100 ; mm2 == :total_error_4
101 ; mm3 == last_error_1:last_error_0
102 ; mm4 == last_error_2:last_error_3
103
104 mov ecx, [esp + 40] ; ecx = data_len
105 test ecx, ecx
106 jz near .data_len_is_0
107
108 mov ebx, [esp + 36] ; ebx = data[]
109 movd mm3, [ebx - 4] ; mm3 = 0:last_error_0
110 movd mm2, [ebx - 8] ; mm2 = 0:data[-2]
111 movd mm1, [ebx - 12] ; mm1 = 0:data[-3]
112 movd mm0, [ebx - 16] ; mm0 = 0:data[-4]
113 movq mm5, mm3 ; mm5 = 0:last_error_0
114 psubd mm5, mm2 ; mm5 = 0:last_error_1
115 punpckldq mm3, mm5 ; mm3 = last_error_1:last_error_ 0
116 psubd mm2, mm1 ; mm2 = 0:data[-2] - data[-3]
117 psubd mm5, mm2 ; mm5 = 0:last_error_2
118 movq mm4, mm5 ; mm4 = 0:last_error_2
119 psubd mm4, mm2 ; mm4 = 0:last_error_2 - (data[- 2] - data[-3])
120 paddd mm4, mm1 ; mm4 = 0:last_error_2 - (data[- 2] - 2 * data[-3])
121 psubd mm4, mm0 ; mm4 = 0:last_error_3
122 punpckldq mm4, mm5 ; mm4 = last_error_2:last_error_ 3
123 pxor mm0, mm0 ; mm0 = total_error_1:total_erro r_0
124 pxor mm1, mm1 ; mm1 = total_error_2:total_erro r_3
125 pxor mm2, mm2 ; mm2 = 0:total_error_4
126
127 ALIGN 16
128 .loop:
129 movd mm7, [ebx] ; mm7 = 0:error_0
130 add ebx, byte 4
131 movq mm6, mm7 ; mm6 = 0:error_0
132 psubd mm7, mm3 ; mm7 = :error_1
133 punpckldq mm6, mm7 ; mm6 = error_1:error_0
134 movq mm5, mm6 ; mm5 = error_1:error_0
135 movq mm7, mm6 ; mm7 = error_1:error_0
136 psubd mm5, mm3 ; mm5 = error_2:
137 movq mm3, mm6 ; mm3 = error_1:error_0
138 psrad mm6, 31
139 pxor mm7, mm6
140 psubd mm7, mm6 ; mm7 = abs(error_1):abs(error_0 )
141 paddd mm0, mm7 ; mm0 = total_error_1:total_erro r_0
142 movq mm6, mm5 ; mm6 = error_2:
143 psubd mm5, mm4 ; mm5 = error_3:
144 punpckhdq mm5, mm6 ; mm5 = error_2:error_3
145 movq mm7, mm5 ; mm7 = error_2:error_3
146 movq mm6, mm5 ; mm6 = error_2:error_3
147 psubd mm5, mm4 ; mm5 = :error_4
148 movq mm4, mm6 ; mm4 = error_2:error_3
149 psrad mm6, 31
150 pxor mm7, mm6
151 psubd mm7, mm6 ; mm7 = abs(error_2):abs(error_3 )
152 paddd mm1, mm7 ; mm1 = total_error_2:total_erro r_3
153 movq mm6, mm5 ; mm6 = :error_4
154 psrad mm5, 31
155 pxor mm6, mm5
156 psubd mm6, mm5 ; mm6 = :abs(error_4)
157 paddd mm2, mm6 ; mm2 = :total_error_4
158
159 dec ecx
160 jnz short .loop
161
162 ; if(total_error_0 < min(min(min(total_error_1, total_error_2), total_erro r_3), total_error_4))
163 ; order = 0;
164 ; else if(total_error_1 < min(min(total_error_2, total_error_3), total_err or_4))
165 ; order = 1;
166 ; else if(total_error_2 < min(total_error_3, total_error_4))
167 ; order = 2;
168 ; else if(total_error_3 < total_error_4)
169 ; order = 3;
170 ; else
171 ; order = 4;
172 movq mm3, mm0 ; mm3 = total_error_1:total_erro r_0
173 movd edi, mm2 ; edi = total_error_4
174 movd esi, mm1 ; esi = total_error_3
175 movd eax, mm0 ; eax = total_error_0
176 punpckhdq mm1, mm1 ; mm1 = total_error_2:total_erro r_2
177 punpckhdq mm3, mm3 ; mm3 = total_error_1:total_erro r_1
178 movd edx, mm1 ; edx = total_error_2
179 movd ecx, mm3 ; ecx = total_error_1
180
181 xor ebx, ebx
182 xor ebp, ebp
183 inc ebx
184 cmp ecx, eax
185 cmovb eax, ecx ; eax = min(total_error_0, total _error_1)
186 cmovbe ebp, ebx
187 inc ebx
188 cmp edx, eax
189 cmovb eax, edx ; eax = min(total_error_0, total _error_1, total_error_2)
190 cmovbe ebp, ebx
191 inc ebx
192 cmp esi, eax
193 cmovb eax, esi ; eax = min(total_error_0, total _error_1, total_error_2, total_error_3)
194 cmovbe ebp, ebx
195 inc ebx
196 cmp edi, eax
197 cmovb eax, edi ; eax = min(total_error_0, total _error_1, total_error_2, total_error_3, total_error_4)
198 cmovbe ebp, ebx
199 movd ebx, mm0 ; ebx = total_error_0
200 emms
201
202 ; residual_bits_per_sample[0] = (FLAC__float)((data_len > 0 && tot al_error_0 > 0) ? log(M_LN2 * (FLAC__double)total_error_0 / (FLAC__double)data_l en) / M_LN2 : 0.0);
203 ; residual_bits_per_sample[1] = (FLAC__float)((data_len > 0 && tot al_error_1 > 0) ? log(M_LN2 * (FLAC__double)total_error_1 / (FLAC__double)data_l en) / M_LN2 : 0.0);
204 ; residual_bits_per_sample[2] = (FLAC__float)((data_len > 0 && tot al_error_2 > 0) ? log(M_LN2 * (FLAC__double)total_error_2 / (FLAC__double)data_l en) / M_LN2 : 0.0);
205 ; residual_bits_per_sample[3] = (FLAC__float)((data_len > 0 && tot al_error_3 > 0) ? log(M_LN2 * (FLAC__double)total_error_3 / (FLAC__double)data_l en) / M_LN2 : 0.0);
206 ; residual_bits_per_sample[4] = (FLAC__float)((data_len > 0 && tot al_error_4 > 0) ? log(M_LN2 * (FLAC__double)total_error_4 / (FLAC__double)data_l en) / M_LN2 : 0.0);
207 xor eax, eax
208 fild dword [esp + 40] ; ST = data_len (NOTE: assumes d ata_len is <2gigs)
209 .rbps_0:
210 test ebx, ebx
211 jz .total_error_0_is_0
212 fld1 ; ST = 1.0 data_len
213 mov [esp], ebx
214 mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_er ror_0
215 mov ebx, [esp + 44]
216 fild qword [esp] ; ST = total_error_0 1.0 data_le n
217 fdiv st2 ; ST = total_error_0/data_len 1. 0 data_len
218 fldln2 ; ST = ln2 total_error_0/data_le n 1.0 data_len
219 fmulp st1 ; ST = ln2*total_error_0/data_le n 1.0 data_len
220 fyl2x ; ST = log2(ln2*total_error_0/da ta_len) data_len
221 fstp dword [ebx] ; residual_bits_per_sample[0] = log2(ln2*total_error_0/data_len) ST = data_len
222 jmp short .rbps_1
223 .total_error_0_is_0:
224 mov ebx, [esp + 44]
225 mov [ebx], eax ; residual_bits_per_sample[0] = 0.0
226 .rbps_1:
227 test ecx, ecx
228 jz .total_error_1_is_0
229 fld1 ; ST = 1.0 data_len
230 mov [esp], ecx
231 mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_er ror_1
232 fild qword [esp] ; ST = total_error_1 1.0 data_le n
233 fdiv st2 ; ST = total_error_1/data_len 1. 0 data_len
234 fldln2 ; ST = ln2 total_error_1/data_le n 1.0 data_len
235 fmulp st1 ; ST = ln2*total_error_1/data_le n 1.0 data_len
236 fyl2x ; ST = log2(ln2*total_error_1/da ta_len) data_len
237 fstp dword [ebx + 4] ; residual_bits_per_sample[1] = log2(ln2*total_error_1/data_len) ST = data_len
238 jmp short .rbps_2
239 .total_error_1_is_0:
240 mov [ebx + 4], eax ; residual_bits_per_sample[1] = 0.0
241 .rbps_2:
242 test edx, edx
243 jz .total_error_2_is_0
244 fld1 ; ST = 1.0 data_len
245 mov [esp], edx
246 mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_er ror_2
247 fild qword [esp] ; ST = total_error_2 1.0 data_le n
248 fdiv st2 ; ST = total_error_2/data_len 1. 0 data_len
249 fldln2 ; ST = ln2 total_error_2/data_le n 1.0 data_len
250 fmulp st1 ; ST = ln2*total_error_2/data_le n 1.0 data_len
251 fyl2x ; ST = log2(ln2*total_error_2/da ta_len) data_len
252 fstp dword [ebx + 8] ; residual_bits_per_sample[2] = log2(ln2*total_error_2/data_len) ST = data_len
253 jmp short .rbps_3
254 .total_error_2_is_0:
255 mov [ebx + 8], eax ; residual_bits_per_sample[2] = 0.0
256 .rbps_3:
257 test esi, esi
258 jz .total_error_3_is_0
259 fld1 ; ST = 1.0 data_len
260 mov [esp], esi
261 mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_er ror_3
262 fild qword [esp] ; ST = total_error_3 1.0 data_le n
263 fdiv st2 ; ST = total_error_3/data_len 1. 0 data_len
264 fldln2 ; ST = ln2 total_error_3/data_le n 1.0 data_len
265 fmulp st1 ; ST = ln2*total_error_3/data_le n 1.0 data_len
266 fyl2x ; ST = log2(ln2*total_error_3/da ta_len) data_len
267 fstp dword [ebx + 12] ; residual_bits_per_sample[3] = log2(ln2*total_error_3/data_len) ST = data_len
268 jmp short .rbps_4
269 .total_error_3_is_0:
270 mov [ebx + 12], eax ; residual_bits_per_sample[3] = 0.0
271 .rbps_4:
272 test edi, edi
273 jz .total_error_4_is_0
274 fld1 ; ST = 1.0 data_len
275 mov [esp], edi
276 mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_er ror_4
277 fild qword [esp] ; ST = total_error_4 1.0 data_le n
278 fdiv st2 ; ST = total_error_4/data_len 1. 0 data_len
279 fldln2 ; ST = ln2 total_error_4/data_le n 1.0 data_len
280 fmulp st1 ; ST = ln2*total_error_4/data_le n 1.0 data_len
281 fyl2x ; ST = log2(ln2*total_error_4/da ta_len) data_len
282 fstp dword [ebx + 16] ; residual_bits_per_sample[4] = log2(ln2*total_error_4/data_len) ST = data_len
283 jmp short .rbps_end
284 .total_error_4_is_0:
285 mov [ebx + 16], eax ; residual_bits_per_sample[4] = 0.0
286 .rbps_end:
287 fstp st0 ; ST = [empty]
288 jmp short .end
289 .data_len_is_0:
290 ; data_len == 0, so residual_bits_per_sample[*] = 0.0
291 xor ebp, ebp
292 mov edi, [esp + 44]
293 mov [edi], ebp
294 mov [edi + 4], ebp
295 mov [edi + 8], ebp
296 mov [edi + 12], ebp
297 mov [edi + 16], ebp
298 add ebp, byte 4 ; order = 4
299
300 .end:
301 mov eax, ebp ; return order
302 add esp, byte 16
303 pop edi
304 pop esi
305 pop ebx
306 pop ebp
307 ret
308
309 ; end
OLDNEW
« no previous file with comments | « src/libFLAC/ia32/cpu_asm.nasm ('k') | src/libFLAC/ia32/lpc_asm.nasm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698