src/libFLAC/ia32/fixed_asm.nasm - Issue 1961133002: Update FLAC to 1.3.1

Side by Side Diff: src/libFLAC/ia32/fixed_asm.nasm

Issue 1961133002: Update FLAC to 1.3.1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/flac.git@master

Patch Set: build config tweaks for Windows Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 ; vim:filetype=nasm ts=8

	2

	3 ; libFLAC - Free Lossless Audio Codec library

	4 ; Copyright (C) 2001-2009 Josh Coalson

	5 ; Copyright (C) 2011-2014 Xiph.Org Foundation

	6 ;

	7 ; Redistribution and use in source and binary forms, with or without

	8 ; modification, are permitted provided that the following conditions

	9 ; are met:

	10 ;

	11 ; - Redistributions of source code must retain the above copyright

	12 ; notice, this list of conditions and the following disclaimer.

	13 ;

	14 ; - Redistributions in binary form must reproduce the above copyright

	15 ; notice, this list of conditions and the following disclaimer in the

	16 ; documentation and/or other materials provided with the distribution.

	17 ;

	18 ; - Neither the name of the Xiph.org Foundation nor the names of its

	19 ; contributors may be used to endorse or promote products derived from

	20 ; this software without specific prior written permission.

	21 ;

	22 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

	23 ; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

	24 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

	25 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR

	26 ; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

	27 ; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

	28 ; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

	29 ; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

	30 ; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

	31 ; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

	32 ; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	33

	34 %include "nasm.h"

	35

	36 data_section

	37

	38 cglobal FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov

	39

	40 code_section

	41

	42 ; **********************************************************************

	43 ;

	44 ; unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 *data, unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])

	45 ; {

	46 ; FLAC__int32 last_error_0 = data[-1];

	47 ; FLAC__int32 last_error_1 = data[-1] - data[-2];

	48 ; FLAC__int32 last_error_2 = last_error_1 - (data[-2] - data[-3]);

	49 ; FLAC__int32 last_error_3 = last_error_2 - (data[-2] - 2*data[-3] + data[ -4]);

	50 ; FLAC__int32 error, save;

	51 ; FLAC__uint32 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, to tal_error_3 = 0, total_error_4 = 0;

	52 ; unsigned i, order;

	53 ;

	54 ; for(i = 0; i < data_len; i++) {

	55 ; error = data[i] ; total_error_0 += local_abs(error); save = error;

	56 ; error -= last_error_0; total_error_1 += local_abs(error); last_e rror_0 = save; save = error;

	57 ; error -= last_error_1; total_error_2 += local_abs(error); last_e rror_1 = save; save = error;

	58 ; error -= last_error_2; total_error_3 += local_abs(error); last_e rror_2 = save; save = error;

	59 ; error -= last_error_3; total_error_4 += local_abs(error); last_e rror_3 = save;

	60 ; }

	61 ;

	62 ; if(total_error_0 < min(min(min(total_error_1, total_error_2), total_erro r_3), total_error_4))

	63 ; order = 0;

	64 ; else if(total_error_1 < min(min(total_error_2, total_error_3), total_err or_4))

	65 ; order = 1;

	66 ; else if(total_error_2 < min(total_error_3, total_error_4))

	67 ; order = 2;

	68 ; else if(total_error_3 < total_error_4)

	69 ; order = 3;

	70 ; else

	71 ; order = 4;

	72 ;

	73 ; residual_bits_per_sample[0] = (FLAC__float)((data_len > 0 && total_error _0 > 0) ? log(M_LN2 * (FLAC__double)total_error_0 / (FLAC__double)data_len) / M_ LN2 : 0.0);

	74 ; residual_bits_per_sample[1] = (FLAC__float)((data_len > 0 && total_error _1 > 0) ? log(M_LN2 * (FLAC__double)total_error_1 / (FLAC__double)data_len) / M_ LN2 : 0.0);

	75 ; residual_bits_per_sample[2] = (FLAC__float)((data_len > 0 && total_error _2 > 0) ? log(M_LN2 * (FLAC__double)total_error_2 / (FLAC__double)data_len) / M_ LN2 : 0.0);

	76 ; residual_bits_per_sample[3] = (FLAC__float)((data_len > 0 && total_error _3 > 0) ? log(M_LN2 * (FLAC__double)total_error_3 / (FLAC__double)data_len) / M_ LN2 : 0.0);

	77 ; residual_bits_per_sample[4] = (FLAC__float)((data_len > 0 && total_error _4 > 0) ? log(M_LN2 * (FLAC__double)total_error_4 / (FLAC__double)data_len) / M_ LN2 : 0.0);

	78 ;

	79 ; return order;

	80 ; }

	81 ALIGN 16

	82 cident FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov

	83

	84 ; esp + 36 == data[]

	85 ; esp + 40 == data_len

	86 ; esp + 44 == residual_bits_per_sample[]

	87

	88 push ebp

	89 push ebx

	90 push esi

	91 push edi

	92 sub esp, byte 16

	93 ; qword [esp] == temp space for loading FLAC__uint64s to FPU regs

	94

	95 ; ebx == &data[i]

	96 ; ecx == loop counter (i)

	97 ; ebp == order

	98 ; mm0 == total_error_1:total_error_0

	99 ; mm1 == total_error_2:total_error_3

	100 ; mm2 == :total_error_4

	101 ; mm3 == last_error_1:last_error_0

	102 ; mm4 == last_error_2:last_error_3

	103

	104 mov ecx, [esp + 40] ; ecx = data_len

	105 test ecx, ecx

	106 jz near .data_len_is_0

	107

	108 mov ebx, [esp + 36] ; ebx = data[]

	109 movd mm3, [ebx - 4] ; mm3 = 0:last_error_0

	110 movd mm2, [ebx - 8] ; mm2 = 0:data[-2]

	111 movd mm1, [ebx - 12] ; mm1 = 0:data[-3]

	112 movd mm0, [ebx - 16] ; mm0 = 0:data[-4]

	113 movq mm5, mm3 ; mm5 = 0:last_error_0

	114 psubd mm5, mm2 ; mm5 = 0:last_error_1

	115 punpckldq mm3, mm5 ; mm3 = last_error_1:last_error_ 0

	116 psubd mm2, mm1 ; mm2 = 0:data[-2] - data[-3]

	117 psubd mm5, mm2 ; mm5 = 0:last_error_2

	118 movq mm4, mm5 ; mm4 = 0:last_error_2

	119 psubd mm4, mm2 ; mm4 = 0:last_error_2 - (data[- 2] - data[-3])

	120 paddd mm4, mm1 ; mm4 = 0:last_error_2 - (data[- 2] - 2 * data[-3])

	121 psubd mm4, mm0 ; mm4 = 0:last_error_3

	122 punpckldq mm4, mm5 ; mm4 = last_error_2:last_error_ 3

	123 pxor mm0, mm0 ; mm0 = total_error_1:total_erro r_0

	124 pxor mm1, mm1 ; mm1 = total_error_2:total_erro r_3

	125 pxor mm2, mm2 ; mm2 = 0:total_error_4

	126

	127 ALIGN 16

	128 .loop:

	129 movd mm7, [ebx] ; mm7 = 0:error_0

	130 add ebx, byte 4

	131 movq mm6, mm7 ; mm6 = 0:error_0

	132 psubd mm7, mm3 ; mm7 = :error_1

	133 punpckldq mm6, mm7 ; mm6 = error_1:error_0

	134 movq mm5, mm6 ; mm5 = error_1:error_0

	135 movq mm7, mm6 ; mm7 = error_1:error_0

	136 psubd mm5, mm3 ; mm5 = error_2:

	137 movq mm3, mm6 ; mm3 = error_1:error_0

	138 psrad mm6, 31

	139 pxor mm7, mm6

	140 psubd mm7, mm6 ; mm7 = abs(error_1):abs(error_0 )

	141 paddd mm0, mm7 ; mm0 = total_error_1:total_erro r_0

	142 movq mm6, mm5 ; mm6 = error_2:

	143 psubd mm5, mm4 ; mm5 = error_3:

	144 punpckhdq mm5, mm6 ; mm5 = error_2:error_3

	145 movq mm7, mm5 ; mm7 = error_2:error_3

	146 movq mm6, mm5 ; mm6 = error_2:error_3

	147 psubd mm5, mm4 ; mm5 = :error_4

	148 movq mm4, mm6 ; mm4 = error_2:error_3

	149 psrad mm6, 31

	150 pxor mm7, mm6

	151 psubd mm7, mm6 ; mm7 = abs(error_2):abs(error_3 )

	152 paddd mm1, mm7 ; mm1 = total_error_2:total_erro r_3

	153 movq mm6, mm5 ; mm6 = :error_4

	154 psrad mm5, 31

	155 pxor mm6, mm5

	156 psubd mm6, mm5 ; mm6 = :abs(error_4)

	157 paddd mm2, mm6 ; mm2 = :total_error_4

	158

	159 dec ecx

	160 jnz short .loop

	161

	162 ; if(total_error_0 < min(min(min(total_error_1, total_error_2), total_erro r_3), total_error_4))

	163 ; order = 0;

	164 ; else if(total_error_1 < min(min(total_error_2, total_error_3), total_err or_4))

	165 ; order = 1;

	166 ; else if(total_error_2 < min(total_error_3, total_error_4))

	167 ; order = 2;

	168 ; else if(total_error_3 < total_error_4)

	169 ; order = 3;

	170 ; else

	171 ; order = 4;

	172 movq mm3, mm0 ; mm3 = total_error_1:total_erro r_0

	173 movd edi, mm2 ; edi = total_error_4

	174 movd esi, mm1 ; esi = total_error_3

	175 movd eax, mm0 ; eax = total_error_0

	176 punpckhdq mm1, mm1 ; mm1 = total_error_2:total_erro r_2

	177 punpckhdq mm3, mm3 ; mm3 = total_error_1:total_erro r_1

	178 movd edx, mm1 ; edx = total_error_2

	179 movd ecx, mm3 ; ecx = total_error_1

	180

	181 xor ebx, ebx

	182 xor ebp, ebp

	183 inc ebx

	184 cmp ecx, eax

	185 cmovb eax, ecx ; eax = min(total_error_0, total _error_1)

	186 cmovbe ebp, ebx

	187 inc ebx

	188 cmp edx, eax

	189 cmovb eax, edx ; eax = min(total_error_0, total _error_1, total_error_2)

	190 cmovbe ebp, ebx

	191 inc ebx

	192 cmp esi, eax

	193 cmovb eax, esi ; eax = min(total_error_0, total _error_1, total_error_2, total_error_3)

	194 cmovbe ebp, ebx

	195 inc ebx

	196 cmp edi, eax

	197 cmovb eax, edi ; eax = min(total_error_0, total _error_1, total_error_2, total_error_3, total_error_4)

	198 cmovbe ebp, ebx

	199 movd ebx, mm0 ; ebx = total_error_0

	200 emms

	201

	202 ; residual_bits_per_sample[0] = (FLAC__float)((data_len > 0 && tot al_error_0 > 0) ? log(M_LN2 * (FLAC__double)total_error_0 / (FLAC__double)data_l en) / M_LN2 : 0.0);

	203 ; residual_bits_per_sample[1] = (FLAC__float)((data_len > 0 && tot al_error_1 > 0) ? log(M_LN2 * (FLAC__double)total_error_1 / (FLAC__double)data_l en) / M_LN2 : 0.0);

	204 ; residual_bits_per_sample[2] = (FLAC__float)((data_len > 0 && tot al_error_2 > 0) ? log(M_LN2 * (FLAC__double)total_error_2 / (FLAC__double)data_l en) / M_LN2 : 0.0);

	205 ; residual_bits_per_sample[3] = (FLAC__float)((data_len > 0 && tot al_error_3 > 0) ? log(M_LN2 * (FLAC__double)total_error_3 / (FLAC__double)data_l en) / M_LN2 : 0.0);

	206 ; residual_bits_per_sample[4] = (FLAC__float)((data_len > 0 && tot al_error_4 > 0) ? log(M_LN2 * (FLAC__double)total_error_4 / (FLAC__double)data_l en) / M_LN2 : 0.0);

	207 xor eax, eax

	208 fild dword [esp + 40] ; ST = data_len (NOTE: assumes d ata_len is <2gigs)

	209 .rbps_0:

	210 test ebx, ebx

	211 jz .total_error_0_is_0

	212 fld1 ; ST = 1.0 data_len

	213 mov [esp], ebx

	214 mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_er ror_0

	215 mov ebx, [esp + 44]

	216 fild qword [esp] ; ST = total_error_0 1.0 data_le n

	217 fdiv st2 ; ST = total_error_0/data_len 1. 0 data_len

	218 fldln2 ; ST = ln2 total_error_0/data_le n 1.0 data_len

	219 fmulp st1 ; ST = ln2*total_error_0/data_le n 1.0 data_len

	220 fyl2x ; ST = log2(ln2*total_error_0/da ta_len) data_len

	221 fstp dword [ebx] ; residual_bits_per_sample[0] = log2(ln2*total_error_0/data_len) ST = data_len

	222 jmp short .rbps_1

	223 .total_error_0_is_0:

	224 mov ebx, [esp + 44]

	225 mov [ebx], eax ; residual_bits_per_sample[0] = 0.0

	226 .rbps_1:

	227 test ecx, ecx

	228 jz .total_error_1_is_0

	229 fld1 ; ST = 1.0 data_len

	230 mov [esp], ecx

	231 mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_er ror_1

	232 fild qword [esp] ; ST = total_error_1 1.0 data_le n

	233 fdiv st2 ; ST = total_error_1/data_len 1. 0 data_len

	234 fldln2 ; ST = ln2 total_error_1/data_le n 1.0 data_len

	235 fmulp st1 ; ST = ln2*total_error_1/data_le n 1.0 data_len

	236 fyl2x ; ST = log2(ln2*total_error_1/da ta_len) data_len

	237 fstp dword [ebx + 4] ; residual_bits_per_sample[1] = log2(ln2*total_error_1/data_len) ST = data_len

	238 jmp short .rbps_2

	239 .total_error_1_is_0:

	240 mov [ebx + 4], eax ; residual_bits_per_sample[1] = 0.0

	241 .rbps_2:

	242 test edx, edx

	243 jz .total_error_2_is_0

	244 fld1 ; ST = 1.0 data_len

	245 mov [esp], edx

	246 mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_er ror_2

	247 fild qword [esp] ; ST = total_error_2 1.0 data_le n

	248 fdiv st2 ; ST = total_error_2/data_len 1. 0 data_len

	249 fldln2 ; ST = ln2 total_error_2/data_le n 1.0 data_len

	250 fmulp st1 ; ST = ln2*total_error_2/data_le n 1.0 data_len

	251 fyl2x ; ST = log2(ln2*total_error_2/da ta_len) data_len

	252 fstp dword [ebx + 8] ; residual_bits_per_sample[2] = log2(ln2*total_error_2/data_len) ST = data_len

	253 jmp short .rbps_3

	254 .total_error_2_is_0:

	255 mov [ebx + 8], eax ; residual_bits_per_sample[2] = 0.0

	256 .rbps_3:

	257 test esi, esi

	258 jz .total_error_3_is_0

	259 fld1 ; ST = 1.0 data_len

	260 mov [esp], esi

	261 mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_er ror_3

	262 fild qword [esp] ; ST = total_error_3 1.0 data_le n

	263 fdiv st2 ; ST = total_error_3/data_len 1. 0 data_len

	264 fldln2 ; ST = ln2 total_error_3/data_le n 1.0 data_len

	265 fmulp st1 ; ST = ln2*total_error_3/data_le n 1.0 data_len

	266 fyl2x ; ST = log2(ln2*total_error_3/da ta_len) data_len

	267 fstp dword [ebx + 12] ; residual_bits_per_sample[3] = log2(ln2*total_error_3/data_len) ST = data_len

	268 jmp short .rbps_4

	269 .total_error_3_is_0:

	270 mov [ebx + 12], eax ; residual_bits_per_sample[3] = 0.0

	271 .rbps_4:

	272 test edi, edi

	273 jz .total_error_4_is_0

	274 fld1 ; ST = 1.0 data_len

	275 mov [esp], edi

	276 mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_er ror_4

	277 fild qword [esp] ; ST = total_error_4 1.0 data_le n

	278 fdiv st2 ; ST = total_error_4/data_len 1. 0 data_len

	279 fldln2 ; ST = ln2 total_error_4/data_le n 1.0 data_len

	280 fmulp st1 ; ST = ln2*total_error_4/data_le n 1.0 data_len

	281 fyl2x ; ST = log2(ln2*total_error_4/da ta_len) data_len

	282 fstp dword [ebx + 16] ; residual_bits_per_sample[4] = log2(ln2*total_error_4/data_len) ST = data_len

	283 jmp short .rbps_end

	284 .total_error_4_is_0:

	285 mov [ebx + 16], eax ; residual_bits_per_sample[4] = 0.0

	286 .rbps_end:

	287 fstp st0 ; ST = [empty]

	288 jmp short .end

	289 .data_len_is_0:

	290 ; data_len == 0, so residual_bits_per_sample[*] = 0.0

	291 xor ebp, ebp

	292 mov edi, [esp + 44]

	293 mov [edi], ebp

	294 mov [edi + 4], ebp

	295 mov [edi + 8], ebp

	296 mov [edi + 12], ebp

	297 mov [edi + 16], ebp

	298 add ebp, byte 4 ; order = 4

	299

	300 .end:

	301 mov eax, ebp ; return order

	302 add esp, byte 16

	303 pop edi

	304 pop esi

	305 pop ebx

	306 pop ebp

	307 ret

	308

	309 ; end

OLD	NEW

« no previous file with comments | « src/libFLAC/ia32/cpu_asm.nasm ('k') | src/libFLAC/ia32/lpc_asm.nasm » ('j') | no next file with comments »