| Index: mozilla/security/nss/lib/freebl/mpi/mpi_x86_asm.c
|
| ===================================================================
|
| --- mozilla/security/nss/lib/freebl/mpi/mpi_x86_asm.c (revision 191424)
|
| +++ mozilla/security/nss/lib/freebl/mpi/mpi_x86_asm.c (working copy)
|
| @@ -1,535 +0,0 @@
|
| -/*
|
| - * mpi_x86_asm.c - MSVC inline assembly implementation of s_mpv_ functions.
|
| - *
|
| - * This Source Code Form is subject to the terms of the Mozilla Public
|
| - * License, v. 2.0. If a copy of the MPL was not distributed with this
|
| - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
| -
|
| -#include "mpi-priv.h"
|
| -
|
| -static int is_sse = -1;
|
| -extern unsigned long s_mpi_is_sse2();
|
| -
|
| -/*
|
| - * ebp - 36: caller's esi
|
| - * ebp - 32: caller's edi
|
| - * ebp - 28:
|
| - * ebp - 24:
|
| - * ebp - 20:
|
| - * ebp - 16:
|
| - * ebp - 12:
|
| - * ebp - 8:
|
| - * ebp - 4:
|
| - * ebp + 0: caller's ebp
|
| - * ebp + 4: return address
|
| - * ebp + 8: a argument
|
| - * ebp + 12: a_len argument
|
| - * ebp + 16: b argument
|
| - * ebp + 20: c argument
|
| - * registers:
|
| - * eax:
|
| - * ebx: carry
|
| - * ecx: a_len
|
| - * edx:
|
| - * esi: a ptr
|
| - * edi: c ptr
|
| - */
|
| -__declspec(naked) void
|
| -s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
|
| -{
|
| - __asm {
|
| - mov eax, is_sse
|
| - cmp eax, 0
|
| - je s_mpv_mul_d_x86
|
| - jg s_mpv_mul_d_sse2
|
| - call s_mpi_is_sse2
|
| - mov is_sse, eax
|
| - cmp eax, 0
|
| - jg s_mpv_mul_d_sse2
|
| -s_mpv_mul_d_x86:
|
| - push ebp
|
| - mov ebp,esp
|
| - sub esp,28
|
| - push edi
|
| - push esi
|
| - push ebx
|
| - mov ebx,0 ; carry = 0
|
| - mov ecx,[ebp+12] ; ecx = a_len
|
| - mov edi,[ebp+20]
|
| - cmp ecx,0
|
| - je L_2 ; jmp if a_len == 0
|
| - mov esi,[ebp+8] ; esi = a
|
| - cld
|
| -L_1:
|
| - lodsd ; eax = [ds:esi]; esi += 4
|
| - mov edx,[ebp+16] ; edx = b
|
| - mul edx ; edx:eax = Phi:Plo = a_i * b
|
| -
|
| - add eax,ebx ; add carry (ebx) to edx:eax
|
| - adc edx,0
|
| - mov ebx,edx ; high half of product becomes next carry
|
| -
|
| - stosd ; [es:edi] = ax; edi += 4;
|
| - dec ecx ; --a_len
|
| - jnz L_1 ; jmp if a_len != 0
|
| -L_2:
|
| - mov [edi],ebx ; *c = carry
|
| - pop ebx
|
| - pop esi
|
| - pop edi
|
| - leave
|
| - ret
|
| - nop
|
| -s_mpv_mul_d_sse2:
|
| - push ebp
|
| - mov ebp, esp
|
| - push edi
|
| - push esi
|
| - psubq mm2, mm2 ; carry = 0
|
| - mov ecx, [ebp+12] ; ecx = a_len
|
| - movd mm1, [ebp+16] ; mm1 = b
|
| - mov edi, [ebp+20]
|
| - cmp ecx, 0
|
| - je L_6 ; jmp if a_len == 0
|
| - mov esi, [ebp+8] ; esi = a
|
| - cld
|
| -L_5:
|
| - movd mm0, [esi] ; mm0 = *a++
|
| - add esi, 4
|
| - pmuludq mm0, mm1 ; mm0 = b * *a++
|
| - paddq mm2, mm0 ; add the carry
|
| - movd [edi], mm2 ; store the 32bit result
|
| - add edi, 4
|
| - psrlq mm2, 32 ; save the carry
|
| - dec ecx ; --a_len
|
| - jnz L_5 ; jmp if a_len != 0
|
| -L_6:
|
| - movd [edi], mm2 ; *c = carry
|
| - emms
|
| - pop esi
|
| - pop edi
|
| - leave
|
| - ret
|
| - nop
|
| - }
|
| -}
|
| -
|
| -/*
|
| - * ebp - 36: caller's esi
|
| - * ebp - 32: caller's edi
|
| - * ebp - 28:
|
| - * ebp - 24:
|
| - * ebp - 20:
|
| - * ebp - 16:
|
| - * ebp - 12:
|
| - * ebp - 8:
|
| - * ebp - 4:
|
| - * ebp + 0: caller's ebp
|
| - * ebp + 4: return address
|
| - * ebp + 8: a argument
|
| - * ebp + 12: a_len argument
|
| - * ebp + 16: b argument
|
| - * ebp + 20: c argument
|
| - * registers:
|
| - * eax:
|
| - * ebx: carry
|
| - * ecx: a_len
|
| - * edx:
|
| - * esi: a ptr
|
| - * edi: c ptr
|
| - */
|
| -__declspec(naked) void
|
| -s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
|
| -{
|
| - __asm {
|
| - mov eax, is_sse
|
| - cmp eax, 0
|
| - je s_mpv_mul_d_add_x86
|
| - jg s_mpv_mul_d_add_sse2
|
| - call s_mpi_is_sse2
|
| - mov is_sse, eax
|
| - cmp eax, 0
|
| - jg s_mpv_mul_d_add_sse2
|
| -s_mpv_mul_d_add_x86:
|
| - push ebp
|
| - mov ebp,esp
|
| - sub esp,28
|
| - push edi
|
| - push esi
|
| - push ebx
|
| - mov ebx,0 ; carry = 0
|
| - mov ecx,[ebp+12] ; ecx = a_len
|
| - mov edi,[ebp+20]
|
| - cmp ecx,0
|
| - je L_11 ; jmp if a_len == 0
|
| - mov esi,[ebp+8] ; esi = a
|
| - cld
|
| -L_10:
|
| - lodsd ; eax = [ds:esi]; esi += 4
|
| - mov edx,[ebp+16] ; edx = b
|
| - mul edx ; edx:eax = Phi:Plo = a_i * b
|
| -
|
| - add eax,ebx ; add carry (ebx) to edx:eax
|
| - adc edx,0
|
| - mov ebx,[edi] ; add in current word from *c
|
| - add eax,ebx
|
| - adc edx,0
|
| - mov ebx,edx ; high half of product becomes next carry
|
| -
|
| - stosd ; [es:edi] = ax; edi += 4;
|
| - dec ecx ; --a_len
|
| - jnz L_10 ; jmp if a_len != 0
|
| -L_11:
|
| - mov [edi],ebx ; *c = carry
|
| - pop ebx
|
| - pop esi
|
| - pop edi
|
| - leave
|
| - ret
|
| - nop
|
| -s_mpv_mul_d_add_sse2:
|
| - push ebp
|
| - mov ebp, esp
|
| - push edi
|
| - push esi
|
| - psubq mm2, mm2 ; carry = 0
|
| - mov ecx, [ebp+12] ; ecx = a_len
|
| - movd mm1, [ebp+16] ; mm1 = b
|
| - mov edi, [ebp+20]
|
| - cmp ecx, 0
|
| - je L_16 ; jmp if a_len == 0
|
| - mov esi, [ebp+8] ; esi = a
|
| - cld
|
| -L_15:
|
| - movd mm0, [esi] ; mm0 = *a++
|
| - add esi, 4
|
| - pmuludq mm0, mm1 ; mm0 = b * *a++
|
| - paddq mm2, mm0 ; add the carry
|
| - movd mm0, [edi]
|
| - paddq mm2, mm0 ; add the carry
|
| - movd [edi], mm2 ; store the 32bit result
|
| - add edi, 4
|
| - psrlq mm2, 32 ; save the carry
|
| - dec ecx ; --a_len
|
| - jnz L_15 ; jmp if a_len != 0
|
| -L_16:
|
| - movd [edi], mm2 ; *c = carry
|
| - emms
|
| - pop esi
|
| - pop edi
|
| - leave
|
| - ret
|
| - nop
|
| - }
|
| -}
|
| -
|
| -/*
|
| - * ebp - 36: caller's esi
|
| - * ebp - 32: caller's edi
|
| - * ebp - 28:
|
| - * ebp - 24:
|
| - * ebp - 20:
|
| - * ebp - 16:
|
| - * ebp - 12:
|
| - * ebp - 8:
|
| - * ebp - 4:
|
| - * ebp + 0: caller's ebp
|
| - * ebp + 4: return address
|
| - * ebp + 8: a argument
|
| - * ebp + 12: a_len argument
|
| - * ebp + 16: b argument
|
| - * ebp + 20: c argument
|
| - * registers:
|
| - * eax:
|
| - * ebx: carry
|
| - * ecx: a_len
|
| - * edx:
|
| - * esi: a ptr
|
| - * edi: c ptr
|
| - */
|
| -__declspec(naked) void
|
| -s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
|
| -{
|
| - __asm {
|
| - mov eax, is_sse
|
| - cmp eax, 0
|
| - je s_mpv_mul_d_add_prop_x86
|
| - jg s_mpv_mul_d_add_prop_sse2
|
| - call s_mpi_is_sse2
|
| - mov is_sse, eax
|
| - cmp eax, 0
|
| - jg s_mpv_mul_d_add_prop_sse2
|
| -s_mpv_mul_d_add_prop_x86:
|
| - push ebp
|
| - mov ebp,esp
|
| - sub esp,28
|
| - push edi
|
| - push esi
|
| - push ebx
|
| - mov ebx,0 ; carry = 0
|
| - mov ecx,[ebp+12] ; ecx = a_len
|
| - mov edi,[ebp+20]
|
| - cmp ecx,0
|
| - je L_21 ; jmp if a_len == 0
|
| - cld
|
| - mov esi,[ebp+8] ; esi = a
|
| -L_20:
|
| - lodsd ; eax = [ds:esi]; esi += 4
|
| - mov edx,[ebp+16] ; edx = b
|
| - mul edx ; edx:eax = Phi:Plo = a_i * b
|
| -
|
| - add eax,ebx ; add carry (ebx) to edx:eax
|
| - adc edx,0
|
| - mov ebx,[edi] ; add in current word from *c
|
| - add eax,ebx
|
| - adc edx,0
|
| - mov ebx,edx ; high half of product becomes next carry
|
| -
|
| - stosd ; [es:edi] = ax; edi += 4;
|
| - dec ecx ; --a_len
|
| - jnz L_20 ; jmp if a_len != 0
|
| -L_21:
|
| - cmp ebx,0 ; is carry zero?
|
| - jz L_23
|
| - mov eax,[edi] ; add in current word from *c
|
| - add eax,ebx
|
| - stosd ; [es:edi] = ax; edi += 4;
|
| - jnc L_23
|
| -L_22:
|
| - mov eax,[edi] ; add in current word from *c
|
| - adc eax,0
|
| - stosd ; [es:edi] = ax; edi += 4;
|
| - jc L_22
|
| -L_23:
|
| - pop ebx
|
| - pop esi
|
| - pop edi
|
| - leave
|
| - ret
|
| - nop
|
| -s_mpv_mul_d_add_prop_sse2:
|
| - push ebp
|
| - mov ebp, esp
|
| - push edi
|
| - push esi
|
| - push ebx
|
| - psubq mm2, mm2 ; carry = 0
|
| - mov ecx, [ebp+12] ; ecx = a_len
|
| - movd mm1, [ebp+16] ; mm1 = b
|
| - mov edi, [ebp+20]
|
| - cmp ecx, 0
|
| - je L_26 ; jmp if a_len == 0
|
| - mov esi, [ebp+8] ; esi = a
|
| - cld
|
| -L_25:
|
| - movd mm0, [esi] ; mm0 = *a++
|
| - movd mm3, [edi] ; fetch the sum
|
| - add esi, 4
|
| - pmuludq mm0, mm1 ; mm0 = b * *a++
|
| - paddq mm2, mm0 ; add the carry
|
| - paddq mm2, mm3 ; add *c++
|
| - movd [edi], mm2 ; store the 32bit result
|
| - add edi, 4
|
| - psrlq mm2, 32 ; save the carry
|
| - dec ecx ; --a_len
|
| - jnz L_25 ; jmp if a_len != 0
|
| -L_26:
|
| - movd ebx, mm2
|
| - cmp ebx, 0 ; is carry zero?
|
| - jz L_28
|
| - mov eax, [edi]
|
| - add eax, ebx
|
| - stosd
|
| - jnc L_28
|
| -L_27:
|
| - mov eax, [edi] ; add in current word from *c
|
| - adc eax, 0
|
| - stosd ; [es:edi] = ax; edi += 4;
|
| - jc L_27
|
| -L_28:
|
| - emms
|
| - pop ebx
|
| - pop esi
|
| - pop edi
|
| - leave
|
| - ret
|
| - nop
|
| - }
|
| -}
|
| -
|
| -/*
|
| - * ebp - 20: caller's esi
|
| - * ebp - 16: caller's edi
|
| - * ebp - 12:
|
| - * ebp - 8: carry
|
| - * ebp - 4: a_len local
|
| - * ebp + 0: caller's ebp
|
| - * ebp + 4: return address
|
| - * ebp + 8: pa argument
|
| - * ebp + 12: a_len argument
|
| - * ebp + 16: ps argument
|
| - * ebp + 20:
|
| - * registers:
|
| - * eax:
|
| - * ebx: carry
|
| - * ecx: a_len
|
| - * edx:
|
| - * esi: a ptr
|
| - * edi: c ptr
|
| - */
|
| -__declspec(naked) void
|
| -s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs)
|
| -{
|
| - __asm {
|
| - mov eax, is_sse
|
| - cmp eax, 0
|
| - je s_mpv_sqr_add_prop_x86
|
| - jg s_mpv_sqr_add_prop_sse2
|
| - call s_mpi_is_sse2
|
| - mov is_sse, eax
|
| - cmp eax, 0
|
| - jg s_mpv_sqr_add_prop_sse2
|
| -s_mpv_sqr_add_prop_x86:
|
| - push ebp
|
| - mov ebp,esp
|
| - sub esp,12
|
| - push edi
|
| - push esi
|
| - push ebx
|
| - mov ebx,0 ; carry = 0
|
| - mov ecx,[ebp+12] ; a_len
|
| - mov edi,[ebp+16] ; edi = ps
|
| - cmp ecx,0
|
| - je L_31 ; jump if a_len == 0
|
| - cld
|
| - mov esi,[ebp+8] ; esi = pa
|
| -L_30:
|
| - lodsd ; eax = [ds:si]; si += 4;
|
| - mul eax
|
| -
|
| - add eax,ebx ; add "carry"
|
| - adc edx,0
|
| - mov ebx,[edi]
|
| - add eax,ebx ; add low word from result
|
| - mov ebx,[edi+4]
|
| - stosd ; [es:di] = eax; di += 4;
|
| - adc edx,ebx ; add high word from result
|
| - mov ebx,0
|
| - mov eax,edx
|
| - adc ebx,0
|
| - stosd ; [es:di] = eax; di += 4;
|
| - dec ecx ; --a_len
|
| - jnz L_30 ; jmp if a_len != 0
|
| -L_31:
|
| - cmp ebx,0 ; is carry zero?
|
| - jz L_34
|
| - mov eax,[edi] ; add in current word from *c
|
| - add eax,ebx
|
| - stosd ; [es:edi] = ax; edi += 4;
|
| - jnc L_34
|
| -L_32:
|
| - mov eax,[edi] ; add in current word from *c
|
| - adc eax,0
|
| - stosd ; [es:edi] = ax; edi += 4;
|
| - jc L_32
|
| -L_34:
|
| - pop ebx
|
| - pop esi
|
| - pop edi
|
| - leave
|
| - ret
|
| - nop
|
| -s_mpv_sqr_add_prop_sse2:
|
| - push ebp
|
| - mov ebp, esp
|
| - push edi
|
| - push esi
|
| - push ebx
|
| - psubq mm2, mm2 ; carry = 0
|
| - mov ecx, [ebp+12] ; ecx = a_len
|
| - mov edi, [ebp+16]
|
| - cmp ecx, 0
|
| - je L_36 ; jmp if a_len == 0
|
| - mov esi, [ebp+8] ; esi = a
|
| - cld
|
| -L_35:
|
| - movd mm0, [esi] ; mm0 = *a
|
| - movd mm3, [edi] ; fetch the sum
|
| - add esi, 4
|
| - pmuludq mm0, mm0 ; mm0 = sqr(a)
|
| - paddq mm2, mm0 ; add the carry
|
| - paddq mm2, mm3 ; add the low word
|
| - movd mm3, [edi+4]
|
| - movd [edi], mm2 ; store the 32bit result
|
| - psrlq mm2, 32
|
| - paddq mm2, mm3 ; add the high word
|
| - movd [edi+4], mm2 ; store the 32bit result
|
| - psrlq mm2, 32 ; save the carry.
|
| - add edi, 8
|
| - dec ecx ; --a_len
|
| - jnz L_35 ; jmp if a_len != 0
|
| -L_36:
|
| - movd ebx, mm2
|
| - cmp ebx, 0 ; is carry zero?
|
| - jz L_38
|
| - mov eax, [edi]
|
| - add eax, ebx
|
| - stosd
|
| - jnc L_38
|
| -L_37:
|
| - mov eax, [edi] ; add in current word from *c
|
| - adc eax, 0
|
| - stosd ; [es:edi] = ax; edi += 4;
|
| - jc L_37
|
| -L_38:
|
| - emms
|
| - pop ebx
|
| - pop esi
|
| - pop edi
|
| - leave
|
| - ret
|
| - nop
|
| - }
|
| -}
|
| -
|
| -/*
|
| - * Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
|
| - * so its high bit is 1. This code is from NSPR.
|
| - *
|
| - * Dump of assembler code for function s_mpv_div_2dx1d:
|
| - *
|
| - * esp + 0: Caller's ebx
|
| - * esp + 4: return address
|
| - * esp + 8: Nhi argument
|
| - * esp + 12: Nlo argument
|
| - * esp + 16: divisor argument
|
| - * esp + 20: qp argument
|
| - * esp + 24: rp argument
|
| - * registers:
|
| - * eax:
|
| - * ebx: carry
|
| - * ecx: a_len
|
| - * edx:
|
| - * esi: a ptr
|
| - * edi: c ptr
|
| - */
|
| -__declspec(naked) mp_err
|
| -s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
|
| - mp_digit *qp, mp_digit *rp)
|
| -{
|
| - __asm {
|
| - push ebx
|
| - mov edx,[esp+8]
|
| - mov eax,[esp+12]
|
| - mov ebx,[esp+16]
|
| - div ebx
|
| - mov ebx,[esp+20]
|
| - mov [ebx],eax
|
| - mov ebx,[esp+24]
|
| - mov [ebx],edx
|
| - xor eax,eax ; return zero
|
| - pop ebx
|
| - ret
|
| - nop
|
| - }
|
| -}
|
|
|