Index: mozilla/security/nss/lib/freebl/mpi/mpi_x86_asm.c |
=================================================================== |
--- mozilla/security/nss/lib/freebl/mpi/mpi_x86_asm.c (revision 191424) |
+++ mozilla/security/nss/lib/freebl/mpi/mpi_x86_asm.c (working copy) |
@@ -1,535 +0,0 @@ |
-/* |
- * mpi_x86_asm.c - MSVC inline assembly implementation of s_mpv_ functions. |
- * |
- * This Source Code Form is subject to the terms of the Mozilla Public |
- * License, v. 2.0. If a copy of the MPL was not distributed with this |
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
- |
-#include "mpi-priv.h" |
- |
-static int is_sse = -1; |
-extern unsigned long s_mpi_is_sse2(); |
- |
-/* |
- * ebp - 36: caller's esi |
- * ebp - 32: caller's edi |
- * ebp - 28: |
- * ebp - 24: |
- * ebp - 20: |
- * ebp - 16: |
- * ebp - 12: |
- * ebp - 8: |
- * ebp - 4: |
- * ebp + 0: caller's ebp |
- * ebp + 4: return address |
- * ebp + 8: a argument |
- * ebp + 12: a_len argument |
- * ebp + 16: b argument |
- * ebp + 20: c argument |
- * registers: |
- * eax: |
- * ebx: carry |
- * ecx: a_len |
- * edx: |
- * esi: a ptr |
- * edi: c ptr |
- */ |
-__declspec(naked) void |
-s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) |
-{ |
- __asm { |
- mov eax, is_sse |
- cmp eax, 0 |
- je s_mpv_mul_d_x86 |
- jg s_mpv_mul_d_sse2 |
- call s_mpi_is_sse2 |
- mov is_sse, eax |
- cmp eax, 0 |
- jg s_mpv_mul_d_sse2 |
-s_mpv_mul_d_x86: |
- push ebp |
- mov ebp,esp |
- sub esp,28 |
- push edi |
- push esi |
- push ebx |
- mov ebx,0 ; carry = 0 |
- mov ecx,[ebp+12] ; ecx = a_len |
- mov edi,[ebp+20] |
- cmp ecx,0 |
- je L_2 ; jmp if a_len == 0 |
- mov esi,[ebp+8] ; esi = a |
- cld |
-L_1: |
- lodsd ; eax = [ds:esi]; esi += 4 |
- mov edx,[ebp+16] ; edx = b |
- mul edx ; edx:eax = Phi:Plo = a_i * b |
- |
- add eax,ebx ; add carry (ebx) to edx:eax |
- adc edx,0 |
- mov ebx,edx ; high half of product becomes next carry |
- |
- stosd ; [es:edi] = ax; edi += 4; |
- dec ecx ; --a_len |
- jnz L_1 ; jmp if a_len != 0 |
-L_2: |
- mov [edi],ebx ; *c = carry |
- pop ebx |
- pop esi |
- pop edi |
- leave |
- ret |
- nop |
-s_mpv_mul_d_sse2: |
- push ebp |
- mov ebp, esp |
- push edi |
- push esi |
- psubq mm2, mm2 ; carry = 0 |
- mov ecx, [ebp+12] ; ecx = a_len |
- movd mm1, [ebp+16] ; mm1 = b |
- mov edi, [ebp+20] |
- cmp ecx, 0 |
- je L_6 ; jmp if a_len == 0 |
- mov esi, [ebp+8] ; esi = a |
- cld |
-L_5: |
- movd mm0, [esi] ; mm0 = *a++ |
- add esi, 4 |
- pmuludq mm0, mm1 ; mm0 = b * *a++ |
- paddq mm2, mm0 ; add the carry |
- movd [edi], mm2 ; store the 32bit result |
- add edi, 4 |
- psrlq mm2, 32 ; save the carry |
- dec ecx ; --a_len |
- jnz L_5 ; jmp if a_len != 0 |
-L_6: |
- movd [edi], mm2 ; *c = carry |
- emms |
- pop esi |
- pop edi |
- leave |
- ret |
- nop |
- } |
-} |
- |
-/* |
- * ebp - 36: caller's esi |
- * ebp - 32: caller's edi |
- * ebp - 28: |
- * ebp - 24: |
- * ebp - 20: |
- * ebp - 16: |
- * ebp - 12: |
- * ebp - 8: |
- * ebp - 4: |
- * ebp + 0: caller's ebp |
- * ebp + 4: return address |
- * ebp + 8: a argument |
- * ebp + 12: a_len argument |
- * ebp + 16: b argument |
- * ebp + 20: c argument |
- * registers: |
- * eax: |
- * ebx: carry |
- * ecx: a_len |
- * edx: |
- * esi: a ptr |
- * edi: c ptr |
- */ |
-__declspec(naked) void |
-s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) |
-{ |
- __asm { |
- mov eax, is_sse |
- cmp eax, 0 |
- je s_mpv_mul_d_add_x86 |
- jg s_mpv_mul_d_add_sse2 |
- call s_mpi_is_sse2 |
- mov is_sse, eax |
- cmp eax, 0 |
- jg s_mpv_mul_d_add_sse2 |
-s_mpv_mul_d_add_x86: |
- push ebp |
- mov ebp,esp |
- sub esp,28 |
- push edi |
- push esi |
- push ebx |
- mov ebx,0 ; carry = 0 |
- mov ecx,[ebp+12] ; ecx = a_len |
- mov edi,[ebp+20] |
- cmp ecx,0 |
- je L_11 ; jmp if a_len == 0 |
- mov esi,[ebp+8] ; esi = a |
- cld |
-L_10: |
- lodsd ; eax = [ds:esi]; esi += 4 |
- mov edx,[ebp+16] ; edx = b |
- mul edx ; edx:eax = Phi:Plo = a_i * b |
- |
- add eax,ebx ; add carry (ebx) to edx:eax |
- adc edx,0 |
- mov ebx,[edi] ; add in current word from *c |
- add eax,ebx |
- adc edx,0 |
- mov ebx,edx ; high half of product becomes next carry |
- |
- stosd ; [es:edi] = ax; edi += 4; |
- dec ecx ; --a_len |
- jnz L_10 ; jmp if a_len != 0 |
-L_11: |
- mov [edi],ebx ; *c = carry |
- pop ebx |
- pop esi |
- pop edi |
- leave |
- ret |
- nop |
-s_mpv_mul_d_add_sse2: |
- push ebp |
- mov ebp, esp |
- push edi |
- push esi |
- psubq mm2, mm2 ; carry = 0 |
- mov ecx, [ebp+12] ; ecx = a_len |
- movd mm1, [ebp+16] ; mm1 = b |
- mov edi, [ebp+20] |
- cmp ecx, 0 |
- je L_16 ; jmp if a_len == 0 |
- mov esi, [ebp+8] ; esi = a |
- cld |
-L_15: |
- movd mm0, [esi] ; mm0 = *a++ |
- add esi, 4 |
- pmuludq mm0, mm1 ; mm0 = b * *a++ |
- paddq mm2, mm0 ; add the carry |
- movd mm0, [edi] |
- paddq mm2, mm0 ; add the carry |
- movd [edi], mm2 ; store the 32bit result |
- add edi, 4 |
- psrlq mm2, 32 ; save the carry |
- dec ecx ; --a_len |
- jnz L_15 ; jmp if a_len != 0 |
-L_16: |
- movd [edi], mm2 ; *c = carry |
- emms |
- pop esi |
- pop edi |
- leave |
- ret |
- nop |
- } |
-} |
- |
-/* |
- * ebp - 36: caller's esi |
- * ebp - 32: caller's edi |
- * ebp - 28: |
- * ebp - 24: |
- * ebp - 20: |
- * ebp - 16: |
- * ebp - 12: |
- * ebp - 8: |
- * ebp - 4: |
- * ebp + 0: caller's ebp |
- * ebp + 4: return address |
- * ebp + 8: a argument |
- * ebp + 12: a_len argument |
- * ebp + 16: b argument |
- * ebp + 20: c argument |
- * registers: |
- * eax: |
- * ebx: carry |
- * ecx: a_len |
- * edx: |
- * esi: a ptr |
- * edi: c ptr |
- */ |
-__declspec(naked) void |
-s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) |
-{ |
- __asm { |
- mov eax, is_sse |
- cmp eax, 0 |
- je s_mpv_mul_d_add_prop_x86 |
- jg s_mpv_mul_d_add_prop_sse2 |
- call s_mpi_is_sse2 |
- mov is_sse, eax |
- cmp eax, 0 |
- jg s_mpv_mul_d_add_prop_sse2 |
-s_mpv_mul_d_add_prop_x86: |
- push ebp |
- mov ebp,esp |
- sub esp,28 |
- push edi |
- push esi |
- push ebx |
- mov ebx,0 ; carry = 0 |
- mov ecx,[ebp+12] ; ecx = a_len |
- mov edi,[ebp+20] |
- cmp ecx,0 |
- je L_21 ; jmp if a_len == 0 |
- cld |
- mov esi,[ebp+8] ; esi = a |
-L_20: |
- lodsd ; eax = [ds:esi]; esi += 4 |
- mov edx,[ebp+16] ; edx = b |
- mul edx ; edx:eax = Phi:Plo = a_i * b |
- |
- add eax,ebx ; add carry (ebx) to edx:eax |
- adc edx,0 |
- mov ebx,[edi] ; add in current word from *c |
- add eax,ebx |
- adc edx,0 |
- mov ebx,edx ; high half of product becomes next carry |
- |
- stosd ; [es:edi] = ax; edi += 4; |
- dec ecx ; --a_len |
- jnz L_20 ; jmp if a_len != 0 |
-L_21: |
- cmp ebx,0 ; is carry zero? |
- jz L_23 |
- mov eax,[edi] ; add in current word from *c |
- add eax,ebx |
- stosd ; [es:edi] = ax; edi += 4; |
- jnc L_23 |
-L_22: |
- mov eax,[edi] ; add in current word from *c |
- adc eax,0 |
- stosd ; [es:edi] = ax; edi += 4; |
- jc L_22 |
-L_23: |
- pop ebx |
- pop esi |
- pop edi |
- leave |
- ret |
- nop |
-s_mpv_mul_d_add_prop_sse2: |
- push ebp |
- mov ebp, esp |
- push edi |
- push esi |
- push ebx |
- psubq mm2, mm2 ; carry = 0 |
- mov ecx, [ebp+12] ; ecx = a_len |
- movd mm1, [ebp+16] ; mm1 = b |
- mov edi, [ebp+20] |
- cmp ecx, 0 |
- je L_26 ; jmp if a_len == 0 |
- mov esi, [ebp+8] ; esi = a |
- cld |
-L_25: |
- movd mm0, [esi] ; mm0 = *a++ |
- movd mm3, [edi] ; fetch the sum |
- add esi, 4 |
- pmuludq mm0, mm1 ; mm0 = b * *a++ |
- paddq mm2, mm0 ; add the carry |
- paddq mm2, mm3 ; add *c++ |
- movd [edi], mm2 ; store the 32bit result |
- add edi, 4 |
- psrlq mm2, 32 ; save the carry |
- dec ecx ; --a_len |
- jnz L_25 ; jmp if a_len != 0 |
-L_26: |
- movd ebx, mm2 |
- cmp ebx, 0 ; is carry zero? |
- jz L_28 |
- mov eax, [edi] |
- add eax, ebx |
- stosd |
- jnc L_28 |
-L_27: |
- mov eax, [edi] ; add in current word from *c |
- adc eax, 0 |
- stosd ; [es:edi] = ax; edi += 4; |
- jc L_27 |
-L_28: |
- emms |
- pop ebx |
- pop esi |
- pop edi |
- leave |
- ret |
- nop |
- } |
-} |
- |
-/* |
- * ebp - 20: caller's esi |
- * ebp - 16: caller's edi |
- * ebp - 12: |
- * ebp - 8: carry |
- * ebp - 4: a_len local |
- * ebp + 0: caller's ebp |
- * ebp + 4: return address |
- * ebp + 8: pa argument |
- * ebp + 12: a_len argument |
- * ebp + 16: ps argument |
- * ebp + 20: |
- * registers: |
- * eax: |
- * ebx: carry |
- * ecx: a_len |
- * edx: |
- * esi: a ptr |
- * edi: c ptr |
- */ |
-__declspec(naked) void |
-s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs) |
-{ |
- __asm { |
- mov eax, is_sse |
- cmp eax, 0 |
- je s_mpv_sqr_add_prop_x86 |
- jg s_mpv_sqr_add_prop_sse2 |
- call s_mpi_is_sse2 |
- mov is_sse, eax |
- cmp eax, 0 |
- jg s_mpv_sqr_add_prop_sse2 |
-s_mpv_sqr_add_prop_x86: |
- push ebp |
- mov ebp,esp |
- sub esp,12 |
- push edi |
- push esi |
- push ebx |
- mov ebx,0 ; carry = 0 |
- mov ecx,[ebp+12] ; a_len |
- mov edi,[ebp+16] ; edi = ps |
- cmp ecx,0 |
- je L_31 ; jump if a_len == 0 |
- cld |
- mov esi,[ebp+8] ; esi = pa |
-L_30: |
- lodsd ; eax = [ds:si]; si += 4; |
- mul eax |
- |
- add eax,ebx ; add "carry" |
- adc edx,0 |
- mov ebx,[edi] |
- add eax,ebx ; add low word from result |
- mov ebx,[edi+4] |
- stosd ; [es:di] = eax; di += 4; |
- adc edx,ebx ; add high word from result |
- mov ebx,0 |
- mov eax,edx |
- adc ebx,0 |
- stosd ; [es:di] = eax; di += 4; |
- dec ecx ; --a_len |
- jnz L_30 ; jmp if a_len != 0 |
-L_31: |
- cmp ebx,0 ; is carry zero? |
- jz L_34 |
- mov eax,[edi] ; add in current word from *c |
- add eax,ebx |
- stosd ; [es:edi] = ax; edi += 4; |
- jnc L_34 |
-L_32: |
- mov eax,[edi] ; add in current word from *c |
- adc eax,0 |
- stosd ; [es:edi] = ax; edi += 4; |
- jc L_32 |
-L_34: |
- pop ebx |
- pop esi |
- pop edi |
- leave |
- ret |
- nop |
-s_mpv_sqr_add_prop_sse2: |
- push ebp |
- mov ebp, esp |
- push edi |
- push esi |
- push ebx |
- psubq mm2, mm2 ; carry = 0 |
- mov ecx, [ebp+12] ; ecx = a_len |
- mov edi, [ebp+16] |
- cmp ecx, 0 |
- je L_36 ; jmp if a_len == 0 |
- mov esi, [ebp+8] ; esi = a |
- cld |
-L_35: |
- movd mm0, [esi] ; mm0 = *a |
- movd mm3, [edi] ; fetch the sum |
- add esi, 4 |
- pmuludq mm0, mm0 ; mm0 = sqr(a) |
- paddq mm2, mm0 ; add the carry |
- paddq mm2, mm3 ; add the low word |
- movd mm3, [edi+4] |
- movd [edi], mm2 ; store the 32bit result |
- psrlq mm2, 32 |
- paddq mm2, mm3 ; add the high word |
- movd [edi+4], mm2 ; store the 32bit result |
- psrlq mm2, 32 ; save the carry. |
- add edi, 8 |
- dec ecx ; --a_len |
- jnz L_35 ; jmp if a_len != 0 |
-L_36: |
- movd ebx, mm2 |
- cmp ebx, 0 ; is carry zero? |
- jz L_38 |
- mov eax, [edi] |
- add eax, ebx |
- stosd |
- jnc L_38 |
-L_37: |
- mov eax, [edi] ; add in current word from *c |
- adc eax, 0 |
- stosd ; [es:edi] = ax; edi += 4; |
- jc L_37 |
-L_38: |
- emms |
- pop ebx |
- pop esi |
- pop edi |
- leave |
- ret |
- nop |
- } |
-} |
- |
-/* |
- * Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized |
- * so its high bit is 1. This code is from NSPR. |
- * |
- * Dump of assembler code for function s_mpv_div_2dx1d: |
- * |
- * esp + 0: Caller's ebx |
- * esp + 4: return address |
- * esp + 8: Nhi argument |
- * esp + 12: Nlo argument |
- * esp + 16: divisor argument |
- * esp + 20: qp argument |
- * esp + 24: rp argument |
- * registers: |
- * eax: |
- * ebx: carry |
- * ecx: a_len |
- * edx: |
- * esi: a ptr |
- * edi: c ptr |
- */ |
-__declspec(naked) mp_err |
-s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, |
- mp_digit *qp, mp_digit *rp) |
-{ |
- __asm { |
- push ebx |
- mov edx,[esp+8] |
- mov eax,[esp+12] |
- mov ebx,[esp+16] |
- div ebx |
- mov ebx,[esp+20] |
- mov [ebx],eax |
- mov ebx,[esp+24] |
- mov [ebx],edx |
- xor eax,eax ; return zero |
- pop ebx |
- ret |
- nop |
- } |
-} |