| Index: third_party/boringssl/win-x86/crypto/bn/x86-mont.asm
|
| diff --git a/third_party/boringssl/win-x86/crypto/bn/x86-mont.asm b/third_party/boringssl/win-x86/crypto/bn/x86-mont.asm
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..de7b949927258f9d725c73fa60bb278232f2dcd1
|
| --- /dev/null
|
| +++ b/third_party/boringssl/win-x86/crypto/bn/x86-mont.asm
|
| @@ -0,0 +1,469 @@
|
| +%ifidn __OUTPUT_FORMAT__,obj
|
| +section code use32 class=code align=64
|
| +%elifidn __OUTPUT_FORMAT__,win32
|
| +%ifdef __YASM_VERSION_ID__
|
| +%if __YASM_VERSION_ID__ < 01010000h
|
| +%error yasm version 1.1.0 or later needed.
|
| +%endif
|
| +; Yasm automatically includes .00 and complains about redefining it.
|
| +; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
|
| +%else
|
| +$@feat.00 equ 1
|
| +%endif
|
| +section .text code align=64
|
| +%else
|
| +section .text code
|
| +%endif
|
| +;extern _OPENSSL_ia32cap_P
|
| +global _bn_mul_mont
|
| +align 16
|
| +_bn_mul_mont:
|
| +L$_bn_mul_mont_begin:
|
| + push ebp
|
| + push ebx
|
| + push esi
|
| + push edi
|
| + xor eax,eax
|
| + mov edi,DWORD [40+esp]
|
| + cmp edi,4
|
| + jl NEAR L$000just_leave
|
| + lea esi,[20+esp]
|
| + lea edx,[24+esp]
|
| + mov ebp,esp
|
| + add edi,2
|
| + neg edi
|
| + lea esp,[edi*4+esp-32]
|
| + neg edi
|
| + mov eax,esp
|
| + sub eax,edx
|
| + and eax,2047
|
| + sub esp,eax
|
| + xor edx,esp
|
| + and edx,2048
|
| + xor edx,2048
|
| + sub esp,edx
|
| + and esp,-64
|
| + mov eax,DWORD [esi]
|
| + mov ebx,DWORD [4+esi]
|
| + mov ecx,DWORD [8+esi]
|
| + mov edx,DWORD [12+esi]
|
| + mov esi,DWORD [16+esi]
|
| + mov esi,DWORD [esi]
|
| + mov DWORD [4+esp],eax
|
| + mov DWORD [8+esp],ebx
|
| + mov DWORD [12+esp],ecx
|
| + mov DWORD [16+esp],edx
|
| + mov DWORD [20+esp],esi
|
| + lea ebx,[edi-3]
|
| + mov DWORD [24+esp],ebp
|
| + lea eax,[_OPENSSL_ia32cap_P]
|
| + bt DWORD [eax],26
|
| + jnc NEAR L$001non_sse2
|
| + mov eax,-1
|
| + movd mm7,eax
|
| + mov esi,DWORD [8+esp]
|
| + mov edi,DWORD [12+esp]
|
| + mov ebp,DWORD [16+esp]
|
| + xor edx,edx
|
| + xor ecx,ecx
|
| + movd mm4,DWORD [edi]
|
| + movd mm5,DWORD [esi]
|
| + movd mm3,DWORD [ebp]
|
| + pmuludq mm5,mm4
|
| + movq mm2,mm5
|
| + movq mm0,mm5
|
| + pand mm0,mm7
|
| + pmuludq mm5,[20+esp]
|
| + pmuludq mm3,mm5
|
| + paddq mm3,mm0
|
| + movd mm1,DWORD [4+ebp]
|
| + movd mm0,DWORD [4+esi]
|
| + psrlq mm2,32
|
| + psrlq mm3,32
|
| + inc ecx
|
| +align 16
|
| +L$0021st:
|
| + pmuludq mm0,mm4
|
| + pmuludq mm1,mm5
|
| + paddq mm2,mm0
|
| + paddq mm3,mm1
|
| + movq mm0,mm2
|
| + pand mm0,mm7
|
| + movd mm1,DWORD [4+ecx*4+ebp]
|
| + paddq mm3,mm0
|
| + movd mm0,DWORD [4+ecx*4+esi]
|
| + psrlq mm2,32
|
| + movd DWORD [28+ecx*4+esp],mm3
|
| + psrlq mm3,32
|
| + lea ecx,[1+ecx]
|
| + cmp ecx,ebx
|
| + jl NEAR L$0021st
|
| + pmuludq mm0,mm4
|
| + pmuludq mm1,mm5
|
| + paddq mm2,mm0
|
| + paddq mm3,mm1
|
| + movq mm0,mm2
|
| + pand mm0,mm7
|
| + paddq mm3,mm0
|
| + movd DWORD [28+ecx*4+esp],mm3
|
| + psrlq mm2,32
|
| + psrlq mm3,32
|
| + paddq mm3,mm2
|
| + movq [32+ebx*4+esp],mm3
|
| + inc edx
|
| +L$003outer:
|
| + xor ecx,ecx
|
| + movd mm4,DWORD [edx*4+edi]
|
| + movd mm5,DWORD [esi]
|
| + movd mm6,DWORD [32+esp]
|
| + movd mm3,DWORD [ebp]
|
| + pmuludq mm5,mm4
|
| + paddq mm5,mm6
|
| + movq mm0,mm5
|
| + movq mm2,mm5
|
| + pand mm0,mm7
|
| + pmuludq mm5,[20+esp]
|
| + pmuludq mm3,mm5
|
| + paddq mm3,mm0
|
| + movd mm6,DWORD [36+esp]
|
| + movd mm1,DWORD [4+ebp]
|
| + movd mm0,DWORD [4+esi]
|
| + psrlq mm2,32
|
| + psrlq mm3,32
|
| + paddq mm2,mm6
|
| + inc ecx
|
| + dec ebx
|
| +L$004inner:
|
| + pmuludq mm0,mm4
|
| + pmuludq mm1,mm5
|
| + paddq mm2,mm0
|
| + paddq mm3,mm1
|
| + movq mm0,mm2
|
| + movd mm6,DWORD [36+ecx*4+esp]
|
| + pand mm0,mm7
|
| + movd mm1,DWORD [4+ecx*4+ebp]
|
| + paddq mm3,mm0
|
| + movd mm0,DWORD [4+ecx*4+esi]
|
| + psrlq mm2,32
|
| + movd DWORD [28+ecx*4+esp],mm3
|
| + psrlq mm3,32
|
| + paddq mm2,mm6
|
| + dec ebx
|
| + lea ecx,[1+ecx]
|
| + jnz NEAR L$004inner
|
| + mov ebx,ecx
|
| + pmuludq mm0,mm4
|
| + pmuludq mm1,mm5
|
| + paddq mm2,mm0
|
| + paddq mm3,mm1
|
| + movq mm0,mm2
|
| + pand mm0,mm7
|
| + paddq mm3,mm0
|
| + movd DWORD [28+ecx*4+esp],mm3
|
| + psrlq mm2,32
|
| + psrlq mm3,32
|
| + movd mm6,DWORD [36+ebx*4+esp]
|
| + paddq mm3,mm2
|
| + paddq mm3,mm6
|
| + movq [32+ebx*4+esp],mm3
|
| + lea edx,[1+edx]
|
| + cmp edx,ebx
|
| + jle NEAR L$003outer
|
| + emms
|
| + jmp NEAR L$005common_tail
|
| +align 16
|
| +L$001non_sse2:
|
| + mov esi,DWORD [8+esp]
|
| + lea ebp,[1+ebx]
|
| + mov edi,DWORD [12+esp]
|
| + xor ecx,ecx
|
| + mov edx,esi
|
| + and ebp,1
|
| + sub edx,edi
|
| + lea eax,[4+ebx*4+edi]
|
| + or ebp,edx
|
| + mov edi,DWORD [edi]
|
| + jz NEAR L$006bn_sqr_mont
|
| + mov DWORD [28+esp],eax
|
| + mov eax,DWORD [esi]
|
| + xor edx,edx
|
| +align 16
|
| +L$007mull:
|
| + mov ebp,edx
|
| + mul edi
|
| + add ebp,eax
|
| + lea ecx,[1+ecx]
|
| + adc edx,0
|
| + mov eax,DWORD [ecx*4+esi]
|
| + cmp ecx,ebx
|
| + mov DWORD [28+ecx*4+esp],ebp
|
| + jl NEAR L$007mull
|
| + mov ebp,edx
|
| + mul edi
|
| + mov edi,DWORD [20+esp]
|
| + add eax,ebp
|
| + mov esi,DWORD [16+esp]
|
| + adc edx,0
|
| + imul edi,DWORD [32+esp]
|
| + mov DWORD [32+ebx*4+esp],eax
|
| + xor ecx,ecx
|
| + mov DWORD [36+ebx*4+esp],edx
|
| + mov DWORD [40+ebx*4+esp],ecx
|
| + mov eax,DWORD [esi]
|
| + mul edi
|
| + add eax,DWORD [32+esp]
|
| + mov eax,DWORD [4+esi]
|
| + adc edx,0
|
| + inc ecx
|
| + jmp NEAR L$0082ndmadd
|
| +align 16
|
| +L$0091stmadd:
|
| + mov ebp,edx
|
| + mul edi
|
| + add ebp,DWORD [32+ecx*4+esp]
|
| + lea ecx,[1+ecx]
|
| + adc edx,0
|
| + add ebp,eax
|
| + mov eax,DWORD [ecx*4+esi]
|
| + adc edx,0
|
| + cmp ecx,ebx
|
| + mov DWORD [28+ecx*4+esp],ebp
|
| + jl NEAR L$0091stmadd
|
| + mov ebp,edx
|
| + mul edi
|
| + add eax,DWORD [32+ebx*4+esp]
|
| + mov edi,DWORD [20+esp]
|
| + adc edx,0
|
| + mov esi,DWORD [16+esp]
|
| + add ebp,eax
|
| + adc edx,0
|
| + imul edi,DWORD [32+esp]
|
| + xor ecx,ecx
|
| + add edx,DWORD [36+ebx*4+esp]
|
| + mov DWORD [32+ebx*4+esp],ebp
|
| + adc ecx,0
|
| + mov eax,DWORD [esi]
|
| + mov DWORD [36+ebx*4+esp],edx
|
| + mov DWORD [40+ebx*4+esp],ecx
|
| + mul edi
|
| + add eax,DWORD [32+esp]
|
| + mov eax,DWORD [4+esi]
|
| + adc edx,0
|
| + mov ecx,1
|
| +align 16
|
| +L$0082ndmadd:
|
| + mov ebp,edx
|
| + mul edi
|
| + add ebp,DWORD [32+ecx*4+esp]
|
| + lea ecx,[1+ecx]
|
| + adc edx,0
|
| + add ebp,eax
|
| + mov eax,DWORD [ecx*4+esi]
|
| + adc edx,0
|
| + cmp ecx,ebx
|
| + mov DWORD [24+ecx*4+esp],ebp
|
| + jl NEAR L$0082ndmadd
|
| + mov ebp,edx
|
| + mul edi
|
| + add ebp,DWORD [32+ebx*4+esp]
|
| + adc edx,0
|
| + add ebp,eax
|
| + adc edx,0
|
| + mov DWORD [28+ebx*4+esp],ebp
|
| + xor eax,eax
|
| + mov ecx,DWORD [12+esp]
|
| + add edx,DWORD [36+ebx*4+esp]
|
| + adc eax,DWORD [40+ebx*4+esp]
|
| + lea ecx,[4+ecx]
|
| + mov DWORD [32+ebx*4+esp],edx
|
| + cmp ecx,DWORD [28+esp]
|
| + mov DWORD [36+ebx*4+esp],eax
|
| + je NEAR L$005common_tail
|
| + mov edi,DWORD [ecx]
|
| + mov esi,DWORD [8+esp]
|
| + mov DWORD [12+esp],ecx
|
| + xor ecx,ecx
|
| + xor edx,edx
|
| + mov eax,DWORD [esi]
|
| + jmp NEAR L$0091stmadd
|
| +align 16
|
| +L$006bn_sqr_mont:
|
| + mov DWORD [esp],ebx
|
| + mov DWORD [12+esp],ecx
|
| + mov eax,edi
|
| + mul edi
|
| + mov DWORD [32+esp],eax
|
| + mov ebx,edx
|
| + shr edx,1
|
| + and ebx,1
|
| + inc ecx
|
| +align 16
|
| +L$010sqr:
|
| + mov eax,DWORD [ecx*4+esi]
|
| + mov ebp,edx
|
| + mul edi
|
| + add eax,ebp
|
| + lea ecx,[1+ecx]
|
| + adc edx,0
|
| + lea ebp,[eax*2+ebx]
|
| + shr eax,31
|
| + cmp ecx,DWORD [esp]
|
| + mov ebx,eax
|
| + mov DWORD [28+ecx*4+esp],ebp
|
| + jl NEAR L$010sqr
|
| + mov eax,DWORD [ecx*4+esi]
|
| + mov ebp,edx
|
| + mul edi
|
| + add eax,ebp
|
| + mov edi,DWORD [20+esp]
|
| + adc edx,0
|
| + mov esi,DWORD [16+esp]
|
| + lea ebp,[eax*2+ebx]
|
| + imul edi,DWORD [32+esp]
|
| + shr eax,31
|
| + mov DWORD [32+ecx*4+esp],ebp
|
| + lea ebp,[edx*2+eax]
|
| + mov eax,DWORD [esi]
|
| + shr edx,31
|
| + mov DWORD [36+ecx*4+esp],ebp
|
| + mov DWORD [40+ecx*4+esp],edx
|
| + mul edi
|
| + add eax,DWORD [32+esp]
|
| + mov ebx,ecx
|
| + adc edx,0
|
| + mov eax,DWORD [4+esi]
|
| + mov ecx,1
|
| +align 16
|
| +L$0113rdmadd:
|
| + mov ebp,edx
|
| + mul edi
|
| + add ebp,DWORD [32+ecx*4+esp]
|
| + adc edx,0
|
| + add ebp,eax
|
| + mov eax,DWORD [4+ecx*4+esi]
|
| + adc edx,0
|
| + mov DWORD [28+ecx*4+esp],ebp
|
| + mov ebp,edx
|
| + mul edi
|
| + add ebp,DWORD [36+ecx*4+esp]
|
| + lea ecx,[2+ecx]
|
| + adc edx,0
|
| + add ebp,eax
|
| + mov eax,DWORD [ecx*4+esi]
|
| + adc edx,0
|
| + cmp ecx,ebx
|
| + mov DWORD [24+ecx*4+esp],ebp
|
| + jl NEAR L$0113rdmadd
|
| + mov ebp,edx
|
| + mul edi
|
| + add ebp,DWORD [32+ebx*4+esp]
|
| + adc edx,0
|
| + add ebp,eax
|
| + adc edx,0
|
| + mov DWORD [28+ebx*4+esp],ebp
|
| + mov ecx,DWORD [12+esp]
|
| + xor eax,eax
|
| + mov esi,DWORD [8+esp]
|
| + add edx,DWORD [36+ebx*4+esp]
|
| + adc eax,DWORD [40+ebx*4+esp]
|
| + mov DWORD [32+ebx*4+esp],edx
|
| + cmp ecx,ebx
|
| + mov DWORD [36+ebx*4+esp],eax
|
| + je NEAR L$005common_tail
|
| + mov edi,DWORD [4+ecx*4+esi]
|
| + lea ecx,[1+ecx]
|
| + mov eax,edi
|
| + mov DWORD [12+esp],ecx
|
| + mul edi
|
| + add eax,DWORD [32+ecx*4+esp]
|
| + adc edx,0
|
| + mov DWORD [32+ecx*4+esp],eax
|
| + xor ebp,ebp
|
| + cmp ecx,ebx
|
| + lea ecx,[1+ecx]
|
| + je NEAR L$012sqrlast
|
| + mov ebx,edx
|
| + shr edx,1
|
| + and ebx,1
|
| +align 16
|
| +L$013sqradd:
|
| + mov eax,DWORD [ecx*4+esi]
|
| + mov ebp,edx
|
| + mul edi
|
| + add eax,ebp
|
| + lea ebp,[eax*1+eax]
|
| + adc edx,0
|
| + shr eax,31
|
| + add ebp,DWORD [32+ecx*4+esp]
|
| + lea ecx,[1+ecx]
|
| + adc eax,0
|
| + add ebp,ebx
|
| + adc eax,0
|
| + cmp ecx,DWORD [esp]
|
| + mov DWORD [28+ecx*4+esp],ebp
|
| + mov ebx,eax
|
| + jle NEAR L$013sqradd
|
| + mov ebp,edx
|
| + add edx,edx
|
| + shr ebp,31
|
| + add edx,ebx
|
| + adc ebp,0
|
| +L$012sqrlast:
|
| + mov edi,DWORD [20+esp]
|
| + mov esi,DWORD [16+esp]
|
| + imul edi,DWORD [32+esp]
|
| + add edx,DWORD [32+ecx*4+esp]
|
| + mov eax,DWORD [esi]
|
| + adc ebp,0
|
| + mov DWORD [32+ecx*4+esp],edx
|
| + mov DWORD [36+ecx*4+esp],ebp
|
| + mul edi
|
| + add eax,DWORD [32+esp]
|
| + lea ebx,[ecx-1]
|
| + adc edx,0
|
| + mov ecx,1
|
| + mov eax,DWORD [4+esi]
|
| + jmp NEAR L$0113rdmadd
|
| +align 16
|
| +L$005common_tail:
|
| + mov ebp,DWORD [16+esp]
|
| + mov edi,DWORD [4+esp]
|
| + lea esi,[32+esp]
|
| + mov eax,DWORD [esi]
|
| + mov ecx,ebx
|
| + xor edx,edx
|
| +align 16
|
| +L$014sub:
|
| + sbb eax,DWORD [edx*4+ebp]
|
| + mov DWORD [edx*4+edi],eax
|
| + dec ecx
|
| + mov eax,DWORD [4+edx*4+esi]
|
| + lea edx,[1+edx]
|
| + jge NEAR L$014sub
|
| + sbb eax,0
|
| +align 16
|
| +L$015copy:
|
| + mov edx,DWORD [ebx*4+esi]
|
| + mov ebp,DWORD [ebx*4+edi]
|
| + xor edx,ebp
|
| + and edx,eax
|
| + xor edx,ebp
|
| + mov DWORD [ebx*4+esi],ecx
|
| + mov DWORD [ebx*4+edi],edx
|
| + dec ebx
|
| + jge NEAR L$015copy
|
| + mov esp,DWORD [24+esp]
|
| + mov eax,1
|
| +L$000just_leave:
|
| + pop edi
|
| + pop esi
|
| + pop ebx
|
| + pop ebp
|
| + ret
|
| +db 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
|
| +db 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
|
| +db 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
|
| +db 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
|
| +db 111,114,103,62,0
|
| +segment .bss
|
| +common _OPENSSL_ia32cap_P 16
|
|
|