Index: third_party/boringssl/linux-x86/crypto/bn/x86-mont.S |
diff --git a/third_party/boringssl/linux-x86/crypto/bn/x86-mont.S b/third_party/boringssl/linux-x86/crypto/bn/x86-mont.S |
new file mode 100644 |
index 0000000000000000000000000000000000000000..1569b2cff1738b661f73f9aa5808299406c18caa |
--- /dev/null |
+++ b/third_party/boringssl/linux-x86/crypto/bn/x86-mont.S |
@@ -0,0 +1,460 @@ |
+#if defined(__i386__) |
+.file "src/crypto/bn/asm/x86-mont.S" |
+.text |
+.globl bn_mul_mont |
+.hidden bn_mul_mont |
+.type bn_mul_mont,@function |
+.align 16 |
+bn_mul_mont: |
+.L_bn_mul_mont_begin: |
+ pushl %ebp |
+ pushl %ebx |
+ pushl %esi |
+ pushl %edi |
+ xorl %eax,%eax |
+ movl 40(%esp),%edi |
+ cmpl $4,%edi |
+ jl .L000just_leave |
+ leal 20(%esp),%esi |
+ leal 24(%esp),%edx |
+ movl %esp,%ebp |
+ addl $2,%edi |
+ negl %edi |
+ leal -32(%esp,%edi,4),%esp |
+ negl %edi |
+ movl %esp,%eax |
+ subl %edx,%eax |
+ andl $2047,%eax |
+ subl %eax,%esp |
+ xorl %esp,%edx |
+ andl $2048,%edx |
+ xorl $2048,%edx |
+ subl %edx,%esp |
+ andl $-64,%esp |
+ movl (%esi),%eax |
+ movl 4(%esi),%ebx |
+ movl 8(%esi),%ecx |
+ movl 12(%esi),%edx |
+ movl 16(%esi),%esi |
+ movl (%esi),%esi |
+ movl %eax,4(%esp) |
+ movl %ebx,8(%esp) |
+ movl %ecx,12(%esp) |
+ movl %edx,16(%esp) |
+ movl %esi,20(%esp) |
+ leal -3(%edi),%ebx |
+ movl %ebp,24(%esp) |
+ call .L001PIC_me_up |
+.L001PIC_me_up: |
+ popl %eax |
+ leal OPENSSL_ia32cap_P-.L001PIC_me_up(%eax),%eax |
+ btl $26,(%eax) |
+ jnc .L002non_sse2 |
+ movl $-1,%eax |
+ movd %eax,%mm7 |
+ movl 8(%esp),%esi |
+ movl 12(%esp),%edi |
+ movl 16(%esp),%ebp |
+ xorl %edx,%edx |
+ xorl %ecx,%ecx |
+ movd (%edi),%mm4 |
+ movd (%esi),%mm5 |
+ movd (%ebp),%mm3 |
+ pmuludq %mm4,%mm5 |
+ movq %mm5,%mm2 |
+ movq %mm5,%mm0 |
+ pand %mm7,%mm0 |
+ pmuludq 20(%esp),%mm5 |
+ pmuludq %mm5,%mm3 |
+ paddq %mm0,%mm3 |
+ movd 4(%ebp),%mm1 |
+ movd 4(%esi),%mm0 |
+ psrlq $32,%mm2 |
+ psrlq $32,%mm3 |
+ incl %ecx |
+.align 16 |
+.L0031st: |
+ pmuludq %mm4,%mm0 |
+ pmuludq %mm5,%mm1 |
+ paddq %mm0,%mm2 |
+ paddq %mm1,%mm3 |
+ movq %mm2,%mm0 |
+ pand %mm7,%mm0 |
+ movd 4(%ebp,%ecx,4),%mm1 |
+ paddq %mm0,%mm3 |
+ movd 4(%esi,%ecx,4),%mm0 |
+ psrlq $32,%mm2 |
+ movd %mm3,28(%esp,%ecx,4) |
+ psrlq $32,%mm3 |
+ leal 1(%ecx),%ecx |
+ cmpl %ebx,%ecx |
+ jl .L0031st |
+ pmuludq %mm4,%mm0 |
+ pmuludq %mm5,%mm1 |
+ paddq %mm0,%mm2 |
+ paddq %mm1,%mm3 |
+ movq %mm2,%mm0 |
+ pand %mm7,%mm0 |
+ paddq %mm0,%mm3 |
+ movd %mm3,28(%esp,%ecx,4) |
+ psrlq $32,%mm2 |
+ psrlq $32,%mm3 |
+ paddq %mm2,%mm3 |
+ movq %mm3,32(%esp,%ebx,4) |
+ incl %edx |
+.L004outer: |
+ xorl %ecx,%ecx |
+ movd (%edi,%edx,4),%mm4 |
+ movd (%esi),%mm5 |
+ movd 32(%esp),%mm6 |
+ movd (%ebp),%mm3 |
+ pmuludq %mm4,%mm5 |
+ paddq %mm6,%mm5 |
+ movq %mm5,%mm0 |
+ movq %mm5,%mm2 |
+ pand %mm7,%mm0 |
+ pmuludq 20(%esp),%mm5 |
+ pmuludq %mm5,%mm3 |
+ paddq %mm0,%mm3 |
+ movd 36(%esp),%mm6 |
+ movd 4(%ebp),%mm1 |
+ movd 4(%esi),%mm0 |
+ psrlq $32,%mm2 |
+ psrlq $32,%mm3 |
+ paddq %mm6,%mm2 |
+ incl %ecx |
+ decl %ebx |
+.L005inner: |
+ pmuludq %mm4,%mm0 |
+ pmuludq %mm5,%mm1 |
+ paddq %mm0,%mm2 |
+ paddq %mm1,%mm3 |
+ movq %mm2,%mm0 |
+ movd 36(%esp,%ecx,4),%mm6 |
+ pand %mm7,%mm0 |
+ movd 4(%ebp,%ecx,4),%mm1 |
+ paddq %mm0,%mm3 |
+ movd 4(%esi,%ecx,4),%mm0 |
+ psrlq $32,%mm2 |
+ movd %mm3,28(%esp,%ecx,4) |
+ psrlq $32,%mm3 |
+ paddq %mm6,%mm2 |
+ decl %ebx |
+ leal 1(%ecx),%ecx |
+ jnz .L005inner |
+ movl %ecx,%ebx |
+ pmuludq %mm4,%mm0 |
+ pmuludq %mm5,%mm1 |
+ paddq %mm0,%mm2 |
+ paddq %mm1,%mm3 |
+ movq %mm2,%mm0 |
+ pand %mm7,%mm0 |
+ paddq %mm0,%mm3 |
+ movd %mm3,28(%esp,%ecx,4) |
+ psrlq $32,%mm2 |
+ psrlq $32,%mm3 |
+ movd 36(%esp,%ebx,4),%mm6 |
+ paddq %mm2,%mm3 |
+ paddq %mm6,%mm3 |
+ movq %mm3,32(%esp,%ebx,4) |
+ leal 1(%edx),%edx |
+ cmpl %ebx,%edx |
+ jle .L004outer |
+ emms |
+ jmp .L006common_tail |
+.align 16 |
+.L002non_sse2: |
+ movl 8(%esp),%esi |
+ leal 1(%ebx),%ebp |
+ movl 12(%esp),%edi |
+ xorl %ecx,%ecx |
+ movl %esi,%edx |
+ andl $1,%ebp |
+ subl %edi,%edx |
+ leal 4(%edi,%ebx,4),%eax |
+ orl %edx,%ebp |
+ movl (%edi),%edi |
+ jz .L007bn_sqr_mont |
+ movl %eax,28(%esp) |
+ movl (%esi),%eax |
+ xorl %edx,%edx |
+.align 16 |
+.L008mull: |
+ movl %edx,%ebp |
+ mull %edi |
+ addl %eax,%ebp |
+ leal 1(%ecx),%ecx |
+ adcl $0,%edx |
+ movl (%esi,%ecx,4),%eax |
+ cmpl %ebx,%ecx |
+ movl %ebp,28(%esp,%ecx,4) |
+ jl .L008mull |
+ movl %edx,%ebp |
+ mull %edi |
+ movl 20(%esp),%edi |
+ addl %ebp,%eax |
+ movl 16(%esp),%esi |
+ adcl $0,%edx |
+ imull 32(%esp),%edi |
+ movl %eax,32(%esp,%ebx,4) |
+ xorl %ecx,%ecx |
+ movl %edx,36(%esp,%ebx,4) |
+ movl %ecx,40(%esp,%ebx,4) |
+ movl (%esi),%eax |
+ mull %edi |
+ addl 32(%esp),%eax |
+ movl 4(%esi),%eax |
+ adcl $0,%edx |
+ incl %ecx |
+ jmp .L0092ndmadd |
+.align 16 |
+.L0101stmadd: |
+ movl %edx,%ebp |
+ mull %edi |
+ addl 32(%esp,%ecx,4),%ebp |
+ leal 1(%ecx),%ecx |
+ adcl $0,%edx |
+ addl %eax,%ebp |
+ movl (%esi,%ecx,4),%eax |
+ adcl $0,%edx |
+ cmpl %ebx,%ecx |
+ movl %ebp,28(%esp,%ecx,4) |
+ jl .L0101stmadd |
+ movl %edx,%ebp |
+ mull %edi |
+ addl 32(%esp,%ebx,4),%eax |
+ movl 20(%esp),%edi |
+ adcl $0,%edx |
+ movl 16(%esp),%esi |
+ addl %eax,%ebp |
+ adcl $0,%edx |
+ imull 32(%esp),%edi |
+ xorl %ecx,%ecx |
+ addl 36(%esp,%ebx,4),%edx |
+ movl %ebp,32(%esp,%ebx,4) |
+ adcl $0,%ecx |
+ movl (%esi),%eax |
+ movl %edx,36(%esp,%ebx,4) |
+ movl %ecx,40(%esp,%ebx,4) |
+ mull %edi |
+ addl 32(%esp),%eax |
+ movl 4(%esi),%eax |
+ adcl $0,%edx |
+ movl $1,%ecx |
+.align 16 |
+.L0092ndmadd: |
+ movl %edx,%ebp |
+ mull %edi |
+ addl 32(%esp,%ecx,4),%ebp |
+ leal 1(%ecx),%ecx |
+ adcl $0,%edx |
+ addl %eax,%ebp |
+ movl (%esi,%ecx,4),%eax |
+ adcl $0,%edx |
+ cmpl %ebx,%ecx |
+ movl %ebp,24(%esp,%ecx,4) |
+ jl .L0092ndmadd |
+ movl %edx,%ebp |
+ mull %edi |
+ addl 32(%esp,%ebx,4),%ebp |
+ adcl $0,%edx |
+ addl %eax,%ebp |
+ adcl $0,%edx |
+ movl %ebp,28(%esp,%ebx,4) |
+ xorl %eax,%eax |
+ movl 12(%esp),%ecx |
+ addl 36(%esp,%ebx,4),%edx |
+ adcl 40(%esp,%ebx,4),%eax |
+ leal 4(%ecx),%ecx |
+ movl %edx,32(%esp,%ebx,4) |
+ cmpl 28(%esp),%ecx |
+ movl %eax,36(%esp,%ebx,4) |
+ je .L006common_tail |
+ movl (%ecx),%edi |
+ movl 8(%esp),%esi |
+ movl %ecx,12(%esp) |
+ xorl %ecx,%ecx |
+ xorl %edx,%edx |
+ movl (%esi),%eax |
+ jmp .L0101stmadd |
+.align 16 |
+.L007bn_sqr_mont: |
+ movl %ebx,(%esp) |
+ movl %ecx,12(%esp) |
+ movl %edi,%eax |
+ mull %edi |
+ movl %eax,32(%esp) |
+ movl %edx,%ebx |
+ shrl $1,%edx |
+ andl $1,%ebx |
+ incl %ecx |
+.align 16 |
+.L011sqr: |
+ movl (%esi,%ecx,4),%eax |
+ movl %edx,%ebp |
+ mull %edi |
+ addl %ebp,%eax |
+ leal 1(%ecx),%ecx |
+ adcl $0,%edx |
+ leal (%ebx,%eax,2),%ebp |
+ shrl $31,%eax |
+ cmpl (%esp),%ecx |
+ movl %eax,%ebx |
+ movl %ebp,28(%esp,%ecx,4) |
+ jl .L011sqr |
+ movl (%esi,%ecx,4),%eax |
+ movl %edx,%ebp |
+ mull %edi |
+ addl %ebp,%eax |
+ movl 20(%esp),%edi |
+ adcl $0,%edx |
+ movl 16(%esp),%esi |
+ leal (%ebx,%eax,2),%ebp |
+ imull 32(%esp),%edi |
+ shrl $31,%eax |
+ movl %ebp,32(%esp,%ecx,4) |
+ leal (%eax,%edx,2),%ebp |
+ movl (%esi),%eax |
+ shrl $31,%edx |
+ movl %ebp,36(%esp,%ecx,4) |
+ movl %edx,40(%esp,%ecx,4) |
+ mull %edi |
+ addl 32(%esp),%eax |
+ movl %ecx,%ebx |
+ adcl $0,%edx |
+ movl 4(%esi),%eax |
+ movl $1,%ecx |
+.align 16 |
+.L0123rdmadd: |
+ movl %edx,%ebp |
+ mull %edi |
+ addl 32(%esp,%ecx,4),%ebp |
+ adcl $0,%edx |
+ addl %eax,%ebp |
+ movl 4(%esi,%ecx,4),%eax |
+ adcl $0,%edx |
+ movl %ebp,28(%esp,%ecx,4) |
+ movl %edx,%ebp |
+ mull %edi |
+ addl 36(%esp,%ecx,4),%ebp |
+ leal 2(%ecx),%ecx |
+ adcl $0,%edx |
+ addl %eax,%ebp |
+ movl (%esi,%ecx,4),%eax |
+ adcl $0,%edx |
+ cmpl %ebx,%ecx |
+ movl %ebp,24(%esp,%ecx,4) |
+ jl .L0123rdmadd |
+ movl %edx,%ebp |
+ mull %edi |
+ addl 32(%esp,%ebx,4),%ebp |
+ adcl $0,%edx |
+ addl %eax,%ebp |
+ adcl $0,%edx |
+ movl %ebp,28(%esp,%ebx,4) |
+ movl 12(%esp),%ecx |
+ xorl %eax,%eax |
+ movl 8(%esp),%esi |
+ addl 36(%esp,%ebx,4),%edx |
+ adcl 40(%esp,%ebx,4),%eax |
+ movl %edx,32(%esp,%ebx,4) |
+ cmpl %ebx,%ecx |
+ movl %eax,36(%esp,%ebx,4) |
+ je .L006common_tail |
+ movl 4(%esi,%ecx,4),%edi |
+ leal 1(%ecx),%ecx |
+ movl %edi,%eax |
+ movl %ecx,12(%esp) |
+ mull %edi |
+ addl 32(%esp,%ecx,4),%eax |
+ adcl $0,%edx |
+ movl %eax,32(%esp,%ecx,4) |
+ xorl %ebp,%ebp |
+ cmpl %ebx,%ecx |
+ leal 1(%ecx),%ecx |
+ je .L013sqrlast |
+ movl %edx,%ebx |
+ shrl $1,%edx |
+ andl $1,%ebx |
+.align 16 |
+.L014sqradd: |
+ movl (%esi,%ecx,4),%eax |
+ movl %edx,%ebp |
+ mull %edi |
+ addl %ebp,%eax |
+ leal (%eax,%eax,1),%ebp |
+ adcl $0,%edx |
+ shrl $31,%eax |
+ addl 32(%esp,%ecx,4),%ebp |
+ leal 1(%ecx),%ecx |
+ adcl $0,%eax |
+ addl %ebx,%ebp |
+ adcl $0,%eax |
+ cmpl (%esp),%ecx |
+ movl %ebp,28(%esp,%ecx,4) |
+ movl %eax,%ebx |
+ jle .L014sqradd |
+ movl %edx,%ebp |
+ addl %edx,%edx |
+ shrl $31,%ebp |
+ addl %ebx,%edx |
+ adcl $0,%ebp |
+.L013sqrlast: |
+ movl 20(%esp),%edi |
+ movl 16(%esp),%esi |
+ imull 32(%esp),%edi |
+ addl 32(%esp,%ecx,4),%edx |
+ movl (%esi),%eax |
+ adcl $0,%ebp |
+ movl %edx,32(%esp,%ecx,4) |
+ movl %ebp,36(%esp,%ecx,4) |
+ mull %edi |
+ addl 32(%esp),%eax |
+ leal -1(%ecx),%ebx |
+ adcl $0,%edx |
+ movl $1,%ecx |
+ movl 4(%esi),%eax |
+ jmp .L0123rdmadd |
+.align 16 |
+.L006common_tail: |
+ movl 16(%esp),%ebp |
+ movl 4(%esp),%edi |
+ leal 32(%esp),%esi |
+ movl (%esi),%eax |
+ movl %ebx,%ecx |
+ xorl %edx,%edx |
+.align 16 |
+.L015sub: |
+ sbbl (%ebp,%edx,4),%eax |
+ movl %eax,(%edi,%edx,4) |
+ decl %ecx |
+ movl 4(%esi,%edx,4),%eax |
+ leal 1(%edx),%edx |
+ jge .L015sub |
+ sbbl $0,%eax |
+.align 16 |
+.L016copy: |
+ movl (%esi,%ebx,4),%edx |
+ movl (%edi,%ebx,4),%ebp |
+ xorl %ebp,%edx |
+ andl %eax,%edx |
+ xorl %ebp,%edx |
+ movl %ecx,(%esi,%ebx,4) |
+ movl %edx,(%edi,%ebx,4) |
+ decl %ebx |
+ jge .L016copy |
+ movl 24(%esp),%esp |
+ movl $1,%eax |
+.L000just_leave: |
+ popl %edi |
+ popl %esi |
+ popl %ebx |
+ popl %ebp |
+ ret |
+.size bn_mul_mont,.-.L_bn_mul_mont_begin |
+.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 |
+.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 |
+.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 |
+.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 |
+.byte 111,114,103,62,0 |
+#endif |