Index: third_party/boringssl/mac-x86/crypto/bn/x86-mont.S |
diff --git a/third_party/boringssl/mac-x86/crypto/bn/x86-mont.S b/third_party/boringssl/mac-x86/crypto/bn/x86-mont.S |
new file mode 100644 |
index 0000000000000000000000000000000000000000..234034b0a0805dcc71521788516e317da955be7a |
--- /dev/null |
+++ b/third_party/boringssl/mac-x86/crypto/bn/x86-mont.S |
@@ -0,0 +1,462 @@ |
+#if defined(__i386__) |
+.file "src/crypto/bn/asm/x86-mont.S" |
+.text |
+.globl _bn_mul_mont |
+.private_extern _bn_mul_mont |
+.align 4 |
+_bn_mul_mont: |
+L_bn_mul_mont_begin: |
+ pushl %ebp |
+ pushl %ebx |
+ pushl %esi |
+ pushl %edi |
+ xorl %eax,%eax |
+ movl 40(%esp),%edi |
+ cmpl $4,%edi |
+ jl L000just_leave |
+ leal 20(%esp),%esi |
+ leal 24(%esp),%edx |
+ movl %esp,%ebp |
+ addl $2,%edi |
+ negl %edi |
+ leal -32(%esp,%edi,4),%esp |
+ negl %edi |
+ movl %esp,%eax |
+ subl %edx,%eax |
+ andl $2047,%eax |
+ subl %eax,%esp |
+ xorl %esp,%edx |
+ andl $2048,%edx |
+ xorl $2048,%edx |
+ subl %edx,%esp |
+ andl $-64,%esp |
+ movl (%esi),%eax |
+ movl 4(%esi),%ebx |
+ movl 8(%esi),%ecx |
+ movl 12(%esi),%edx |
+ movl 16(%esi),%esi |
+ movl (%esi),%esi |
+ movl %eax,4(%esp) |
+ movl %ebx,8(%esp) |
+ movl %ecx,12(%esp) |
+ movl %edx,16(%esp) |
+ movl %esi,20(%esp) |
+ leal -3(%edi),%ebx |
+ movl %ebp,24(%esp) |
+ call L001PIC_me_up |
+L001PIC_me_up: |
+ popl %eax |
+ movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001PIC_me_up(%eax),%eax |
+ btl $26,(%eax) |
+ jnc L002non_sse2 |
+ movl $-1,%eax |
+ movd %eax,%mm7 |
+ movl 8(%esp),%esi |
+ movl 12(%esp),%edi |
+ movl 16(%esp),%ebp |
+ xorl %edx,%edx |
+ xorl %ecx,%ecx |
+ movd (%edi),%mm4 |
+ movd (%esi),%mm5 |
+ movd (%ebp),%mm3 |
+ pmuludq %mm4,%mm5 |
+ movq %mm5,%mm2 |
+ movq %mm5,%mm0 |
+ pand %mm7,%mm0 |
+ pmuludq 20(%esp),%mm5 |
+ pmuludq %mm5,%mm3 |
+ paddq %mm0,%mm3 |
+ movd 4(%ebp),%mm1 |
+ movd 4(%esi),%mm0 |
+ psrlq $32,%mm2 |
+ psrlq $32,%mm3 |
+ incl %ecx |
+.align 4,0x90 |
+L0031st: |
+ pmuludq %mm4,%mm0 |
+ pmuludq %mm5,%mm1 |
+ paddq %mm0,%mm2 |
+ paddq %mm1,%mm3 |
+ movq %mm2,%mm0 |
+ pand %mm7,%mm0 |
+ movd 4(%ebp,%ecx,4),%mm1 |
+ paddq %mm0,%mm3 |
+ movd 4(%esi,%ecx,4),%mm0 |
+ psrlq $32,%mm2 |
+ movd %mm3,28(%esp,%ecx,4) |
+ psrlq $32,%mm3 |
+ leal 1(%ecx),%ecx |
+ cmpl %ebx,%ecx |
+ jl L0031st |
+ pmuludq %mm4,%mm0 |
+ pmuludq %mm5,%mm1 |
+ paddq %mm0,%mm2 |
+ paddq %mm1,%mm3 |
+ movq %mm2,%mm0 |
+ pand %mm7,%mm0 |
+ paddq %mm0,%mm3 |
+ movd %mm3,28(%esp,%ecx,4) |
+ psrlq $32,%mm2 |
+ psrlq $32,%mm3 |
+ paddq %mm2,%mm3 |
+ movq %mm3,32(%esp,%ebx,4) |
+ incl %edx |
+L004outer: |
+ xorl %ecx,%ecx |
+ movd (%edi,%edx,4),%mm4 |
+ movd (%esi),%mm5 |
+ movd 32(%esp),%mm6 |
+ movd (%ebp),%mm3 |
+ pmuludq %mm4,%mm5 |
+ paddq %mm6,%mm5 |
+ movq %mm5,%mm0 |
+ movq %mm5,%mm2 |
+ pand %mm7,%mm0 |
+ pmuludq 20(%esp),%mm5 |
+ pmuludq %mm5,%mm3 |
+ paddq %mm0,%mm3 |
+ movd 36(%esp),%mm6 |
+ movd 4(%ebp),%mm1 |
+ movd 4(%esi),%mm0 |
+ psrlq $32,%mm2 |
+ psrlq $32,%mm3 |
+ paddq %mm6,%mm2 |
+ incl %ecx |
+ decl %ebx |
+L005inner: |
+ pmuludq %mm4,%mm0 |
+ pmuludq %mm5,%mm1 |
+ paddq %mm0,%mm2 |
+ paddq %mm1,%mm3 |
+ movq %mm2,%mm0 |
+ movd 36(%esp,%ecx,4),%mm6 |
+ pand %mm7,%mm0 |
+ movd 4(%ebp,%ecx,4),%mm1 |
+ paddq %mm0,%mm3 |
+ movd 4(%esi,%ecx,4),%mm0 |
+ psrlq $32,%mm2 |
+ movd %mm3,28(%esp,%ecx,4) |
+ psrlq $32,%mm3 |
+ paddq %mm6,%mm2 |
+ decl %ebx |
+ leal 1(%ecx),%ecx |
+ jnz L005inner |
+ movl %ecx,%ebx |
+ pmuludq %mm4,%mm0 |
+ pmuludq %mm5,%mm1 |
+ paddq %mm0,%mm2 |
+ paddq %mm1,%mm3 |
+ movq %mm2,%mm0 |
+ pand %mm7,%mm0 |
+ paddq %mm0,%mm3 |
+ movd %mm3,28(%esp,%ecx,4) |
+ psrlq $32,%mm2 |
+ psrlq $32,%mm3 |
+ movd 36(%esp,%ebx,4),%mm6 |
+ paddq %mm2,%mm3 |
+ paddq %mm6,%mm3 |
+ movq %mm3,32(%esp,%ebx,4) |
+ leal 1(%edx),%edx |
+ cmpl %ebx,%edx |
+ jle L004outer |
+ emms |
+ jmp L006common_tail |
+.align 4,0x90 |
+L002non_sse2: |
+ movl 8(%esp),%esi |
+ leal 1(%ebx),%ebp |
+ movl 12(%esp),%edi |
+ xorl %ecx,%ecx |
+ movl %esi,%edx |
+ andl $1,%ebp |
+ subl %edi,%edx |
+ leal 4(%edi,%ebx,4),%eax |
+ orl %edx,%ebp |
+ movl (%edi),%edi |
+ jz L007bn_sqr_mont |
+ movl %eax,28(%esp) |
+ movl (%esi),%eax |
+ xorl %edx,%edx |
+.align 4,0x90 |
+L008mull: |
+ movl %edx,%ebp |
+ mull %edi |
+ addl %eax,%ebp |
+ leal 1(%ecx),%ecx |
+ adcl $0,%edx |
+ movl (%esi,%ecx,4),%eax |
+ cmpl %ebx,%ecx |
+ movl %ebp,28(%esp,%ecx,4) |
+ jl L008mull |
+ movl %edx,%ebp |
+ mull %edi |
+ movl 20(%esp),%edi |
+ addl %ebp,%eax |
+ movl 16(%esp),%esi |
+ adcl $0,%edx |
+ imull 32(%esp),%edi |
+ movl %eax,32(%esp,%ebx,4) |
+ xorl %ecx,%ecx |
+ movl %edx,36(%esp,%ebx,4) |
+ movl %ecx,40(%esp,%ebx,4) |
+ movl (%esi),%eax |
+ mull %edi |
+ addl 32(%esp),%eax |
+ movl 4(%esi),%eax |
+ adcl $0,%edx |
+ incl %ecx |
+ jmp L0092ndmadd |
+.align 4,0x90 |
+L0101stmadd: |
+ movl %edx,%ebp |
+ mull %edi |
+ addl 32(%esp,%ecx,4),%ebp |
+ leal 1(%ecx),%ecx |
+ adcl $0,%edx |
+ addl %eax,%ebp |
+ movl (%esi,%ecx,4),%eax |
+ adcl $0,%edx |
+ cmpl %ebx,%ecx |
+ movl %ebp,28(%esp,%ecx,4) |
+ jl L0101stmadd |
+ movl %edx,%ebp |
+ mull %edi |
+ addl 32(%esp,%ebx,4),%eax |
+ movl 20(%esp),%edi |
+ adcl $0,%edx |
+ movl 16(%esp),%esi |
+ addl %eax,%ebp |
+ adcl $0,%edx |
+ imull 32(%esp),%edi |
+ xorl %ecx,%ecx |
+ addl 36(%esp,%ebx,4),%edx |
+ movl %ebp,32(%esp,%ebx,4) |
+ adcl $0,%ecx |
+ movl (%esi),%eax |
+ movl %edx,36(%esp,%ebx,4) |
+ movl %ecx,40(%esp,%ebx,4) |
+ mull %edi |
+ addl 32(%esp),%eax |
+ movl 4(%esi),%eax |
+ adcl $0,%edx |
+ movl $1,%ecx |
+.align 4,0x90 |
+L0092ndmadd: |
+ movl %edx,%ebp |
+ mull %edi |
+ addl 32(%esp,%ecx,4),%ebp |
+ leal 1(%ecx),%ecx |
+ adcl $0,%edx |
+ addl %eax,%ebp |
+ movl (%esi,%ecx,4),%eax |
+ adcl $0,%edx |
+ cmpl %ebx,%ecx |
+ movl %ebp,24(%esp,%ecx,4) |
+ jl L0092ndmadd |
+ movl %edx,%ebp |
+ mull %edi |
+ addl 32(%esp,%ebx,4),%ebp |
+ adcl $0,%edx |
+ addl %eax,%ebp |
+ adcl $0,%edx |
+ movl %ebp,28(%esp,%ebx,4) |
+ xorl %eax,%eax |
+ movl 12(%esp),%ecx |
+ addl 36(%esp,%ebx,4),%edx |
+ adcl 40(%esp,%ebx,4),%eax |
+ leal 4(%ecx),%ecx |
+ movl %edx,32(%esp,%ebx,4) |
+ cmpl 28(%esp),%ecx |
+ movl %eax,36(%esp,%ebx,4) |
+ je L006common_tail |
+ movl (%ecx),%edi |
+ movl 8(%esp),%esi |
+ movl %ecx,12(%esp) |
+ xorl %ecx,%ecx |
+ xorl %edx,%edx |
+ movl (%esi),%eax |
+ jmp L0101stmadd |
+.align 4,0x90 |
+L007bn_sqr_mont: |
+ movl %ebx,(%esp) |
+ movl %ecx,12(%esp) |
+ movl %edi,%eax |
+ mull %edi |
+ movl %eax,32(%esp) |
+ movl %edx,%ebx |
+ shrl $1,%edx |
+ andl $1,%ebx |
+ incl %ecx |
+.align 4,0x90 |
+L011sqr: |
+ movl (%esi,%ecx,4),%eax |
+ movl %edx,%ebp |
+ mull %edi |
+ addl %ebp,%eax |
+ leal 1(%ecx),%ecx |
+ adcl $0,%edx |
+ leal (%ebx,%eax,2),%ebp |
+ shrl $31,%eax |
+ cmpl (%esp),%ecx |
+ movl %eax,%ebx |
+ movl %ebp,28(%esp,%ecx,4) |
+ jl L011sqr |
+ movl (%esi,%ecx,4),%eax |
+ movl %edx,%ebp |
+ mull %edi |
+ addl %ebp,%eax |
+ movl 20(%esp),%edi |
+ adcl $0,%edx |
+ movl 16(%esp),%esi |
+ leal (%ebx,%eax,2),%ebp |
+ imull 32(%esp),%edi |
+ shrl $31,%eax |
+ movl %ebp,32(%esp,%ecx,4) |
+ leal (%eax,%edx,2),%ebp |
+ movl (%esi),%eax |
+ shrl $31,%edx |
+ movl %ebp,36(%esp,%ecx,4) |
+ movl %edx,40(%esp,%ecx,4) |
+ mull %edi |
+ addl 32(%esp),%eax |
+ movl %ecx,%ebx |
+ adcl $0,%edx |
+ movl 4(%esi),%eax |
+ movl $1,%ecx |
+.align 4,0x90 |
+L0123rdmadd: |
+ movl %edx,%ebp |
+ mull %edi |
+ addl 32(%esp,%ecx,4),%ebp |
+ adcl $0,%edx |
+ addl %eax,%ebp |
+ movl 4(%esi,%ecx,4),%eax |
+ adcl $0,%edx |
+ movl %ebp,28(%esp,%ecx,4) |
+ movl %edx,%ebp |
+ mull %edi |
+ addl 36(%esp,%ecx,4),%ebp |
+ leal 2(%ecx),%ecx |
+ adcl $0,%edx |
+ addl %eax,%ebp |
+ movl (%esi,%ecx,4),%eax |
+ adcl $0,%edx |
+ cmpl %ebx,%ecx |
+ movl %ebp,24(%esp,%ecx,4) |
+ jl L0123rdmadd |
+ movl %edx,%ebp |
+ mull %edi |
+ addl 32(%esp,%ebx,4),%ebp |
+ adcl $0,%edx |
+ addl %eax,%ebp |
+ adcl $0,%edx |
+ movl %ebp,28(%esp,%ebx,4) |
+ movl 12(%esp),%ecx |
+ xorl %eax,%eax |
+ movl 8(%esp),%esi |
+ addl 36(%esp,%ebx,4),%edx |
+ adcl 40(%esp,%ebx,4),%eax |
+ movl %edx,32(%esp,%ebx,4) |
+ cmpl %ebx,%ecx |
+ movl %eax,36(%esp,%ebx,4) |
+ je L006common_tail |
+ movl 4(%esi,%ecx,4),%edi |
+ leal 1(%ecx),%ecx |
+ movl %edi,%eax |
+ movl %ecx,12(%esp) |
+ mull %edi |
+ addl 32(%esp,%ecx,4),%eax |
+ adcl $0,%edx |
+ movl %eax,32(%esp,%ecx,4) |
+ xorl %ebp,%ebp |
+ cmpl %ebx,%ecx |
+ leal 1(%ecx),%ecx |
+ je L013sqrlast |
+ movl %edx,%ebx |
+ shrl $1,%edx |
+ andl $1,%ebx |
+.align 4,0x90 |
+L014sqradd: |
+ movl (%esi,%ecx,4),%eax |
+ movl %edx,%ebp |
+ mull %edi |
+ addl %ebp,%eax |
+ leal (%eax,%eax,1),%ebp |
+ adcl $0,%edx |
+ shrl $31,%eax |
+ addl 32(%esp,%ecx,4),%ebp |
+ leal 1(%ecx),%ecx |
+ adcl $0,%eax |
+ addl %ebx,%ebp |
+ adcl $0,%eax |
+ cmpl (%esp),%ecx |
+ movl %ebp,28(%esp,%ecx,4) |
+ movl %eax,%ebx |
+ jle L014sqradd |
+ movl %edx,%ebp |
+ addl %edx,%edx |
+ shrl $31,%ebp |
+ addl %ebx,%edx |
+ adcl $0,%ebp |
+L013sqrlast: |
+ movl 20(%esp),%edi |
+ movl 16(%esp),%esi |
+ imull 32(%esp),%edi |
+ addl 32(%esp,%ecx,4),%edx |
+ movl (%esi),%eax |
+ adcl $0,%ebp |
+ movl %edx,32(%esp,%ecx,4) |
+ movl %ebp,36(%esp,%ecx,4) |
+ mull %edi |
+ addl 32(%esp),%eax |
+ leal -1(%ecx),%ebx |
+ adcl $0,%edx |
+ movl $1,%ecx |
+ movl 4(%esi),%eax |
+ jmp L0123rdmadd |
+.align 4,0x90 |
+L006common_tail: |
+ movl 16(%esp),%ebp |
+ movl 4(%esp),%edi |
+ leal 32(%esp),%esi |
+ movl (%esi),%eax |
+ movl %ebx,%ecx |
+ xorl %edx,%edx |
+.align 4,0x90 |
+L015sub: |
+ sbbl (%ebp,%edx,4),%eax |
+ movl %eax,(%edi,%edx,4) |
+ decl %ecx |
+ movl 4(%esi,%edx,4),%eax |
+ leal 1(%edx),%edx |
+ jge L015sub |
+ sbbl $0,%eax |
+.align 4,0x90 |
+L016copy: |
+ movl (%esi,%ebx,4),%edx |
+ movl (%edi,%ebx,4),%ebp |
+ xorl %ebp,%edx |
+ andl %eax,%edx |
+ xorl %ebp,%edx |
+ movl %ecx,(%esi,%ebx,4) |
+ movl %edx,(%edi,%ebx,4) |
+ decl %ebx |
+ jge L016copy |
+ movl 24(%esp),%esp |
+ movl $1,%eax |
+L000just_leave: |
+ popl %edi |
+ popl %esi |
+ popl %ebx |
+ popl %ebp |
+ ret |
+.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 |
+.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 |
+.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 |
+.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 |
+.byte 111,114,103,62,0 |
+.section __IMPORT,__pointers,non_lazy_symbol_pointers |
+L_OPENSSL_ia32cap_P$non_lazy_ptr: |
+.indirect_symbol _OPENSSL_ia32cap_P |
+.long 0 |
+#endif |