Index: third_party/boringssl/mac-x86/crypto/bn/x86-mont.S |
diff --git a/third_party/boringssl/mac-x86/crypto/bn/x86-mont.S b/third_party/boringssl/mac-x86/crypto/bn/x86-mont.S |
index 234034b0a0805dcc71521788516e317da955be7a..5c13ca4d6307408d75fe887c9269ddb01c04f5f7 100644 |
--- a/third_party/boringssl/mac-x86/crypto/bn/x86-mont.S |
+++ b/third_party/boringssl/mac-x86/crypto/bn/x86-mont.S |
@@ -16,39 +16,54 @@ L_bn_mul_mont_begin: |
jl L000just_leave |
leal 20(%esp),%esi |
leal 24(%esp),%edx |
- movl %esp,%ebp |
addl $2,%edi |
negl %edi |
- leal -32(%esp,%edi,4),%esp |
+ leal -32(%esp,%edi,4),%ebp |
negl %edi |
- movl %esp,%eax |
+ movl %ebp,%eax |
subl %edx,%eax |
andl $2047,%eax |
- subl %eax,%esp |
- xorl %esp,%edx |
+ subl %eax,%ebp |
+ xorl %ebp,%edx |
andl $2048,%edx |
xorl $2048,%edx |
- subl %edx,%esp |
- andl $-64,%esp |
+ subl %edx,%ebp |
+ andl $-64,%ebp |
+ movl %esp,%eax |
+ subl %ebp,%eax |
+ andl $-4096,%eax |
+ movl %esp,%edx |
+ leal (%ebp,%eax,1),%esp |
+ movl (%esp),%eax |
+ cmpl %ebp,%esp |
+ ja L001page_walk |
+ jmp L002page_walk_done |
+.align 4,0x90 |
+L001page_walk: |
+ leal -4096(%esp),%esp |
+ movl (%esp),%eax |
+ cmpl %ebp,%esp |
+ ja L001page_walk |
+L002page_walk_done: |
movl (%esi),%eax |
movl 4(%esi),%ebx |
movl 8(%esi),%ecx |
- movl 12(%esi),%edx |
+ movl 12(%esi),%ebp |
movl 16(%esi),%esi |
movl (%esi),%esi |
movl %eax,4(%esp) |
movl %ebx,8(%esp) |
movl %ecx,12(%esp) |
- movl %edx,16(%esp) |
+ movl %ebp,16(%esp) |
movl %esi,20(%esp) |
leal -3(%edi),%ebx |
- movl %ebp,24(%esp) |
- call L001PIC_me_up |
-L001PIC_me_up: |
+ movl %edx,24(%esp) |
+ call L003PIC_me_up |
+L003PIC_me_up: |
popl %eax |
- movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001PIC_me_up(%eax),%eax |
+ movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L003PIC_me_up(%eax),%eax |
btl $26,(%eax) |
- jnc L002non_sse2 |
+ jnc L004non_sse2 |
movl $-1,%eax |
movd %eax,%mm7 |
movl 8(%esp),%esi |
@@ -72,7 +87,7 @@ L001PIC_me_up: |
psrlq $32,%mm3 |
incl %ecx |
.align 4,0x90 |
-L0031st: |
+L0051st: |
pmuludq %mm4,%mm0 |
pmuludq %mm5,%mm1 |
paddq %mm0,%mm2 |
@@ -87,7 +102,7 @@ L0031st: |
psrlq $32,%mm3 |
leal 1(%ecx),%ecx |
cmpl %ebx,%ecx |
- jl L0031st |
+ jl L0051st |
pmuludq %mm4,%mm0 |
pmuludq %mm5,%mm1 |
paddq %mm0,%mm2 |
@@ -101,7 +116,7 @@ L0031st: |
paddq %mm2,%mm3 |
movq %mm3,32(%esp,%ebx,4) |
incl %edx |
-L004outer: |
+L006outer: |
xorl %ecx,%ecx |
movd (%edi,%edx,4),%mm4 |
movd (%esi),%mm5 |
@@ -123,7 +138,7 @@ L004outer: |
paddq %mm6,%mm2 |
incl %ecx |
decl %ebx |
-L005inner: |
+L007inner: |
pmuludq %mm4,%mm0 |
pmuludq %mm5,%mm1 |
paddq %mm0,%mm2 |
@@ -140,7 +155,7 @@ L005inner: |
paddq %mm6,%mm2 |
decl %ebx |
leal 1(%ecx),%ecx |
- jnz L005inner |
+ jnz L007inner |
movl %ecx,%ebx |
pmuludq %mm4,%mm0 |
pmuludq %mm5,%mm1 |
@@ -158,11 +173,11 @@ L005inner: |
movq %mm3,32(%esp,%ebx,4) |
leal 1(%edx),%edx |
cmpl %ebx,%edx |
- jle L004outer |
+ jle L006outer |
emms |
- jmp L006common_tail |
+ jmp L008common_tail |
.align 4,0x90 |
-L002non_sse2: |
+L004non_sse2: |
movl 8(%esp),%esi |
leal 1(%ebx),%ebp |
movl 12(%esp),%edi |
@@ -173,12 +188,12 @@ L002non_sse2: |
leal 4(%edi,%ebx,4),%eax |
orl %edx,%ebp |
movl (%edi),%edi |
- jz L007bn_sqr_mont |
+ jz L009bn_sqr_mont |
movl %eax,28(%esp) |
movl (%esi),%eax |
xorl %edx,%edx |
.align 4,0x90 |
-L008mull: |
+L010mull: |
movl %edx,%ebp |
mull %edi |
addl %eax,%ebp |
@@ -187,7 +202,7 @@ L008mull: |
movl (%esi,%ecx,4),%eax |
cmpl %ebx,%ecx |
movl %ebp,28(%esp,%ecx,4) |
- jl L008mull |
+ jl L010mull |
movl %edx,%ebp |
mull %edi |
movl 20(%esp),%edi |
@@ -205,9 +220,9 @@ L008mull: |
movl 4(%esi),%eax |
adcl $0,%edx |
incl %ecx |
- jmp L0092ndmadd |
+ jmp L0112ndmadd |
.align 4,0x90 |
-L0101stmadd: |
+L0121stmadd: |
movl %edx,%ebp |
mull %edi |
addl 32(%esp,%ecx,4),%ebp |
@@ -218,7 +233,7 @@ L0101stmadd: |
adcl $0,%edx |
cmpl %ebx,%ecx |
movl %ebp,28(%esp,%ecx,4) |
- jl L0101stmadd |
+ jl L0121stmadd |
movl %edx,%ebp |
mull %edi |
addl 32(%esp,%ebx,4),%eax |
@@ -241,7 +256,7 @@ L0101stmadd: |
adcl $0,%edx |
movl $1,%ecx |
.align 4,0x90 |
-L0092ndmadd: |
+L0112ndmadd: |
movl %edx,%ebp |
mull %edi |
addl 32(%esp,%ecx,4),%ebp |
@@ -252,7 +267,7 @@ L0092ndmadd: |
adcl $0,%edx |
cmpl %ebx,%ecx |
movl %ebp,24(%esp,%ecx,4) |
- jl L0092ndmadd |
+ jl L0112ndmadd |
movl %edx,%ebp |
mull %edi |
addl 32(%esp,%ebx,4),%ebp |
@@ -268,16 +283,16 @@ L0092ndmadd: |
movl %edx,32(%esp,%ebx,4) |
cmpl 28(%esp),%ecx |
movl %eax,36(%esp,%ebx,4) |
- je L006common_tail |
+ je L008common_tail |
movl (%ecx),%edi |
movl 8(%esp),%esi |
movl %ecx,12(%esp) |
xorl %ecx,%ecx |
xorl %edx,%edx |
movl (%esi),%eax |
- jmp L0101stmadd |
+ jmp L0121stmadd |
.align 4,0x90 |
-L007bn_sqr_mont: |
+L009bn_sqr_mont: |
movl %ebx,(%esp) |
movl %ecx,12(%esp) |
movl %edi,%eax |
@@ -288,7 +303,7 @@ L007bn_sqr_mont: |
andl $1,%ebx |
incl %ecx |
.align 4,0x90 |
-L011sqr: |
+L013sqr: |
movl (%esi,%ecx,4),%eax |
movl %edx,%ebp |
mull %edi |
@@ -300,7 +315,7 @@ L011sqr: |
cmpl (%esp),%ecx |
movl %eax,%ebx |
movl %ebp,28(%esp,%ecx,4) |
- jl L011sqr |
+ jl L013sqr |
movl (%esi,%ecx,4),%eax |
movl %edx,%ebp |
mull %edi |
@@ -324,7 +339,7 @@ L011sqr: |
movl 4(%esi),%eax |
movl $1,%ecx |
.align 4,0x90 |
-L0123rdmadd: |
+L0143rdmadd: |
movl %edx,%ebp |
mull %edi |
addl 32(%esp,%ecx,4),%ebp |
@@ -343,7 +358,7 @@ L0123rdmadd: |
adcl $0,%edx |
cmpl %ebx,%ecx |
movl %ebp,24(%esp,%ecx,4) |
- jl L0123rdmadd |
+ jl L0143rdmadd |
movl %edx,%ebp |
mull %edi |
addl 32(%esp,%ebx,4),%ebp |
@@ -359,7 +374,7 @@ L0123rdmadd: |
movl %edx,32(%esp,%ebx,4) |
cmpl %ebx,%ecx |
movl %eax,36(%esp,%ebx,4) |
- je L006common_tail |
+ je L008common_tail |
movl 4(%esi,%ecx,4),%edi |
leal 1(%ecx),%ecx |
movl %edi,%eax |
@@ -371,12 +386,12 @@ L0123rdmadd: |
xorl %ebp,%ebp |
cmpl %ebx,%ecx |
leal 1(%ecx),%ecx |
- je L013sqrlast |
+ je L015sqrlast |
movl %edx,%ebx |
shrl $1,%edx |
andl $1,%ebx |
.align 4,0x90 |
-L014sqradd: |
+L016sqradd: |
movl (%esi,%ecx,4),%eax |
movl %edx,%ebp |
mull %edi |
@@ -392,13 +407,13 @@ L014sqradd: |
cmpl (%esp),%ecx |
movl %ebp,28(%esp,%ecx,4) |
movl %eax,%ebx |
- jle L014sqradd |
+ jle L016sqradd |
movl %edx,%ebp |
addl %edx,%edx |
shrl $31,%ebp |
addl %ebx,%edx |
adcl $0,%ebp |
-L013sqrlast: |
+L015sqrlast: |
movl 20(%esp),%edi |
movl 16(%esp),%esi |
imull 32(%esp),%edi |
@@ -413,9 +428,9 @@ L013sqrlast: |
adcl $0,%edx |
movl $1,%ecx |
movl 4(%esi),%eax |
- jmp L0123rdmadd |
+ jmp L0143rdmadd |
.align 4,0x90 |
-L006common_tail: |
+L008common_tail: |
movl 16(%esp),%ebp |
movl 4(%esp),%edi |
leal 32(%esp),%esi |
@@ -423,25 +438,26 @@ L006common_tail: |
movl %ebx,%ecx |
xorl %edx,%edx |
.align 4,0x90 |
-L015sub: |
+L017sub: |
sbbl (%ebp,%edx,4),%eax |
movl %eax,(%edi,%edx,4) |
decl %ecx |
movl 4(%esi,%edx,4),%eax |
leal 1(%edx),%edx |
- jge L015sub |
+ jge L017sub |
sbbl $0,%eax |
+ andl %eax,%esi |
+ notl %eax |
+ movl %edi,%ebp |
+ andl %eax,%ebp |
+ orl %ebp,%esi |
.align 4,0x90 |
-L016copy: |
- movl (%esi,%ebx,4),%edx |
- movl (%edi,%ebx,4),%ebp |
- xorl %ebp,%edx |
- andl %eax,%edx |
- xorl %ebp,%edx |
- movl %ecx,(%esi,%ebx,4) |
- movl %edx,(%edi,%ebx,4) |
+L018copy: |
+ movl (%esi,%ebx,4),%eax |
+ movl %eax,(%edi,%ebx,4) |
+ movl %ecx,32(%esp,%ebx,4) |
decl %ebx |
- jge L016copy |
+ jge L018copy |
movl 24(%esp),%esp |
movl $1,%eax |
L000just_leave: |