Index: third_party/boringssl/mac-x86_64/crypto/bn/x86_64-mont5.S |
diff --git a/third_party/boringssl/mac-x86_64/crypto/bn/x86_64-mont5.S b/third_party/boringssl/mac-x86_64/crypto/bn/x86_64-mont5.S |
index a154cc8dfd550c58da1614d78e171bd08ca8800a..4646c3d9e2b24cfe08a4c28585610a26fbc46174 100644 |
--- a/third_party/boringssl/mac-x86_64/crypto/bn/x86_64-mont5.S |
+++ b/third_party/boringssl/mac-x86_64/crypto/bn/x86_64-mont5.S |
@@ -8,16 +8,15 @@ |
.p2align 6 |
_bn_mul_mont_gather5: |
+ movl %r9d,%r9d |
+ movq %rsp,%rax |
testl $7,%r9d |
jnz L$mul_enter |
jmp L$mul4x_enter |
.p2align 4 |
L$mul_enter: |
- movl %r9d,%r9d |
- movq %rsp,%rax |
movd 8(%rsp),%xmm5 |
- leaq L$inc(%rip),%r10 |
pushq %rbx |
pushq %rbp |
pushq %r12 |
@@ -25,13 +24,39 @@ L$mul_enter: |
pushq %r14 |
pushq %r15 |
- leaq 2(%r9),%r11 |
- negq %r11 |
- leaq -264(%rsp,%r11,8),%rsp |
- andq $-1024,%rsp |
+ negq %r9 |
+ movq %rsp,%r11 |
+ leaq -280(%rsp,%r9,8),%r10 |
+ negq %r9 |
+ andq $-1024,%r10 |
+ |
+ |
+ |
+ |
+ |
+ |
+ |
+ |
+ subq %r10,%r11 |
+ andq $-4096,%r11 |
+ leaq (%r10,%r11,1),%rsp |
+ movq (%rsp),%r11 |
+ cmpq %r10,%rsp |
+ ja L$mul_page_walk |
+ jmp L$mul_page_walk_done |
+ |
+L$mul_page_walk: |
+ leaq -4096(%rsp),%rsp |
+ movq (%rsp),%r11 |
+ cmpq %r10,%rsp |
+ ja L$mul_page_walk |
+L$mul_page_walk_done: |
+ |
+ leaq L$inc(%rip),%r10 |
movq %rax,8(%rsp,%r9,8) |
L$mul_body: |
+ |
leaq 128(%rdx),%r12 |
movdqa 0(%r10),%xmm0 |
movdqa 16(%r10),%xmm1 |
@@ -370,16 +395,17 @@ L$sub: sbbq (%rcx,%r14,8),%rax |
sbbq $0,%rax |
xorq %r14,%r14 |
+ andq %rax,%rsi |
+ notq %rax |
+ movq %rdi,%rcx |
+ andq %rax,%rcx |
movq %r9,%r15 |
+ orq %rcx,%rsi |
.p2align 4 |
L$copy: |
- movq (%rsp,%r14,8),%rsi |
- movq (%rdi,%r14,8),%rcx |
- xorq %rcx,%rsi |
- andq %rax,%rsi |
- xorq %rcx,%rsi |
+ movq (%rsi,%r14,8),%rax |
movq %r14,(%rsp,%r14,8) |
- movq %rsi,(%rdi,%r14,8) |
+ movq %rax,(%rdi,%r14,8) |
leaq 1(%r14),%r14 |
subq $1,%r15 |
jnz L$copy |
@@ -400,15 +426,16 @@ L$mul_epilogue: |
.p2align 5 |
bn_mul4x_mont_gather5: |
-L$mul4x_enter: |
.byte 0x67 |
movq %rsp,%rax |
+L$mul4x_enter: |
pushq %rbx |
pushq %rbp |
pushq %r12 |
pushq %r13 |
pushq %r14 |
pushq %r15 |
+L$mul4x_prologue: |
.byte 0x67 |
shll $3,%r9d |
@@ -425,24 +452,41 @@ L$mul4x_enter: |
leaq -320(%rsp,%r9,2),%r11 |
+ movq %rsp,%rbp |
subq %rdi,%r11 |
andq $4095,%r11 |
cmpq %r11,%r10 |
jb L$mul4xsp_alt |
- subq %r11,%rsp |
- leaq -320(%rsp,%r9,2),%rsp |
+ subq %r11,%rbp |
+ leaq -320(%rbp,%r9,2),%rbp |
jmp L$mul4xsp_done |
.p2align 5 |
L$mul4xsp_alt: |
leaq 4096-320(,%r9,2),%r10 |
- leaq -320(%rsp,%r9,2),%rsp |
+ leaq -320(%rbp,%r9,2),%rbp |
subq %r10,%r11 |
movq $0,%r10 |
cmovcq %r10,%r11 |
- subq %r11,%rsp |
+ subq %r11,%rbp |
L$mul4xsp_done: |
- andq $-64,%rsp |
+ andq $-64,%rbp |
+ movq %rsp,%r11 |
+ subq %rbp,%r11 |
+ andq $-4096,%r11 |
+ leaq (%r11,%rbp,1),%rsp |
+ movq (%rsp),%r10 |
+ cmpq %rbp,%rsp |
+ ja L$mul4x_page_walk |
+ jmp L$mul4x_page_walk_done |
+ |
+L$mul4x_page_walk: |
+ leaq -4096(%rsp),%rsp |
+ movq (%rsp),%r10 |
+ cmpq %rbp,%rsp |
+ ja L$mul4x_page_walk |
+L$mul4x_page_walk_done: |
+ |
negq %r9 |
movq %rax,40(%rsp) |
@@ -1001,6 +1045,7 @@ _bn_power5: |
pushq %r13 |
pushq %r14 |
pushq %r15 |
+L$power5_prologue: |
shll $3,%r9d |
leal (%r9,%r9,2),%r10d |
@@ -1015,24 +1060,41 @@ _bn_power5: |
leaq -320(%rsp,%r9,2),%r11 |
+ movq %rsp,%rbp |
subq %rdi,%r11 |
andq $4095,%r11 |
cmpq %r11,%r10 |
jb L$pwr_sp_alt |
- subq %r11,%rsp |
- leaq -320(%rsp,%r9,2),%rsp |
+ subq %r11,%rbp |
+ leaq -320(%rbp,%r9,2),%rbp |
jmp L$pwr_sp_done |
.p2align 5 |
L$pwr_sp_alt: |
leaq 4096-320(,%r9,2),%r10 |
- leaq -320(%rsp,%r9,2),%rsp |
+ leaq -320(%rbp,%r9,2),%rbp |
subq %r10,%r11 |
movq $0,%r10 |
cmovcq %r10,%r11 |
- subq %r11,%rsp |
+ subq %r11,%rbp |
L$pwr_sp_done: |
- andq $-64,%rsp |
+ andq $-64,%rbp |
+ movq %rsp,%r11 |
+ subq %rbp,%r11 |
+ andq $-4096,%r11 |
+ leaq (%r11,%rbp,1),%rsp |
+ movq (%rsp),%r10 |
+ cmpq %rbp,%rsp |
+ ja L$pwr_page_walk |
+ jmp L$pwr_page_walk_done |
+ |
+L$pwr_page_walk: |
+ leaq -4096(%rsp),%rsp |
+ movq (%rsp),%r10 |
+ cmpq %rbp,%rsp |
+ ja L$pwr_page_walk |
+L$pwr_page_walk_done: |
+ |
movq %r9,%r10 |
negq %r9 |
@@ -1943,6 +2005,7 @@ bn_from_mont8x: |
pushq %r13 |
pushq %r14 |
pushq %r15 |
+L$from_prologue: |
shll $3,%r9d |
leaq (%r9,%r9,2),%r10 |
@@ -1957,24 +2020,41 @@ bn_from_mont8x: |
leaq -320(%rsp,%r9,2),%r11 |
+ movq %rsp,%rbp |
subq %rdi,%r11 |
andq $4095,%r11 |
cmpq %r11,%r10 |
jb L$from_sp_alt |
- subq %r11,%rsp |
- leaq -320(%rsp,%r9,2),%rsp |
+ subq %r11,%rbp |
+ leaq -320(%rbp,%r9,2),%rbp |
jmp L$from_sp_done |
.p2align 5 |
L$from_sp_alt: |
leaq 4096-320(,%r9,2),%r10 |
- leaq -320(%rsp,%r9,2),%rsp |
+ leaq -320(%rbp,%r9,2),%rbp |
subq %r10,%r11 |
movq $0,%r10 |
cmovcq %r10,%r11 |
- subq %r11,%rsp |
+ subq %r11,%rbp |
L$from_sp_done: |
- andq $-64,%rsp |
+ andq $-64,%rbp |
+ movq %rsp,%r11 |
+ subq %rbp,%r11 |
+ andq $-4096,%r11 |
+ leaq (%r11,%rbp,1),%rsp |
+ movq (%rsp),%r10 |
+ cmpq %rbp,%rsp |
+ ja L$from_page_walk |
+ jmp L$from_page_walk_done |
+ |
+L$from_page_walk: |
+ leaq -4096(%rsp),%rsp |
+ movq (%rsp),%r10 |
+ cmpq %rbp,%rsp |
+ ja L$from_page_walk |
+L$from_page_walk_done: |
+ |
movq %r9,%r10 |
negq %r9 |