Index: third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont5.S |
diff --git a/third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont5.S b/third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont5.S |
index 5d7502c3fd609cf68bee72f93dfca101b8a5f245..b891f353c64fe510ec6c2d3964b693649a951982 100644 |
--- a/third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont5.S |
+++ b/third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont5.S |
@@ -9,16 +9,15 @@ |
.type bn_mul_mont_gather5,@function |
.align 64 |
bn_mul_mont_gather5: |
+ movl %r9d,%r9d |
+ movq %rsp,%rax |
testl $7,%r9d |
jnz .Lmul_enter |
jmp .Lmul4x_enter |
.align 16 |
.Lmul_enter: |
- movl %r9d,%r9d |
- movq %rsp,%rax |
movd 8(%rsp),%xmm5 |
- leaq .Linc(%rip),%r10 |
pushq %rbx |
pushq %rbp |
pushq %r12 |
@@ -26,13 +25,39 @@ bn_mul_mont_gather5: |
pushq %r14 |
pushq %r15 |
- leaq 2(%r9),%r11 |
- negq %r11 |
- leaq -264(%rsp,%r11,8),%rsp |
- andq $-1024,%rsp |
+ negq %r9 |
+ movq %rsp,%r11 |
+ leaq -280(%rsp,%r9,8),%r10 |
+ negq %r9 |
+ andq $-1024,%r10 |
+ |
+ |
+ |
+ |
+ |
+ |
+ |
+ |
+ subq %r10,%r11 |
+ andq $-4096,%r11 |
+ leaq (%r10,%r11,1),%rsp |
+ movq (%rsp),%r11 |
+ cmpq %r10,%rsp |
+ ja .Lmul_page_walk |
+ jmp .Lmul_page_walk_done |
+ |
+.Lmul_page_walk: |
+ leaq -4096(%rsp),%rsp |
+ movq (%rsp),%r11 |
+ cmpq %r10,%rsp |
+ ja .Lmul_page_walk |
+.Lmul_page_walk_done: |
+ |
+ leaq .Linc(%rip),%r10 |
movq %rax,8(%rsp,%r9,8) |
.Lmul_body: |
+ |
leaq 128(%rdx),%r12 |
movdqa 0(%r10),%xmm0 |
movdqa 16(%r10),%xmm1 |
@@ -371,16 +396,17 @@ bn_mul_mont_gather5: |
sbbq $0,%rax |
xorq %r14,%r14 |
+ andq %rax,%rsi |
+ notq %rax |
+ movq %rdi,%rcx |
+ andq %rax,%rcx |
movq %r9,%r15 |
+ orq %rcx,%rsi |
.align 16 |
.Lcopy: |
- movq (%rsp,%r14,8),%rsi |
- movq (%rdi,%r14,8),%rcx |
- xorq %rcx,%rsi |
- andq %rax,%rsi |
- xorq %rcx,%rsi |
+ movq (%rsi,%r14,8),%rax |
movq %r14,(%rsp,%r14,8) |
- movq %rsi,(%rdi,%r14,8) |
+ movq %rax,(%rdi,%r14,8) |
leaq 1(%r14),%r14 |
subq $1,%r15 |
jnz .Lcopy |
@@ -401,15 +427,16 @@ bn_mul_mont_gather5: |
.type bn_mul4x_mont_gather5,@function |
.align 32 |
bn_mul4x_mont_gather5: |
-.Lmul4x_enter: |
.byte 0x67 |
movq %rsp,%rax |
+.Lmul4x_enter: |
pushq %rbx |
pushq %rbp |
pushq %r12 |
pushq %r13 |
pushq %r14 |
pushq %r15 |
+.Lmul4x_prologue: |
.byte 0x67 |
shll $3,%r9d |
@@ -426,24 +453,41 @@ bn_mul4x_mont_gather5: |
leaq -320(%rsp,%r9,2),%r11 |
+ movq %rsp,%rbp |
subq %rdi,%r11 |
andq $4095,%r11 |
cmpq %r11,%r10 |
jb .Lmul4xsp_alt |
- subq %r11,%rsp |
- leaq -320(%rsp,%r9,2),%rsp |
+ subq %r11,%rbp |
+ leaq -320(%rbp,%r9,2),%rbp |
jmp .Lmul4xsp_done |
.align 32 |
.Lmul4xsp_alt: |
leaq 4096-320(,%r9,2),%r10 |
- leaq -320(%rsp,%r9,2),%rsp |
+ leaq -320(%rbp,%r9,2),%rbp |
subq %r10,%r11 |
movq $0,%r10 |
cmovcq %r10,%r11 |
- subq %r11,%rsp |
+ subq %r11,%rbp |
.Lmul4xsp_done: |
- andq $-64,%rsp |
+ andq $-64,%rbp |
+ movq %rsp,%r11 |
+ subq %rbp,%r11 |
+ andq $-4096,%r11 |
+ leaq (%r11,%rbp,1),%rsp |
+ movq (%rsp),%r10 |
+ cmpq %rbp,%rsp |
+ ja .Lmul4x_page_walk |
+ jmp .Lmul4x_page_walk_done |
+ |
+.Lmul4x_page_walk: |
+ leaq -4096(%rsp),%rsp |
+ movq (%rsp),%r10 |
+ cmpq %rbp,%rsp |
+ ja .Lmul4x_page_walk |
+.Lmul4x_page_walk_done: |
+ |
negq %r9 |
movq %rax,40(%rsp) |
@@ -1002,6 +1046,7 @@ bn_power5: |
pushq %r13 |
pushq %r14 |
pushq %r15 |
+.Lpower5_prologue: |
shll $3,%r9d |
leal (%r9,%r9,2),%r10d |
@@ -1016,24 +1061,41 @@ bn_power5: |
leaq -320(%rsp,%r9,2),%r11 |
+ movq %rsp,%rbp |
subq %rdi,%r11 |
andq $4095,%r11 |
cmpq %r11,%r10 |
jb .Lpwr_sp_alt |
- subq %r11,%rsp |
- leaq -320(%rsp,%r9,2),%rsp |
+ subq %r11,%rbp |
+ leaq -320(%rbp,%r9,2),%rbp |
jmp .Lpwr_sp_done |
.align 32 |
.Lpwr_sp_alt: |
leaq 4096-320(,%r9,2),%r10 |
- leaq -320(%rsp,%r9,2),%rsp |
+ leaq -320(%rbp,%r9,2),%rbp |
subq %r10,%r11 |
movq $0,%r10 |
cmovcq %r10,%r11 |
- subq %r11,%rsp |
+ subq %r11,%rbp |
.Lpwr_sp_done: |
- andq $-64,%rsp |
+ andq $-64,%rbp |
+ movq %rsp,%r11 |
+ subq %rbp,%r11 |
+ andq $-4096,%r11 |
+ leaq (%r11,%rbp,1),%rsp |
+ movq (%rsp),%r10 |
+ cmpq %rbp,%rsp |
+ ja .Lpwr_page_walk |
+ jmp .Lpwr_page_walk_done |
+ |
+.Lpwr_page_walk: |
+ leaq -4096(%rsp),%rsp |
+ movq (%rsp),%r10 |
+ cmpq %rbp,%rsp |
+ ja .Lpwr_page_walk |
+.Lpwr_page_walk_done: |
+ |
movq %r9,%r10 |
negq %r9 |
@@ -1944,6 +2006,7 @@ bn_from_mont8x: |
pushq %r13 |
pushq %r14 |
pushq %r15 |
+.Lfrom_prologue: |
shll $3,%r9d |
leaq (%r9,%r9,2),%r10 |
@@ -1958,24 +2021,41 @@ bn_from_mont8x: |
leaq -320(%rsp,%r9,2),%r11 |
+ movq %rsp,%rbp |
subq %rdi,%r11 |
andq $4095,%r11 |
cmpq %r11,%r10 |
jb .Lfrom_sp_alt |
- subq %r11,%rsp |
- leaq -320(%rsp,%r9,2),%rsp |
+ subq %r11,%rbp |
+ leaq -320(%rbp,%r9,2),%rbp |
jmp .Lfrom_sp_done |
.align 32 |
.Lfrom_sp_alt: |
leaq 4096-320(,%r9,2),%r10 |
- leaq -320(%rsp,%r9,2),%rsp |
+ leaq -320(%rbp,%r9,2),%rbp |
subq %r10,%r11 |
movq $0,%r10 |
cmovcq %r10,%r11 |
- subq %r11,%rsp |
+ subq %r11,%rbp |
.Lfrom_sp_done: |
- andq $-64,%rsp |
+ andq $-64,%rbp |
+ movq %rsp,%r11 |
+ subq %rbp,%r11 |
+ andq $-4096,%r11 |
+ leaq (%r11,%rbp,1),%rsp |
+ movq (%rsp),%r10 |
+ cmpq %rbp,%rsp |
+ ja .Lfrom_page_walk |
+ jmp .Lfrom_page_walk_done |
+ |
+.Lfrom_page_walk: |
+ leaq -4096(%rsp),%rsp |
+ movq (%rsp),%r10 |
+ cmpq %rbp,%rsp |
+ ja .Lfrom_page_walk |
+.Lfrom_page_walk_done: |
+ |
movq %r9,%r10 |
negq %r9 |