| Index: third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont.S
|
| diff --git a/third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont.S b/third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont.S
|
| index 83926ad789b0a25102fb778b1842f32270aa47f3..5152de5d57e00ce784fb062300bb7b291740f837 100644
|
| --- a/third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont.S
|
| +++ b/third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont.S
|
| @@ -9,6 +9,8 @@
|
| .type bn_mul_mont,@function
|
| .align 16
|
| bn_mul_mont:
|
| + movl %r9d,%r9d
|
| + movq %rsp,%rax
|
| testl $3,%r9d
|
| jnz .Lmul_enter
|
| cmpl $8,%r9d
|
| @@ -28,14 +30,37 @@ bn_mul_mont:
|
| pushq %r14
|
| pushq %r15
|
|
|
| - movl %r9d,%r9d
|
| - leaq 2(%r9),%r10
|
| + negq %r9
|
| movq %rsp,%r11
|
| - negq %r10
|
| - leaq (%rsp,%r10,8),%rsp
|
| - andq $-1024,%rsp
|
| + leaq -16(%rsp,%r9,8),%r10
|
| + negq %r9
|
| + andq $-1024,%r10
|
| +
|
| +
|
|
|
| - movq %r11,8(%rsp,%r9,8)
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +
|
| + subq %r10,%r11
|
| + andq $-4096,%r11
|
| + leaq (%r10,%r11,1),%rsp
|
| + movq (%rsp),%r11
|
| + cmpq %r10,%rsp
|
| + ja .Lmul_page_walk
|
| + jmp .Lmul_page_walk_done
|
| +
|
| +.align 16
|
| +.Lmul_page_walk:
|
| + leaq -4096(%rsp),%rsp
|
| + movq (%rsp),%r11
|
| + cmpq %r10,%rsp
|
| + ja .Lmul_page_walk
|
| +.Lmul_page_walk_done:
|
| +
|
| + movq %rax,8(%rsp,%r9,8)
|
| .Lmul_body:
|
| movq %rdx,%r12
|
| movq (%r8),%r8
|
| @@ -187,35 +212,38 @@ bn_mul_mont:
|
|
|
| sbbq $0,%rax
|
| xorq %r14,%r14
|
| + andq %rax,%rsi
|
| + notq %rax
|
| + movq %rdi,%rcx
|
| + andq %rax,%rcx
|
| movq %r9,%r15
|
| + orq %rcx,%rsi
|
| .align 16
|
| .Lcopy:
|
| - movq (%rsp,%r14,8),%rsi
|
| - movq (%rdi,%r14,8),%rcx
|
| - xorq %rcx,%rsi
|
| - andq %rax,%rsi
|
| - xorq %rcx,%rsi
|
| + movq (%rsi,%r14,8),%rax
|
| movq %r14,(%rsp,%r14,8)
|
| - movq %rsi,(%rdi,%r14,8)
|
| + movq %rax,(%rdi,%r14,8)
|
| leaq 1(%r14),%r14
|
| subq $1,%r15
|
| jnz .Lcopy
|
|
|
| movq 8(%rsp,%r9,8),%rsi
|
| movq $1,%rax
|
| - movq (%rsi),%r15
|
| - movq 8(%rsi),%r14
|
| - movq 16(%rsi),%r13
|
| - movq 24(%rsi),%r12
|
| - movq 32(%rsi),%rbp
|
| - movq 40(%rsi),%rbx
|
| - leaq 48(%rsi),%rsp
|
| + movq -48(%rsi),%r15
|
| + movq -40(%rsi),%r14
|
| + movq -32(%rsi),%r13
|
| + movq -24(%rsi),%r12
|
| + movq -16(%rsi),%rbp
|
| + movq -8(%rsi),%rbx
|
| + leaq (%rsi),%rsp
|
| .Lmul_epilogue:
|
| .byte 0xf3,0xc3
|
| .size bn_mul_mont,.-bn_mul_mont
|
| .type bn_mul4x_mont,@function
|
| .align 16
|
| bn_mul4x_mont:
|
| + movl %r9d,%r9d
|
| + movq %rsp,%rax
|
| .Lmul4x_enter:
|
| pushq %rbx
|
| pushq %rbp
|
| @@ -224,14 +252,28 @@ bn_mul4x_mont:
|
| pushq %r14
|
| pushq %r15
|
|
|
| - movl %r9d,%r9d
|
| - leaq 4(%r9),%r10
|
| + negq %r9
|
| movq %rsp,%r11
|
| - negq %r10
|
| - leaq (%rsp,%r10,8),%rsp
|
| - andq $-1024,%rsp
|
| + leaq -32(%rsp,%r9,8),%r10
|
| + negq %r9
|
| + andq $-1024,%r10
|
|
|
| - movq %r11,8(%rsp,%r9,8)
|
| + subq %r10,%r11
|
| + andq $-4096,%r11
|
| + leaq (%r10,%r11,1),%rsp
|
| + movq (%rsp),%r11
|
| + cmpq %r10,%rsp
|
| + ja .Lmul4x_page_walk
|
| + jmp .Lmul4x_page_walk_done
|
| +
|
| +.Lmul4x_page_walk:
|
| + leaq -4096(%rsp),%rsp
|
| + movq (%rsp),%r11
|
| + cmpq %r10,%rsp
|
| + ja .Lmul4x_page_walk
|
| +.Lmul4x_page_walk_done:
|
| +
|
| + movq %rax,8(%rsp,%r9,8)
|
| .Lmul4x_body:
|
| movq %rdi,16(%rsp,%r9,8)
|
| movq %rdx,%r12
|
| @@ -532,6 +574,7 @@ bn_mul4x_mont:
|
| jb .Louter4x
|
| movq 16(%rsp,%r9,8),%rdi
|
| movq 0(%rsp),%rax
|
| + pxor %xmm0,%xmm0
|
| movq 8(%rsp),%rdx
|
| shrq $2,%r9
|
| leaq (%rsp),%rsi
|
| @@ -569,45 +612,44 @@ bn_mul4x_mont:
|
| movq %rbx,16(%rdi,%r14,8)
|
|
|
| sbbq $0,%rax
|
| - movq %rax,%xmm0
|
| - punpcklqdq %xmm0,%xmm0
|
| movq %rbp,24(%rdi,%r14,8)
|
| xorq %r14,%r14
|
| + andq %rax,%rsi
|
| + notq %rax
|
| + movq %rdi,%rcx
|
| + andq %rax,%rcx
|
| + leaq -1(%r9),%r15
|
| + orq %rcx,%rsi
|
|
|
| - movq %r9,%r15
|
| - pxor %xmm5,%xmm5
|
| + movdqu (%rsi),%xmm1
|
| + movdqa %xmm0,(%rsp)
|
| + movdqu %xmm1,(%rdi)
|
| jmp .Lcopy4x
|
| .align 16
|
| .Lcopy4x:
|
| - movdqu (%rsp,%r14,1),%xmm2
|
| - movdqu 16(%rsp,%r14,1),%xmm4
|
| - movdqu (%rdi,%r14,1),%xmm1
|
| - movdqu 16(%rdi,%r14,1),%xmm3
|
| - pxor %xmm1,%xmm2
|
| - pxor %xmm3,%xmm4
|
| - pand %xmm0,%xmm2
|
| - pand %xmm0,%xmm4
|
| - pxor %xmm1,%xmm2
|
| - pxor %xmm3,%xmm4
|
| - movdqu %xmm2,(%rdi,%r14,1)
|
| - movdqu %xmm4,16(%rdi,%r14,1)
|
| - movdqa %xmm5,(%rsp,%r14,1)
|
| - movdqa %xmm5,16(%rsp,%r14,1)
|
| -
|
| + movdqu 16(%rsi,%r14,1),%xmm2
|
| + movdqu 32(%rsi,%r14,1),%xmm1
|
| + movdqa %xmm0,16(%rsp,%r14,1)
|
| + movdqu %xmm2,16(%rdi,%r14,1)
|
| + movdqa %xmm0,32(%rsp,%r14,1)
|
| + movdqu %xmm1,32(%rdi,%r14,1)
|
| leaq 32(%r14),%r14
|
| decq %r15
|
| jnz .Lcopy4x
|
|
|
| shlq $2,%r9
|
| + movdqu 16(%rsi,%r14,1),%xmm2
|
| + movdqa %xmm0,16(%rsp,%r14,1)
|
| + movdqu %xmm2,16(%rdi,%r14,1)
|
| movq 8(%rsp,%r9,8),%rsi
|
| movq $1,%rax
|
| - movq (%rsi),%r15
|
| - movq 8(%rsi),%r14
|
| - movq 16(%rsi),%r13
|
| - movq 24(%rsi),%r12
|
| - movq 32(%rsi),%rbp
|
| - movq 40(%rsi),%rbx
|
| - leaq 48(%rsi),%rsp
|
| + movq -48(%rsi),%r15
|
| + movq -40(%rsi),%r14
|
| + movq -32(%rsi),%r13
|
| + movq -24(%rsi),%r12
|
| + movq -16(%rsi),%rbp
|
| + movq -8(%rsi),%rbx
|
| + leaq (%rsi),%rsp
|
| .Lmul4x_epilogue:
|
| .byte 0xf3,0xc3
|
| .size bn_mul4x_mont,.-bn_mul4x_mont
|
| @@ -617,14 +659,15 @@ bn_mul4x_mont:
|
| .type bn_sqr8x_mont,@function
|
| .align 32
|
| bn_sqr8x_mont:
|
| -.Lsqr8x_enter:
|
| movq %rsp,%rax
|
| +.Lsqr8x_enter:
|
| pushq %rbx
|
| pushq %rbp
|
| pushq %r12
|
| pushq %r13
|
| pushq %r14
|
| pushq %r15
|
| +.Lsqr8x_prologue:
|
|
|
| movl %r9d,%r10d
|
| shll $3,%r9d
|
| @@ -637,25 +680,43 @@ bn_sqr8x_mont:
|
|
|
|
|
| leaq -64(%rsp,%r9,2),%r11
|
| + movq %rsp,%rbp
|
| movq (%r8),%r8
|
| subq %rsi,%r11
|
| andq $4095,%r11
|
| cmpq %r11,%r10
|
| jb .Lsqr8x_sp_alt
|
| - subq %r11,%rsp
|
| - leaq -64(%rsp,%r9,2),%rsp
|
| + subq %r11,%rbp
|
| + leaq -64(%rbp,%r9,2),%rbp
|
| jmp .Lsqr8x_sp_done
|
|
|
| .align 32
|
| .Lsqr8x_sp_alt:
|
| leaq 4096-64(,%r9,2),%r10
|
| - leaq -64(%rsp,%r9,2),%rsp
|
| + leaq -64(%rbp,%r9,2),%rbp
|
| subq %r10,%r11
|
| movq $0,%r10
|
| cmovcq %r10,%r11
|
| - subq %r11,%rsp
|
| + subq %r11,%rbp
|
| .Lsqr8x_sp_done:
|
| - andq $-64,%rsp
|
| + andq $-64,%rbp
|
| + movq %rsp,%r11
|
| + subq %rbp,%r11
|
| + andq $-4096,%r11
|
| + leaq (%r11,%rbp,1),%rsp
|
| + movq (%rsp),%r10
|
| + cmpq %rbp,%rsp
|
| + ja .Lsqr8x_page_walk
|
| + jmp .Lsqr8x_page_walk_done
|
| +
|
| +.align 16
|
| +.Lsqr8x_page_walk:
|
| + leaq -4096(%rsp),%rsp
|
| + movq (%rsp),%r10
|
| + cmpq %rbp,%rsp
|
| + ja .Lsqr8x_page_walk
|
| +.Lsqr8x_page_walk_done:
|
| +
|
| movq %r9,%r10
|
| negq %r9
|
|
|
|
|