| Index: third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont.asm
|
| diff --git a/third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont.asm b/third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont.asm
|
| index db0d1b976fcefd6370083cf0ce4456905dc3463d..4d8e1cb72a736b29f70158190e48bc3e774ca9b4 100644
|
| --- a/third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont.asm
|
| +++ b/third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont.asm
|
| @@ -677,20 +677,20 @@ $L$sqr8x_enter:
|
|
|
|
|
|
|
| - lea r11,[((-64))+r9*4+rsp]
|
| + lea r11,[((-64))+r9*2+rsp]
|
| mov r8,QWORD[r8]
|
| sub r11,rsi
|
| and r11,4095
|
| cmp r10,r11
|
| jb NEAR $L$sqr8x_sp_alt
|
| sub rsp,r11
|
| - lea rsp,[((-64))+r9*4+rsp]
|
| + lea rsp,[((-64))+r9*2+rsp]
|
| jmp NEAR $L$sqr8x_sp_done
|
|
|
| ALIGN 32
|
| $L$sqr8x_sp_alt:
|
| - lea r10,[((4096-64))+r9*4]
|
| - lea rsp,[((-64))+r9*4+rsp]
|
| + lea r10,[((4096-64))+r9*2]
|
| + lea rsp,[((-64))+r9*2+rsp]
|
| sub r11,r10
|
| mov r10,0
|
| cmovc r11,r10
|
| @@ -700,58 +700,80 @@ $L$sqr8x_sp_done:
|
| mov r10,r9
|
| neg r9
|
|
|
| - lea r11,[64+r9*2+rsp]
|
| mov QWORD[32+rsp],r8
|
| mov QWORD[40+rsp],rax
|
| $L$sqr8x_body:
|
|
|
| - mov rbp,r9
|
| -DB 102,73,15,110,211
|
| - shr rbp,3+2
|
| - mov eax,DWORD[((OPENSSL_ia32cap_P+8))]
|
| - jmp NEAR $L$sqr8x_copy_n
|
| -
|
| -ALIGN 32
|
| -$L$sqr8x_copy_n:
|
| - movq xmm0,QWORD[rcx]
|
| - movq xmm1,QWORD[8+rcx]
|
| - movq xmm3,QWORD[16+rcx]
|
| - movq xmm4,QWORD[24+rcx]
|
| - lea rcx,[32+rcx]
|
| - movdqa XMMWORD[r11],xmm0
|
| - movdqa XMMWORD[16+r11],xmm1
|
| - movdqa XMMWORD[32+r11],xmm3
|
| - movdqa XMMWORD[48+r11],xmm4
|
| - lea r11,[64+r11]
|
| - dec rbp
|
| - jnz NEAR $L$sqr8x_copy_n
|
| -
|
| +DB 102,72,15,110,209
|
| pxor xmm0,xmm0
|
| DB 102,72,15,110,207
|
| DB 102,73,15,110,218
|
| call bn_sqr8x_internal
|
|
|
| +
|
| +
|
| +
|
| + lea rbx,[r9*1+rdi]
|
| + mov rcx,r9
|
| + mov rdx,r9
|
| +DB 102,72,15,126,207
|
| + sar rcx,3+2
|
| + jmp NEAR $L$sqr8x_sub
|
| +
|
| +ALIGN 32
|
| +$L$sqr8x_sub:
|
| + mov r12,QWORD[rbx]
|
| + mov r13,QWORD[8+rbx]
|
| + mov r14,QWORD[16+rbx]
|
| + mov r15,QWORD[24+rbx]
|
| + lea rbx,[32+rbx]
|
| + sbb r12,QWORD[rbp]
|
| + sbb r13,QWORD[8+rbp]
|
| + sbb r14,QWORD[16+rbp]
|
| + sbb r15,QWORD[24+rbp]
|
| + lea rbp,[32+rbp]
|
| + mov QWORD[rdi],r12
|
| + mov QWORD[8+rdi],r13
|
| + mov QWORD[16+rdi],r14
|
| + mov QWORD[24+rdi],r15
|
| + lea rdi,[32+rdi]
|
| + inc rcx
|
| + jnz NEAR $L$sqr8x_sub
|
| +
|
| + sbb rax,0
|
| + lea rbx,[r9*1+rbx]
|
| + lea rdi,[r9*1+rdi]
|
| +
|
| +DB 102,72,15,110,200
|
| pxor xmm0,xmm0
|
| - lea rax,[48+rsp]
|
| - lea rdx,[64+r9*2+rsp]
|
| - shr r9,3+2
|
| + pshufd xmm1,xmm1,0
|
| mov rsi,QWORD[40+rsp]
|
| - jmp NEAR $L$sqr8x_zero
|
| + jmp NEAR $L$sqr8x_cond_copy
|
|
|
| ALIGN 32
|
| -$L$sqr8x_zero:
|
| - movdqa XMMWORD[rax],xmm0
|
| - movdqa XMMWORD[16+rax],xmm0
|
| - movdqa XMMWORD[32+rax],xmm0
|
| - movdqa XMMWORD[48+rax],xmm0
|
| - lea rax,[64+rax]
|
| - movdqa XMMWORD[rdx],xmm0
|
| - movdqa XMMWORD[16+rdx],xmm0
|
| - movdqa XMMWORD[32+rdx],xmm0
|
| - movdqa XMMWORD[48+rdx],xmm0
|
| - lea rdx,[64+rdx]
|
| - dec r9
|
| - jnz NEAR $L$sqr8x_zero
|
| +$L$sqr8x_cond_copy:
|
| + movdqa xmm2,XMMWORD[rbx]
|
| + movdqa xmm3,XMMWORD[16+rbx]
|
| + lea rbx,[32+rbx]
|
| + movdqu xmm4,XMMWORD[rdi]
|
| + movdqu xmm5,XMMWORD[16+rdi]
|
| + lea rdi,[32+rdi]
|
| + movdqa XMMWORD[(-32)+rbx],xmm0
|
| + movdqa XMMWORD[(-16)+rbx],xmm0
|
| + movdqa XMMWORD[(-32)+rdx*1+rbx],xmm0
|
| + movdqa XMMWORD[(-16)+rdx*1+rbx],xmm0
|
| + pcmpeqd xmm0,xmm1
|
| + pand xmm2,xmm1
|
| + pand xmm3,xmm1
|
| + pand xmm4,xmm0
|
| + pand xmm5,xmm0
|
| + pxor xmm0,xmm0
|
| + por xmm4,xmm2
|
| + por xmm5,xmm3
|
| + movdqu XMMWORD[(-32)+rdi],xmm4
|
| + movdqu XMMWORD[(-16)+rdi],xmm5
|
| + add r9,32
|
| + jnz NEAR $L$sqr8x_cond_copy
|
|
|
| mov rax,1
|
| mov r15,QWORD[((-48))+rsi]
|
|
|