Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1608)

Unified Diff: third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont.S

Issue 2219933002: Land BoringSSL roll on master (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont.S
diff --git a/third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont.S b/third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont.S
index 4d401c6743f84448e533db45a600b0fdc5e52a21..83926ad789b0a25102fb778b1842f32270aa47f3 100644
--- a/third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont.S
+++ b/third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont.S
@@ -636,20 +636,20 @@ bn_sqr8x_mont:
- leaq -64(%rsp,%r9,4),%r11
+ leaq -64(%rsp,%r9,2),%r11
movq (%r8),%r8
subq %rsi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lsqr8x_sp_alt
subq %r11,%rsp
- leaq -64(%rsp,%r9,4),%rsp
+ leaq -64(%rsp,%r9,2),%rsp
jmp .Lsqr8x_sp_done
.align 32
.Lsqr8x_sp_alt:
- leaq 4096-64(,%r9,4),%r10
- leaq -64(%rsp,%r9,4),%rsp
+ leaq 4096-64(,%r9,2),%r10
+ leaq -64(%rsp,%r9,2),%rsp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
@@ -659,58 +659,80 @@ bn_sqr8x_mont:
movq %r9,%r10
negq %r9
- leaq 64(%rsp,%r9,2),%r11
movq %r8,32(%rsp)
movq %rax,40(%rsp)
.Lsqr8x_body:
- movq %r9,%rbp
-.byte 102,73,15,110,211
- shrq $3+2,%rbp
- movl OPENSSL_ia32cap_P+8(%rip),%eax
- jmp .Lsqr8x_copy_n
-
-.align 32
-.Lsqr8x_copy_n:
- movq 0(%rcx),%xmm0
- movq 8(%rcx),%xmm1
- movq 16(%rcx),%xmm3
- movq 24(%rcx),%xmm4
- leaq 32(%rcx),%rcx
- movdqa %xmm0,0(%r11)
- movdqa %xmm1,16(%r11)
- movdqa %xmm3,32(%r11)
- movdqa %xmm4,48(%r11)
- leaq 64(%r11),%r11
- decq %rbp
- jnz .Lsqr8x_copy_n
-
+.byte 102,72,15,110,209
pxor %xmm0,%xmm0
.byte 102,72,15,110,207
.byte 102,73,15,110,218
call bn_sqr8x_internal
+
+
+
+ leaq (%rdi,%r9,1),%rbx
+ movq %r9,%rcx
+ movq %r9,%rdx
+.byte 102,72,15,126,207
+ sarq $3+2,%rcx
+ jmp .Lsqr8x_sub
+
+.align 32
+.Lsqr8x_sub:
+ movq 0(%rbx),%r12
+ movq 8(%rbx),%r13
+ movq 16(%rbx),%r14
+ movq 24(%rbx),%r15
+ leaq 32(%rbx),%rbx
+ sbbq 0(%rbp),%r12
+ sbbq 8(%rbp),%r13
+ sbbq 16(%rbp),%r14
+ sbbq 24(%rbp),%r15
+ leaq 32(%rbp),%rbp
+ movq %r12,0(%rdi)
+ movq %r13,8(%rdi)
+ movq %r14,16(%rdi)
+ movq %r15,24(%rdi)
+ leaq 32(%rdi),%rdi
+ incq %rcx
+ jnz .Lsqr8x_sub
+
+ sbbq $0,%rax
+ leaq (%rbx,%r9,1),%rbx
+ leaq (%rdi,%r9,1),%rdi
+
+.byte 102,72,15,110,200
pxor %xmm0,%xmm0
- leaq 48(%rsp),%rax
- leaq 64(%rsp,%r9,2),%rdx
- shrq $3+2,%r9
+ pshufd $0,%xmm1,%xmm1
movq 40(%rsp),%rsi
- jmp .Lsqr8x_zero
+ jmp .Lsqr8x_cond_copy
.align 32
-.Lsqr8x_zero:
- movdqa %xmm0,0(%rax)
- movdqa %xmm0,16(%rax)
- movdqa %xmm0,32(%rax)
- movdqa %xmm0,48(%rax)
- leaq 64(%rax),%rax
- movdqa %xmm0,0(%rdx)
- movdqa %xmm0,16(%rdx)
- movdqa %xmm0,32(%rdx)
- movdqa %xmm0,48(%rdx)
- leaq 64(%rdx),%rdx
- decq %r9
- jnz .Lsqr8x_zero
+.Lsqr8x_cond_copy:
+ movdqa 0(%rbx),%xmm2
+ movdqa 16(%rbx),%xmm3
+ leaq 32(%rbx),%rbx
+ movdqu 0(%rdi),%xmm4
+ movdqu 16(%rdi),%xmm5
+ leaq 32(%rdi),%rdi
+ movdqa %xmm0,-32(%rbx)
+ movdqa %xmm0,-16(%rbx)
+ movdqa %xmm0,-32(%rbx,%rdx,1)
+ movdqa %xmm0,-16(%rbx,%rdx,1)
+ pcmpeqd %xmm1,%xmm0
+ pand %xmm1,%xmm2
+ pand %xmm1,%xmm3
+ pand %xmm0,%xmm4
+ pand %xmm0,%xmm5
+ pxor %xmm0,%xmm0
+ por %xmm2,%xmm4
+ por %xmm3,%xmm5
+ movdqu %xmm4,-32(%rdi)
+ movdqu %xmm5,-16(%rdi)
+ addq $32,%r9
+ jnz .Lsqr8x_cond_copy
movq $1,%rax
movq -48(%rsi),%r15

Powered by Google App Engine
This is Rietveld 408576698