Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(50)

Unified Diff: third_party/boringssl/mac-x86_64/crypto/bn/rsaz-x86_64.S

Issue 2219933002: Land BoringSSL roll on master (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/boringssl/mac-x86_64/crypto/bn/rsaz-x86_64.S
diff --git a/third_party/boringssl/mac-x86_64/crypto/bn/rsaz-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/bn/rsaz-x86_64.S
index 5e9e82feb025bf07215153bf39e6ed73d4d83ce6..337276f9cbf34f99ee8ee578c567ed6ba5e93838 100644
--- a/third_party/boringssl/mac-x86_64/crypto/bn/rsaz-x86_64.S
+++ b/third_party/boringssl/mac-x86_64/crypto/bn/rsaz-x86_64.S
@@ -465,48 +465,94 @@ _rsaz_512_mul_gather4:
pushq %r14
pushq %r15
- movl %r9d,%r9d
- subq $128+24,%rsp
+ subq $152,%rsp
L$mul_gather4_body:
- movl 64(%rdx,%r9,4),%eax
-.byte 102,72,15,110,199
- movl (%rdx,%r9,4),%ebx
-.byte 102,72,15,110,201
+ movd %r9d,%xmm8
+ movdqa L$inc+16(%rip),%xmm1
+ movdqa L$inc(%rip),%xmm0
+
+ pshufd $0,%xmm8,%xmm8
+ movdqa %xmm1,%xmm7
+ movdqa %xmm1,%xmm2
+ paddd %xmm0,%xmm1
+ pcmpeqd %xmm8,%xmm0
+ movdqa %xmm7,%xmm3
+ paddd %xmm1,%xmm2
+ pcmpeqd %xmm8,%xmm1
+ movdqa %xmm7,%xmm4
+ paddd %xmm2,%xmm3
+ pcmpeqd %xmm8,%xmm2
+ movdqa %xmm7,%xmm5
+ paddd %xmm3,%xmm4
+ pcmpeqd %xmm8,%xmm3
+ movdqa %xmm7,%xmm6
+ paddd %xmm4,%xmm5
+ pcmpeqd %xmm8,%xmm4
+ paddd %xmm5,%xmm6
+ pcmpeqd %xmm8,%xmm5
+ paddd %xmm6,%xmm7
+ pcmpeqd %xmm8,%xmm6
+ pcmpeqd %xmm8,%xmm7
+
+ movdqa 0(%rdx),%xmm8
+ movdqa 16(%rdx),%xmm9
+ movdqa 32(%rdx),%xmm10
+ movdqa 48(%rdx),%xmm11
+ pand %xmm0,%xmm8
+ movdqa 64(%rdx),%xmm12
+ pand %xmm1,%xmm9
+ movdqa 80(%rdx),%xmm13
+ pand %xmm2,%xmm10
+ movdqa 96(%rdx),%xmm14
+ pand %xmm3,%xmm11
+ movdqa 112(%rdx),%xmm15
+ leaq 128(%rdx),%rbp
+ pand %xmm4,%xmm12
+ pand %xmm5,%xmm13
+ pand %xmm6,%xmm14
+ pand %xmm7,%xmm15
+ por %xmm10,%xmm8
+ por %xmm11,%xmm9
+ por %xmm12,%xmm8
+ por %xmm13,%xmm9
+ por %xmm14,%xmm8
+ por %xmm15,%xmm9
+
+ por %xmm9,%xmm8
+ pshufd $0x4e,%xmm8,%xmm9
+ por %xmm9,%xmm8
+.byte 102,76,15,126,195
+
movq %r8,128(%rsp)
+ movq %rdi,128+8(%rsp)
+ movq %rcx,128+16(%rsp)
- shlq $32,%rax
- orq %rax,%rbx
movq (%rsi),%rax
movq 8(%rsi),%rcx
- leaq 128(%rdx,%r9,4),%rbp
mulq %rbx
movq %rax,(%rsp)
movq %rcx,%rax
movq %rdx,%r8
mulq %rbx
- movd (%rbp),%xmm4
addq %rax,%r8
movq 16(%rsi),%rax
movq %rdx,%r9
adcq $0,%r9
mulq %rbx
- movd 64(%rbp),%xmm5
addq %rax,%r9
movq 24(%rsi),%rax
movq %rdx,%r10
adcq $0,%r10
mulq %rbx
- pslldq $4,%xmm5
addq %rax,%r10
movq 32(%rsi),%rax
movq %rdx,%r11
adcq $0,%r11
mulq %rbx
- por %xmm5,%xmm4
addq %rax,%r11
movq 40(%rsi),%rax
movq %rdx,%r12
@@ -519,14 +565,12 @@ L$mul_gather4_body:
adcq $0,%r13
mulq %rbx
- leaq 128(%rbp),%rbp
addq %rax,%r13
movq 56(%rsi),%rax
movq %rdx,%r14
adcq $0,%r14
mulq %rbx
-.byte 102,72,15,126,227
addq %rax,%r14
movq (%rsi),%rax
movq %rdx,%r15
@@ -538,6 +582,35 @@ L$mul_gather4_body:
.p2align 5
L$oop_mul_gather:
+ movdqa 0(%rbp),%xmm8
+ movdqa 16(%rbp),%xmm9
+ movdqa 32(%rbp),%xmm10
+ movdqa 48(%rbp),%xmm11
+ pand %xmm0,%xmm8
+ movdqa 64(%rbp),%xmm12
+ pand %xmm1,%xmm9
+ movdqa 80(%rbp),%xmm13
+ pand %xmm2,%xmm10
+ movdqa 96(%rbp),%xmm14
+ pand %xmm3,%xmm11
+ movdqa 112(%rbp),%xmm15
+ leaq 128(%rbp),%rbp
+ pand %xmm4,%xmm12
+ pand %xmm5,%xmm13
+ pand %xmm6,%xmm14
+ pand %xmm7,%xmm15
+ por %xmm10,%xmm8
+ por %xmm11,%xmm9
+ por %xmm12,%xmm8
+ por %xmm13,%xmm9
+ por %xmm14,%xmm8
+ por %xmm15,%xmm9
+
+ por %xmm9,%xmm8
+ pshufd $0x4e,%xmm8,%xmm9
+ por %xmm9,%xmm8
+.byte 102,76,15,126,195
+
mulq %rbx
addq %rax,%r8
movq 8(%rsi),%rax
@@ -546,7 +619,6 @@ L$oop_mul_gather:
adcq $0,%r8
mulq %rbx
- movd (%rbp),%xmm4
addq %rax,%r9
movq 16(%rsi),%rax
adcq $0,%rdx
@@ -555,7 +627,6 @@ L$oop_mul_gather:
adcq $0,%r9
mulq %rbx
- movd 64(%rbp),%xmm5
addq %rax,%r10
movq 24(%rsi),%rax
adcq $0,%rdx
@@ -564,7 +635,6 @@ L$oop_mul_gather:
adcq $0,%r10
mulq %rbx
- pslldq $4,%xmm5
addq %rax,%r11
movq 32(%rsi),%rax
adcq $0,%rdx
@@ -573,7 +643,6 @@ L$oop_mul_gather:
adcq $0,%r11
mulq %rbx
- por %xmm5,%xmm4
addq %rax,%r12
movq 40(%rsi),%rax
adcq $0,%rdx
@@ -598,7 +667,6 @@ L$oop_mul_gather:
adcq $0,%r14
mulq %rbx
-.byte 102,72,15,126,227
addq %rax,%r15
movq (%rsi),%rax
adcq $0,%rdx
@@ -606,7 +674,6 @@ L$oop_mul_gather:
movq %rdx,%r15
adcq $0,%r15
- leaq 128(%rbp),%rbp
leaq 8(%rdi),%rdi
decl %ecx
@@ -621,8 +688,8 @@ L$oop_mul_gather:
movq %r14,48(%rdi)
movq %r15,56(%rdi)
-.byte 102,72,15,126,199
-.byte 102,72,15,126,205
+ movq 128+8(%rsp),%rdi
+ movq 128+16(%rsp),%rbp
movq (%rsp),%r8
movq 8(%rsp),%r9
@@ -672,7 +739,7 @@ _rsaz_512_mul_scatter4:
movl %r9d,%r9d
subq $128+24,%rsp
L$mul_scatter4_body:
- leaq (%r8,%r9,4),%r8
+ leaq (%r8,%r9,8),%r8
.byte 102,72,15,110,199
.byte 102,72,15,110,202
.byte 102,73,15,110,208
@@ -708,30 +775,14 @@ L$mul_scatter4_body:
call __rsaz_512_subtract
- movl %r8d,0(%rsi)
- shrq $32,%r8
- movl %r9d,128(%rsi)
- shrq $32,%r9
- movl %r10d,256(%rsi)
- shrq $32,%r10
- movl %r11d,384(%rsi)
- shrq $32,%r11
- movl %r12d,512(%rsi)
- shrq $32,%r12
- movl %r13d,640(%rsi)
- shrq $32,%r13
- movl %r14d,768(%rsi)
- shrq $32,%r14
- movl %r15d,896(%rsi)
- shrq $32,%r15
- movl %r8d,64(%rsi)
- movl %r9d,192(%rsi)
- movl %r10d,320(%rsi)
- movl %r11d,448(%rsi)
- movl %r12d,576(%rsi)
- movl %r13d,704(%rsi)
- movl %r14d,832(%rsi)
- movl %r15d,960(%rsi)
+ movq %r8,0(%rsi)
+ movq %r9,128(%rsi)
+ movq %r10,256(%rsi)
+ movq %r11,384(%rsi)
+ movq %r12,512(%rsi)
+ movq %r13,640(%rsi)
+ movq %r14,768(%rsi)
+ movq %r15,896(%rsi)
leaq 128+24+48(%rsp),%rax
movq -48(%rax),%r15
@@ -1086,16 +1137,14 @@ L$oop_mul:
.p2align 4
_rsaz_512_scatter4:
- leaq (%rdi,%rdx,4),%rdi
+ leaq (%rdi,%rdx,8),%rdi
movl $8,%r9d
jmp L$oop_scatter
.p2align 4
L$oop_scatter:
movq (%rsi),%rax
leaq 8(%rsi),%rsi
- movl %eax,(%rdi)
- shrq $32,%rax
- movl %eax,64(%rdi)
+ movq %rax,(%rdi)
leaq 128(%rdi),%rdi
decl %r9d
jnz L$oop_scatter
@@ -1107,20 +1156,73 @@ L$oop_scatter:
.p2align 4
_rsaz_512_gather4:
- leaq (%rsi,%rdx,4),%rsi
+ movd %edx,%xmm8
+ movdqa L$inc+16(%rip),%xmm1
+ movdqa L$inc(%rip),%xmm0
+
+ pshufd $0,%xmm8,%xmm8
+ movdqa %xmm1,%xmm7
+ movdqa %xmm1,%xmm2
+ paddd %xmm0,%xmm1
+ pcmpeqd %xmm8,%xmm0
+ movdqa %xmm7,%xmm3
+ paddd %xmm1,%xmm2
+ pcmpeqd %xmm8,%xmm1
+ movdqa %xmm7,%xmm4
+ paddd %xmm2,%xmm3
+ pcmpeqd %xmm8,%xmm2
+ movdqa %xmm7,%xmm5
+ paddd %xmm3,%xmm4
+ pcmpeqd %xmm8,%xmm3
+ movdqa %xmm7,%xmm6
+ paddd %xmm4,%xmm5
+ pcmpeqd %xmm8,%xmm4
+ paddd %xmm5,%xmm6
+ pcmpeqd %xmm8,%xmm5
+ paddd %xmm6,%xmm7
+ pcmpeqd %xmm8,%xmm6
+ pcmpeqd %xmm8,%xmm7
movl $8,%r9d
jmp L$oop_gather
.p2align 4
L$oop_gather:
- movl (%rsi),%eax
- movl 64(%rsi),%r8d
+ movdqa 0(%rsi),%xmm8
+ movdqa 16(%rsi),%xmm9
+ movdqa 32(%rsi),%xmm10
+ movdqa 48(%rsi),%xmm11
+ pand %xmm0,%xmm8
+ movdqa 64(%rsi),%xmm12
+ pand %xmm1,%xmm9
+ movdqa 80(%rsi),%xmm13
+ pand %xmm2,%xmm10
+ movdqa 96(%rsi),%xmm14
+ pand %xmm3,%xmm11
+ movdqa 112(%rsi),%xmm15
leaq 128(%rsi),%rsi
- shlq $32,%r8
- orq %r8,%rax
- movq %rax,(%rdi)
+ pand %xmm4,%xmm12
+ pand %xmm5,%xmm13
+ pand %xmm6,%xmm14
+ pand %xmm7,%xmm15
+ por %xmm10,%xmm8
+ por %xmm11,%xmm9
+ por %xmm12,%xmm8
+ por %xmm13,%xmm9
+ por %xmm14,%xmm8
+ por %xmm15,%xmm9
+
+ por %xmm9,%xmm8
+ pshufd $0x4e,%xmm8,%xmm9
+ por %xmm9,%xmm8
+ movq %xmm8,(%rdi)
leaq 8(%rdi),%rdi
decl %r9d
jnz L$oop_gather
.byte 0xf3,0xc3
+L$SEH_end_rsaz_512_gather4:
+
+.p2align 6
+L$inc:
+.long 0,0, 1,1
+.long 2,2, 2,2
#endif
« no previous file with comments | « third_party/boringssl/mac-x86_64/crypto/aes/vpaes-x86_64.S ('k') | third_party/boringssl/mac-x86_64/crypto/bn/x86_64-mont.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698