Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(70)

Unified Diff: third_party/boringssl/win-x86_64/crypto/bn/rsaz-x86_64.asm

Issue 2219933002: Land BoringSSL roll on master (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/boringssl/win-x86_64/crypto/bn/rsaz-x86_64.asm
diff --git a/third_party/boringssl/win-x86_64/crypto/bn/rsaz-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/bn/rsaz-x86_64.asm
index 04d5e3915afb406c19996105d6e315417f4f2ca5..72ec5052898e19154dc85edfb67ff291bc877385 100644
--- a/third_party/boringssl/win-x86_64/crypto/bn/rsaz-x86_64.asm
+++ b/third_party/boringssl/win-x86_64/crypto/bn/rsaz-x86_64.asm
@@ -504,48 +504,104 @@ $L$SEH_begin_rsaz_512_mul_gather4:
push r14
push r15
- mov r9d,r9d
- sub rsp,128+24
+ sub rsp,328
+ movaps XMMWORD[160+rsp],xmm6
+ movaps XMMWORD[176+rsp],xmm7
+ movaps XMMWORD[192+rsp],xmm8
+ movaps XMMWORD[208+rsp],xmm9
+ movaps XMMWORD[224+rsp],xmm10
+ movaps XMMWORD[240+rsp],xmm11
+ movaps XMMWORD[256+rsp],xmm12
+ movaps XMMWORD[272+rsp],xmm13
+ movaps XMMWORD[288+rsp],xmm14
+ movaps XMMWORD[304+rsp],xmm15
$L$mul_gather4_body:
- mov eax,DWORD[64+r9*4+rdx]
-DB 102,72,15,110,199
- mov ebx,DWORD[r9*4+rdx]
-DB 102,72,15,110,201
+ movd xmm8,r9d
+ movdqa xmm1,XMMWORD[(($L$inc+16))]
+ movdqa xmm0,XMMWORD[$L$inc]
+
+ pshufd xmm8,xmm8,0
+ movdqa xmm7,xmm1
+ movdqa xmm2,xmm1
+ paddd xmm1,xmm0
+ pcmpeqd xmm0,xmm8
+ movdqa xmm3,xmm7
+ paddd xmm2,xmm1
+ pcmpeqd xmm1,xmm8
+ movdqa xmm4,xmm7
+ paddd xmm3,xmm2
+ pcmpeqd xmm2,xmm8
+ movdqa xmm5,xmm7
+ paddd xmm4,xmm3
+ pcmpeqd xmm3,xmm8
+ movdqa xmm6,xmm7
+ paddd xmm5,xmm4
+ pcmpeqd xmm4,xmm8
+ paddd xmm6,xmm5
+ pcmpeqd xmm5,xmm8
+ paddd xmm7,xmm6
+ pcmpeqd xmm6,xmm8
+ pcmpeqd xmm7,xmm8
+
+ movdqa xmm8,XMMWORD[rdx]
+ movdqa xmm9,XMMWORD[16+rdx]
+ movdqa xmm10,XMMWORD[32+rdx]
+ movdqa xmm11,XMMWORD[48+rdx]
+ pand xmm8,xmm0
+ movdqa xmm12,XMMWORD[64+rdx]
+ pand xmm9,xmm1
+ movdqa xmm13,XMMWORD[80+rdx]
+ pand xmm10,xmm2
+ movdqa xmm14,XMMWORD[96+rdx]
+ pand xmm11,xmm3
+ movdqa xmm15,XMMWORD[112+rdx]
+ lea rbp,[128+rdx]
+ pand xmm12,xmm4
+ pand xmm13,xmm5
+ pand xmm14,xmm6
+ pand xmm15,xmm7
+ por xmm8,xmm10
+ por xmm9,xmm11
+ por xmm8,xmm12
+ por xmm9,xmm13
+ por xmm8,xmm14
+ por xmm9,xmm15
+
+ por xmm8,xmm9
+ pshufd xmm9,xmm8,0x4e
+ por xmm8,xmm9
+DB 102,76,15,126,195
+
mov QWORD[128+rsp],r8
+ mov QWORD[((128+8))+rsp],rdi
+ mov QWORD[((128+16))+rsp],rcx
- shl rax,32
- or rbx,rax
mov rax,QWORD[rsi]
mov rcx,QWORD[8+rsi]
- lea rbp,[128+r9*4+rdx]
mul rbx
mov QWORD[rsp],rax
mov rax,rcx
mov r8,rdx
mul rbx
- movd xmm4,DWORD[rbp]
add r8,rax
mov rax,QWORD[16+rsi]
mov r9,rdx
adc r9,0
mul rbx
- movd xmm5,DWORD[64+rbp]
add r9,rax
mov rax,QWORD[24+rsi]
mov r10,rdx
adc r10,0
mul rbx
- pslldq xmm5,4
add r10,rax
mov rax,QWORD[32+rsi]
mov r11,rdx
adc r11,0
mul rbx
- por xmm4,xmm5
add r11,rax
mov rax,QWORD[40+rsi]
mov r12,rdx
@@ -558,14 +614,12 @@ DB 102,72,15,110,201
adc r13,0
mul rbx
- lea rbp,[128+rbp]
add r13,rax
mov rax,QWORD[56+rsi]
mov r14,rdx
adc r14,0
mul rbx
-DB 102,72,15,126,227
add r14,rax
mov rax,QWORD[rsi]
mov r15,rdx
@@ -577,6 +631,35 @@ DB 102,72,15,126,227
ALIGN 32
$L$oop_mul_gather:
+ movdqa xmm8,XMMWORD[rbp]
+ movdqa xmm9,XMMWORD[16+rbp]
+ movdqa xmm10,XMMWORD[32+rbp]
+ movdqa xmm11,XMMWORD[48+rbp]
+ pand xmm8,xmm0
+ movdqa xmm12,XMMWORD[64+rbp]
+ pand xmm9,xmm1
+ movdqa xmm13,XMMWORD[80+rbp]
+ pand xmm10,xmm2
+ movdqa xmm14,XMMWORD[96+rbp]
+ pand xmm11,xmm3
+ movdqa xmm15,XMMWORD[112+rbp]
+ lea rbp,[128+rbp]
+ pand xmm12,xmm4
+ pand xmm13,xmm5
+ pand xmm14,xmm6
+ pand xmm15,xmm7
+ por xmm8,xmm10
+ por xmm9,xmm11
+ por xmm8,xmm12
+ por xmm9,xmm13
+ por xmm8,xmm14
+ por xmm9,xmm15
+
+ por xmm8,xmm9
+ pshufd xmm9,xmm8,0x4e
+ por xmm8,xmm9
+DB 102,76,15,126,195
+
mul rbx
add r8,rax
mov rax,QWORD[8+rsi]
@@ -585,7 +668,6 @@ $L$oop_mul_gather:
adc r8,0
mul rbx
- movd xmm4,DWORD[rbp]
add r9,rax
mov rax,QWORD[16+rsi]
adc rdx,0
@@ -594,7 +676,6 @@ $L$oop_mul_gather:
adc r9,0
mul rbx
- movd xmm5,DWORD[64+rbp]
add r10,rax
mov rax,QWORD[24+rsi]
adc rdx,0
@@ -603,7 +684,6 @@ $L$oop_mul_gather:
adc r10,0
mul rbx
- pslldq xmm5,4
add r11,rax
mov rax,QWORD[32+rsi]
adc rdx,0
@@ -612,7 +692,6 @@ $L$oop_mul_gather:
adc r11,0
mul rbx
- por xmm4,xmm5
add r12,rax
mov rax,QWORD[40+rsi]
adc rdx,0
@@ -637,7 +716,6 @@ $L$oop_mul_gather:
adc r14,0
mul rbx
-DB 102,72,15,126,227
add r15,rax
mov rax,QWORD[rsi]
adc rdx,0
@@ -645,7 +723,6 @@ DB 102,72,15,126,227
mov r15,rdx
adc r15,0
- lea rbp,[128+rbp]
lea rdi,[8+rdi]
dec ecx
@@ -660,8 +737,8 @@ DB 102,72,15,126,227
mov QWORD[48+rdi],r14
mov QWORD[56+rdi],r15
-DB 102,72,15,126,199
-DB 102,72,15,126,205
+ mov rdi,QWORD[((128+8))+rsp]
+ mov rbp,QWORD[((128+16))+rsp]
mov r8,QWORD[rsp]
mov r9,QWORD[8+rsp]
@@ -686,6 +763,17 @@ DB 102,72,15,126,205
call __rsaz_512_subtract
lea rax,[((128+24+48))+rsp]
+ movaps xmm6,XMMWORD[((160-200))+rax]
+ movaps xmm7,XMMWORD[((176-200))+rax]
+ movaps xmm8,XMMWORD[((192-200))+rax]
+ movaps xmm9,XMMWORD[((208-200))+rax]
+ movaps xmm10,XMMWORD[((224-200))+rax]
+ movaps xmm11,XMMWORD[((240-200))+rax]
+ movaps xmm12,XMMWORD[((256-200))+rax]
+ movaps xmm13,XMMWORD[((272-200))+rax]
+ movaps xmm14,XMMWORD[((288-200))+rax]
+ movaps xmm15,XMMWORD[((304-200))+rax]
+ lea rax,[176+rax]
mov r15,QWORD[((-48))+rax]
mov r14,QWORD[((-40))+rax]
mov r13,QWORD[((-32))+rax]
@@ -724,7 +812,7 @@ $L$SEH_begin_rsaz_512_mul_scatter4:
mov r9d,r9d
sub rsp,128+24
$L$mul_scatter4_body:
- lea r8,[r9*4+r8]
+ lea r8,[r9*8+r8]
DB 102,72,15,110,199
DB 102,72,15,110,202
DB 102,73,15,110,208
@@ -760,30 +848,14 @@ DB 102,72,15,126,214
call __rsaz_512_subtract
- mov DWORD[rsi],r8d
- shr r8,32
- mov DWORD[128+rsi],r9d
- shr r9,32
- mov DWORD[256+rsi],r10d
- shr r10,32
- mov DWORD[384+rsi],r11d
- shr r11,32
- mov DWORD[512+rsi],r12d
- shr r12,32
- mov DWORD[640+rsi],r13d
- shr r13,32
- mov DWORD[768+rsi],r14d
- shr r14,32
- mov DWORD[896+rsi],r15d
- shr r15,32
- mov DWORD[64+rsi],r8d
- mov DWORD[192+rsi],r9d
- mov DWORD[320+rsi],r10d
- mov DWORD[448+rsi],r11d
- mov DWORD[576+rsi],r12d
- mov DWORD[704+rsi],r13d
- mov DWORD[832+rsi],r14d
- mov DWORD[960+rsi],r15d
+ mov QWORD[rsi],r8
+ mov QWORD[128+rsi],r9
+ mov QWORD[256+rsi],r10
+ mov QWORD[384+rsi],r11
+ mov QWORD[512+rsi],r12
+ mov QWORD[640+rsi],r13
+ mov QWORD[768+rsi],r14
+ mov QWORD[896+rsi],r15
lea rax,[((128+24+48))+rsp]
mov r15,QWORD[((-48))+rax]
@@ -1150,16 +1222,14 @@ global rsaz_512_scatter4
ALIGN 16
rsaz_512_scatter4:
- lea rcx,[r8*4+rcx]
+ lea rcx,[r8*8+rcx]
mov r9d,8
jmp NEAR $L$oop_scatter
ALIGN 16
$L$oop_scatter:
mov rax,QWORD[rdx]
lea rdx,[8+rdx]
- mov DWORD[rcx],eax
- shr rax,32
- mov DWORD[64+rcx],eax
+ mov QWORD[rcx],rax
lea rcx,[128+rcx]
dec r9d
jnz NEAR $L$oop_scatter
@@ -1170,22 +1240,98 @@ global rsaz_512_gather4
ALIGN 16
rsaz_512_gather4:
- lea rdx,[r8*4+rdx]
+$L$SEH_begin_rsaz_512_gather4:
+DB 0x48,0x81,0xec,0xa8,0x00,0x00,0x00
+DB 0x0f,0x29,0x34,0x24
+DB 0x0f,0x29,0x7c,0x24,0x10
+DB 0x44,0x0f,0x29,0x44,0x24,0x20
+DB 0x44,0x0f,0x29,0x4c,0x24,0x30
+DB 0x44,0x0f,0x29,0x54,0x24,0x40
+DB 0x44,0x0f,0x29,0x5c,0x24,0x50
+DB 0x44,0x0f,0x29,0x64,0x24,0x60
+DB 0x44,0x0f,0x29,0x6c,0x24,0x70
+DB 0x44,0x0f,0x29,0xb4,0x24,0x80,0,0,0
+DB 0x44,0x0f,0x29,0xbc,0x24,0x90,0,0,0
+ movd xmm8,r8d
+ movdqa xmm1,XMMWORD[(($L$inc+16))]
+ movdqa xmm0,XMMWORD[$L$inc]
+
+ pshufd xmm8,xmm8,0
+ movdqa xmm7,xmm1
+ movdqa xmm2,xmm1
+ paddd xmm1,xmm0
+ pcmpeqd xmm0,xmm8
+ movdqa xmm3,xmm7
+ paddd xmm2,xmm1
+ pcmpeqd xmm1,xmm8
+ movdqa xmm4,xmm7
+ paddd xmm3,xmm2
+ pcmpeqd xmm2,xmm8
+ movdqa xmm5,xmm7
+ paddd xmm4,xmm3
+ pcmpeqd xmm3,xmm8
+ movdqa xmm6,xmm7
+ paddd xmm5,xmm4
+ pcmpeqd xmm4,xmm8
+ paddd xmm6,xmm5
+ pcmpeqd xmm5,xmm8
+ paddd xmm7,xmm6
+ pcmpeqd xmm6,xmm8
+ pcmpeqd xmm7,xmm8
mov r9d,8
jmp NEAR $L$oop_gather
ALIGN 16
$L$oop_gather:
- mov eax,DWORD[rdx]
- mov r8d,DWORD[64+rdx]
+ movdqa xmm8,XMMWORD[rdx]
+ movdqa xmm9,XMMWORD[16+rdx]
+ movdqa xmm10,XMMWORD[32+rdx]
+ movdqa xmm11,XMMWORD[48+rdx]
+ pand xmm8,xmm0
+ movdqa xmm12,XMMWORD[64+rdx]
+ pand xmm9,xmm1
+ movdqa xmm13,XMMWORD[80+rdx]
+ pand xmm10,xmm2
+ movdqa xmm14,XMMWORD[96+rdx]
+ pand xmm11,xmm3
+ movdqa xmm15,XMMWORD[112+rdx]
lea rdx,[128+rdx]
- shl r8,32
- or rax,r8
- mov QWORD[rcx],rax
+ pand xmm12,xmm4
+ pand xmm13,xmm5
+ pand xmm14,xmm6
+ pand xmm15,xmm7
+ por xmm8,xmm10
+ por xmm9,xmm11
+ por xmm8,xmm12
+ por xmm9,xmm13
+ por xmm8,xmm14
+ por xmm9,xmm15
+
+ por xmm8,xmm9
+ pshufd xmm9,xmm8,0x4e
+ por xmm8,xmm9
+ movq QWORD[rcx],xmm8
lea rcx,[8+rcx]
dec r9d
jnz NEAR $L$oop_gather
+ movaps xmm6,XMMWORD[rsp]
+ movaps xmm7,XMMWORD[16+rsp]
+ movaps xmm8,XMMWORD[32+rsp]
+ movaps xmm9,XMMWORD[48+rsp]
+ movaps xmm10,XMMWORD[64+rsp]
+ movaps xmm11,XMMWORD[80+rsp]
+ movaps xmm12,XMMWORD[96+rsp]
+ movaps xmm13,XMMWORD[112+rsp]
+ movaps xmm14,XMMWORD[128+rsp]
+ movaps xmm15,XMMWORD[144+rsp]
+ add rsp,0xa8
DB 0F3h,0C3h ;repret
+$L$SEH_end_rsaz_512_gather4:
+
+ALIGN 64
+$L$inc:
+ DD 0,0,1,1
+ DD 2,2,2,2
EXTERN __imp_RtlVirtualUnwind
ALIGN 16
@@ -1221,6 +1367,18 @@ se_handler:
lea rax,[((128+24+48))+rax]
+ lea rbx,[$L$mul_gather4_epilogue]
+ cmp rbx,r10
+ jne NEAR $L$se_not_in_mul_gather4
+
+ lea rax,[176+rax]
+
+ lea rsi,[((-48-168))+rax]
+ lea rdi,[512+r8]
+ mov ecx,20
+ DD 0xa548f3fc
+
+$L$se_not_in_mul_gather4:
mov rbx,QWORD[((-8))+rax]
mov rbp,QWORD[((-16))+rax]
mov r12,QWORD[((-24))+rax]
@@ -1296,6 +1454,10 @@ ALIGN 4
DD $L$SEH_end_rsaz_512_mul_by_one wrt ..imagebase
DD $L$SEH_info_rsaz_512_mul_by_one wrt ..imagebase
+ DD $L$SEH_begin_rsaz_512_gather4 wrt ..imagebase
+ DD $L$SEH_end_rsaz_512_gather4 wrt ..imagebase
+ DD $L$SEH_info_rsaz_512_gather4 wrt ..imagebase
+
section .xdata rdata align=8
ALIGN 8
$L$SEH_info_rsaz_512_sqr:
@@ -1318,3 +1480,16 @@ $L$SEH_info_rsaz_512_mul_by_one:
DB 9,0,0,0
DD se_handler wrt ..imagebase
DD $L$mul_by_one_body wrt ..imagebase,$L$mul_by_one_epilogue wrt ..imagebase
+$L$SEH_info_rsaz_512_gather4:
+DB 0x01,0x46,0x16,0x00
+DB 0x46,0xf8,0x09,0x00
+DB 0x3d,0xe8,0x08,0x00
+DB 0x34,0xd8,0x07,0x00
+DB 0x2e,0xc8,0x06,0x00
+DB 0x28,0xb8,0x05,0x00
+DB 0x22,0xa8,0x04,0x00
+DB 0x1c,0x98,0x03,0x00
+DB 0x16,0x88,0x02,0x00
+DB 0x10,0x78,0x01,0x00
+DB 0x0b,0x68,0x00,0x00
+DB 0x07,0x01,0x15,0x00
« no previous file with comments | « third_party/boringssl/win-x86/crypto/sha/sha256-586.asm ('k') | third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698