Index: third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont5.asm |
diff --git a/third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont5.asm b/third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont5.asm |
index 58f19ac26ce216dd4ec3833bd384023fc581b9bf..5033f027c2cc394322f92d7d571ac9c04c0078d8 100644 |
--- a/third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont5.asm |
+++ b/third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont5.asm |
@@ -23,16 +23,15 @@ $L$SEH_begin_bn_mul_mont_gather5: |
mov r9,QWORD[48+rsp] |
+ mov r9d,r9d |
+ mov rax,rsp |
test r9d,7 |
jnz NEAR $L$mul_enter |
jmp NEAR $L$mul4x_enter |
ALIGN 16 |
$L$mul_enter: |
- mov r9d,r9d |
- mov rax,rsp |
movd xmm5,DWORD[56+rsp] |
- lea r10,[$L$inc] |
push rbx |
push rbp |
push r12 |
@@ -40,13 +39,39 @@ $L$mul_enter: |
push r14 |
push r15 |
- lea r11,[2+r9] |
- neg r11 |
- lea rsp,[((-264))+r11*8+rsp] |
- and rsp,-1024 |
+ neg r9 |
+ mov r11,rsp |
+ lea r10,[((-280))+r9*8+rsp] |
+ neg r9 |
+ and r10,-1024 |
+ |
+ |
+ |
+ |
+ |
+ |
+ |
+ |
+ sub r11,r10 |
+ and r11,-4096 |
+ lea rsp,[r11*1+r10] |
+ mov r11,QWORD[rsp] |
+ cmp rsp,r10 |
+ ja NEAR $L$mul_page_walk |
+ jmp NEAR $L$mul_page_walk_done |
+ |
+$L$mul_page_walk: |
+ lea rsp,[((-4096))+rsp] |
+ mov r11,QWORD[rsp] |
+ cmp rsp,r10 |
+ ja NEAR $L$mul_page_walk |
+$L$mul_page_walk_done: |
+ |
+ lea r10,[$L$inc] |
mov QWORD[8+r9*8+rsp],rax |
$L$mul_body: |
+ |
lea r12,[128+rdx] |
movdqa xmm0,XMMWORD[r10] |
movdqa xmm1,XMMWORD[16+r10] |
@@ -385,16 +410,17 @@ $L$sub: sbb rax,QWORD[r14*8+rcx] |
sbb rax,0 |
xor r14,r14 |
+ and rsi,rax |
+ not rax |
+ mov rcx,rdi |
+ and rcx,rax |
mov r15,r9 |
+ or rsi,rcx |
ALIGN 16 |
$L$copy: |
- mov rsi,QWORD[r14*8+rsp] |
- mov rcx,QWORD[r14*8+rdi] |
- xor rsi,rcx |
- and rsi,rax |
- xor rsi,rcx |
+ mov rax,QWORD[r14*8+rsi] |
mov QWORD[r14*8+rsp],r14 |
- mov QWORD[r14*8+rdi],rsi |
+ mov QWORD[r14*8+rdi],rax |
lea r14,[1+r14] |
sub r15,1 |
jnz NEAR $L$copy |
@@ -429,15 +455,16 @@ $L$SEH_begin_bn_mul4x_mont_gather5: |
mov r9,QWORD[48+rsp] |
-$L$mul4x_enter: |
DB 0x67 |
mov rax,rsp |
+$L$mul4x_enter: |
push rbx |
push rbp |
push r12 |
push r13 |
push r14 |
push r15 |
+$L$mul4x_prologue: |
DB 0x67 |
shl r9d,3 |
@@ -454,24 +481,41 @@ DB 0x67 |
lea r11,[((-320))+r9*2+rsp] |
+ mov rbp,rsp |
sub r11,rdi |
and r11,4095 |
cmp r10,r11 |
jb NEAR $L$mul4xsp_alt |
- sub rsp,r11 |
- lea rsp,[((-320))+r9*2+rsp] |
+ sub rbp,r11 |
+ lea rbp,[((-320))+r9*2+rbp] |
jmp NEAR $L$mul4xsp_done |
ALIGN 32 |
$L$mul4xsp_alt: |
lea r10,[((4096-320))+r9*2] |
- lea rsp,[((-320))+r9*2+rsp] |
+ lea rbp,[((-320))+r9*2+rbp] |
sub r11,r10 |
mov r10,0 |
cmovc r11,r10 |
- sub rsp,r11 |
+ sub rbp,r11 |
$L$mul4xsp_done: |
- and rsp,-64 |
+ and rbp,-64 |
+ mov r11,rsp |
+ sub r11,rbp |
+ and r11,-4096 |
+ lea rsp,[rbp*1+r11] |
+ mov r10,QWORD[rsp] |
+ cmp rsp,rbp |
+ ja NEAR $L$mul4x_page_walk |
+ jmp NEAR $L$mul4x_page_walk_done |
+ |
+$L$mul4x_page_walk: |
+ lea rsp,[((-4096))+rsp] |
+ mov r10,QWORD[rsp] |
+ cmp rsp,rbp |
+ ja NEAR $L$mul4x_page_walk |
+$L$mul4x_page_walk_done: |
+ |
neg r9 |
mov QWORD[40+rsp],rax |
@@ -1043,6 +1087,7 @@ $L$SEH_begin_bn_power5: |
push r13 |
push r14 |
push r15 |
+$L$power5_prologue: |
shl r9d,3 |
lea r10d,[r9*2+r9] |
@@ -1057,24 +1102,41 @@ $L$SEH_begin_bn_power5: |
lea r11,[((-320))+r9*2+rsp] |
+ mov rbp,rsp |
sub r11,rdi |
and r11,4095 |
cmp r10,r11 |
jb NEAR $L$pwr_sp_alt |
- sub rsp,r11 |
- lea rsp,[((-320))+r9*2+rsp] |
+ sub rbp,r11 |
+ lea rbp,[((-320))+r9*2+rbp] |
jmp NEAR $L$pwr_sp_done |
ALIGN 32 |
$L$pwr_sp_alt: |
lea r10,[((4096-320))+r9*2] |
- lea rsp,[((-320))+r9*2+rsp] |
+ lea rbp,[((-320))+r9*2+rbp] |
sub r11,r10 |
mov r10,0 |
cmovc r11,r10 |
- sub rsp,r11 |
+ sub rbp,r11 |
$L$pwr_sp_done: |
- and rsp,-64 |
+ and rbp,-64 |
+ mov r11,rsp |
+ sub r11,rbp |
+ and r11,-4096 |
+ lea rsp,[rbp*1+r11] |
+ mov r10,QWORD[rsp] |
+ cmp rsp,rbp |
+ ja NEAR $L$pwr_page_walk |
+ jmp NEAR $L$pwr_page_walk_done |
+ |
+$L$pwr_page_walk: |
+ lea rsp,[((-4096))+rsp] |
+ mov r10,QWORD[rsp] |
+ cmp rsp,rbp |
+ ja NEAR $L$pwr_page_walk |
+$L$pwr_page_walk_done: |
+ |
mov r10,r9 |
neg r9 |
@@ -1997,6 +2059,7 @@ DB 0x67 |
push r13 |
push r14 |
push r15 |
+$L$from_prologue: |
shl r9d,3 |
lea r10,[r9*2+r9] |
@@ -2011,24 +2074,41 @@ DB 0x67 |
lea r11,[((-320))+r9*2+rsp] |
+ mov rbp,rsp |
sub r11,rdi |
and r11,4095 |
cmp r10,r11 |
jb NEAR $L$from_sp_alt |
- sub rsp,r11 |
- lea rsp,[((-320))+r9*2+rsp] |
+ sub rbp,r11 |
+ lea rbp,[((-320))+r9*2+rbp] |
jmp NEAR $L$from_sp_done |
ALIGN 32 |
$L$from_sp_alt: |
lea r10,[((4096-320))+r9*2] |
- lea rsp,[((-320))+r9*2+rsp] |
+ lea rbp,[((-320))+r9*2+rbp] |
sub r11,r10 |
mov r10,0 |
cmovc r11,r10 |
- sub rsp,r11 |
+ sub rbp,r11 |
$L$from_sp_done: |
- and rsp,-64 |
+ and rbp,-64 |
+ mov r11,rsp |
+ sub r11,rbp |
+ and r11,-4096 |
+ lea rsp,[rbp*1+r11] |
+ mov r10,QWORD[rsp] |
+ cmp rsp,rbp |
+ ja NEAR $L$from_page_walk |
+ jmp NEAR $L$from_page_walk_done |
+ |
+$L$from_page_walk: |
+ lea rsp,[((-4096))+rsp] |
+ mov r10,QWORD[rsp] |
+ cmp rsp,rbp |
+ ja NEAR $L$from_page_walk |
+$L$from_page_walk_done: |
+ |
mov r10,r9 |
neg r9 |
@@ -2321,9 +2401,14 @@ mul_handler: |
cmp rbx,r10 |
jb NEAR $L$common_seh_tail |
+ mov r10d,DWORD[4+r11] |
+ lea r10,[r10*1+rsi] |
+ cmp rbx,r10 |
+ jb NEAR $L$common_pop_regs |
+ |
mov rax,QWORD[152+r8] |
- mov r10d,DWORD[4+r11] |
+ mov r10d,DWORD[8+r11] |
lea r10,[r10*1+rsi] |
cmp rbx,r10 |
jae NEAR $L$common_seh_tail |
@@ -2335,11 +2420,11 @@ mul_handler: |
mov r10,QWORD[192+r8] |
mov rax,QWORD[8+r10*8+rax] |
- jmp NEAR $L$body_proceed |
+ jmp NEAR $L$common_pop_regs |
$L$body_40: |
mov rax,QWORD[40+rax] |
-$L$body_proceed: |
+$L$common_pop_regs: |
mov rbx,QWORD[((-8))+rax] |
mov rbp,QWORD[((-16))+rax] |
mov r12,QWORD[((-24))+rax] |
@@ -2419,22 +2504,22 @@ ALIGN 8 |
$L$SEH_info_bn_mul_mont_gather5: |
DB 9,0,0,0 |
DD mul_handler wrt ..imagebase |
- DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase |
+ DD $L$mul_body wrt ..imagebase,$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase |
ALIGN 8 |
$L$SEH_info_bn_mul4x_mont_gather5: |
DB 9,0,0,0 |
DD mul_handler wrt ..imagebase |
- DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase |
+ DD $L$mul4x_prologue wrt ..imagebase,$L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase |
ALIGN 8 |
$L$SEH_info_bn_power5: |
DB 9,0,0,0 |
DD mul_handler wrt ..imagebase |
- DD $L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebase |
+ DD $L$power5_prologue wrt ..imagebase,$L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebase |
ALIGN 8 |
$L$SEH_info_bn_from_mont8x: |
DB 9,0,0,0 |
DD mul_handler wrt ..imagebase |
- DD $L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase |
+ DD $L$from_prologue wrt ..imagebase,$L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase |
ALIGN 8 |
$L$SEH_info_bn_gather5: |
DB 0x01,0x0b,0x03,0x0a |