Index: win-x86_64/crypto/ec/p256-x86_64-asm.asm |
diff --git a/win-x86_64/crypto/ec/p256-x86_64-asm.asm b/win-x86_64/crypto/ec/p256-x86_64-asm.asm |
index a2e40758194f84bc2b8df35fd20dc611155f170b..cbcf8835a6baa780884db1e8c9d4f643b782bd05 100644 |
--- a/win-x86_64/crypto/ec/p256-x86_64-asm.asm |
+++ b/win-x86_64/crypto/ec/p256-x86_64-asm.asm |
@@ -35,6 +35,7 @@ $L$SEH_begin_ecp_nistz256_mul_by_2: |
push r13 |
mov r8,QWORD[rsi] |
+ xor r13,r13 |
mov r9,QWORD[8+rsi] |
add r8,r8 |
mov r10,QWORD[16+rsi] |
@@ -45,7 +46,7 @@ $L$SEH_begin_ecp_nistz256_mul_by_2: |
adc r10,r10 |
adc r11,r11 |
mov rdx,r9 |
- sbb r13,r13 |
+ adc r13,0 |
sub r8,QWORD[rsi] |
mov rcx,r10 |
@@ -53,14 +54,14 @@ $L$SEH_begin_ecp_nistz256_mul_by_2: |
sbb r10,QWORD[16+rsi] |
mov r12,r11 |
sbb r11,QWORD[24+rsi] |
- test r13,r13 |
+ sbb r13,0 |
- cmovz r8,rax |
- cmovz r9,rdx |
+ cmovc r8,rax |
+ cmovc r9,rdx |
mov QWORD[rdi],r8 |
- cmovz r10,rcx |
+ cmovc r10,rcx |
mov QWORD[8+rdi],r9 |
- cmovz r11,r12 |
+ cmovc r11,r12 |
mov QWORD[16+rdi],r10 |
mov QWORD[24+rdi],r11 |
@@ -673,6 +674,8 @@ $L$SEH_begin_ecp_nistz256_from_mont: |
mov rsi,r9 |
adc rdx,0 |
+ |
+ |
sub r8,-1 |
mov rax,r10 |
sbb r9,r12 |
@@ -873,13 +876,14 @@ $L$SEH_end_ecp_nistz256_avx2_select_w7: |
ALIGN 32 |
__ecp_nistz256_add_toq: |
+ xor r11,r11 |
add r12,QWORD[rbx] |
adc r13,QWORD[8+rbx] |
mov rax,r12 |
adc r8,QWORD[16+rbx] |
adc r9,QWORD[24+rbx] |
mov rbp,r13 |
- sbb r11,r11 |
+ adc r11,0 |
sub r12,-1 |
mov rcx,r8 |
@@ -887,14 +891,14 @@ __ecp_nistz256_add_toq: |
sbb r8,0 |
mov r10,r9 |
sbb r9,r15 |
- test r11,r11 |
+ sbb r11,0 |
- cmovz r12,rax |
- cmovz r13,rbp |
+ cmovc r12,rax |
+ cmovc r13,rbp |
mov QWORD[rdi],r12 |
- cmovz r8,rcx |
+ cmovc r8,rcx |
mov QWORD[8+rdi],r13 |
- cmovz r9,r10 |
+ cmovc r9,r10 |
mov QWORD[16+rdi],r8 |
mov QWORD[24+rdi],r9 |
@@ -962,13 +966,14 @@ __ecp_nistz256_subq: |
ALIGN 32 |
__ecp_nistz256_mul_by_2q: |
+ xor r11,r11 |
add r12,r12 |
adc r13,r13 |
mov rax,r12 |
adc r8,r8 |
adc r9,r9 |
mov rbp,r13 |
- sbb r11,r11 |
+ adc r11,0 |
sub r12,-1 |
mov rcx,r8 |
@@ -976,14 +981,14 @@ __ecp_nistz256_mul_by_2q: |
sbb r8,0 |
mov r10,r9 |
sbb r9,r15 |
- test r11,r11 |
+ sbb r11,0 |
- cmovz r12,rax |
- cmovz r13,rbp |
+ cmovc r12,rax |
+ cmovc r13,rbp |
mov QWORD[rdi],r12 |
- cmovz r8,rcx |
+ cmovc r8,rcx |
mov QWORD[8+rdi],r13 |
- cmovz r9,r10 |
+ cmovc r9,r10 |
mov QWORD[16+rdi],r8 |
mov QWORD[24+rdi],r9 |
@@ -1232,16 +1237,14 @@ $L$SEH_begin_ecp_nistz256_point_add: |
mov rsi,rdx |
movdqa XMMWORD[384+rsp],xmm0 |
movdqa XMMWORD[(384+16)+rsp],xmm1 |
- por xmm1,xmm0 |
movdqa XMMWORD[416+rsp],xmm2 |
movdqa XMMWORD[(416+16)+rsp],xmm3 |
- por xmm3,xmm2 |
movdqa XMMWORD[448+rsp],xmm4 |
movdqa XMMWORD[(448+16)+rsp],xmm5 |
- por xmm3,xmm1 |
+ por xmm5,xmm4 |
movdqu xmm0,XMMWORD[rsi] |
- pshufd xmm5,xmm3,0xb1 |
+ pshufd xmm3,xmm5,0xb1 |
movdqu xmm1,XMMWORD[16+rsi] |
movdqu xmm2,XMMWORD[32+rsi] |
por xmm5,xmm3 |
@@ -1253,14 +1256,14 @@ $L$SEH_begin_ecp_nistz256_point_add: |
movdqa XMMWORD[480+rsp],xmm0 |
pshufd xmm4,xmm5,0x1e |
movdqa XMMWORD[(480+16)+rsp],xmm1 |
- por xmm1,xmm0 |
-DB 102,72,15,110,199 |
+ movdqu xmm0,XMMWORD[64+rsi] |
+ movdqu xmm1,XMMWORD[80+rsi] |
movdqa XMMWORD[512+rsp],xmm2 |
movdqa XMMWORD[(512+16)+rsp],xmm3 |
- por xmm3,xmm2 |
por xmm5,xmm4 |
pxor xmm4,xmm4 |
- por xmm3,xmm1 |
+ por xmm1,xmm0 |
+DB 102,72,15,110,199 |
lea rsi,[((64-0))+rsi] |
mov QWORD[((544+0))+rsp],rax |
@@ -1271,8 +1274,8 @@ DB 102,72,15,110,199 |
call __ecp_nistz256_sqr_montq |
pcmpeqd xmm5,xmm4 |
- pshufd xmm4,xmm3,0xb1 |
- por xmm4,xmm3 |
+ pshufd xmm4,xmm1,0xb1 |
+ por xmm4,xmm1 |
pshufd xmm5,xmm5,0 |
pshufd xmm3,xmm4,0x1e |
por xmm4,xmm3 |
@@ -1455,6 +1458,7 @@ $L$add_proceedq: |
+ xor r11,r11 |
add r12,r12 |
lea rsi,[96+rsp] |
adc r13,r13 |
@@ -1462,7 +1466,7 @@ $L$add_proceedq: |
adc r8,r8 |
adc r9,r9 |
mov rbp,r13 |
- sbb r11,r11 |
+ adc r11,0 |
sub r12,-1 |
mov rcx,r8 |
@@ -1470,15 +1474,15 @@ $L$add_proceedq: |
sbb r8,0 |
mov r10,r9 |
sbb r9,r15 |
- test r11,r11 |
+ sbb r11,0 |
- cmovz r12,rax |
+ cmovc r12,rax |
mov rax,QWORD[rsi] |
- cmovz r13,rbp |
+ cmovc r13,rbp |
mov rbp,QWORD[8+rsi] |
- cmovz r8,rcx |
+ cmovc r8,rcx |
mov rcx,QWORD[16+rsi] |
- cmovz r9,r10 |
+ cmovc r9,r10 |
mov r10,QWORD[24+rsi] |
call __ecp_nistz256_subq |
@@ -1643,16 +1647,14 @@ $L$SEH_begin_ecp_nistz256_point_add_affine: |
mov r8,QWORD[((64+24))+rsi] |
movdqa XMMWORD[320+rsp],xmm0 |
movdqa XMMWORD[(320+16)+rsp],xmm1 |
- por xmm1,xmm0 |
movdqa XMMWORD[352+rsp],xmm2 |
movdqa XMMWORD[(352+16)+rsp],xmm3 |
- por xmm3,xmm2 |
movdqa XMMWORD[384+rsp],xmm4 |
movdqa XMMWORD[(384+16)+rsp],xmm5 |
- por xmm3,xmm1 |
+ por xmm5,xmm4 |
movdqu xmm0,XMMWORD[rbx] |
- pshufd xmm5,xmm3,0xb1 |
+ pshufd xmm3,xmm5,0xb1 |
movdqu xmm1,XMMWORD[16+rbx] |
movdqu xmm2,XMMWORD[32+rbx] |
por xmm5,xmm3 |
@@ -1770,6 +1772,7 @@ DB 102,72,15,110,199 |
+ xor r11,r11 |
add r12,r12 |
lea rsi,[192+rsp] |
adc r13,r13 |
@@ -1777,7 +1780,7 @@ DB 102,72,15,110,199 |
adc r8,r8 |
adc r9,r9 |
mov rbp,r13 |
- sbb r11,r11 |
+ adc r11,0 |
sub r12,-1 |
mov rcx,r8 |
@@ -1785,15 +1788,15 @@ DB 102,72,15,110,199 |
sbb r8,0 |
mov r10,r9 |
sbb r9,r15 |
- test r11,r11 |
+ sbb r11,0 |
- cmovz r12,rax |
+ cmovc r12,rax |
mov rax,QWORD[rsi] |
- cmovz r13,rbp |
+ cmovc r13,rbp |
mov rbp,QWORD[8+rsi] |
- cmovz r8,rcx |
+ cmovc r8,rcx |
mov rcx,QWORD[16+rsi] |
- cmovz r9,r10 |
+ cmovc r9,r10 |
mov r10,QWORD[24+rsi] |
call __ecp_nistz256_subq |