| Index: win-x86_64/crypto/ec/p256-x86_64-asm.asm
|
| diff --git a/win-x86_64/crypto/ec/p256-x86_64-asm.asm b/win-x86_64/crypto/ec/p256-x86_64-asm.asm
|
| index a2e40758194f84bc2b8df35fd20dc611155f170b..cbcf8835a6baa780884db1e8c9d4f643b782bd05 100644
|
| --- a/win-x86_64/crypto/ec/p256-x86_64-asm.asm
|
| +++ b/win-x86_64/crypto/ec/p256-x86_64-asm.asm
|
| @@ -35,6 +35,7 @@ $L$SEH_begin_ecp_nistz256_mul_by_2:
|
| push r13
|
|
|
| mov r8,QWORD[rsi]
|
| + xor r13,r13
|
| mov r9,QWORD[8+rsi]
|
| add r8,r8
|
| mov r10,QWORD[16+rsi]
|
| @@ -45,7 +46,7 @@ $L$SEH_begin_ecp_nistz256_mul_by_2:
|
| adc r10,r10
|
| adc r11,r11
|
| mov rdx,r9
|
| - sbb r13,r13
|
| + adc r13,0
|
|
|
| sub r8,QWORD[rsi]
|
| mov rcx,r10
|
| @@ -53,14 +54,14 @@ $L$SEH_begin_ecp_nistz256_mul_by_2:
|
| sbb r10,QWORD[16+rsi]
|
| mov r12,r11
|
| sbb r11,QWORD[24+rsi]
|
| - test r13,r13
|
| + sbb r13,0
|
|
|
| - cmovz r8,rax
|
| - cmovz r9,rdx
|
| + cmovc r8,rax
|
| + cmovc r9,rdx
|
| mov QWORD[rdi],r8
|
| - cmovz r10,rcx
|
| + cmovc r10,rcx
|
| mov QWORD[8+rdi],r9
|
| - cmovz r11,r12
|
| + cmovc r11,r12
|
| mov QWORD[16+rdi],r10
|
| mov QWORD[24+rdi],r11
|
|
|
| @@ -673,6 +674,8 @@ $L$SEH_begin_ecp_nistz256_from_mont:
|
| mov rsi,r9
|
| adc rdx,0
|
|
|
| +
|
| +
|
| sub r8,-1
|
| mov rax,r10
|
| sbb r9,r12
|
| @@ -873,13 +876,14 @@ $L$SEH_end_ecp_nistz256_avx2_select_w7:
|
|
|
| ALIGN 32
|
| __ecp_nistz256_add_toq:
|
| + xor r11,r11
|
| add r12,QWORD[rbx]
|
| adc r13,QWORD[8+rbx]
|
| mov rax,r12
|
| adc r8,QWORD[16+rbx]
|
| adc r9,QWORD[24+rbx]
|
| mov rbp,r13
|
| - sbb r11,r11
|
| + adc r11,0
|
|
|
| sub r12,-1
|
| mov rcx,r8
|
| @@ -887,14 +891,14 @@ __ecp_nistz256_add_toq:
|
| sbb r8,0
|
| mov r10,r9
|
| sbb r9,r15
|
| - test r11,r11
|
| + sbb r11,0
|
|
|
| - cmovz r12,rax
|
| - cmovz r13,rbp
|
| + cmovc r12,rax
|
| + cmovc r13,rbp
|
| mov QWORD[rdi],r12
|
| - cmovz r8,rcx
|
| + cmovc r8,rcx
|
| mov QWORD[8+rdi],r13
|
| - cmovz r9,r10
|
| + cmovc r9,r10
|
| mov QWORD[16+rdi],r8
|
| mov QWORD[24+rdi],r9
|
|
|
| @@ -962,13 +966,14 @@ __ecp_nistz256_subq:
|
|
|
| ALIGN 32
|
| __ecp_nistz256_mul_by_2q:
|
| + xor r11,r11
|
| add r12,r12
|
| adc r13,r13
|
| mov rax,r12
|
| adc r8,r8
|
| adc r9,r9
|
| mov rbp,r13
|
| - sbb r11,r11
|
| + adc r11,0
|
|
|
| sub r12,-1
|
| mov rcx,r8
|
| @@ -976,14 +981,14 @@ __ecp_nistz256_mul_by_2q:
|
| sbb r8,0
|
| mov r10,r9
|
| sbb r9,r15
|
| - test r11,r11
|
| + sbb r11,0
|
|
|
| - cmovz r12,rax
|
| - cmovz r13,rbp
|
| + cmovc r12,rax
|
| + cmovc r13,rbp
|
| mov QWORD[rdi],r12
|
| - cmovz r8,rcx
|
| + cmovc r8,rcx
|
| mov QWORD[8+rdi],r13
|
| - cmovz r9,r10
|
| + cmovc r9,r10
|
| mov QWORD[16+rdi],r8
|
| mov QWORD[24+rdi],r9
|
|
|
| @@ -1232,16 +1237,14 @@ $L$SEH_begin_ecp_nistz256_point_add:
|
| mov rsi,rdx
|
| movdqa XMMWORD[384+rsp],xmm0
|
| movdqa XMMWORD[(384+16)+rsp],xmm1
|
| - por xmm1,xmm0
|
| movdqa XMMWORD[416+rsp],xmm2
|
| movdqa XMMWORD[(416+16)+rsp],xmm3
|
| - por xmm3,xmm2
|
| movdqa XMMWORD[448+rsp],xmm4
|
| movdqa XMMWORD[(448+16)+rsp],xmm5
|
| - por xmm3,xmm1
|
| + por xmm5,xmm4
|
|
|
| movdqu xmm0,XMMWORD[rsi]
|
| - pshufd xmm5,xmm3,0xb1
|
| + pshufd xmm3,xmm5,0xb1
|
| movdqu xmm1,XMMWORD[16+rsi]
|
| movdqu xmm2,XMMWORD[32+rsi]
|
| por xmm5,xmm3
|
| @@ -1253,14 +1256,14 @@ $L$SEH_begin_ecp_nistz256_point_add:
|
| movdqa XMMWORD[480+rsp],xmm0
|
| pshufd xmm4,xmm5,0x1e
|
| movdqa XMMWORD[(480+16)+rsp],xmm1
|
| - por xmm1,xmm0
|
| -DB 102,72,15,110,199
|
| + movdqu xmm0,XMMWORD[64+rsi]
|
| + movdqu xmm1,XMMWORD[80+rsi]
|
| movdqa XMMWORD[512+rsp],xmm2
|
| movdqa XMMWORD[(512+16)+rsp],xmm3
|
| - por xmm3,xmm2
|
| por xmm5,xmm4
|
| pxor xmm4,xmm4
|
| - por xmm3,xmm1
|
| + por xmm1,xmm0
|
| +DB 102,72,15,110,199
|
|
|
| lea rsi,[((64-0))+rsi]
|
| mov QWORD[((544+0))+rsp],rax
|
| @@ -1271,8 +1274,8 @@ DB 102,72,15,110,199
|
| call __ecp_nistz256_sqr_montq
|
|
|
| pcmpeqd xmm5,xmm4
|
| - pshufd xmm4,xmm3,0xb1
|
| - por xmm4,xmm3
|
| + pshufd xmm4,xmm1,0xb1
|
| + por xmm4,xmm1
|
| pshufd xmm5,xmm5,0
|
| pshufd xmm3,xmm4,0x1e
|
| por xmm4,xmm3
|
| @@ -1455,6 +1458,7 @@ $L$add_proceedq:
|
|
|
|
|
|
|
| + xor r11,r11
|
| add r12,r12
|
| lea rsi,[96+rsp]
|
| adc r13,r13
|
| @@ -1462,7 +1466,7 @@ $L$add_proceedq:
|
| adc r8,r8
|
| adc r9,r9
|
| mov rbp,r13
|
| - sbb r11,r11
|
| + adc r11,0
|
|
|
| sub r12,-1
|
| mov rcx,r8
|
| @@ -1470,15 +1474,15 @@ $L$add_proceedq:
|
| sbb r8,0
|
| mov r10,r9
|
| sbb r9,r15
|
| - test r11,r11
|
| + sbb r11,0
|
|
|
| - cmovz r12,rax
|
| + cmovc r12,rax
|
| mov rax,QWORD[rsi]
|
| - cmovz r13,rbp
|
| + cmovc r13,rbp
|
| mov rbp,QWORD[8+rsi]
|
| - cmovz r8,rcx
|
| + cmovc r8,rcx
|
| mov rcx,QWORD[16+rsi]
|
| - cmovz r9,r10
|
| + cmovc r9,r10
|
| mov r10,QWORD[24+rsi]
|
|
|
| call __ecp_nistz256_subq
|
| @@ -1643,16 +1647,14 @@ $L$SEH_begin_ecp_nistz256_point_add_affine:
|
| mov r8,QWORD[((64+24))+rsi]
|
| movdqa XMMWORD[320+rsp],xmm0
|
| movdqa XMMWORD[(320+16)+rsp],xmm1
|
| - por xmm1,xmm0
|
| movdqa XMMWORD[352+rsp],xmm2
|
| movdqa XMMWORD[(352+16)+rsp],xmm3
|
| - por xmm3,xmm2
|
| movdqa XMMWORD[384+rsp],xmm4
|
| movdqa XMMWORD[(384+16)+rsp],xmm5
|
| - por xmm3,xmm1
|
| + por xmm5,xmm4
|
|
|
| movdqu xmm0,XMMWORD[rbx]
|
| - pshufd xmm5,xmm3,0xb1
|
| + pshufd xmm3,xmm5,0xb1
|
| movdqu xmm1,XMMWORD[16+rbx]
|
| movdqu xmm2,XMMWORD[32+rbx]
|
| por xmm5,xmm3
|
| @@ -1770,6 +1772,7 @@ DB 102,72,15,110,199
|
|
|
|
|
|
|
| + xor r11,r11
|
| add r12,r12
|
| lea rsi,[192+rsp]
|
| adc r13,r13
|
| @@ -1777,7 +1780,7 @@ DB 102,72,15,110,199
|
| adc r8,r8
|
| adc r9,r9
|
| mov rbp,r13
|
| - sbb r11,r11
|
| + adc r11,0
|
|
|
| sub r12,-1
|
| mov rcx,r8
|
| @@ -1785,15 +1788,15 @@ DB 102,72,15,110,199
|
| sbb r8,0
|
| mov r10,r9
|
| sbb r9,r15
|
| - test r11,r11
|
| + sbb r11,0
|
|
|
| - cmovz r12,rax
|
| + cmovc r12,rax
|
| mov rax,QWORD[rsi]
|
| - cmovz r13,rbp
|
| + cmovc r13,rbp
|
| mov rbp,QWORD[8+rsi]
|
| - cmovz r8,rcx
|
| + cmovc r8,rcx
|
| mov rcx,QWORD[16+rsi]
|
| - cmovz r9,r10
|
| + cmovc r9,r10
|
| mov r10,QWORD[24+rsi]
|
|
|
| call __ecp_nistz256_subq
|
|
|