Index: linux-x86_64/crypto/ec/p256-x86_64-asm.S |
diff --git a/linux-x86_64/crypto/ec/p256-x86_64-asm.S b/linux-x86_64/crypto/ec/p256-x86_64-asm.S |
index 4abce6f91ee68ae72670d1af5aad2f739e43b48b..e059dd6081938a36d6775cb71d7d63d4db86d885 100644 |
--- a/linux-x86_64/crypto/ec/p256-x86_64-asm.S |
+++ b/linux-x86_64/crypto/ec/p256-x86_64-asm.S |
@@ -24,6 +24,7 @@ ecp_nistz256_mul_by_2: |
pushq %r13 |
movq 0(%rsi),%r8 |
+ xorq %r13,%r13 |
movq 8(%rsi),%r9 |
addq %r8,%r8 |
movq 16(%rsi),%r10 |
@@ -34,7 +35,7 @@ ecp_nistz256_mul_by_2: |
adcq %r10,%r10 |
adcq %r11,%r11 |
movq %r9,%rdx |
- sbbq %r13,%r13 |
+ adcq $0,%r13 |
subq 0(%rsi),%r8 |
movq %r10,%rcx |
@@ -42,14 +43,14 @@ ecp_nistz256_mul_by_2: |
sbbq 16(%rsi),%r10 |
movq %r11,%r12 |
sbbq 24(%rsi),%r11 |
- testq %r13,%r13 |
+ sbbq $0,%r13 |
- cmovzq %rax,%r8 |
- cmovzq %rdx,%r9 |
+ cmovcq %rax,%r8 |
+ cmovcq %rdx,%r9 |
movq %r8,0(%rdi) |
- cmovzq %rcx,%r10 |
+ cmovcq %rcx,%r10 |
movq %r9,8(%rdi) |
- cmovzq %r12,%r11 |
+ cmovcq %r12,%r11 |
movq %r10,16(%rdi) |
movq %r11,24(%rdi) |
@@ -625,6 +626,8 @@ ecp_nistz256_from_mont: |
movq %r9,%rsi |
adcq $0,%rdx |
+ |
+ |
subq $-1,%r8 |
movq %r10,%rax |
sbbq %r12,%r9 |
@@ -765,13 +768,14 @@ ecp_nistz256_avx2_select_w7: |
.type __ecp_nistz256_add_toq,@function |
.align 32 |
__ecp_nistz256_add_toq: |
+ xorq %r11,%r11 |
addq 0(%rbx),%r12 |
adcq 8(%rbx),%r13 |
movq %r12,%rax |
adcq 16(%rbx),%r8 |
adcq 24(%rbx),%r9 |
movq %r13,%rbp |
- sbbq %r11,%r11 |
+ adcq $0,%r11 |
subq $-1,%r12 |
movq %r8,%rcx |
@@ -779,14 +783,14 @@ __ecp_nistz256_add_toq: |
sbbq $0,%r8 |
movq %r9,%r10 |
sbbq %r15,%r9 |
- testq %r11,%r11 |
+ sbbq $0,%r11 |
- cmovzq %rax,%r12 |
- cmovzq %rbp,%r13 |
+ cmovcq %rax,%r12 |
+ cmovcq %rbp,%r13 |
movq %r12,0(%rdi) |
- cmovzq %rcx,%r8 |
+ cmovcq %rcx,%r8 |
movq %r13,8(%rdi) |
- cmovzq %r10,%r9 |
+ cmovcq %r10,%r9 |
movq %r8,16(%rdi) |
movq %r9,24(%rdi) |
@@ -854,13 +858,14 @@ __ecp_nistz256_subq: |
.type __ecp_nistz256_mul_by_2q,@function |
.align 32 |
__ecp_nistz256_mul_by_2q: |
+ xorq %r11,%r11 |
addq %r12,%r12 |
adcq %r13,%r13 |
movq %r12,%rax |
adcq %r8,%r8 |
adcq %r9,%r9 |
movq %r13,%rbp |
- sbbq %r11,%r11 |
+ adcq $0,%r11 |
subq $-1,%r12 |
movq %r8,%rcx |
@@ -868,14 +873,14 @@ __ecp_nistz256_mul_by_2q: |
sbbq $0,%r8 |
movq %r9,%r10 |
sbbq %r15,%r9 |
- testq %r11,%r11 |
+ sbbq $0,%r11 |
- cmovzq %rax,%r12 |
- cmovzq %rbp,%r13 |
+ cmovcq %rax,%r12 |
+ cmovcq %rbp,%r13 |
movq %r12,0(%rdi) |
- cmovzq %rcx,%r8 |
+ cmovcq %rcx,%r8 |
movq %r13,8(%rdi) |
- cmovzq %r10,%r9 |
+ cmovcq %r10,%r9 |
movq %r8,16(%rdi) |
movq %r9,24(%rdi) |
@@ -1107,16 +1112,14 @@ ecp_nistz256_point_add: |
movq %rdx,%rsi |
movdqa %xmm0,384(%rsp) |
movdqa %xmm1,384+16(%rsp) |
- por %xmm0,%xmm1 |
movdqa %xmm2,416(%rsp) |
movdqa %xmm3,416+16(%rsp) |
- por %xmm2,%xmm3 |
movdqa %xmm4,448(%rsp) |
movdqa %xmm5,448+16(%rsp) |
- por %xmm1,%xmm3 |
+ por %xmm4,%xmm5 |
movdqu 0(%rsi),%xmm0 |
- pshufd $0xb1,%xmm3,%xmm5 |
+ pshufd $0xb1,%xmm5,%xmm3 |
movdqu 16(%rsi),%xmm1 |
movdqu 32(%rsi),%xmm2 |
por %xmm3,%xmm5 |
@@ -1128,14 +1131,14 @@ ecp_nistz256_point_add: |
movdqa %xmm0,480(%rsp) |
pshufd $0x1e,%xmm5,%xmm4 |
movdqa %xmm1,480+16(%rsp) |
- por %xmm0,%xmm1 |
-.byte 102,72,15,110,199 |
+ movdqu 64(%rsi),%xmm0 |
+ movdqu 80(%rsi),%xmm1 |
movdqa %xmm2,512(%rsp) |
movdqa %xmm3,512+16(%rsp) |
- por %xmm2,%xmm3 |
por %xmm4,%xmm5 |
pxor %xmm4,%xmm4 |
- por %xmm1,%xmm3 |
+ por %xmm0,%xmm1 |
+.byte 102,72,15,110,199 |
leaq 64-0(%rsi),%rsi |
movq %rax,544+0(%rsp) |
@@ -1146,8 +1149,8 @@ ecp_nistz256_point_add: |
call __ecp_nistz256_sqr_montq |
pcmpeqd %xmm4,%xmm5 |
- pshufd $0xb1,%xmm3,%xmm4 |
- por %xmm3,%xmm4 |
+ pshufd $0xb1,%xmm1,%xmm4 |
+ por %xmm1,%xmm4 |
pshufd $0,%xmm5,%xmm5 |
pshufd $0x1e,%xmm4,%xmm3 |
por %xmm3,%xmm4 |
@@ -1330,6 +1333,7 @@ ecp_nistz256_point_add: |
+ xorq %r11,%r11 |
addq %r12,%r12 |
leaq 96(%rsp),%rsi |
adcq %r13,%r13 |
@@ -1337,7 +1341,7 @@ ecp_nistz256_point_add: |
adcq %r8,%r8 |
adcq %r9,%r9 |
movq %r13,%rbp |
- sbbq %r11,%r11 |
+ adcq $0,%r11 |
subq $-1,%r12 |
movq %r8,%rcx |
@@ -1345,15 +1349,15 @@ ecp_nistz256_point_add: |
sbbq $0,%r8 |
movq %r9,%r10 |
sbbq %r15,%r9 |
- testq %r11,%r11 |
+ sbbq $0,%r11 |
- cmovzq %rax,%r12 |
+ cmovcq %rax,%r12 |
movq 0(%rsi),%rax |
- cmovzq %rbp,%r13 |
+ cmovcq %rbp,%r13 |
movq 8(%rsi),%rbp |
- cmovzq %rcx,%r8 |
+ cmovcq %rcx,%r8 |
movq 16(%rsi),%rcx |
- cmovzq %r10,%r9 |
+ cmovcq %r10,%r9 |
movq 24(%rsi),%r10 |
call __ecp_nistz256_subq |
@@ -1508,16 +1512,14 @@ ecp_nistz256_point_add_affine: |
movq 64+24(%rsi),%r8 |
movdqa %xmm0,320(%rsp) |
movdqa %xmm1,320+16(%rsp) |
- por %xmm0,%xmm1 |
movdqa %xmm2,352(%rsp) |
movdqa %xmm3,352+16(%rsp) |
- por %xmm2,%xmm3 |
movdqa %xmm4,384(%rsp) |
movdqa %xmm5,384+16(%rsp) |
- por %xmm1,%xmm3 |
+ por %xmm4,%xmm5 |
movdqu 0(%rbx),%xmm0 |
- pshufd $0xb1,%xmm3,%xmm5 |
+ pshufd $0xb1,%xmm5,%xmm3 |
movdqu 16(%rbx),%xmm1 |
movdqu 32(%rbx),%xmm2 |
por %xmm3,%xmm5 |
@@ -1635,6 +1637,7 @@ ecp_nistz256_point_add_affine: |
+ xorq %r11,%r11 |
addq %r12,%r12 |
leaq 192(%rsp),%rsi |
adcq %r13,%r13 |
@@ -1642,7 +1645,7 @@ ecp_nistz256_point_add_affine: |
adcq %r8,%r8 |
adcq %r9,%r9 |
movq %r13,%rbp |
- sbbq %r11,%r11 |
+ adcq $0,%r11 |
subq $-1,%r12 |
movq %r8,%rcx |
@@ -1650,15 +1653,15 @@ ecp_nistz256_point_add_affine: |
sbbq $0,%r8 |
movq %r9,%r10 |
sbbq %r15,%r9 |
- testq %r11,%r11 |
+ sbbq $0,%r11 |
- cmovzq %rax,%r12 |
+ cmovcq %rax,%r12 |
movq 0(%rsi),%rax |
- cmovzq %rbp,%r13 |
+ cmovcq %rbp,%r13 |
movq 8(%rsi),%rbp |
- cmovzq %rcx,%r8 |
+ cmovcq %rcx,%r8 |
movq 16(%rsi),%rcx |
- cmovzq %r10,%r9 |
+ cmovcq %r10,%r9 |
movq 24(%rsi),%r10 |
call __ecp_nistz256_subq |