| Index: linux-x86_64/crypto/ec/p256-x86_64-asm.S
|
| diff --git a/linux-x86_64/crypto/ec/p256-x86_64-asm.S b/linux-x86_64/crypto/ec/p256-x86_64-asm.S
|
| index 4abce6f91ee68ae72670d1af5aad2f739e43b48b..e059dd6081938a36d6775cb71d7d63d4db86d885 100644
|
| --- a/linux-x86_64/crypto/ec/p256-x86_64-asm.S
|
| +++ b/linux-x86_64/crypto/ec/p256-x86_64-asm.S
|
| @@ -24,6 +24,7 @@ ecp_nistz256_mul_by_2:
|
| pushq %r13
|
|
|
| movq 0(%rsi),%r8
|
| + xorq %r13,%r13
|
| movq 8(%rsi),%r9
|
| addq %r8,%r8
|
| movq 16(%rsi),%r10
|
| @@ -34,7 +35,7 @@ ecp_nistz256_mul_by_2:
|
| adcq %r10,%r10
|
| adcq %r11,%r11
|
| movq %r9,%rdx
|
| - sbbq %r13,%r13
|
| + adcq $0,%r13
|
|
|
| subq 0(%rsi),%r8
|
| movq %r10,%rcx
|
| @@ -42,14 +43,14 @@ ecp_nistz256_mul_by_2:
|
| sbbq 16(%rsi),%r10
|
| movq %r11,%r12
|
| sbbq 24(%rsi),%r11
|
| - testq %r13,%r13
|
| + sbbq $0,%r13
|
|
|
| - cmovzq %rax,%r8
|
| - cmovzq %rdx,%r9
|
| + cmovcq %rax,%r8
|
| + cmovcq %rdx,%r9
|
| movq %r8,0(%rdi)
|
| - cmovzq %rcx,%r10
|
| + cmovcq %rcx,%r10
|
| movq %r9,8(%rdi)
|
| - cmovzq %r12,%r11
|
| + cmovcq %r12,%r11
|
| movq %r10,16(%rdi)
|
| movq %r11,24(%rdi)
|
|
|
| @@ -625,6 +626,8 @@ ecp_nistz256_from_mont:
|
| movq %r9,%rsi
|
| adcq $0,%rdx
|
|
|
| +
|
| +
|
| subq $-1,%r8
|
| movq %r10,%rax
|
| sbbq %r12,%r9
|
| @@ -765,13 +768,14 @@ ecp_nistz256_avx2_select_w7:
|
| .type __ecp_nistz256_add_toq,@function
|
| .align 32
|
| __ecp_nistz256_add_toq:
|
| + xorq %r11,%r11
|
| addq 0(%rbx),%r12
|
| adcq 8(%rbx),%r13
|
| movq %r12,%rax
|
| adcq 16(%rbx),%r8
|
| adcq 24(%rbx),%r9
|
| movq %r13,%rbp
|
| - sbbq %r11,%r11
|
| + adcq $0,%r11
|
|
|
| subq $-1,%r12
|
| movq %r8,%rcx
|
| @@ -779,14 +783,14 @@ __ecp_nistz256_add_toq:
|
| sbbq $0,%r8
|
| movq %r9,%r10
|
| sbbq %r15,%r9
|
| - testq %r11,%r11
|
| + sbbq $0,%r11
|
|
|
| - cmovzq %rax,%r12
|
| - cmovzq %rbp,%r13
|
| + cmovcq %rax,%r12
|
| + cmovcq %rbp,%r13
|
| movq %r12,0(%rdi)
|
| - cmovzq %rcx,%r8
|
| + cmovcq %rcx,%r8
|
| movq %r13,8(%rdi)
|
| - cmovzq %r10,%r9
|
| + cmovcq %r10,%r9
|
| movq %r8,16(%rdi)
|
| movq %r9,24(%rdi)
|
|
|
| @@ -854,13 +858,14 @@ __ecp_nistz256_subq:
|
| .type __ecp_nistz256_mul_by_2q,@function
|
| .align 32
|
| __ecp_nistz256_mul_by_2q:
|
| + xorq %r11,%r11
|
| addq %r12,%r12
|
| adcq %r13,%r13
|
| movq %r12,%rax
|
| adcq %r8,%r8
|
| adcq %r9,%r9
|
| movq %r13,%rbp
|
| - sbbq %r11,%r11
|
| + adcq $0,%r11
|
|
|
| subq $-1,%r12
|
| movq %r8,%rcx
|
| @@ -868,14 +873,14 @@ __ecp_nistz256_mul_by_2q:
|
| sbbq $0,%r8
|
| movq %r9,%r10
|
| sbbq %r15,%r9
|
| - testq %r11,%r11
|
| + sbbq $0,%r11
|
|
|
| - cmovzq %rax,%r12
|
| - cmovzq %rbp,%r13
|
| + cmovcq %rax,%r12
|
| + cmovcq %rbp,%r13
|
| movq %r12,0(%rdi)
|
| - cmovzq %rcx,%r8
|
| + cmovcq %rcx,%r8
|
| movq %r13,8(%rdi)
|
| - cmovzq %r10,%r9
|
| + cmovcq %r10,%r9
|
| movq %r8,16(%rdi)
|
| movq %r9,24(%rdi)
|
|
|
| @@ -1107,16 +1112,14 @@ ecp_nistz256_point_add:
|
| movq %rdx,%rsi
|
| movdqa %xmm0,384(%rsp)
|
| movdqa %xmm1,384+16(%rsp)
|
| - por %xmm0,%xmm1
|
| movdqa %xmm2,416(%rsp)
|
| movdqa %xmm3,416+16(%rsp)
|
| - por %xmm2,%xmm3
|
| movdqa %xmm4,448(%rsp)
|
| movdqa %xmm5,448+16(%rsp)
|
| - por %xmm1,%xmm3
|
| + por %xmm4,%xmm5
|
|
|
| movdqu 0(%rsi),%xmm0
|
| - pshufd $0xb1,%xmm3,%xmm5
|
| + pshufd $0xb1,%xmm5,%xmm3
|
| movdqu 16(%rsi),%xmm1
|
| movdqu 32(%rsi),%xmm2
|
| por %xmm3,%xmm5
|
| @@ -1128,14 +1131,14 @@ ecp_nistz256_point_add:
|
| movdqa %xmm0,480(%rsp)
|
| pshufd $0x1e,%xmm5,%xmm4
|
| movdqa %xmm1,480+16(%rsp)
|
| - por %xmm0,%xmm1
|
| -.byte 102,72,15,110,199
|
| + movdqu 64(%rsi),%xmm0
|
| + movdqu 80(%rsi),%xmm1
|
| movdqa %xmm2,512(%rsp)
|
| movdqa %xmm3,512+16(%rsp)
|
| - por %xmm2,%xmm3
|
| por %xmm4,%xmm5
|
| pxor %xmm4,%xmm4
|
| - por %xmm1,%xmm3
|
| + por %xmm0,%xmm1
|
| +.byte 102,72,15,110,199
|
|
|
| leaq 64-0(%rsi),%rsi
|
| movq %rax,544+0(%rsp)
|
| @@ -1146,8 +1149,8 @@ ecp_nistz256_point_add:
|
| call __ecp_nistz256_sqr_montq
|
|
|
| pcmpeqd %xmm4,%xmm5
|
| - pshufd $0xb1,%xmm3,%xmm4
|
| - por %xmm3,%xmm4
|
| + pshufd $0xb1,%xmm1,%xmm4
|
| + por %xmm1,%xmm4
|
| pshufd $0,%xmm5,%xmm5
|
| pshufd $0x1e,%xmm4,%xmm3
|
| por %xmm3,%xmm4
|
| @@ -1330,6 +1333,7 @@ ecp_nistz256_point_add:
|
|
|
|
|
|
|
| + xorq %r11,%r11
|
| addq %r12,%r12
|
| leaq 96(%rsp),%rsi
|
| adcq %r13,%r13
|
| @@ -1337,7 +1341,7 @@ ecp_nistz256_point_add:
|
| adcq %r8,%r8
|
| adcq %r9,%r9
|
| movq %r13,%rbp
|
| - sbbq %r11,%r11
|
| + adcq $0,%r11
|
|
|
| subq $-1,%r12
|
| movq %r8,%rcx
|
| @@ -1345,15 +1349,15 @@ ecp_nistz256_point_add:
|
| sbbq $0,%r8
|
| movq %r9,%r10
|
| sbbq %r15,%r9
|
| - testq %r11,%r11
|
| + sbbq $0,%r11
|
|
|
| - cmovzq %rax,%r12
|
| + cmovcq %rax,%r12
|
| movq 0(%rsi),%rax
|
| - cmovzq %rbp,%r13
|
| + cmovcq %rbp,%r13
|
| movq 8(%rsi),%rbp
|
| - cmovzq %rcx,%r8
|
| + cmovcq %rcx,%r8
|
| movq 16(%rsi),%rcx
|
| - cmovzq %r10,%r9
|
| + cmovcq %r10,%r9
|
| movq 24(%rsi),%r10
|
|
|
| call __ecp_nistz256_subq
|
| @@ -1508,16 +1512,14 @@ ecp_nistz256_point_add_affine:
|
| movq 64+24(%rsi),%r8
|
| movdqa %xmm0,320(%rsp)
|
| movdqa %xmm1,320+16(%rsp)
|
| - por %xmm0,%xmm1
|
| movdqa %xmm2,352(%rsp)
|
| movdqa %xmm3,352+16(%rsp)
|
| - por %xmm2,%xmm3
|
| movdqa %xmm4,384(%rsp)
|
| movdqa %xmm5,384+16(%rsp)
|
| - por %xmm1,%xmm3
|
| + por %xmm4,%xmm5
|
|
|
| movdqu 0(%rbx),%xmm0
|
| - pshufd $0xb1,%xmm3,%xmm5
|
| + pshufd $0xb1,%xmm5,%xmm3
|
| movdqu 16(%rbx),%xmm1
|
| movdqu 32(%rbx),%xmm2
|
| por %xmm3,%xmm5
|
| @@ -1635,6 +1637,7 @@ ecp_nistz256_point_add_affine:
|
|
|
|
|
|
|
| + xorq %r11,%r11
|
| addq %r12,%r12
|
| leaq 192(%rsp),%rsi
|
| adcq %r13,%r13
|
| @@ -1642,7 +1645,7 @@ ecp_nistz256_point_add_affine:
|
| adcq %r8,%r8
|
| adcq %r9,%r9
|
| movq %r13,%rbp
|
| - sbbq %r11,%r11
|
| + adcq $0,%r11
|
|
|
| subq $-1,%r12
|
| movq %r8,%rcx
|
| @@ -1650,15 +1653,15 @@ ecp_nistz256_point_add_affine:
|
| sbbq $0,%r8
|
| movq %r9,%r10
|
| sbbq %r15,%r9
|
| - testq %r11,%r11
|
| + sbbq $0,%r11
|
|
|
| - cmovzq %rax,%r12
|
| + cmovcq %rax,%r12
|
| movq 0(%rsi),%rax
|
| - cmovzq %rbp,%r13
|
| + cmovcq %rbp,%r13
|
| movq 8(%rsi),%rbp
|
| - cmovzq %rcx,%r8
|
| + cmovcq %rcx,%r8
|
| movq 16(%rsi),%rcx
|
| - cmovzq %r10,%r9
|
| + cmovcq %r10,%r9
|
| movq 24(%rsi),%r10
|
|
|
| call __ecp_nistz256_subq
|
|
|