| Index: third_party/boringssl/mac-x86_64/crypto/ec/p256-x86_64-asm.S
|
| diff --git a/third_party/boringssl/mac-x86_64/crypto/ec/p256-x86_64-asm.S b/third_party/boringssl/mac-x86_64/crypto/ec/p256-x86_64-asm.S
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..1cd0cc3f5c68fe14b3f0d587eca132931bd52cc5
|
| --- /dev/null
|
| +++ b/third_party/boringssl/mac-x86_64/crypto/ec/p256-x86_64-asm.S
|
| @@ -0,0 +1,1788 @@
|
| +#if defined(__x86_64__)
|
| +.text
|
| +
|
| +
|
| +
|
| +.p2align 6
|
| +L$poly:
|
| +.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
|
| +
|
| +L$One:
|
| +.long 1,1,1,1,1,1,1,1
|
| +L$Two:
|
| +.long 2,2,2,2,2,2,2,2
|
| +L$Three:
|
| +.long 3,3,3,3,3,3,3,3
|
| +L$ONE_mont:
|
| +.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
|
| +
|
| +
|
| +.p2align 6
|
| +ecp_nistz256_mul_by_2:
|
| + pushq %r12
|
| + pushq %r13
|
| +
|
| + movq 0(%rsi),%r8
|
| + movq 8(%rsi),%r9
|
| + addq %r8,%r8
|
| + movq 16(%rsi),%r10
|
| + adcq %r9,%r9
|
| + movq 24(%rsi),%r11
|
| + leaq L$poly(%rip),%rsi
|
| + movq %r8,%rax
|
| + adcq %r10,%r10
|
| + adcq %r11,%r11
|
| + movq %r9,%rdx
|
| + sbbq %r13,%r13
|
| +
|
| + subq 0(%rsi),%r8
|
| + movq %r10,%rcx
|
| + sbbq 8(%rsi),%r9
|
| + sbbq 16(%rsi),%r10
|
| + movq %r11,%r12
|
| + sbbq 24(%rsi),%r11
|
| + testq %r13,%r13
|
| +
|
| + cmovzq %rax,%r8
|
| + cmovzq %rdx,%r9
|
| + movq %r8,0(%rdi)
|
| + cmovzq %rcx,%r10
|
| + movq %r9,8(%rdi)
|
| + cmovzq %r12,%r11
|
| + movq %r10,16(%rdi)
|
| + movq %r11,24(%rdi)
|
| +
|
| + popq %r13
|
| + popq %r12
|
| + .byte 0xf3,0xc3
|
| +
|
| +
|
| +
|
| +
|
| +.globl _ecp_nistz256_neg
|
| +.private_extern _ecp_nistz256_neg
|
| +
|
| +.p2align 5
|
| +_ecp_nistz256_neg:
|
| + pushq %r12
|
| + pushq %r13
|
| +
|
| + xorq %r8,%r8
|
| + xorq %r9,%r9
|
| + xorq %r10,%r10
|
| + xorq %r11,%r11
|
| + xorq %r13,%r13
|
| +
|
| + subq 0(%rsi),%r8
|
| + sbbq 8(%rsi),%r9
|
| + sbbq 16(%rsi),%r10
|
| + movq %r8,%rax
|
| + sbbq 24(%rsi),%r11
|
| + leaq L$poly(%rip),%rsi
|
| + movq %r9,%rdx
|
| + sbbq $0,%r13
|
| +
|
| + addq 0(%rsi),%r8
|
| + movq %r10,%rcx
|
| + adcq 8(%rsi),%r9
|
| + adcq 16(%rsi),%r10
|
| + movq %r11,%r12
|
| + adcq 24(%rsi),%r11
|
| + testq %r13,%r13
|
| +
|
| + cmovzq %rax,%r8
|
| + cmovzq %rdx,%r9
|
| + movq %r8,0(%rdi)
|
| + cmovzq %rcx,%r10
|
| + movq %r9,8(%rdi)
|
| + cmovzq %r12,%r11
|
| + movq %r10,16(%rdi)
|
| + movq %r11,24(%rdi)
|
| +
|
| + popq %r13
|
| + popq %r12
|
| + .byte 0xf3,0xc3
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +.globl _ecp_nistz256_mul_mont
|
| +.private_extern _ecp_nistz256_mul_mont
|
| +
|
| +.p2align 5
|
| +_ecp_nistz256_mul_mont:
|
| +L$mul_mont:
|
| + pushq %rbp
|
| + pushq %rbx
|
| + pushq %r12
|
| + pushq %r13
|
| + pushq %r14
|
| + pushq %r15
|
| + movq %rdx,%rbx
|
| + movq 0(%rdx),%rax
|
| + movq 0(%rsi),%r9
|
| + movq 8(%rsi),%r10
|
| + movq 16(%rsi),%r11
|
| + movq 24(%rsi),%r12
|
| +
|
| + call __ecp_nistz256_mul_montq
|
| +L$mul_mont_done:
|
| + popq %r15
|
| + popq %r14
|
| + popq %r13
|
| + popq %r12
|
| + popq %rbx
|
| + popq %rbp
|
| + .byte 0xf3,0xc3
|
| +
|
| +
|
| +
|
| +.p2align 5
|
| +__ecp_nistz256_mul_montq:
|
| +
|
| +
|
| + movq %rax,%rbp
|
| + mulq %r9
|
| + movq L$poly+8(%rip),%r14
|
| + movq %rax,%r8
|
| + movq %rbp,%rax
|
| + movq %rdx,%r9
|
| +
|
| + mulq %r10
|
| + movq L$poly+24(%rip),%r15
|
| + addq %rax,%r9
|
| + movq %rbp,%rax
|
| + adcq $0,%rdx
|
| + movq %rdx,%r10
|
| +
|
| + mulq %r11
|
| + addq %rax,%r10
|
| + movq %rbp,%rax
|
| + adcq $0,%rdx
|
| + movq %rdx,%r11
|
| +
|
| + mulq %r12
|
| + addq %rax,%r11
|
| + movq %r8,%rax
|
| + adcq $0,%rdx
|
| + xorq %r13,%r13
|
| + movq %rdx,%r12
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +
|
| + movq %r8,%rbp
|
| + shlq $32,%r8
|
| + mulq %r15
|
| + shrq $32,%rbp
|
| + addq %r8,%r9
|
| + adcq %rbp,%r10
|
| + adcq %rax,%r11
|
| + movq 8(%rbx),%rax
|
| + adcq %rdx,%r12
|
| + adcq $0,%r13
|
| + xorq %r8,%r8
|
| +
|
| +
|
| +
|
| + movq %rax,%rbp
|
| + mulq 0(%rsi)
|
| + addq %rax,%r9
|
| + movq %rbp,%rax
|
| + adcq $0,%rdx
|
| + movq %rdx,%rcx
|
| +
|
| + mulq 8(%rsi)
|
| + addq %rcx,%r10
|
| + adcq $0,%rdx
|
| + addq %rax,%r10
|
| + movq %rbp,%rax
|
| + adcq $0,%rdx
|
| + movq %rdx,%rcx
|
| +
|
| + mulq 16(%rsi)
|
| + addq %rcx,%r11
|
| + adcq $0,%rdx
|
| + addq %rax,%r11
|
| + movq %rbp,%rax
|
| + adcq $0,%rdx
|
| + movq %rdx,%rcx
|
| +
|
| + mulq 24(%rsi)
|
| + addq %rcx,%r12
|
| + adcq $0,%rdx
|
| + addq %rax,%r12
|
| + movq %r9,%rax
|
| + adcq %rdx,%r13
|
| + adcq $0,%r8
|
| +
|
| +
|
| +
|
| + movq %r9,%rbp
|
| + shlq $32,%r9
|
| + mulq %r15
|
| + shrq $32,%rbp
|
| + addq %r9,%r10
|
| + adcq %rbp,%r11
|
| + adcq %rax,%r12
|
| + movq 16(%rbx),%rax
|
| + adcq %rdx,%r13
|
| + adcq $0,%r8
|
| + xorq %r9,%r9
|
| +
|
| +
|
| +
|
| + movq %rax,%rbp
|
| + mulq 0(%rsi)
|
| + addq %rax,%r10
|
| + movq %rbp,%rax
|
| + adcq $0,%rdx
|
| + movq %rdx,%rcx
|
| +
|
| + mulq 8(%rsi)
|
| + addq %rcx,%r11
|
| + adcq $0,%rdx
|
| + addq %rax,%r11
|
| + movq %rbp,%rax
|
| + adcq $0,%rdx
|
| + movq %rdx,%rcx
|
| +
|
| + mulq 16(%rsi)
|
| + addq %rcx,%r12
|
| + adcq $0,%rdx
|
| + addq %rax,%r12
|
| + movq %rbp,%rax
|
| + adcq $0,%rdx
|
| + movq %rdx,%rcx
|
| +
|
| + mulq 24(%rsi)
|
| + addq %rcx,%r13
|
| + adcq $0,%rdx
|
| + addq %rax,%r13
|
| + movq %r10,%rax
|
| + adcq %rdx,%r8
|
| + adcq $0,%r9
|
| +
|
| +
|
| +
|
| + movq %r10,%rbp
|
| + shlq $32,%r10
|
| + mulq %r15
|
| + shrq $32,%rbp
|
| + addq %r10,%r11
|
| + adcq %rbp,%r12
|
| + adcq %rax,%r13
|
| + movq 24(%rbx),%rax
|
| + adcq %rdx,%r8
|
| + adcq $0,%r9
|
| + xorq %r10,%r10
|
| +
|
| +
|
| +
|
| + movq %rax,%rbp
|
| + mulq 0(%rsi)
|
| + addq %rax,%r11
|
| + movq %rbp,%rax
|
| + adcq $0,%rdx
|
| + movq %rdx,%rcx
|
| +
|
| + mulq 8(%rsi)
|
| + addq %rcx,%r12
|
| + adcq $0,%rdx
|
| + addq %rax,%r12
|
| + movq %rbp,%rax
|
| + adcq $0,%rdx
|
| + movq %rdx,%rcx
|
| +
|
| + mulq 16(%rsi)
|
| + addq %rcx,%r13
|
| + adcq $0,%rdx
|
| + addq %rax,%r13
|
| + movq %rbp,%rax
|
| + adcq $0,%rdx
|
| + movq %rdx,%rcx
|
| +
|
| + mulq 24(%rsi)
|
| + addq %rcx,%r8
|
| + adcq $0,%rdx
|
| + addq %rax,%r8
|
| + movq %r11,%rax
|
| + adcq %rdx,%r9
|
| + adcq $0,%r10
|
| +
|
| +
|
| +
|
| + movq %r11,%rbp
|
| + shlq $32,%r11
|
| + mulq %r15
|
| + shrq $32,%rbp
|
| + addq %r11,%r12
|
| + adcq %rbp,%r13
|
| + movq %r12,%rcx
|
| + adcq %rax,%r8
|
| + adcq %rdx,%r9
|
| + movq %r13,%rbp
|
| + adcq $0,%r10
|
| +
|
| +
|
| +
|
| + subq $-1,%r12
|
| + movq %r8,%rbx
|
| + sbbq %r14,%r13
|
| + sbbq $0,%r8
|
| + movq %r9,%rdx
|
| + sbbq %r15,%r9
|
| + sbbq $0,%r10
|
| +
|
| + cmovcq %rcx,%r12
|
| + cmovcq %rbp,%r13
|
| + movq %r12,0(%rdi)
|
| + cmovcq %rbx,%r8
|
| + movq %r13,8(%rdi)
|
| + cmovcq %rdx,%r9
|
| + movq %r8,16(%rdi)
|
| + movq %r9,24(%rdi)
|
| +
|
| + .byte 0xf3,0xc3
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +.globl _ecp_nistz256_sqr_mont
|
| +.private_extern _ecp_nistz256_sqr_mont
|
| +
|
| +.p2align 5
|
| +_ecp_nistz256_sqr_mont:
|
| + pushq %rbp
|
| + pushq %rbx
|
| + pushq %r12
|
| + pushq %r13
|
| + pushq %r14
|
| + pushq %r15
|
| + movq 0(%rsi),%rax
|
| + movq 8(%rsi),%r14
|
| + movq 16(%rsi),%r15
|
| + movq 24(%rsi),%r8
|
| +
|
| + call __ecp_nistz256_sqr_montq
|
| +L$sqr_mont_done:
|
| + popq %r15
|
| + popq %r14
|
| + popq %r13
|
| + popq %r12
|
| + popq %rbx
|
| + popq %rbp
|
| + .byte 0xf3,0xc3
|
| +
|
| +
|
| +
|
| +.p2align 5
|
| +__ecp_nistz256_sqr_montq:
|
| + movq %rax,%r13
|
| + mulq %r14
|
| + movq %rax,%r9
|
| + movq %r15,%rax
|
| + movq %rdx,%r10
|
| +
|
| + mulq %r13
|
| + addq %rax,%r10
|
| + movq %r8,%rax
|
| + adcq $0,%rdx
|
| + movq %rdx,%r11
|
| +
|
| + mulq %r13
|
| + addq %rax,%r11
|
| + movq %r15,%rax
|
| + adcq $0,%rdx
|
| + movq %rdx,%r12
|
| +
|
| +
|
| + mulq %r14
|
| + addq %rax,%r11
|
| + movq %r8,%rax
|
| + adcq $0,%rdx
|
| + movq %rdx,%rbp
|
| +
|
| + mulq %r14
|
| + addq %rax,%r12
|
| + movq %r8,%rax
|
| + adcq $0,%rdx
|
| + addq %rbp,%r12
|
| + movq %rdx,%r13
|
| + adcq $0,%r13
|
| +
|
| +
|
| + mulq %r15
|
| + xorq %r15,%r15
|
| + addq %rax,%r13
|
| + movq 0(%rsi),%rax
|
| + movq %rdx,%r14
|
| + adcq $0,%r14
|
| +
|
| + addq %r9,%r9
|
| + adcq %r10,%r10
|
| + adcq %r11,%r11
|
| + adcq %r12,%r12
|
| + adcq %r13,%r13
|
| + adcq %r14,%r14
|
| + adcq $0,%r15
|
| +
|
| + mulq %rax
|
| + movq %rax,%r8
|
| + movq 8(%rsi),%rax
|
| + movq %rdx,%rcx
|
| +
|
| + mulq %rax
|
| + addq %rcx,%r9
|
| + adcq %rax,%r10
|
| + movq 16(%rsi),%rax
|
| + adcq $0,%rdx
|
| + movq %rdx,%rcx
|
| +
|
| + mulq %rax
|
| + addq %rcx,%r11
|
| + adcq %rax,%r12
|
| + movq 24(%rsi),%rax
|
| + adcq $0,%rdx
|
| + movq %rdx,%rcx
|
| +
|
| + mulq %rax
|
| + addq %rcx,%r13
|
| + adcq %rax,%r14
|
| + movq %r8,%rax
|
| + adcq %rdx,%r15
|
| +
|
| + movq L$poly+8(%rip),%rsi
|
| + movq L$poly+24(%rip),%rbp
|
| +
|
| +
|
| +
|
| +
|
| + movq %r8,%rcx
|
| + shlq $32,%r8
|
| + mulq %rbp
|
| + shrq $32,%rcx
|
| + addq %r8,%r9
|
| + adcq %rcx,%r10
|
| + adcq %rax,%r11
|
| + movq %r9,%rax
|
| + adcq $0,%rdx
|
| +
|
| +
|
| +
|
| + movq %r9,%rcx
|
| + shlq $32,%r9
|
| + movq %rdx,%r8
|
| + mulq %rbp
|
| + shrq $32,%rcx
|
| + addq %r9,%r10
|
| + adcq %rcx,%r11
|
| + adcq %rax,%r8
|
| + movq %r10,%rax
|
| + adcq $0,%rdx
|
| +
|
| +
|
| +
|
| + movq %r10,%rcx
|
| + shlq $32,%r10
|
| + movq %rdx,%r9
|
| + mulq %rbp
|
| + shrq $32,%rcx
|
| + addq %r10,%r11
|
| + adcq %rcx,%r8
|
| + adcq %rax,%r9
|
| + movq %r11,%rax
|
| + adcq $0,%rdx
|
| +
|
| +
|
| +
|
| + movq %r11,%rcx
|
| + shlq $32,%r11
|
| + movq %rdx,%r10
|
| + mulq %rbp
|
| + shrq $32,%rcx
|
| + addq %r11,%r8
|
| + adcq %rcx,%r9
|
| + adcq %rax,%r10
|
| + adcq $0,%rdx
|
| + xorq %r11,%r11
|
| +
|
| +
|
| +
|
| + addq %r8,%r12
|
| + adcq %r9,%r13
|
| + movq %r12,%r8
|
| + adcq %r10,%r14
|
| + adcq %rdx,%r15
|
| + movq %r13,%r9
|
| + adcq $0,%r11
|
| +
|
| + subq $-1,%r12
|
| + movq %r14,%r10
|
| + sbbq %rsi,%r13
|
| + sbbq $0,%r14
|
| + movq %r15,%rcx
|
| + sbbq %rbp,%r15
|
| + sbbq $0,%r11
|
| +
|
| + cmovcq %r8,%r12
|
| + cmovcq %r9,%r13
|
| + movq %r12,0(%rdi)
|
| + cmovcq %r10,%r14
|
| + movq %r13,8(%rdi)
|
| + cmovcq %rcx,%r15
|
| + movq %r14,16(%rdi)
|
| + movq %r15,24(%rdi)
|
| +
|
| + .byte 0xf3,0xc3
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +.globl _ecp_nistz256_from_mont
|
| +.private_extern _ecp_nistz256_from_mont
|
| +
|
| +.p2align 5
|
| +_ecp_nistz256_from_mont:
|
| + pushq %r12
|
| + pushq %r13
|
| +
|
| + movq 0(%rsi),%rax
|
| + movq L$poly+24(%rip),%r13
|
| + movq 8(%rsi),%r9
|
| + movq 16(%rsi),%r10
|
| + movq 24(%rsi),%r11
|
| + movq %rax,%r8
|
| + movq L$poly+8(%rip),%r12
|
| +
|
| +
|
| +
|
| + movq %rax,%rcx
|
| + shlq $32,%r8
|
| + mulq %r13
|
| + shrq $32,%rcx
|
| + addq %r8,%r9
|
| + adcq %rcx,%r10
|
| + adcq %rax,%r11
|
| + movq %r9,%rax
|
| + adcq $0,%rdx
|
| +
|
| +
|
| +
|
| + movq %r9,%rcx
|
| + shlq $32,%r9
|
| + movq %rdx,%r8
|
| + mulq %r13
|
| + shrq $32,%rcx
|
| + addq %r9,%r10
|
| + adcq %rcx,%r11
|
| + adcq %rax,%r8
|
| + movq %r10,%rax
|
| + adcq $0,%rdx
|
| +
|
| +
|
| +
|
| + movq %r10,%rcx
|
| + shlq $32,%r10
|
| + movq %rdx,%r9
|
| + mulq %r13
|
| + shrq $32,%rcx
|
| + addq %r10,%r11
|
| + adcq %rcx,%r8
|
| + adcq %rax,%r9
|
| + movq %r11,%rax
|
| + adcq $0,%rdx
|
| +
|
| +
|
| +
|
| + movq %r11,%rcx
|
| + shlq $32,%r11
|
| + movq %rdx,%r10
|
| + mulq %r13
|
| + shrq $32,%rcx
|
| + addq %r11,%r8
|
| + adcq %rcx,%r9
|
| + movq %r8,%rcx
|
| + adcq %rax,%r10
|
| + movq %r9,%rsi
|
| + adcq $0,%rdx
|
| +
|
| + subq $-1,%r8
|
| + movq %r10,%rax
|
| + sbbq %r12,%r9
|
| + sbbq $0,%r10
|
| + movq %rdx,%r11
|
| + sbbq %r13,%rdx
|
| + sbbq %r13,%r13
|
| +
|
| + cmovnzq %rcx,%r8
|
| + cmovnzq %rsi,%r9
|
| + movq %r8,0(%rdi)
|
| + cmovnzq %rax,%r10
|
| + movq %r9,8(%rdi)
|
| + cmovzq %rdx,%r11
|
| + movq %r10,16(%rdi)
|
| + movq %r11,24(%rdi)
|
| +
|
| + popq %r13
|
| + popq %r12
|
| + .byte 0xf3,0xc3
|
| +
|
| +
|
| +
|
| +.globl _ecp_nistz256_select_w5
|
| +.private_extern _ecp_nistz256_select_w5
|
| +
|
| +.p2align 5
|
| +_ecp_nistz256_select_w5:
|
| + movdqa L$One(%rip),%xmm0
|
| + movd %edx,%xmm1
|
| +
|
| + pxor %xmm2,%xmm2
|
| + pxor %xmm3,%xmm3
|
| + pxor %xmm4,%xmm4
|
| + pxor %xmm5,%xmm5
|
| + pxor %xmm6,%xmm6
|
| + pxor %xmm7,%xmm7
|
| +
|
| + movdqa %xmm0,%xmm8
|
| + pshufd $0,%xmm1,%xmm1
|
| +
|
| + movq $16,%rax
|
| +L$select_loop_sse_w5:
|
| +
|
| + movdqa %xmm8,%xmm15
|
| + paddd %xmm0,%xmm8
|
| + pcmpeqd %xmm1,%xmm15
|
| +
|
| + movdqa 0(%rsi),%xmm9
|
| + movdqa 16(%rsi),%xmm10
|
| + movdqa 32(%rsi),%xmm11
|
| + movdqa 48(%rsi),%xmm12
|
| + movdqa 64(%rsi),%xmm13
|
| + movdqa 80(%rsi),%xmm14
|
| + leaq 96(%rsi),%rsi
|
| +
|
| + pand %xmm15,%xmm9
|
| + pand %xmm15,%xmm10
|
| + por %xmm9,%xmm2
|
| + pand %xmm15,%xmm11
|
| + por %xmm10,%xmm3
|
| + pand %xmm15,%xmm12
|
| + por %xmm11,%xmm4
|
| + pand %xmm15,%xmm13
|
| + por %xmm12,%xmm5
|
| + pand %xmm15,%xmm14
|
| + por %xmm13,%xmm6
|
| + por %xmm14,%xmm7
|
| +
|
| + decq %rax
|
| + jnz L$select_loop_sse_w5
|
| +
|
| + movdqu %xmm2,0(%rdi)
|
| + movdqu %xmm3,16(%rdi)
|
| + movdqu %xmm4,32(%rdi)
|
| + movdqu %xmm5,48(%rdi)
|
| + movdqu %xmm6,64(%rdi)
|
| + movdqu %xmm7,80(%rdi)
|
| + .byte 0xf3,0xc3
|
| +
|
| +
|
| +
|
| +
|
| +.globl _ecp_nistz256_select_w7
|
| +.private_extern _ecp_nistz256_select_w7
|
| +
|
| +.p2align 5
|
| +_ecp_nistz256_select_w7:
|
| + movdqa L$One(%rip),%xmm8
|
| + movd %edx,%xmm1
|
| +
|
| + pxor %xmm2,%xmm2
|
| + pxor %xmm3,%xmm3
|
| + pxor %xmm4,%xmm4
|
| + pxor %xmm5,%xmm5
|
| +
|
| + movdqa %xmm8,%xmm0
|
| + pshufd $0,%xmm1,%xmm1
|
| + movq $64,%rax
|
| +
|
| +L$select_loop_sse_w7:
|
| + movdqa %xmm8,%xmm15
|
| + paddd %xmm0,%xmm8
|
| + movdqa 0(%rsi),%xmm9
|
| + movdqa 16(%rsi),%xmm10
|
| + pcmpeqd %xmm1,%xmm15
|
| + movdqa 32(%rsi),%xmm11
|
| + movdqa 48(%rsi),%xmm12
|
| + leaq 64(%rsi),%rsi
|
| +
|
| + pand %xmm15,%xmm9
|
| + pand %xmm15,%xmm10
|
| + por %xmm9,%xmm2
|
| + pand %xmm15,%xmm11
|
| + por %xmm10,%xmm3
|
| + pand %xmm15,%xmm12
|
| + por %xmm11,%xmm4
|
| + prefetcht0 255(%rsi)
|
| + por %xmm12,%xmm5
|
| +
|
| + decq %rax
|
| + jnz L$select_loop_sse_w7
|
| +
|
| + movdqu %xmm2,0(%rdi)
|
| + movdqu %xmm3,16(%rdi)
|
| + movdqu %xmm4,32(%rdi)
|
| + movdqu %xmm5,48(%rdi)
|
| + .byte 0xf3,0xc3
|
| +
|
| +.globl _ecp_nistz256_avx2_select_w7
|
| +.private_extern _ecp_nistz256_avx2_select_w7
|
| +
|
| +.p2align 5
|
| +_ecp_nistz256_avx2_select_w7:
|
| +.byte 0x0f,0x0b
|
| + .byte 0xf3,0xc3
|
| +
|
| +
|
| +.p2align 5
|
| +__ecp_nistz256_add_toq:
|
| + addq 0(%rbx),%r12
|
| + adcq 8(%rbx),%r13
|
| + movq %r12,%rax
|
| + adcq 16(%rbx),%r8
|
| + adcq 24(%rbx),%r9
|
| + movq %r13,%rbp
|
| + sbbq %r11,%r11
|
| +
|
| + subq $-1,%r12
|
| + movq %r8,%rcx
|
| + sbbq %r14,%r13
|
| + sbbq $0,%r8
|
| + movq %r9,%r10
|
| + sbbq %r15,%r9
|
| + testq %r11,%r11
|
| +
|
| + cmovzq %rax,%r12
|
| + cmovzq %rbp,%r13
|
| + movq %r12,0(%rdi)
|
| + cmovzq %rcx,%r8
|
| + movq %r13,8(%rdi)
|
| + cmovzq %r10,%r9
|
| + movq %r8,16(%rdi)
|
| + movq %r9,24(%rdi)
|
| +
|
| + .byte 0xf3,0xc3
|
| +
|
| +
|
| +
|
| +.p2align 5
|
| +__ecp_nistz256_sub_fromq:
|
| + subq 0(%rbx),%r12
|
| + sbbq 8(%rbx),%r13
|
| + movq %r12,%rax
|
| + sbbq 16(%rbx),%r8
|
| + sbbq 24(%rbx),%r9
|
| + movq %r13,%rbp
|
| + sbbq %r11,%r11
|
| +
|
| + addq $-1,%r12
|
| + movq %r8,%rcx
|
| + adcq %r14,%r13
|
| + adcq $0,%r8
|
| + movq %r9,%r10
|
| + adcq %r15,%r9
|
| + testq %r11,%r11
|
| +
|
| + cmovzq %rax,%r12
|
| + cmovzq %rbp,%r13
|
| + movq %r12,0(%rdi)
|
| + cmovzq %rcx,%r8
|
| + movq %r13,8(%rdi)
|
| + cmovzq %r10,%r9
|
| + movq %r8,16(%rdi)
|
| + movq %r9,24(%rdi)
|
| +
|
| + .byte 0xf3,0xc3
|
| +
|
| +
|
| +
|
| +.p2align 5
|
| +__ecp_nistz256_subq:
|
| + subq %r12,%rax
|
| + sbbq %r13,%rbp
|
| + movq %rax,%r12
|
| + sbbq %r8,%rcx
|
| + sbbq %r9,%r10
|
| + movq %rbp,%r13
|
| + sbbq %r11,%r11
|
| +
|
| + addq $-1,%rax
|
| + movq %rcx,%r8
|
| + adcq %r14,%rbp
|
| + adcq $0,%rcx
|
| + movq %r10,%r9
|
| + adcq %r15,%r10
|
| + testq %r11,%r11
|
| +
|
| + cmovnzq %rax,%r12
|
| + cmovnzq %rbp,%r13
|
| + cmovnzq %rcx,%r8
|
| + cmovnzq %r10,%r9
|
| +
|
| + .byte 0xf3,0xc3
|
| +
|
| +
|
| +
|
| +.p2align 5
|
| +__ecp_nistz256_mul_by_2q:
|
| + addq %r12,%r12
|
| + adcq %r13,%r13
|
| + movq %r12,%rax
|
| + adcq %r8,%r8
|
| + adcq %r9,%r9
|
| + movq %r13,%rbp
|
| + sbbq %r11,%r11
|
| +
|
| + subq $-1,%r12
|
| + movq %r8,%rcx
|
| + sbbq %r14,%r13
|
| + sbbq $0,%r8
|
| + movq %r9,%r10
|
| + sbbq %r15,%r9
|
| + testq %r11,%r11
|
| +
|
| + cmovzq %rax,%r12
|
| + cmovzq %rbp,%r13
|
| + movq %r12,0(%rdi)
|
| + cmovzq %rcx,%r8
|
| + movq %r13,8(%rdi)
|
| + cmovzq %r10,%r9
|
| + movq %r8,16(%rdi)
|
| + movq %r9,24(%rdi)
|
| +
|
| + .byte 0xf3,0xc3
|
| +
|
| +.globl _ecp_nistz256_point_double
|
| +.private_extern _ecp_nistz256_point_double
|
| +
|
| +.p2align 5
|
| +_ecp_nistz256_point_double:
|
| + pushq %rbp
|
| + pushq %rbx
|
| + pushq %r12
|
| + pushq %r13
|
| + pushq %r14
|
| + pushq %r15
|
| + subq $160+8,%rsp
|
| +
|
| +L$point_double_shortcutq:
|
| + movdqu 0(%rsi),%xmm0
|
| + movq %rsi,%rbx
|
| + movdqu 16(%rsi),%xmm1
|
| + movq 32+0(%rsi),%r12
|
| + movq 32+8(%rsi),%r13
|
| + movq 32+16(%rsi),%r8
|
| + movq 32+24(%rsi),%r9
|
| + movq L$poly+8(%rip),%r14
|
| + movq L$poly+24(%rip),%r15
|
| + movdqa %xmm0,96(%rsp)
|
| + movdqa %xmm1,96+16(%rsp)
|
| + leaq 32(%rdi),%r10
|
| + leaq 64(%rdi),%r11
|
| +.byte 102,72,15,110,199
|
| +.byte 102,73,15,110,202
|
| +.byte 102,73,15,110,211
|
| +
|
| + leaq 0(%rsp),%rdi
|
| + call __ecp_nistz256_mul_by_2q
|
| +
|
| + movq 64+0(%rsi),%rax
|
| + movq 64+8(%rsi),%r14
|
| + movq 64+16(%rsi),%r15
|
| + movq 64+24(%rsi),%r8
|
| + leaq 64-0(%rsi),%rsi
|
| + leaq 64(%rsp),%rdi
|
| + call __ecp_nistz256_sqr_montq
|
| +
|
| + movq 0+0(%rsp),%rax
|
| + movq 8+0(%rsp),%r14
|
| + leaq 0+0(%rsp),%rsi
|
| + movq 16+0(%rsp),%r15
|
| + movq 24+0(%rsp),%r8
|
| + leaq 0(%rsp),%rdi
|
| + call __ecp_nistz256_sqr_montq
|
| +
|
| + movq 32(%rbx),%rax
|
| + movq 64+0(%rbx),%r9
|
| + movq 64+8(%rbx),%r10
|
| + movq 64+16(%rbx),%r11
|
| + movq 64+24(%rbx),%r12
|
| + leaq 64-0(%rbx),%rsi
|
| + leaq 32(%rbx),%rbx
|
| +.byte 102,72,15,126,215
|
| + call __ecp_nistz256_mul_montq
|
| + call __ecp_nistz256_mul_by_2q
|
| +
|
| + movq 96+0(%rsp),%r12
|
| + movq 96+8(%rsp),%r13
|
| + leaq 64(%rsp),%rbx
|
| + movq 96+16(%rsp),%r8
|
| + movq 96+24(%rsp),%r9
|
| + leaq 32(%rsp),%rdi
|
| + call __ecp_nistz256_add_toq
|
| +
|
| + movq 96+0(%rsp),%r12
|
| + movq 96+8(%rsp),%r13
|
| + leaq 64(%rsp),%rbx
|
| + movq 96+16(%rsp),%r8
|
| + movq 96+24(%rsp),%r9
|
| + leaq 64(%rsp),%rdi
|
| + call __ecp_nistz256_sub_fromq
|
| +
|
| + movq 0+0(%rsp),%rax
|
| + movq 8+0(%rsp),%r14
|
| + leaq 0+0(%rsp),%rsi
|
| + movq 16+0(%rsp),%r15
|
| + movq 24+0(%rsp),%r8
|
| +.byte 102,72,15,126,207
|
| + call __ecp_nistz256_sqr_montq
|
| + xorq %r9,%r9
|
| + movq %r12,%rax
|
| + addq $-1,%r12
|
| + movq %r13,%r10
|
| + adcq %rsi,%r13
|
| + movq %r14,%rcx
|
| + adcq $0,%r14
|
| + movq %r15,%r8
|
| + adcq %rbp,%r15
|
| + adcq $0,%r9
|
| + xorq %rsi,%rsi
|
| + testq $1,%rax
|
| +
|
| + cmovzq %rax,%r12
|
| + cmovzq %r10,%r13
|
| + cmovzq %rcx,%r14
|
| + cmovzq %r8,%r15
|
| + cmovzq %rsi,%r9
|
| +
|
| + movq %r13,%rax
|
| + shrq $1,%r12
|
| + shlq $63,%rax
|
| + movq %r14,%r10
|
| + shrq $1,%r13
|
| + orq %rax,%r12
|
| + shlq $63,%r10
|
| + movq %r15,%rcx
|
| + shrq $1,%r14
|
| + orq %r10,%r13
|
| + shlq $63,%rcx
|
| + movq %r12,0(%rdi)
|
| + shrq $1,%r15
|
| + movq %r13,8(%rdi)
|
| + shlq $63,%r9
|
| + orq %rcx,%r14
|
| + orq %r9,%r15
|
| + movq %r14,16(%rdi)
|
| + movq %r15,24(%rdi)
|
| + movq 64(%rsp),%rax
|
| + leaq 64(%rsp),%rbx
|
| + movq 0+32(%rsp),%r9
|
| + movq 8+32(%rsp),%r10
|
| + leaq 0+32(%rsp),%rsi
|
| + movq 16+32(%rsp),%r11
|
| + movq 24+32(%rsp),%r12
|
| + leaq 32(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| + leaq 128(%rsp),%rdi
|
| + call __ecp_nistz256_mul_by_2q
|
| +
|
| + leaq 32(%rsp),%rbx
|
| + leaq 32(%rsp),%rdi
|
| + call __ecp_nistz256_add_toq
|
| +
|
| + movq 96(%rsp),%rax
|
| + leaq 96(%rsp),%rbx
|
| + movq 0+0(%rsp),%r9
|
| + movq 8+0(%rsp),%r10
|
| + leaq 0+0(%rsp),%rsi
|
| + movq 16+0(%rsp),%r11
|
| + movq 24+0(%rsp),%r12
|
| + leaq 0(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| + leaq 128(%rsp),%rdi
|
| + call __ecp_nistz256_mul_by_2q
|
| +
|
| + movq 0+32(%rsp),%rax
|
| + movq 8+32(%rsp),%r14
|
| + leaq 0+32(%rsp),%rsi
|
| + movq 16+32(%rsp),%r15
|
| + movq 24+32(%rsp),%r8
|
| +.byte 102,72,15,126,199
|
| + call __ecp_nistz256_sqr_montq
|
| +
|
| + leaq 128(%rsp),%rbx
|
| + movq %r14,%r8
|
| + movq %r15,%r9
|
| + movq %rsi,%r14
|
| + movq %rbp,%r15
|
| + call __ecp_nistz256_sub_fromq
|
| +
|
| + movq 0+0(%rsp),%rax
|
| + movq 0+8(%rsp),%rbp
|
| + movq 0+16(%rsp),%rcx
|
| + movq 0+24(%rsp),%r10
|
| + leaq 0(%rsp),%rdi
|
| + call __ecp_nistz256_subq
|
| +
|
| + movq 32(%rsp),%rax
|
| + leaq 32(%rsp),%rbx
|
| + movq %r12,%r14
|
| + xorl %ecx,%ecx
|
| + movq %r12,0+0(%rsp)
|
| + movq %r13,%r10
|
| + movq %r13,0+8(%rsp)
|
| + cmovzq %r8,%r11
|
| + movq %r8,0+16(%rsp)
|
| + leaq 0-0(%rsp),%rsi
|
| + cmovzq %r9,%r12
|
| + movq %r9,0+24(%rsp)
|
| + movq %r14,%r9
|
| + leaq 0(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| +.byte 102,72,15,126,203
|
| +.byte 102,72,15,126,207
|
| + call __ecp_nistz256_sub_fromq
|
| +
|
| + addq $160+8,%rsp
|
| + popq %r15
|
| + popq %r14
|
| + popq %r13
|
| + popq %r12
|
| + popq %rbx
|
| + popq %rbp
|
| + .byte 0xf3,0xc3
|
| +
|
| +.globl _ecp_nistz256_point_add
|
| +.private_extern _ecp_nistz256_point_add
|
| +
|
| +.p2align 5
|
| +_ecp_nistz256_point_add:
|
| + pushq %rbp
|
| + pushq %rbx
|
| + pushq %r12
|
| + pushq %r13
|
| + pushq %r14
|
| + pushq %r15
|
| + subq $576+8,%rsp
|
| +
|
| + movdqu 0(%rsi),%xmm0
|
| + movdqu 16(%rsi),%xmm1
|
| + movdqu 32(%rsi),%xmm2
|
| + movdqu 48(%rsi),%xmm3
|
| + movdqu 64(%rsi),%xmm4
|
| + movdqu 80(%rsi),%xmm5
|
| + movq %rsi,%rbx
|
| + movq %rdx,%rsi
|
| + movdqa %xmm0,384(%rsp)
|
| + movdqa %xmm1,384+16(%rsp)
|
| + por %xmm0,%xmm1
|
| + movdqa %xmm2,416(%rsp)
|
| + movdqa %xmm3,416+16(%rsp)
|
| + por %xmm2,%xmm3
|
| + movdqa %xmm4,448(%rsp)
|
| + movdqa %xmm5,448+16(%rsp)
|
| + por %xmm1,%xmm3
|
| +
|
| + movdqu 0(%rsi),%xmm0
|
| + pshufd $0xb1,%xmm3,%xmm5
|
| + movdqu 16(%rsi),%xmm1
|
| + movdqu 32(%rsi),%xmm2
|
| + por %xmm3,%xmm5
|
| + movdqu 48(%rsi),%xmm3
|
| + movq 64+0(%rsi),%rax
|
| + movq 64+8(%rsi),%r14
|
| + movq 64+16(%rsi),%r15
|
| + movq 64+24(%rsi),%r8
|
| + movdqa %xmm0,480(%rsp)
|
| + pshufd $0x1e,%xmm5,%xmm4
|
| + movdqa %xmm1,480+16(%rsp)
|
| + por %xmm0,%xmm1
|
| +.byte 102,72,15,110,199
|
| + movdqa %xmm2,512(%rsp)
|
| + movdqa %xmm3,512+16(%rsp)
|
| + por %xmm2,%xmm3
|
| + por %xmm4,%xmm5
|
| + pxor %xmm4,%xmm4
|
| + por %xmm1,%xmm3
|
| +
|
| + leaq 64-0(%rsi),%rsi
|
| + movq %rax,544+0(%rsp)
|
| + movq %r14,544+8(%rsp)
|
| + movq %r15,544+16(%rsp)
|
| + movq %r8,544+24(%rsp)
|
| + leaq 96(%rsp),%rdi
|
| + call __ecp_nistz256_sqr_montq
|
| +
|
| + pcmpeqd %xmm4,%xmm5
|
| + pshufd $0xb1,%xmm3,%xmm4
|
| + por %xmm3,%xmm4
|
| + pshufd $0,%xmm5,%xmm5
|
| + pshufd $0x1e,%xmm4,%xmm3
|
| + por %xmm3,%xmm4
|
| + pxor %xmm3,%xmm3
|
| + pcmpeqd %xmm3,%xmm4
|
| + pshufd $0,%xmm4,%xmm4
|
| + movq 64+0(%rbx),%rax
|
| + movq 64+8(%rbx),%r14
|
| + movq 64+16(%rbx),%r15
|
| + movq 64+24(%rbx),%r8
|
| +.byte 102,72,15,110,203
|
| +
|
| + leaq 64-0(%rbx),%rsi
|
| + leaq 32(%rsp),%rdi
|
| + call __ecp_nistz256_sqr_montq
|
| +
|
| + movq 544(%rsp),%rax
|
| + leaq 544(%rsp),%rbx
|
| + movq 0+96(%rsp),%r9
|
| + movq 8+96(%rsp),%r10
|
| + leaq 0+96(%rsp),%rsi
|
| + movq 16+96(%rsp),%r11
|
| + movq 24+96(%rsp),%r12
|
| + leaq 224(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| + movq 448(%rsp),%rax
|
| + leaq 448(%rsp),%rbx
|
| + movq 0+32(%rsp),%r9
|
| + movq 8+32(%rsp),%r10
|
| + leaq 0+32(%rsp),%rsi
|
| + movq 16+32(%rsp),%r11
|
| + movq 24+32(%rsp),%r12
|
| + leaq 256(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| + movq 416(%rsp),%rax
|
| + leaq 416(%rsp),%rbx
|
| + movq 0+224(%rsp),%r9
|
| + movq 8+224(%rsp),%r10
|
| + leaq 0+224(%rsp),%rsi
|
| + movq 16+224(%rsp),%r11
|
| + movq 24+224(%rsp),%r12
|
| + leaq 224(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| + movq 512(%rsp),%rax
|
| + leaq 512(%rsp),%rbx
|
| + movq 0+256(%rsp),%r9
|
| + movq 8+256(%rsp),%r10
|
| + leaq 0+256(%rsp),%rsi
|
| + movq 16+256(%rsp),%r11
|
| + movq 24+256(%rsp),%r12
|
| + leaq 256(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| + leaq 224(%rsp),%rbx
|
| + leaq 64(%rsp),%rdi
|
| + call __ecp_nistz256_sub_fromq
|
| +
|
| + orq %r13,%r12
|
| + movdqa %xmm4,%xmm2
|
| + orq %r8,%r12
|
| + orq %r9,%r12
|
| + por %xmm5,%xmm2
|
| +.byte 102,73,15,110,220
|
| +
|
| + movq 384(%rsp),%rax
|
| + leaq 384(%rsp),%rbx
|
| + movq 0+96(%rsp),%r9
|
| + movq 8+96(%rsp),%r10
|
| + leaq 0+96(%rsp),%rsi
|
| + movq 16+96(%rsp),%r11
|
| + movq 24+96(%rsp),%r12
|
| + leaq 160(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| + movq 480(%rsp),%rax
|
| + leaq 480(%rsp),%rbx
|
| + movq 0+32(%rsp),%r9
|
| + movq 8+32(%rsp),%r10
|
| + leaq 0+32(%rsp),%rsi
|
| + movq 16+32(%rsp),%r11
|
| + movq 24+32(%rsp),%r12
|
| + leaq 192(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| + leaq 160(%rsp),%rbx
|
| + leaq 0(%rsp),%rdi
|
| + call __ecp_nistz256_sub_fromq
|
| +
|
| + orq %r13,%r12
|
| + orq %r8,%r12
|
| + orq %r9,%r12
|
| +
|
| +.byte 0x3e
|
| + jnz L$add_proceedq
|
| +.byte 102,73,15,126,208
|
| +.byte 102,73,15,126,217
|
| + testq %r8,%r8
|
| + jnz L$add_proceedq
|
| + testq %r9,%r9
|
| + jz L$add_doubleq
|
| +
|
| +.byte 102,72,15,126,199
|
| + pxor %xmm0,%xmm0
|
| + movdqu %xmm0,0(%rdi)
|
| + movdqu %xmm0,16(%rdi)
|
| + movdqu %xmm0,32(%rdi)
|
| + movdqu %xmm0,48(%rdi)
|
| + movdqu %xmm0,64(%rdi)
|
| + movdqu %xmm0,80(%rdi)
|
| + jmp L$add_doneq
|
| +
|
| +.p2align 5
|
| +L$add_doubleq:
|
| +.byte 102,72,15,126,206
|
| +.byte 102,72,15,126,199
|
| + addq $416,%rsp
|
| + jmp L$point_double_shortcutq
|
| +
|
| +.p2align 5
|
| +L$add_proceedq:
|
| + movq 0+64(%rsp),%rax
|
| + movq 8+64(%rsp),%r14
|
| + leaq 0+64(%rsp),%rsi
|
| + movq 16+64(%rsp),%r15
|
| + movq 24+64(%rsp),%r8
|
| + leaq 96(%rsp),%rdi
|
| + call __ecp_nistz256_sqr_montq
|
| +
|
| + movq 448(%rsp),%rax
|
| + leaq 448(%rsp),%rbx
|
| + movq 0+0(%rsp),%r9
|
| + movq 8+0(%rsp),%r10
|
| + leaq 0+0(%rsp),%rsi
|
| + movq 16+0(%rsp),%r11
|
| + movq 24+0(%rsp),%r12
|
| + leaq 352(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| + movq 0+0(%rsp),%rax
|
| + movq 8+0(%rsp),%r14
|
| + leaq 0+0(%rsp),%rsi
|
| + movq 16+0(%rsp),%r15
|
| + movq 24+0(%rsp),%r8
|
| + leaq 32(%rsp),%rdi
|
| + call __ecp_nistz256_sqr_montq
|
| +
|
| + movq 544(%rsp),%rax
|
| + leaq 544(%rsp),%rbx
|
| + movq 0+352(%rsp),%r9
|
| + movq 8+352(%rsp),%r10
|
| + leaq 0+352(%rsp),%rsi
|
| + movq 16+352(%rsp),%r11
|
| + movq 24+352(%rsp),%r12
|
| + leaq 352(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| + movq 0(%rsp),%rax
|
| + leaq 0(%rsp),%rbx
|
| + movq 0+32(%rsp),%r9
|
| + movq 8+32(%rsp),%r10
|
| + leaq 0+32(%rsp),%rsi
|
| + movq 16+32(%rsp),%r11
|
| + movq 24+32(%rsp),%r12
|
| + leaq 128(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| + movq 160(%rsp),%rax
|
| + leaq 160(%rsp),%rbx
|
| + movq 0+32(%rsp),%r9
|
| + movq 8+32(%rsp),%r10
|
| + leaq 0+32(%rsp),%rsi
|
| + movq 16+32(%rsp),%r11
|
| + movq 24+32(%rsp),%r12
|
| + leaq 192(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| +
|
| +
|
| +
|
| + addq %r12,%r12
|
| + leaq 96(%rsp),%rsi
|
| + adcq %r13,%r13
|
| + movq %r12,%rax
|
| + adcq %r8,%r8
|
| + adcq %r9,%r9
|
| + movq %r13,%rbp
|
| + sbbq %r11,%r11
|
| +
|
| + subq $-1,%r12
|
| + movq %r8,%rcx
|
| + sbbq %r14,%r13
|
| + sbbq $0,%r8
|
| + movq %r9,%r10
|
| + sbbq %r15,%r9
|
| + testq %r11,%r11
|
| +
|
| + cmovzq %rax,%r12
|
| + movq 0(%rsi),%rax
|
| + cmovzq %rbp,%r13
|
| + movq 8(%rsi),%rbp
|
| + cmovzq %rcx,%r8
|
| + movq 16(%rsi),%rcx
|
| + cmovzq %r10,%r9
|
| + movq 24(%rsi),%r10
|
| +
|
| + call __ecp_nistz256_subq
|
| +
|
| + leaq 128(%rsp),%rbx
|
| + leaq 288(%rsp),%rdi
|
| + call __ecp_nistz256_sub_fromq
|
| +
|
| + movq 192+0(%rsp),%rax
|
| + movq 192+8(%rsp),%rbp
|
| + movq 192+16(%rsp),%rcx
|
| + movq 192+24(%rsp),%r10
|
| + leaq 320(%rsp),%rdi
|
| +
|
| + call __ecp_nistz256_subq
|
| +
|
| + movq %r12,0(%rdi)
|
| + movq %r13,8(%rdi)
|
| + movq %r8,16(%rdi)
|
| + movq %r9,24(%rdi)
|
| + movq 128(%rsp),%rax
|
| + leaq 128(%rsp),%rbx
|
| + movq 0+224(%rsp),%r9
|
| + movq 8+224(%rsp),%r10
|
| + leaq 0+224(%rsp),%rsi
|
| + movq 16+224(%rsp),%r11
|
| + movq 24+224(%rsp),%r12
|
| + leaq 256(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| + movq 320(%rsp),%rax
|
| + leaq 320(%rsp),%rbx
|
| + movq 0+64(%rsp),%r9
|
| + movq 8+64(%rsp),%r10
|
| + leaq 0+64(%rsp),%rsi
|
| + movq 16+64(%rsp),%r11
|
| + movq 24+64(%rsp),%r12
|
| + leaq 320(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| + leaq 256(%rsp),%rbx
|
| + leaq 320(%rsp),%rdi
|
| + call __ecp_nistz256_sub_fromq
|
| +
|
| +.byte 102,72,15,126,199
|
| +
|
| + movdqa %xmm5,%xmm0
|
| + movdqa %xmm5,%xmm1
|
| + pandn 352(%rsp),%xmm0
|
| + movdqa %xmm5,%xmm2
|
| + pandn 352+16(%rsp),%xmm1
|
| + movdqa %xmm5,%xmm3
|
| + pand 544(%rsp),%xmm2
|
| + pand 544+16(%rsp),%xmm3
|
| + por %xmm0,%xmm2
|
| + por %xmm1,%xmm3
|
| +
|
| + movdqa %xmm4,%xmm0
|
| + movdqa %xmm4,%xmm1
|
| + pandn %xmm2,%xmm0
|
| + movdqa %xmm4,%xmm2
|
| + pandn %xmm3,%xmm1
|
| + movdqa %xmm4,%xmm3
|
| + pand 448(%rsp),%xmm2
|
| + pand 448+16(%rsp),%xmm3
|
| + por %xmm0,%xmm2
|
| + por %xmm1,%xmm3
|
| + movdqu %xmm2,64(%rdi)
|
| + movdqu %xmm3,80(%rdi)
|
| +
|
| + movdqa %xmm5,%xmm0
|
| + movdqa %xmm5,%xmm1
|
| + pandn 288(%rsp),%xmm0
|
| + movdqa %xmm5,%xmm2
|
| + pandn 288+16(%rsp),%xmm1
|
| + movdqa %xmm5,%xmm3
|
| + pand 480(%rsp),%xmm2
|
| + pand 480+16(%rsp),%xmm3
|
| + por %xmm0,%xmm2
|
| + por %xmm1,%xmm3
|
| +
|
| + movdqa %xmm4,%xmm0
|
| + movdqa %xmm4,%xmm1
|
| + pandn %xmm2,%xmm0
|
| + movdqa %xmm4,%xmm2
|
| + pandn %xmm3,%xmm1
|
| + movdqa %xmm4,%xmm3
|
| + pand 384(%rsp),%xmm2
|
| + pand 384+16(%rsp),%xmm3
|
| + por %xmm0,%xmm2
|
| + por %xmm1,%xmm3
|
| + movdqu %xmm2,0(%rdi)
|
| + movdqu %xmm3,16(%rdi)
|
| +
|
| + movdqa %xmm5,%xmm0
|
| + movdqa %xmm5,%xmm1
|
| + pandn 320(%rsp),%xmm0
|
| + movdqa %xmm5,%xmm2
|
| + pandn 320+16(%rsp),%xmm1
|
| + movdqa %xmm5,%xmm3
|
| + pand 512(%rsp),%xmm2
|
| + pand 512+16(%rsp),%xmm3
|
| + por %xmm0,%xmm2
|
| + por %xmm1,%xmm3
|
| +
|
| + movdqa %xmm4,%xmm0
|
| + movdqa %xmm4,%xmm1
|
| + pandn %xmm2,%xmm0
|
| + movdqa %xmm4,%xmm2
|
| + pandn %xmm3,%xmm1
|
| + movdqa %xmm4,%xmm3
|
| + pand 416(%rsp),%xmm2
|
| + pand 416+16(%rsp),%xmm3
|
| + por %xmm0,%xmm2
|
| + por %xmm1,%xmm3
|
| + movdqu %xmm2,32(%rdi)
|
| + movdqu %xmm3,48(%rdi)
|
| +
|
| +L$add_doneq:
|
| + addq $576+8,%rsp
|
| + popq %r15
|
| + popq %r14
|
| + popq %r13
|
| + popq %r12
|
| + popq %rbx
|
| + popq %rbp
|
| + .byte 0xf3,0xc3
|
| +
|
| +.globl _ecp_nistz256_point_add_affine
|
| +.private_extern _ecp_nistz256_point_add_affine
|
| +
|
| +.p2align 5
|
| +_ecp_nistz256_point_add_affine:
|
| + pushq %rbp
|
| + pushq %rbx
|
| + pushq %r12
|
| + pushq %r13
|
| + pushq %r14
|
| + pushq %r15
|
| + subq $480+8,%rsp
|
| +
|
| + movdqu 0(%rsi),%xmm0
|
| + movq %rdx,%rbx
|
| + movdqu 16(%rsi),%xmm1
|
| + movdqu 32(%rsi),%xmm2
|
| + movdqu 48(%rsi),%xmm3
|
| + movdqu 64(%rsi),%xmm4
|
| + movdqu 80(%rsi),%xmm5
|
| + movq 64+0(%rsi),%rax
|
| + movq 64+8(%rsi),%r14
|
| + movq 64+16(%rsi),%r15
|
| + movq 64+24(%rsi),%r8
|
| + movdqa %xmm0,320(%rsp)
|
| + movdqa %xmm1,320+16(%rsp)
|
| + por %xmm0,%xmm1
|
| + movdqa %xmm2,352(%rsp)
|
| + movdqa %xmm3,352+16(%rsp)
|
| + por %xmm2,%xmm3
|
| + movdqa %xmm4,384(%rsp)
|
| + movdqa %xmm5,384+16(%rsp)
|
| + por %xmm1,%xmm3
|
| +
|
| + movdqu 0(%rbx),%xmm0
|
| + pshufd $0xb1,%xmm3,%xmm5
|
| + movdqu 16(%rbx),%xmm1
|
| + movdqu 32(%rbx),%xmm2
|
| + por %xmm3,%xmm5
|
| + movdqu 48(%rbx),%xmm3
|
| + movdqa %xmm0,416(%rsp)
|
| + pshufd $0x1e,%xmm5,%xmm4
|
| + movdqa %xmm1,416+16(%rsp)
|
| + por %xmm0,%xmm1
|
| +.byte 102,72,15,110,199
|
| + movdqa %xmm2,448(%rsp)
|
| + movdqa %xmm3,448+16(%rsp)
|
| + por %xmm2,%xmm3
|
| + por %xmm4,%xmm5
|
| + pxor %xmm4,%xmm4
|
| + por %xmm1,%xmm3
|
| +
|
| + leaq 64-0(%rsi),%rsi
|
| + leaq 32(%rsp),%rdi
|
| + call __ecp_nistz256_sqr_montq
|
| +
|
| + pcmpeqd %xmm4,%xmm5
|
| + pshufd $0xb1,%xmm3,%xmm4
|
| + movq 0(%rbx),%rax
|
| +
|
| + movq %r12,%r9
|
| + por %xmm3,%xmm4
|
| + pshufd $0,%xmm5,%xmm5
|
| + pshufd $0x1e,%xmm4,%xmm3
|
| + movq %r13,%r10
|
| + por %xmm3,%xmm4
|
| + pxor %xmm3,%xmm3
|
| + movq %r14,%r11
|
| + pcmpeqd %xmm3,%xmm4
|
| + pshufd $0,%xmm4,%xmm4
|
| +
|
| + leaq 32-0(%rsp),%rsi
|
| + movq %r15,%r12
|
| + leaq 0(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| + leaq 320(%rsp),%rbx
|
| + leaq 64(%rsp),%rdi
|
| + call __ecp_nistz256_sub_fromq
|
| +
|
| + movq 384(%rsp),%rax
|
| + leaq 384(%rsp),%rbx
|
| + movq 0+32(%rsp),%r9
|
| + movq 8+32(%rsp),%r10
|
| + leaq 0+32(%rsp),%rsi
|
| + movq 16+32(%rsp),%r11
|
| + movq 24+32(%rsp),%r12
|
| + leaq 32(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| + movq 384(%rsp),%rax
|
| + leaq 384(%rsp),%rbx
|
| + movq 0+64(%rsp),%r9
|
| + movq 8+64(%rsp),%r10
|
| + leaq 0+64(%rsp),%rsi
|
| + movq 16+64(%rsp),%r11
|
| + movq 24+64(%rsp),%r12
|
| + leaq 288(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| + movq 448(%rsp),%rax
|
| + leaq 448(%rsp),%rbx
|
| + movq 0+32(%rsp),%r9
|
| + movq 8+32(%rsp),%r10
|
| + leaq 0+32(%rsp),%rsi
|
| + movq 16+32(%rsp),%r11
|
| + movq 24+32(%rsp),%r12
|
| + leaq 32(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| + leaq 352(%rsp),%rbx
|
| + leaq 96(%rsp),%rdi
|
| + call __ecp_nistz256_sub_fromq
|
| +
|
| + movq 0+64(%rsp),%rax
|
| + movq 8+64(%rsp),%r14
|
| + leaq 0+64(%rsp),%rsi
|
| + movq 16+64(%rsp),%r15
|
| + movq 24+64(%rsp),%r8
|
| + leaq 128(%rsp),%rdi
|
| + call __ecp_nistz256_sqr_montq
|
| +
|
| + movq 0+96(%rsp),%rax
|
| + movq 8+96(%rsp),%r14
|
| + leaq 0+96(%rsp),%rsi
|
| + movq 16+96(%rsp),%r15
|
| + movq 24+96(%rsp),%r8
|
| + leaq 192(%rsp),%rdi
|
| + call __ecp_nistz256_sqr_montq
|
| +
|
| + movq 128(%rsp),%rax
|
| + leaq 128(%rsp),%rbx
|
| + movq 0+64(%rsp),%r9
|
| + movq 8+64(%rsp),%r10
|
| + leaq 0+64(%rsp),%rsi
|
| + movq 16+64(%rsp),%r11
|
| + movq 24+64(%rsp),%r12
|
| + leaq 160(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| + movq 320(%rsp),%rax
|
| + leaq 320(%rsp),%rbx
|
| + movq 0+128(%rsp),%r9
|
| + movq 8+128(%rsp),%r10
|
| + leaq 0+128(%rsp),%rsi
|
| + movq 16+128(%rsp),%r11
|
| + movq 24+128(%rsp),%r12
|
| + leaq 0(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| +
|
| +
|
| +
|
| + addq %r12,%r12
|
| + leaq 192(%rsp),%rsi
|
| + adcq %r13,%r13
|
| + movq %r12,%rax
|
| + adcq %r8,%r8
|
| + adcq %r9,%r9
|
| + movq %r13,%rbp
|
| + sbbq %r11,%r11
|
| +
|
| + subq $-1,%r12
|
| + movq %r8,%rcx
|
| + sbbq %r14,%r13
|
| + sbbq $0,%r8
|
| + movq %r9,%r10
|
| + sbbq %r15,%r9
|
| + testq %r11,%r11
|
| +
|
| + cmovzq %rax,%r12
|
| + movq 0(%rsi),%rax
|
| + cmovzq %rbp,%r13
|
| + movq 8(%rsi),%rbp
|
| + cmovzq %rcx,%r8
|
| + movq 16(%rsi),%rcx
|
| + cmovzq %r10,%r9
|
| + movq 24(%rsi),%r10
|
| +
|
| + call __ecp_nistz256_subq
|
| +
|
| + leaq 160(%rsp),%rbx
|
| + leaq 224(%rsp),%rdi
|
| + call __ecp_nistz256_sub_fromq
|
| +
|
| + movq 0+0(%rsp),%rax
|
| + movq 0+8(%rsp),%rbp
|
| + movq 0+16(%rsp),%rcx
|
| + movq 0+24(%rsp),%r10
|
| + leaq 64(%rsp),%rdi
|
| +
|
| + call __ecp_nistz256_subq
|
| +
|
| + movq %r12,0(%rdi)
|
| + movq %r13,8(%rdi)
|
| + movq %r8,16(%rdi)
|
| + movq %r9,24(%rdi)
|
| + movq 352(%rsp),%rax
|
| + leaq 352(%rsp),%rbx
|
| + movq 0+160(%rsp),%r9
|
| + movq 8+160(%rsp),%r10
|
| + leaq 0+160(%rsp),%rsi
|
| + movq 16+160(%rsp),%r11
|
| + movq 24+160(%rsp),%r12
|
| + leaq 32(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| + movq 96(%rsp),%rax
|
| + leaq 96(%rsp),%rbx
|
| + movq 0+64(%rsp),%r9
|
| + movq 8+64(%rsp),%r10
|
| + leaq 0+64(%rsp),%rsi
|
| + movq 16+64(%rsp),%r11
|
| + movq 24+64(%rsp),%r12
|
| + leaq 64(%rsp),%rdi
|
| + call __ecp_nistz256_mul_montq
|
| +
|
| + leaq 32(%rsp),%rbx
|
| + leaq 256(%rsp),%rdi
|
| + call __ecp_nistz256_sub_fromq
|
| +
|
| +.byte 102,72,15,126,199
|
| +
|
| + movdqa %xmm5,%xmm0
|
| + movdqa %xmm5,%xmm1
|
| + pandn 288(%rsp),%xmm0
|
| + movdqa %xmm5,%xmm2
|
| + pandn 288+16(%rsp),%xmm1
|
| + movdqa %xmm5,%xmm3
|
| + pand L$ONE_mont(%rip),%xmm2
|
| + pand L$ONE_mont+16(%rip),%xmm3
|
| + por %xmm0,%xmm2
|
| + por %xmm1,%xmm3
|
| +
|
| + movdqa %xmm4,%xmm0
|
| + movdqa %xmm4,%xmm1
|
| + pandn %xmm2,%xmm0
|
| + movdqa %xmm4,%xmm2
|
| + pandn %xmm3,%xmm1
|
| + movdqa %xmm4,%xmm3
|
| + pand 384(%rsp),%xmm2
|
| + pand 384+16(%rsp),%xmm3
|
| + por %xmm0,%xmm2
|
| + por %xmm1,%xmm3
|
| + movdqu %xmm2,64(%rdi)
|
| + movdqu %xmm3,80(%rdi)
|
| +
|
| + movdqa %xmm5,%xmm0
|
| + movdqa %xmm5,%xmm1
|
| + pandn 224(%rsp),%xmm0
|
| + movdqa %xmm5,%xmm2
|
| + pandn 224+16(%rsp),%xmm1
|
| + movdqa %xmm5,%xmm3
|
| + pand 416(%rsp),%xmm2
|
| + pand 416+16(%rsp),%xmm3
|
| + por %xmm0,%xmm2
|
| + por %xmm1,%xmm3
|
| +
|
| + movdqa %xmm4,%xmm0
|
| + movdqa %xmm4,%xmm1
|
| + pandn %xmm2,%xmm0
|
| + movdqa %xmm4,%xmm2
|
| + pandn %xmm3,%xmm1
|
| + movdqa %xmm4,%xmm3
|
| + pand 320(%rsp),%xmm2
|
| + pand 320+16(%rsp),%xmm3
|
| + por %xmm0,%xmm2
|
| + por %xmm1,%xmm3
|
| + movdqu %xmm2,0(%rdi)
|
| + movdqu %xmm3,16(%rdi)
|
| +
|
| + movdqa %xmm5,%xmm0
|
| + movdqa %xmm5,%xmm1
|
| + pandn 256(%rsp),%xmm0
|
| + movdqa %xmm5,%xmm2
|
| + pandn 256+16(%rsp),%xmm1
|
| + movdqa %xmm5,%xmm3
|
| + pand 448(%rsp),%xmm2
|
| + pand 448+16(%rsp),%xmm3
|
| + por %xmm0,%xmm2
|
| + por %xmm1,%xmm3
|
| +
|
| + movdqa %xmm4,%xmm0
|
| + movdqa %xmm4,%xmm1
|
| + pandn %xmm2,%xmm0
|
| + movdqa %xmm4,%xmm2
|
| + pandn %xmm3,%xmm1
|
| + movdqa %xmm4,%xmm3
|
| + pand 352(%rsp),%xmm2
|
| + pand 352+16(%rsp),%xmm3
|
| + por %xmm0,%xmm2
|
| + por %xmm1,%xmm3
|
| + movdqu %xmm2,32(%rdi)
|
| + movdqu %xmm3,48(%rdi)
|
| +
|
| + addq $480+8,%rsp
|
| + popq %r15
|
| + popq %r14
|
| + popq %r13
|
| + popq %r12
|
| + popq %rbx
|
| + popq %rbp
|
| + .byte 0xf3,0xc3
|
| +
|
| +#endif
|
|
|