| Index: third_party/boringssl/linux-x86_64/crypto/cipher/chacha20_poly1305_x86_64.S
|
| diff --git a/third_party/boringssl/linux-x86_64/crypto/cipher/chacha20_poly1305_x86_64.S b/third_party/boringssl/linux-x86_64/crypto/cipher/chacha20_poly1305_x86_64.S
|
| deleted file mode 100644
|
| index d149d0f77f1357f4fc241b12ae7dcb4bfc3c7757..0000000000000000000000000000000000000000
|
| --- a/third_party/boringssl/linux-x86_64/crypto/cipher/chacha20_poly1305_x86_64.S
|
| +++ /dev/null
|
| @@ -1,8788 +0,0 @@
|
| -#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
| -.text
|
| -.extern OPENSSL_ia32cap_P
|
| -.hidden OPENSSL_ia32cap_P
|
| -
|
| -chacha20_poly1305_constants:
|
| -
|
| -.align 64
|
| -.chacha20_consts:
|
| -.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
|
| -.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
|
| -.rol8:
|
| -.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
|
| -.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
|
| -.rol16:
|
| -.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
|
| -.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
|
| -.avx2_init:
|
| -.long 0,0,0,0
|
| -.sse_inc:
|
| -.long 1,0,0,0
|
| -.avx2_inc:
|
| -.long 2,0,0,0,2,0,0,0
|
| -.clamp:
|
| -.quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC
|
| -.quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
|
| -.align 16
|
| -.and_masks:
|
| -.byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
|
| -.byte 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
|
| -.byte 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
|
| -.byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
|
| -.byte 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
|
| -.byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
|
| -.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
|
| -.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
|
| -.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
|
| -.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00
|
| -.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00
|
| -.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
|
| -.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00
|
| -.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
|
| -.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
|
| -
|
| -.type poly_hash_ad_internal,@function
|
| -.align 64
|
| -poly_hash_ad_internal:
|
| -.cfi_startproc
|
| - xorq %r10,%r10
|
| - xorq %r11,%r11
|
| - xorq %r12,%r12
|
| - cmpq $13,%r8
|
| - jne hash_ad_loop
|
| -poly_fast_tls_ad:
|
| -
|
| - movq (%rcx),%r10
|
| - movq 5(%rcx),%r11
|
| - shrq $24,%r11
|
| - movq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - .byte 0xf3,0xc3
|
| -hash_ad_loop:
|
| -
|
| - cmpq $16,%r8
|
| - jb hash_ad_tail
|
| - addq 0(%rcx),%r10
|
| - adcq 8+0(%rcx),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 16(%rcx),%rcx
|
| - subq $16,%r8
|
| - jmp hash_ad_loop
|
| -hash_ad_tail:
|
| - cmpq $0,%r8
|
| - je 1f
|
| -
|
| - xorq %r13,%r13
|
| - xorq %r14,%r14
|
| - xorq %r15,%r15
|
| - addq %r8,%rcx
|
| -hash_ad_tail_loop:
|
| - shldq $8,%r13,%r14
|
| - shlq $8,%r13
|
| - movzbq -1(%rcx),%r15
|
| - xorq %r15,%r13
|
| - decq %rcx
|
| - decq %r8
|
| - jne hash_ad_tail_loop
|
| -
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| -
|
| -1:
|
| - .byte 0xf3,0xc3
|
| -.cfi_endproc
|
| -.size poly_hash_ad_internal, .-poly_hash_ad_internal
|
| -
|
| -.globl chacha20_poly1305_open
|
| -.hidden chacha20_poly1305_open
|
| -.type chacha20_poly1305_open,@function
|
| -.align 64
|
| -chacha20_poly1305_open:
|
| -.cfi_startproc
|
| - pushq %rbp
|
| -.cfi_adjust_cfa_offset 8
|
| - pushq %rbx
|
| -.cfi_adjust_cfa_offset 8
|
| - pushq %r12
|
| -.cfi_adjust_cfa_offset 8
|
| - pushq %r13
|
| -.cfi_adjust_cfa_offset 8
|
| - pushq %r14
|
| -.cfi_adjust_cfa_offset 8
|
| - pushq %r15
|
| -.cfi_adjust_cfa_offset 8
|
| -
|
| -
|
| - pushq %r9
|
| -.cfi_adjust_cfa_offset 8
|
| - subq $288 + 32,%rsp
|
| -.cfi_adjust_cfa_offset 288 + 32
|
| -.cfi_offset rbp, -16
|
| -.cfi_offset rbx, -24
|
| -.cfi_offset r12, -32
|
| -.cfi_offset r13, -40
|
| -.cfi_offset r14, -48
|
| -.cfi_offset r15, -56
|
| - leaq 32(%rsp),%rbp
|
| - andq $-32,%rbp
|
| - movq %rdx,8+32(%rbp)
|
| - movq %r8,0+32(%rbp)
|
| - movq %rdx,%rbx
|
| -
|
| - movl OPENSSL_ia32cap_P+8(%rip),%eax
|
| - andl $288,%eax
|
| - xorl $288,%eax
|
| - jz chacha20_poly1305_open_avx2
|
| -
|
| -1:
|
| - cmpq $128,%rbx
|
| - jbe open_sse_128
|
| -
|
| - movdqa .chacha20_consts(%rip),%xmm0
|
| - movdqu 0(%r9),%xmm4
|
| - movdqu 16(%r9),%xmm8
|
| - movdqu 32(%r9),%xmm12
|
| - movdqa %xmm12,%xmm7
|
| -
|
| - movdqa %xmm4,48(%rbp)
|
| - movdqa %xmm8,64(%rbp)
|
| - movdqa %xmm12,96(%rbp)
|
| - movq $10,%r10
|
| -1:
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol16(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm4
|
| - pxor %xmm3,%xmm4
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol8(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm4
|
| - pxor %xmm3,%xmm4
|
| -.byte 102,15,58,15,228,4
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,12
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol16(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm4
|
| - pxor %xmm3,%xmm4
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol8(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm4
|
| - pxor %xmm3,%xmm4
|
| -.byte 102,15,58,15,228,12
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,4
|
| -
|
| - decq %r10
|
| - jne 1b
|
| -
|
| - paddd .chacha20_consts(%rip),%xmm0
|
| - paddd 48(%rbp),%xmm4
|
| -
|
| - pand .clamp(%rip),%xmm0
|
| - movdqa %xmm0,0(%rbp)
|
| - movdqa %xmm4,16(%rbp)
|
| -
|
| - movq %r8,%r8
|
| - call poly_hash_ad_internal
|
| -open_sse_main_loop:
|
| - cmpq $256,%rbx
|
| - jb 2f
|
| -
|
| - movdqa .chacha20_consts(%rip),%xmm0
|
| - movdqa 48(%rbp),%xmm4
|
| - movdqa 64(%rbp),%xmm8
|
| - movdqa %xmm0,%xmm1
|
| - movdqa %xmm4,%xmm5
|
| - movdqa %xmm8,%xmm9
|
| - movdqa %xmm0,%xmm2
|
| - movdqa %xmm4,%xmm6
|
| - movdqa %xmm8,%xmm10
|
| - movdqa %xmm0,%xmm3
|
| - movdqa %xmm4,%xmm7
|
| - movdqa %xmm8,%xmm11
|
| - movdqa 96(%rbp),%xmm15
|
| - paddd .sse_inc(%rip),%xmm15
|
| - movdqa %xmm15,%xmm14
|
| - paddd .sse_inc(%rip),%xmm14
|
| - movdqa %xmm14,%xmm13
|
| - paddd .sse_inc(%rip),%xmm13
|
| - movdqa %xmm13,%xmm12
|
| - paddd .sse_inc(%rip),%xmm12
|
| - movdqa %xmm12,96(%rbp)
|
| - movdqa %xmm13,112(%rbp)
|
| - movdqa %xmm14,128(%rbp)
|
| - movdqa %xmm15,144(%rbp)
|
| -
|
| -
|
| -
|
| - movq $4,%rcx
|
| - movq %rsi,%r8
|
| -1:
|
| - movdqa %xmm8,80(%rbp)
|
| - movdqa .rol16(%rip),%xmm8
|
| - paddd %xmm7,%xmm3
|
| - paddd %xmm6,%xmm2
|
| - paddd %xmm5,%xmm1
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm3,%xmm15
|
| - pxor %xmm2,%xmm14
|
| - pxor %xmm1,%xmm13
|
| - pxor %xmm0,%xmm12
|
| -.byte 102,69,15,56,0,248
|
| -.byte 102,69,15,56,0,240
|
| -.byte 102,69,15,56,0,232
|
| -.byte 102,69,15,56,0,224
|
| - movdqa 80(%rbp),%xmm8
|
| - paddd %xmm15,%xmm11
|
| - paddd %xmm14,%xmm10
|
| - paddd %xmm13,%xmm9
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm11,%xmm7
|
| - addq 0(%r8),%r10
|
| - adcq 8+0(%r8),%r11
|
| - adcq $1,%r12
|
| -
|
| - leaq 16(%r8),%r8
|
| - pxor %xmm10,%xmm6
|
| - pxor %xmm9,%xmm5
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm8,80(%rbp)
|
| - movdqa %xmm7,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm7
|
| - pxor %xmm8,%xmm7
|
| - movdqa %xmm6,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm6
|
| - pxor %xmm8,%xmm6
|
| - movdqa %xmm5,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm5
|
| - pxor %xmm8,%xmm5
|
| - movdqa %xmm4,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm4
|
| - pxor %xmm8,%xmm4
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movdqa .rol8(%rip),%xmm8
|
| - paddd %xmm7,%xmm3
|
| - paddd %xmm6,%xmm2
|
| - paddd %xmm5,%xmm1
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm3,%xmm15
|
| - pxor %xmm2,%xmm14
|
| - pxor %xmm1,%xmm13
|
| - pxor %xmm0,%xmm12
|
| -.byte 102,69,15,56,0,248
|
| -.byte 102,69,15,56,0,240
|
| -.byte 102,69,15,56,0,232
|
| -.byte 102,69,15,56,0,224
|
| - movdqa 80(%rbp),%xmm8
|
| - paddd %xmm15,%xmm11
|
| - paddd %xmm14,%xmm10
|
| - paddd %xmm13,%xmm9
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm11,%xmm7
|
| - pxor %xmm10,%xmm6
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - pxor %xmm9,%xmm5
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm8,80(%rbp)
|
| - movdqa %xmm7,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm7
|
| - pxor %xmm8,%xmm7
|
| - movdqa %xmm6,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm6
|
| - pxor %xmm8,%xmm6
|
| - movdqa %xmm5,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm5
|
| - pxor %xmm8,%xmm5
|
| - movdqa %xmm4,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm4
|
| - pxor %xmm8,%xmm4
|
| - movdqa 80(%rbp),%xmm8
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| -.byte 102,15,58,15,255,4
|
| -.byte 102,69,15,58,15,219,8
|
| -.byte 102,69,15,58,15,255,12
|
| -.byte 102,15,58,15,246,4
|
| -.byte 102,69,15,58,15,210,8
|
| -.byte 102,69,15,58,15,246,12
|
| -.byte 102,15,58,15,237,4
|
| -.byte 102,69,15,58,15,201,8
|
| -.byte 102,69,15,58,15,237,12
|
| -.byte 102,15,58,15,228,4
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,12
|
| - movdqa %xmm8,80(%rbp)
|
| - movdqa .rol16(%rip),%xmm8
|
| - paddd %xmm7,%xmm3
|
| - paddd %xmm6,%xmm2
|
| - paddd %xmm5,%xmm1
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm3,%xmm15
|
| - pxor %xmm2,%xmm14
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - pxor %xmm1,%xmm13
|
| - pxor %xmm0,%xmm12
|
| -.byte 102,69,15,56,0,248
|
| -.byte 102,69,15,56,0,240
|
| -.byte 102,69,15,56,0,232
|
| -.byte 102,69,15,56,0,224
|
| - movdqa 80(%rbp),%xmm8
|
| - paddd %xmm15,%xmm11
|
| - paddd %xmm14,%xmm10
|
| - paddd %xmm13,%xmm9
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm11,%xmm7
|
| - pxor %xmm10,%xmm6
|
| - pxor %xmm9,%xmm5
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm8,80(%rbp)
|
| - movdqa %xmm7,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm7
|
| - pxor %xmm8,%xmm7
|
| - movdqa %xmm6,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm6
|
| - pxor %xmm8,%xmm6
|
| - movdqa %xmm5,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm5
|
| - pxor %xmm8,%xmm5
|
| - movdqa %xmm4,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm4
|
| - pxor %xmm8,%xmm4
|
| - movdqa .rol8(%rip),%xmm8
|
| - paddd %xmm7,%xmm3
|
| - paddd %xmm6,%xmm2
|
| - paddd %xmm5,%xmm1
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm3,%xmm15
|
| - pxor %xmm2,%xmm14
|
| - pxor %xmm1,%xmm13
|
| - pxor %xmm0,%xmm12
|
| -.byte 102,69,15,56,0,248
|
| -.byte 102,69,15,56,0,240
|
| -.byte 102,69,15,56,0,232
|
| -.byte 102,69,15,56,0,224
|
| - movdqa 80(%rbp),%xmm8
|
| - paddd %xmm15,%xmm11
|
| - paddd %xmm14,%xmm10
|
| - paddd %xmm13,%xmm9
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm11,%xmm7
|
| - pxor %xmm10,%xmm6
|
| - pxor %xmm9,%xmm5
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm8,80(%rbp)
|
| - movdqa %xmm7,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm7
|
| - pxor %xmm8,%xmm7
|
| - movdqa %xmm6,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm6
|
| - pxor %xmm8,%xmm6
|
| - movdqa %xmm5,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm5
|
| - pxor %xmm8,%xmm5
|
| - movdqa %xmm4,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm4
|
| - pxor %xmm8,%xmm4
|
| - movdqa 80(%rbp),%xmm8
|
| -.byte 102,15,58,15,255,12
|
| -.byte 102,69,15,58,15,219,8
|
| -.byte 102,69,15,58,15,255,4
|
| -.byte 102,15,58,15,246,12
|
| -.byte 102,69,15,58,15,210,8
|
| -.byte 102,69,15,58,15,246,4
|
| -.byte 102,15,58,15,237,12
|
| -.byte 102,69,15,58,15,201,8
|
| -.byte 102,69,15,58,15,237,4
|
| -.byte 102,15,58,15,228,12
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,4
|
| -
|
| - decq %rcx
|
| - jge 1b
|
| - addq 0(%r8),%r10
|
| - adcq 8+0(%r8),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 16(%r8),%r8
|
| - cmpq $-6,%rcx
|
| - jg 1b
|
| - paddd .chacha20_consts(%rip),%xmm3
|
| - paddd 48(%rbp),%xmm7
|
| - paddd 64(%rbp),%xmm11
|
| - paddd 144(%rbp),%xmm15
|
| - paddd .chacha20_consts(%rip),%xmm2
|
| - paddd 48(%rbp),%xmm6
|
| - paddd 64(%rbp),%xmm10
|
| - paddd 128(%rbp),%xmm14
|
| - paddd .chacha20_consts(%rip),%xmm1
|
| - paddd 48(%rbp),%xmm5
|
| - paddd 64(%rbp),%xmm9
|
| - paddd 112(%rbp),%xmm13
|
| - paddd .chacha20_consts(%rip),%xmm0
|
| - paddd 48(%rbp),%xmm4
|
| - paddd 64(%rbp),%xmm8
|
| - paddd 96(%rbp),%xmm12
|
| - movdqa %xmm12,80(%rbp)
|
| - movdqu 0 + 0(%rsi),%xmm12
|
| - pxor %xmm3,%xmm12
|
| - movdqu %xmm12,0 + 0(%rdi)
|
| - movdqu 16 + 0(%rsi),%xmm12
|
| - pxor %xmm7,%xmm12
|
| - movdqu %xmm12,16 + 0(%rdi)
|
| - movdqu 32 + 0(%rsi),%xmm12
|
| - pxor %xmm11,%xmm12
|
| - movdqu %xmm12,32 + 0(%rdi)
|
| - movdqu 48 + 0(%rsi),%xmm12
|
| - pxor %xmm15,%xmm12
|
| - movdqu %xmm12,48 + 0(%rdi)
|
| - movdqu 0 + 64(%rsi),%xmm3
|
| - movdqu 16 + 64(%rsi),%xmm7
|
| - movdqu 32 + 64(%rsi),%xmm11
|
| - movdqu 48 + 64(%rsi),%xmm15
|
| - pxor %xmm3,%xmm2
|
| - pxor %xmm7,%xmm6
|
| - pxor %xmm11,%xmm10
|
| - pxor %xmm14,%xmm15
|
| - movdqu %xmm2,0 + 64(%rdi)
|
| - movdqu %xmm6,16 + 64(%rdi)
|
| - movdqu %xmm10,32 + 64(%rdi)
|
| - movdqu %xmm15,48 + 64(%rdi)
|
| - movdqu 0 + 128(%rsi),%xmm3
|
| - movdqu 16 + 128(%rsi),%xmm7
|
| - movdqu 32 + 128(%rsi),%xmm11
|
| - movdqu 48 + 128(%rsi),%xmm15
|
| - pxor %xmm3,%xmm1
|
| - pxor %xmm7,%xmm5
|
| - pxor %xmm11,%xmm9
|
| - pxor %xmm13,%xmm15
|
| - movdqu %xmm1,0 + 128(%rdi)
|
| - movdqu %xmm5,16 + 128(%rdi)
|
| - movdqu %xmm9,32 + 128(%rdi)
|
| - movdqu %xmm15,48 + 128(%rdi)
|
| - movdqu 0 + 192(%rsi),%xmm3
|
| - movdqu 16 + 192(%rsi),%xmm7
|
| - movdqu 32 + 192(%rsi),%xmm11
|
| - movdqu 48 + 192(%rsi),%xmm15
|
| - pxor %xmm3,%xmm0
|
| - pxor %xmm7,%xmm4
|
| - pxor %xmm11,%xmm8
|
| - pxor 80(%rbp),%xmm15
|
| - movdqu %xmm0,0 + 192(%rdi)
|
| - movdqu %xmm4,16 + 192(%rdi)
|
| - movdqu %xmm8,32 + 192(%rdi)
|
| - movdqu %xmm15,48 + 192(%rdi)
|
| -
|
| - leaq 256(%rsi),%rsi
|
| - leaq 256(%rdi),%rdi
|
| - subq $256,%rbx
|
| - jmp open_sse_main_loop
|
| -2:
|
| -
|
| - testq %rbx,%rbx
|
| - jz open_sse_finalize
|
| - cmpq $64,%rbx
|
| - ja 3f
|
| - movdqa .chacha20_consts(%rip),%xmm0
|
| - movdqa 48(%rbp),%xmm4
|
| - movdqa 64(%rbp),%xmm8
|
| - movdqa 96(%rbp),%xmm12
|
| - paddd .sse_inc(%rip),%xmm12
|
| - movdqa %xmm12,96(%rbp)
|
| -
|
| - xorq %r8,%r8
|
| - movq %rbx,%rcx
|
| - cmpq $16,%rcx
|
| - jb 2f
|
| -1:
|
| - addq 0(%rsi,%r8), %r10
|
| - adcq 8+0(%rsi,%r8), %r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - subq $16,%rcx
|
| -2:
|
| - addq $16,%r8
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol16(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm4
|
| - pxor %xmm3,%xmm4
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol8(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm4
|
| - pxor %xmm3,%xmm4
|
| -.byte 102,15,58,15,228,4
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,12
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol16(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm4
|
| - pxor %xmm3,%xmm4
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol8(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm4
|
| - pxor %xmm3,%xmm4
|
| -.byte 102,15,58,15,228,12
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,4
|
| -
|
| - cmpq $16,%rcx
|
| - jae 1b
|
| - cmpq $160,%r8
|
| - jne 2b
|
| - paddd .chacha20_consts(%rip),%xmm0
|
| - paddd 48(%rbp),%xmm4
|
| - paddd 64(%rbp),%xmm8
|
| - paddd 96(%rbp),%xmm12
|
| -
|
| - jmp open_sse_tail_64_dec_loop
|
| -3:
|
| - cmpq $128,%rbx
|
| - ja 3f
|
| - movdqa .chacha20_consts(%rip),%xmm0
|
| - movdqa 48(%rbp),%xmm4
|
| - movdqa 64(%rbp),%xmm8
|
| - movdqa %xmm0,%xmm1
|
| - movdqa %xmm4,%xmm5
|
| - movdqa %xmm8,%xmm9
|
| - movdqa 96(%rbp),%xmm13
|
| - paddd .sse_inc(%rip),%xmm13
|
| - movdqa %xmm13,%xmm12
|
| - paddd .sse_inc(%rip),%xmm12
|
| - movdqa %xmm12,96(%rbp)
|
| - movdqa %xmm13,112(%rbp)
|
| -
|
| - movq %rbx,%rcx
|
| - andq $-16,%rcx
|
| - xorq %r8,%r8
|
| -1:
|
| - addq 0(%rsi,%r8), %r10
|
| - adcq 8+0(%rsi,%r8), %r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| -2:
|
| - addq $16,%r8
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol16(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm4
|
| - pxor %xmm3,%xmm4
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol8(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm4
|
| - pxor %xmm3,%xmm4
|
| -.byte 102,15,58,15,228,4
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,12
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol16(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm5
|
| - pxor %xmm3,%xmm5
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol8(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm5
|
| - pxor %xmm3,%xmm5
|
| -.byte 102,15,58,15,237,4
|
| -.byte 102,69,15,58,15,201,8
|
| -.byte 102,69,15,58,15,237,12
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol16(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm4
|
| - pxor %xmm3,%xmm4
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol8(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm4
|
| - pxor %xmm3,%xmm4
|
| -.byte 102,15,58,15,228,12
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,4
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol16(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm5
|
| - pxor %xmm3,%xmm5
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol8(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm5
|
| - pxor %xmm3,%xmm5
|
| -.byte 102,15,58,15,237,12
|
| -.byte 102,69,15,58,15,201,8
|
| -.byte 102,69,15,58,15,237,4
|
| -
|
| - cmpq %rcx,%r8
|
| - jb 1b
|
| - cmpq $160,%r8
|
| - jne 2b
|
| - paddd .chacha20_consts(%rip),%xmm1
|
| - paddd 48(%rbp),%xmm5
|
| - paddd 64(%rbp),%xmm9
|
| - paddd 112(%rbp),%xmm13
|
| - paddd .chacha20_consts(%rip),%xmm0
|
| - paddd 48(%rbp),%xmm4
|
| - paddd 64(%rbp),%xmm8
|
| - paddd 96(%rbp),%xmm12
|
| - movdqu 0 + 0(%rsi),%xmm3
|
| - movdqu 16 + 0(%rsi),%xmm7
|
| - movdqu 32 + 0(%rsi),%xmm11
|
| - movdqu 48 + 0(%rsi),%xmm15
|
| - pxor %xmm3,%xmm1
|
| - pxor %xmm7,%xmm5
|
| - pxor %xmm11,%xmm9
|
| - pxor %xmm13,%xmm15
|
| - movdqu %xmm1,0 + 0(%rdi)
|
| - movdqu %xmm5,16 + 0(%rdi)
|
| - movdqu %xmm9,32 + 0(%rdi)
|
| - movdqu %xmm15,48 + 0(%rdi)
|
| -
|
| - subq $64,%rbx
|
| - leaq 64(%rsi),%rsi
|
| - leaq 64(%rdi),%rdi
|
| - jmp open_sse_tail_64_dec_loop
|
| -3:
|
| - cmpq $192,%rbx
|
| - ja 3f
|
| - movdqa .chacha20_consts(%rip),%xmm0
|
| - movdqa 48(%rbp),%xmm4
|
| - movdqa 64(%rbp),%xmm8
|
| - movdqa %xmm0,%xmm1
|
| - movdqa %xmm4,%xmm5
|
| - movdqa %xmm8,%xmm9
|
| - movdqa %xmm0,%xmm2
|
| - movdqa %xmm4,%xmm6
|
| - movdqa %xmm8,%xmm10
|
| - movdqa 96(%rbp),%xmm14
|
| - paddd .sse_inc(%rip),%xmm14
|
| - movdqa %xmm14,%xmm13
|
| - paddd .sse_inc(%rip),%xmm13
|
| - movdqa %xmm13,%xmm12
|
| - paddd .sse_inc(%rip),%xmm12
|
| - movdqa %xmm12,96(%rbp)
|
| - movdqa %xmm13,112(%rbp)
|
| - movdqa %xmm14,128(%rbp)
|
| -
|
| - movq %rbx,%rcx
|
| - movq $160,%r8
|
| - cmpq $160,%rcx
|
| - cmovgq %r8,%rcx
|
| - andq $-16,%rcx
|
| - xorq %r8,%r8
|
| -1:
|
| - addq 0(%rsi,%r8), %r10
|
| - adcq 8+0(%rsi,%r8), %r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| -2:
|
| - addq $16,%r8
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol16(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm4
|
| - pxor %xmm3,%xmm4
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol8(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm4
|
| - pxor %xmm3,%xmm4
|
| -.byte 102,15,58,15,228,4
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,12
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol16(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm5
|
| - pxor %xmm3,%xmm5
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol8(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm5
|
| - pxor %xmm3,%xmm5
|
| -.byte 102,15,58,15,237,4
|
| -.byte 102,69,15,58,15,201,8
|
| -.byte 102,69,15,58,15,237,12
|
| - paddd %xmm6,%xmm2
|
| - pxor %xmm2,%xmm14
|
| - pshufb .rol16(%rip),%xmm14
|
| - paddd %xmm14,%xmm10
|
| - pxor %xmm10,%xmm6
|
| - movdqa %xmm6,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm6
|
| - pxor %xmm3,%xmm6
|
| - paddd %xmm6,%xmm2
|
| - pxor %xmm2,%xmm14
|
| - pshufb .rol8(%rip),%xmm14
|
| - paddd %xmm14,%xmm10
|
| - pxor %xmm10,%xmm6
|
| - movdqa %xmm6,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm6
|
| - pxor %xmm3,%xmm6
|
| -.byte 102,15,58,15,246,4
|
| -.byte 102,69,15,58,15,210,8
|
| -.byte 102,69,15,58,15,246,12
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol16(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm4
|
| - pxor %xmm3,%xmm4
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol8(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm4
|
| - pxor %xmm3,%xmm4
|
| -.byte 102,15,58,15,228,12
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,4
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol16(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm5
|
| - pxor %xmm3,%xmm5
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol8(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm5
|
| - pxor %xmm3,%xmm5
|
| -.byte 102,15,58,15,237,12
|
| -.byte 102,69,15,58,15,201,8
|
| -.byte 102,69,15,58,15,237,4
|
| - paddd %xmm6,%xmm2
|
| - pxor %xmm2,%xmm14
|
| - pshufb .rol16(%rip),%xmm14
|
| - paddd %xmm14,%xmm10
|
| - pxor %xmm10,%xmm6
|
| - movdqa %xmm6,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm6
|
| - pxor %xmm3,%xmm6
|
| - paddd %xmm6,%xmm2
|
| - pxor %xmm2,%xmm14
|
| - pshufb .rol8(%rip),%xmm14
|
| - paddd %xmm14,%xmm10
|
| - pxor %xmm10,%xmm6
|
| - movdqa %xmm6,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm6
|
| - pxor %xmm3,%xmm6
|
| -.byte 102,15,58,15,246,12
|
| -.byte 102,69,15,58,15,210,8
|
| -.byte 102,69,15,58,15,246,4
|
| -
|
| - cmpq %rcx,%r8
|
| - jb 1b
|
| - cmpq $160,%r8
|
| - jne 2b
|
| - cmpq $176,%rbx
|
| - jb 1f
|
| - addq 160(%rsi),%r10
|
| - adcq 8+160(%rsi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - cmpq $192,%rbx
|
| - jb 1f
|
| - addq 176(%rsi),%r10
|
| - adcq 8+176(%rsi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| -1:
|
| - paddd .chacha20_consts(%rip),%xmm2
|
| - paddd 48(%rbp),%xmm6
|
| - paddd 64(%rbp),%xmm10
|
| - paddd 128(%rbp),%xmm14
|
| - paddd .chacha20_consts(%rip),%xmm1
|
| - paddd 48(%rbp),%xmm5
|
| - paddd 64(%rbp),%xmm9
|
| - paddd 112(%rbp),%xmm13
|
| - paddd .chacha20_consts(%rip),%xmm0
|
| - paddd 48(%rbp),%xmm4
|
| - paddd 64(%rbp),%xmm8
|
| - paddd 96(%rbp),%xmm12
|
| - movdqu 0 + 0(%rsi),%xmm3
|
| - movdqu 16 + 0(%rsi),%xmm7
|
| - movdqu 32 + 0(%rsi),%xmm11
|
| - movdqu 48 + 0(%rsi),%xmm15
|
| - pxor %xmm3,%xmm2
|
| - pxor %xmm7,%xmm6
|
| - pxor %xmm11,%xmm10
|
| - pxor %xmm14,%xmm15
|
| - movdqu %xmm2,0 + 0(%rdi)
|
| - movdqu %xmm6,16 + 0(%rdi)
|
| - movdqu %xmm10,32 + 0(%rdi)
|
| - movdqu %xmm15,48 + 0(%rdi)
|
| - movdqu 0 + 64(%rsi),%xmm3
|
| - movdqu 16 + 64(%rsi),%xmm7
|
| - movdqu 32 + 64(%rsi),%xmm11
|
| - movdqu 48 + 64(%rsi),%xmm15
|
| - pxor %xmm3,%xmm1
|
| - pxor %xmm7,%xmm5
|
| - pxor %xmm11,%xmm9
|
| - pxor %xmm13,%xmm15
|
| - movdqu %xmm1,0 + 64(%rdi)
|
| - movdqu %xmm5,16 + 64(%rdi)
|
| - movdqu %xmm9,32 + 64(%rdi)
|
| - movdqu %xmm15,48 + 64(%rdi)
|
| -
|
| - subq $128,%rbx
|
| - leaq 128(%rsi),%rsi
|
| - leaq 128(%rdi),%rdi
|
| - jmp open_sse_tail_64_dec_loop
|
| -3:
|
| -
|
| - movdqa .chacha20_consts(%rip),%xmm0
|
| - movdqa 48(%rbp),%xmm4
|
| - movdqa 64(%rbp),%xmm8
|
| - movdqa %xmm0,%xmm1
|
| - movdqa %xmm4,%xmm5
|
| - movdqa %xmm8,%xmm9
|
| - movdqa %xmm0,%xmm2
|
| - movdqa %xmm4,%xmm6
|
| - movdqa %xmm8,%xmm10
|
| - movdqa %xmm0,%xmm3
|
| - movdqa %xmm4,%xmm7
|
| - movdqa %xmm8,%xmm11
|
| - movdqa 96(%rbp),%xmm15
|
| - paddd .sse_inc(%rip),%xmm15
|
| - movdqa %xmm15,%xmm14
|
| - paddd .sse_inc(%rip),%xmm14
|
| - movdqa %xmm14,%xmm13
|
| - paddd .sse_inc(%rip),%xmm13
|
| - movdqa %xmm13,%xmm12
|
| - paddd .sse_inc(%rip),%xmm12
|
| - movdqa %xmm12,96(%rbp)
|
| - movdqa %xmm13,112(%rbp)
|
| - movdqa %xmm14,128(%rbp)
|
| - movdqa %xmm15,144(%rbp)
|
| -
|
| - xorq %r8,%r8
|
| -1:
|
| - addq 0(%rsi,%r8), %r10
|
| - adcq 8+0(%rsi,%r8), %r11
|
| - adcq $1,%r12
|
| - movdqa %xmm11,80(%rbp)
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol16(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm11
|
| - pslld $12,%xmm11
|
| - psrld $20,%xmm4
|
| - pxor %xmm11,%xmm4
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol8(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm11
|
| - pslld $7,%xmm11
|
| - psrld $25,%xmm4
|
| - pxor %xmm11,%xmm4
|
| -.byte 102,15,58,15,228,4
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,12
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol16(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm11
|
| - pslld $12,%xmm11
|
| - psrld $20,%xmm5
|
| - pxor %xmm11,%xmm5
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol8(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm11
|
| - pslld $7,%xmm11
|
| - psrld $25,%xmm5
|
| - pxor %xmm11,%xmm5
|
| -.byte 102,15,58,15,237,4
|
| -.byte 102,69,15,58,15,201,8
|
| -.byte 102,69,15,58,15,237,12
|
| - paddd %xmm6,%xmm2
|
| - pxor %xmm2,%xmm14
|
| - pshufb .rol16(%rip),%xmm14
|
| - paddd %xmm14,%xmm10
|
| - pxor %xmm10,%xmm6
|
| - movdqa %xmm6,%xmm11
|
| - pslld $12,%xmm11
|
| - psrld $20,%xmm6
|
| - pxor %xmm11,%xmm6
|
| - paddd %xmm6,%xmm2
|
| - pxor %xmm2,%xmm14
|
| - pshufb .rol8(%rip),%xmm14
|
| - paddd %xmm14,%xmm10
|
| - pxor %xmm10,%xmm6
|
| - movdqa %xmm6,%xmm11
|
| - pslld $7,%xmm11
|
| - psrld $25,%xmm6
|
| - pxor %xmm11,%xmm6
|
| -.byte 102,15,58,15,246,4
|
| -.byte 102,69,15,58,15,210,8
|
| -.byte 102,69,15,58,15,246,12
|
| - movdqa 80(%rbp),%xmm11
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movdqa %xmm9,80(%rbp)
|
| - paddd %xmm7,%xmm3
|
| - pxor %xmm3,%xmm15
|
| - pshufb .rol16(%rip),%xmm15
|
| - paddd %xmm15,%xmm11
|
| - pxor %xmm11,%xmm7
|
| - movdqa %xmm7,%xmm9
|
| - pslld $12,%xmm9
|
| - psrld $20,%xmm7
|
| - pxor %xmm9,%xmm7
|
| - paddd %xmm7,%xmm3
|
| - pxor %xmm3,%xmm15
|
| - pshufb .rol8(%rip),%xmm15
|
| - paddd %xmm15,%xmm11
|
| - pxor %xmm11,%xmm7
|
| - movdqa %xmm7,%xmm9
|
| - pslld $7,%xmm9
|
| - psrld $25,%xmm7
|
| - pxor %xmm9,%xmm7
|
| -.byte 102,15,58,15,255,4
|
| -.byte 102,69,15,58,15,219,8
|
| -.byte 102,69,15,58,15,255,12
|
| - movdqa 80(%rbp),%xmm9
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - movdqa %xmm11,80(%rbp)
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol16(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm11
|
| - pslld $12,%xmm11
|
| - psrld $20,%xmm4
|
| - pxor %xmm11,%xmm4
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol8(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm11
|
| - pslld $7,%xmm11
|
| - psrld $25,%xmm4
|
| - pxor %xmm11,%xmm4
|
| -.byte 102,15,58,15,228,12
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,4
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol16(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm11
|
| - pslld $12,%xmm11
|
| - psrld $20,%xmm5
|
| - pxor %xmm11,%xmm5
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol8(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm11
|
| - pslld $7,%xmm11
|
| - psrld $25,%xmm5
|
| - pxor %xmm11,%xmm5
|
| -.byte 102,15,58,15,237,12
|
| -.byte 102,69,15,58,15,201,8
|
| -.byte 102,69,15,58,15,237,4
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - paddd %xmm6,%xmm2
|
| - pxor %xmm2,%xmm14
|
| - pshufb .rol16(%rip),%xmm14
|
| - paddd %xmm14,%xmm10
|
| - pxor %xmm10,%xmm6
|
| - movdqa %xmm6,%xmm11
|
| - pslld $12,%xmm11
|
| - psrld $20,%xmm6
|
| - pxor %xmm11,%xmm6
|
| - paddd %xmm6,%xmm2
|
| - pxor %xmm2,%xmm14
|
| - pshufb .rol8(%rip),%xmm14
|
| - paddd %xmm14,%xmm10
|
| - pxor %xmm10,%xmm6
|
| - movdqa %xmm6,%xmm11
|
| - pslld $7,%xmm11
|
| - psrld $25,%xmm6
|
| - pxor %xmm11,%xmm6
|
| -.byte 102,15,58,15,246,12
|
| -.byte 102,69,15,58,15,210,8
|
| -.byte 102,69,15,58,15,246,4
|
| - movdqa 80(%rbp),%xmm11
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - movdqa %xmm9,80(%rbp)
|
| - paddd %xmm7,%xmm3
|
| - pxor %xmm3,%xmm15
|
| - pshufb .rol16(%rip),%xmm15
|
| - paddd %xmm15,%xmm11
|
| - pxor %xmm11,%xmm7
|
| - movdqa %xmm7,%xmm9
|
| - pslld $12,%xmm9
|
| - psrld $20,%xmm7
|
| - pxor %xmm9,%xmm7
|
| - paddd %xmm7,%xmm3
|
| - pxor %xmm3,%xmm15
|
| - pshufb .rol8(%rip),%xmm15
|
| - paddd %xmm15,%xmm11
|
| - pxor %xmm11,%xmm7
|
| - movdqa %xmm7,%xmm9
|
| - pslld $7,%xmm9
|
| - psrld $25,%xmm7
|
| - pxor %xmm9,%xmm7
|
| -.byte 102,15,58,15,255,12
|
| -.byte 102,69,15,58,15,219,8
|
| -.byte 102,69,15,58,15,255,4
|
| - movdqa 80(%rbp),%xmm9
|
| -
|
| - addq $16,%r8
|
| - cmpq $160,%r8
|
| - jb 1b
|
| - movq %rbx,%rcx
|
| - andq $-16,%rcx
|
| -1:
|
| - addq 0(%rsi,%r8), %r10
|
| - adcq 8+0(%rsi,%r8), %r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - addq $16,%r8
|
| - cmpq %rcx,%r8
|
| - jb 1b
|
| - paddd .chacha20_consts(%rip),%xmm3
|
| - paddd 48(%rbp),%xmm7
|
| - paddd 64(%rbp),%xmm11
|
| - paddd 144(%rbp),%xmm15
|
| - paddd .chacha20_consts(%rip),%xmm2
|
| - paddd 48(%rbp),%xmm6
|
| - paddd 64(%rbp),%xmm10
|
| - paddd 128(%rbp),%xmm14
|
| - paddd .chacha20_consts(%rip),%xmm1
|
| - paddd 48(%rbp),%xmm5
|
| - paddd 64(%rbp),%xmm9
|
| - paddd 112(%rbp),%xmm13
|
| - paddd .chacha20_consts(%rip),%xmm0
|
| - paddd 48(%rbp),%xmm4
|
| - paddd 64(%rbp),%xmm8
|
| - paddd 96(%rbp),%xmm12
|
| - movdqa %xmm12,80(%rbp)
|
| - movdqu 0 + 0(%rsi),%xmm12
|
| - pxor %xmm3,%xmm12
|
| - movdqu %xmm12,0 + 0(%rdi)
|
| - movdqu 16 + 0(%rsi),%xmm12
|
| - pxor %xmm7,%xmm12
|
| - movdqu %xmm12,16 + 0(%rdi)
|
| - movdqu 32 + 0(%rsi),%xmm12
|
| - pxor %xmm11,%xmm12
|
| - movdqu %xmm12,32 + 0(%rdi)
|
| - movdqu 48 + 0(%rsi),%xmm12
|
| - pxor %xmm15,%xmm12
|
| - movdqu %xmm12,48 + 0(%rdi)
|
| - movdqu 0 + 64(%rsi),%xmm3
|
| - movdqu 16 + 64(%rsi),%xmm7
|
| - movdqu 32 + 64(%rsi),%xmm11
|
| - movdqu 48 + 64(%rsi),%xmm15
|
| - pxor %xmm3,%xmm2
|
| - pxor %xmm7,%xmm6
|
| - pxor %xmm11,%xmm10
|
| - pxor %xmm14,%xmm15
|
| - movdqu %xmm2,0 + 64(%rdi)
|
| - movdqu %xmm6,16 + 64(%rdi)
|
| - movdqu %xmm10,32 + 64(%rdi)
|
| - movdqu %xmm15,48 + 64(%rdi)
|
| - movdqu 0 + 128(%rsi),%xmm3
|
| - movdqu 16 + 128(%rsi),%xmm7
|
| - movdqu 32 + 128(%rsi),%xmm11
|
| - movdqu 48 + 128(%rsi),%xmm15
|
| - pxor %xmm3,%xmm1
|
| - pxor %xmm7,%xmm5
|
| - pxor %xmm11,%xmm9
|
| - pxor %xmm13,%xmm15
|
| - movdqu %xmm1,0 + 128(%rdi)
|
| - movdqu %xmm5,16 + 128(%rdi)
|
| - movdqu %xmm9,32 + 128(%rdi)
|
| - movdqu %xmm15,48 + 128(%rdi)
|
| -
|
| - movdqa 80(%rbp),%xmm12
|
| - subq $192,%rbx
|
| - leaq 192(%rsi),%rsi
|
| - leaq 192(%rdi),%rdi
|
| -
|
| -
|
| -open_sse_tail_64_dec_loop:
|
| - cmpq $16,%rbx
|
| - jb 1f
|
| - subq $16,%rbx
|
| - movdqu (%rsi),%xmm3
|
| - pxor %xmm3,%xmm0
|
| - movdqu %xmm0,(%rdi)
|
| - leaq 16(%rsi),%rsi
|
| - leaq 16(%rdi),%rdi
|
| - movdqa %xmm4,%xmm0
|
| - movdqa %xmm8,%xmm4
|
| - movdqa %xmm12,%xmm8
|
| - jmp open_sse_tail_64_dec_loop
|
| -1:
|
| - movdqa %xmm0,%xmm1
|
| -
|
| -
|
| -open_sse_tail_16:
|
| - testq %rbx,%rbx
|
| - jz open_sse_finalize
|
| -
|
| -
|
| -
|
| - pxor %xmm3,%xmm3
|
| - leaq -1(%rsi,%rbx), %rsi
|
| - movq %rbx,%r8
|
| -2:
|
| - pslldq $1,%xmm3
|
| - pinsrb $0,(%rsi),%xmm3
|
| - subq $1,%rsi
|
| - subq $1,%r8
|
| - jnz 2b
|
| -
|
| -3:
|
| -.byte 102,73,15,126,221
|
| - pextrq $1,%xmm3,%r14
|
| -
|
| - pxor %xmm1,%xmm3
|
| -
|
| -
|
| -2:
|
| - pextrb $0,%xmm3,(%rdi)
|
| - psrldq $1,%xmm3
|
| - addq $1,%rdi
|
| - subq $1,%rbx
|
| - jne 2b
|
| -
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| -
|
| -open_sse_finalize:
|
| - addq 32(%rbp),%r10
|
| - adcq 8+32(%rbp),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| -
|
| - movq %r10,%r13
|
| - movq %r11,%r14
|
| - movq %r12,%r15
|
| - subq $-5,%r10
|
| - sbbq $-1,%r11
|
| - sbbq $3,%r12
|
| - cmovcq %r13,%r10
|
| - cmovcq %r14,%r11
|
| - cmovcq %r15,%r12
|
| -
|
| - addq 0+16(%rbp),%r10
|
| - adcq 8+16(%rbp),%r11
|
| -
|
| - addq $288 + 32,%rsp
|
| -.cfi_adjust_cfa_offset -(288 + 32)
|
| - popq %r9
|
| -.cfi_adjust_cfa_offset -8
|
| - movq %r10,(%r9)
|
| - movq %r11,8(%r9)
|
| -
|
| - popq %r15
|
| -.cfi_adjust_cfa_offset -8
|
| - popq %r14
|
| -.cfi_adjust_cfa_offset -8
|
| - popq %r13
|
| -.cfi_adjust_cfa_offset -8
|
| - popq %r12
|
| -.cfi_adjust_cfa_offset -8
|
| - popq %rbx
|
| -.cfi_adjust_cfa_offset -8
|
| - popq %rbp
|
| -.cfi_adjust_cfa_offset -8
|
| - .byte 0xf3,0xc3
|
| -.cfi_adjust_cfa_offset (8 * 6) + 288 + 32
|
| -
|
| -open_sse_128:
|
| - movdqu .chacha20_consts(%rip),%xmm0
|
| - movdqa %xmm0,%xmm1
|
| - movdqa %xmm0,%xmm2
|
| - movdqu 0(%r9),%xmm4
|
| - movdqa %xmm4,%xmm5
|
| - movdqa %xmm4,%xmm6
|
| - movdqu 16(%r9),%xmm8
|
| - movdqa %xmm8,%xmm9
|
| - movdqa %xmm8,%xmm10
|
| - movdqu 32(%r9),%xmm12
|
| - movdqa %xmm12,%xmm13
|
| - paddd .sse_inc(%rip),%xmm13
|
| - movdqa %xmm13,%xmm14
|
| - paddd .sse_inc(%rip),%xmm14
|
| - movdqa %xmm4,%xmm7
|
| - movdqa %xmm8,%xmm11
|
| - movdqa %xmm13,%xmm15
|
| - movq $10,%r10
|
| -1:
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol16(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm4
|
| - pxor %xmm3,%xmm4
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol8(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm4
|
| - pxor %xmm3,%xmm4
|
| -.byte 102,15,58,15,228,4
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,12
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol16(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm5
|
| - pxor %xmm3,%xmm5
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol8(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm5
|
| - pxor %xmm3,%xmm5
|
| -.byte 102,15,58,15,237,4
|
| -.byte 102,69,15,58,15,201,8
|
| -.byte 102,69,15,58,15,237,12
|
| - paddd %xmm6,%xmm2
|
| - pxor %xmm2,%xmm14
|
| - pshufb .rol16(%rip),%xmm14
|
| - paddd %xmm14,%xmm10
|
| - pxor %xmm10,%xmm6
|
| - movdqa %xmm6,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm6
|
| - pxor %xmm3,%xmm6
|
| - paddd %xmm6,%xmm2
|
| - pxor %xmm2,%xmm14
|
| - pshufb .rol8(%rip),%xmm14
|
| - paddd %xmm14,%xmm10
|
| - pxor %xmm10,%xmm6
|
| - movdqa %xmm6,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm6
|
| - pxor %xmm3,%xmm6
|
| -.byte 102,15,58,15,246,4
|
| -.byte 102,69,15,58,15,210,8
|
| -.byte 102,69,15,58,15,246,12
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol16(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm4
|
| - pxor %xmm3,%xmm4
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol8(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm4
|
| - pxor %xmm3,%xmm4
|
| -.byte 102,15,58,15,228,12
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,4
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol16(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm5
|
| - pxor %xmm3,%xmm5
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol8(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm5
|
| - pxor %xmm3,%xmm5
|
| -.byte 102,15,58,15,237,12
|
| -.byte 102,69,15,58,15,201,8
|
| -.byte 102,69,15,58,15,237,4
|
| - paddd %xmm6,%xmm2
|
| - pxor %xmm2,%xmm14
|
| - pshufb .rol16(%rip),%xmm14
|
| - paddd %xmm14,%xmm10
|
| - pxor %xmm10,%xmm6
|
| - movdqa %xmm6,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm6
|
| - pxor %xmm3,%xmm6
|
| - paddd %xmm6,%xmm2
|
| - pxor %xmm2,%xmm14
|
| - pshufb .rol8(%rip),%xmm14
|
| - paddd %xmm14,%xmm10
|
| - pxor %xmm10,%xmm6
|
| - movdqa %xmm6,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm6
|
| - pxor %xmm3,%xmm6
|
| -.byte 102,15,58,15,246,12
|
| -.byte 102,69,15,58,15,210,8
|
| -.byte 102,69,15,58,15,246,4
|
| -
|
| - decq %r10
|
| - jnz 1b
|
| - paddd .chacha20_consts(%rip),%xmm0
|
| - paddd .chacha20_consts(%rip),%xmm1
|
| - paddd .chacha20_consts(%rip),%xmm2
|
| - paddd %xmm7,%xmm4
|
| - paddd %xmm7,%xmm5
|
| - paddd %xmm7,%xmm6
|
| - paddd %xmm11,%xmm9
|
| - paddd %xmm11,%xmm10
|
| - paddd %xmm15,%xmm13
|
| - paddd .sse_inc(%rip),%xmm15
|
| - paddd %xmm15,%xmm14
|
| -
|
| - pand .clamp(%rip),%xmm0
|
| - movdqa %xmm0,0(%rbp)
|
| - movdqa %xmm4,16(%rbp)
|
| -
|
| - movq %r8,%r8
|
| - call poly_hash_ad_internal
|
| -1:
|
| - cmpq $16,%rbx
|
| - jb open_sse_tail_16
|
| - subq $16,%rbx
|
| - addq 0(%rsi),%r10
|
| - adcq 8+0(%rsi),%r11
|
| - adcq $1,%r12
|
| -
|
| -
|
| - movdqu 0(%rsi),%xmm3
|
| - pxor %xmm3,%xmm1
|
| - movdqu %xmm1,0(%rdi)
|
| - leaq 16(%rsi),%rsi
|
| - leaq 16(%rdi),%rdi
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| -
|
| - movdqa %xmm5,%xmm1
|
| - movdqa %xmm9,%xmm5
|
| - movdqa %xmm13,%xmm9
|
| - movdqa %xmm2,%xmm13
|
| - movdqa %xmm6,%xmm2
|
| - movdqa %xmm10,%xmm6
|
| - movdqa %xmm14,%xmm10
|
| - jmp 1b
|
| - jmp open_sse_tail_16
|
| -.size chacha20_poly1305_open, .-chacha20_poly1305_open
|
| -.cfi_endproc
|
| -
|
| -
|
| -
|
| -
|
| -.globl chacha20_poly1305_seal
|
| -.hidden chacha20_poly1305_seal
|
| -.type chacha20_poly1305_seal,@function
|
| -.align 64
|
| -chacha20_poly1305_seal:
|
| -.cfi_startproc
|
| - pushq %rbp
|
| -.cfi_adjust_cfa_offset 8
|
| - pushq %rbx
|
| -.cfi_adjust_cfa_offset 8
|
| - pushq %r12
|
| -.cfi_adjust_cfa_offset 8
|
| - pushq %r13
|
| -.cfi_adjust_cfa_offset 8
|
| - pushq %r14
|
| -.cfi_adjust_cfa_offset 8
|
| - pushq %r15
|
| -.cfi_adjust_cfa_offset 8
|
| -
|
| -
|
| - pushq %r9
|
| -.cfi_adjust_cfa_offset 8
|
| - subq $288 + 32,%rsp
|
| -.cfi_adjust_cfa_offset 288 + 32
|
| -.cfi_offset rbp, -16
|
| -.cfi_offset rbx, -24
|
| -.cfi_offset r12, -32
|
| -.cfi_offset r13, -40
|
| -.cfi_offset r14, -48
|
| -.cfi_offset r15, -56
|
| - leaq 32(%rsp),%rbp
|
| - andq $-32,%rbp
|
| - movq %rdx,8+32(%rbp)
|
| - movq %r8,0+32(%rbp)
|
| - movq %rdx,%rbx
|
| -
|
| - movl OPENSSL_ia32cap_P+8(%rip),%eax
|
| - andl $288,%eax
|
| - xorl $288,%eax
|
| - jz chacha20_poly1305_seal_avx2
|
| -
|
| - cmpq $128,%rbx
|
| - jbe seal_sse_128
|
| -
|
| - movdqa .chacha20_consts(%rip),%xmm0
|
| - movdqu 0(%r9),%xmm4
|
| - movdqu 16(%r9),%xmm8
|
| - movdqu 32(%r9),%xmm12
|
| - movdqa %xmm0,%xmm1
|
| - movdqa %xmm0,%xmm2
|
| - movdqa %xmm0,%xmm3
|
| - movdqa %xmm4,%xmm5
|
| - movdqa %xmm4,%xmm6
|
| - movdqa %xmm4,%xmm7
|
| - movdqa %xmm8,%xmm9
|
| - movdqa %xmm8,%xmm10
|
| - movdqa %xmm8,%xmm11
|
| - movdqa %xmm12,%xmm15
|
| - paddd .sse_inc(%rip),%xmm12
|
| - movdqa %xmm12,%xmm14
|
| - paddd .sse_inc(%rip),%xmm12
|
| - movdqa %xmm12,%xmm13
|
| - paddd .sse_inc(%rip),%xmm12
|
| -
|
| - movdqa %xmm4,48(%rbp)
|
| - movdqa %xmm8,64(%rbp)
|
| - movdqa %xmm12,96(%rbp)
|
| - movdqa %xmm13,112(%rbp)
|
| - movdqa %xmm14,128(%rbp)
|
| - movdqa %xmm15,144(%rbp)
|
| - movq $10,%r10
|
| -1:
|
| - movdqa %xmm8,80(%rbp)
|
| - movdqa .rol16(%rip),%xmm8
|
| - paddd %xmm7,%xmm3
|
| - paddd %xmm6,%xmm2
|
| - paddd %xmm5,%xmm1
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm3,%xmm15
|
| - pxor %xmm2,%xmm14
|
| - pxor %xmm1,%xmm13
|
| - pxor %xmm0,%xmm12
|
| -.byte 102,69,15,56,0,248
|
| -.byte 102,69,15,56,0,240
|
| -.byte 102,69,15,56,0,232
|
| -.byte 102,69,15,56,0,224
|
| - movdqa 80(%rbp),%xmm8
|
| - paddd %xmm15,%xmm11
|
| - paddd %xmm14,%xmm10
|
| - paddd %xmm13,%xmm9
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm11,%xmm7
|
| - pxor %xmm10,%xmm6
|
| - pxor %xmm9,%xmm5
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm8,80(%rbp)
|
| - movdqa %xmm7,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm7
|
| - pxor %xmm8,%xmm7
|
| - movdqa %xmm6,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm6
|
| - pxor %xmm8,%xmm6
|
| - movdqa %xmm5,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm5
|
| - pxor %xmm8,%xmm5
|
| - movdqa %xmm4,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm4
|
| - pxor %xmm8,%xmm4
|
| - movdqa .rol8(%rip),%xmm8
|
| - paddd %xmm7,%xmm3
|
| - paddd %xmm6,%xmm2
|
| - paddd %xmm5,%xmm1
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm3,%xmm15
|
| - pxor %xmm2,%xmm14
|
| - pxor %xmm1,%xmm13
|
| - pxor %xmm0,%xmm12
|
| -.byte 102,69,15,56,0,248
|
| -.byte 102,69,15,56,0,240
|
| -.byte 102,69,15,56,0,232
|
| -.byte 102,69,15,56,0,224
|
| - movdqa 80(%rbp),%xmm8
|
| - paddd %xmm15,%xmm11
|
| - paddd %xmm14,%xmm10
|
| - paddd %xmm13,%xmm9
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm11,%xmm7
|
| - pxor %xmm10,%xmm6
|
| - pxor %xmm9,%xmm5
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm8,80(%rbp)
|
| - movdqa %xmm7,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm7
|
| - pxor %xmm8,%xmm7
|
| - movdqa %xmm6,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm6
|
| - pxor %xmm8,%xmm6
|
| - movdqa %xmm5,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm5
|
| - pxor %xmm8,%xmm5
|
| - movdqa %xmm4,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm4
|
| - pxor %xmm8,%xmm4
|
| - movdqa 80(%rbp),%xmm8
|
| -.byte 102,15,58,15,255,4
|
| -.byte 102,69,15,58,15,219,8
|
| -.byte 102,69,15,58,15,255,12
|
| -.byte 102,15,58,15,246,4
|
| -.byte 102,69,15,58,15,210,8
|
| -.byte 102,69,15,58,15,246,12
|
| -.byte 102,15,58,15,237,4
|
| -.byte 102,69,15,58,15,201,8
|
| -.byte 102,69,15,58,15,237,12
|
| -.byte 102,15,58,15,228,4
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,12
|
| - movdqa %xmm8,80(%rbp)
|
| - movdqa .rol16(%rip),%xmm8
|
| - paddd %xmm7,%xmm3
|
| - paddd %xmm6,%xmm2
|
| - paddd %xmm5,%xmm1
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm3,%xmm15
|
| - pxor %xmm2,%xmm14
|
| - pxor %xmm1,%xmm13
|
| - pxor %xmm0,%xmm12
|
| -.byte 102,69,15,56,0,248
|
| -.byte 102,69,15,56,0,240
|
| -.byte 102,69,15,56,0,232
|
| -.byte 102,69,15,56,0,224
|
| - movdqa 80(%rbp),%xmm8
|
| - paddd %xmm15,%xmm11
|
| - paddd %xmm14,%xmm10
|
| - paddd %xmm13,%xmm9
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm11,%xmm7
|
| - pxor %xmm10,%xmm6
|
| - pxor %xmm9,%xmm5
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm8,80(%rbp)
|
| - movdqa %xmm7,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm7
|
| - pxor %xmm8,%xmm7
|
| - movdqa %xmm6,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm6
|
| - pxor %xmm8,%xmm6
|
| - movdqa %xmm5,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm5
|
| - pxor %xmm8,%xmm5
|
| - movdqa %xmm4,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm4
|
| - pxor %xmm8,%xmm4
|
| - movdqa .rol8(%rip),%xmm8
|
| - paddd %xmm7,%xmm3
|
| - paddd %xmm6,%xmm2
|
| - paddd %xmm5,%xmm1
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm3,%xmm15
|
| - pxor %xmm2,%xmm14
|
| - pxor %xmm1,%xmm13
|
| - pxor %xmm0,%xmm12
|
| -.byte 102,69,15,56,0,248
|
| -.byte 102,69,15,56,0,240
|
| -.byte 102,69,15,56,0,232
|
| -.byte 102,69,15,56,0,224
|
| - movdqa 80(%rbp),%xmm8
|
| - paddd %xmm15,%xmm11
|
| - paddd %xmm14,%xmm10
|
| - paddd %xmm13,%xmm9
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm11,%xmm7
|
| - pxor %xmm10,%xmm6
|
| - pxor %xmm9,%xmm5
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm8,80(%rbp)
|
| - movdqa %xmm7,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm7
|
| - pxor %xmm8,%xmm7
|
| - movdqa %xmm6,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm6
|
| - pxor %xmm8,%xmm6
|
| - movdqa %xmm5,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm5
|
| - pxor %xmm8,%xmm5
|
| - movdqa %xmm4,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm4
|
| - pxor %xmm8,%xmm4
|
| - movdqa 80(%rbp),%xmm8
|
| -.byte 102,15,58,15,255,12
|
| -.byte 102,69,15,58,15,219,8
|
| -.byte 102,69,15,58,15,255,4
|
| -.byte 102,15,58,15,246,12
|
| -.byte 102,69,15,58,15,210,8
|
| -.byte 102,69,15,58,15,246,4
|
| -.byte 102,15,58,15,237,12
|
| -.byte 102,69,15,58,15,201,8
|
| -.byte 102,69,15,58,15,237,4
|
| -.byte 102,15,58,15,228,12
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,4
|
| -
|
| - decq %r10
|
| - jnz 1b
|
| - paddd .chacha20_consts(%rip),%xmm3
|
| - paddd 48(%rbp),%xmm7
|
| - paddd 64(%rbp),%xmm11
|
| - paddd 144(%rbp),%xmm15
|
| - paddd .chacha20_consts(%rip),%xmm2
|
| - paddd 48(%rbp),%xmm6
|
| - paddd 64(%rbp),%xmm10
|
| - paddd 128(%rbp),%xmm14
|
| - paddd .chacha20_consts(%rip),%xmm1
|
| - paddd 48(%rbp),%xmm5
|
| - paddd 64(%rbp),%xmm9
|
| - paddd 112(%rbp),%xmm13
|
| - paddd .chacha20_consts(%rip),%xmm0
|
| - paddd 48(%rbp),%xmm4
|
| - paddd 64(%rbp),%xmm8
|
| - paddd 96(%rbp),%xmm12
|
| -
|
| -
|
| - pand .clamp(%rip),%xmm3
|
| - movdqa %xmm3,0(%rbp)
|
| - movdqa %xmm7,16(%rbp)
|
| -
|
| - movq %r8,%r8
|
| - call poly_hash_ad_internal
|
| - movdqu 0 + 0(%rsi),%xmm3
|
| - movdqu 16 + 0(%rsi),%xmm7
|
| - movdqu 32 + 0(%rsi),%xmm11
|
| - movdqu 48 + 0(%rsi),%xmm15
|
| - pxor %xmm3,%xmm2
|
| - pxor %xmm7,%xmm6
|
| - pxor %xmm11,%xmm10
|
| - pxor %xmm14,%xmm15
|
| - movdqu %xmm2,0 + 0(%rdi)
|
| - movdqu %xmm6,16 + 0(%rdi)
|
| - movdqu %xmm10,32 + 0(%rdi)
|
| - movdqu %xmm15,48 + 0(%rdi)
|
| - movdqu 0 + 64(%rsi),%xmm3
|
| - movdqu 16 + 64(%rsi),%xmm7
|
| - movdqu 32 + 64(%rsi),%xmm11
|
| - movdqu 48 + 64(%rsi),%xmm15
|
| - pxor %xmm3,%xmm1
|
| - pxor %xmm7,%xmm5
|
| - pxor %xmm11,%xmm9
|
| - pxor %xmm13,%xmm15
|
| - movdqu %xmm1,0 + 64(%rdi)
|
| - movdqu %xmm5,16 + 64(%rdi)
|
| - movdqu %xmm9,32 + 64(%rdi)
|
| - movdqu %xmm15,48 + 64(%rdi)
|
| -
|
| - cmpq $192,%rbx
|
| - ja 1f
|
| - movq $128,%rcx
|
| - subq $128,%rbx
|
| - leaq 128(%rsi),%rsi
|
| - jmp seal_sse_128_seal_hash
|
| -1:
|
| - movdqu 0 + 128(%rsi),%xmm3
|
| - movdqu 16 + 128(%rsi),%xmm7
|
| - movdqu 32 + 128(%rsi),%xmm11
|
| - movdqu 48 + 128(%rsi),%xmm15
|
| - pxor %xmm3,%xmm0
|
| - pxor %xmm7,%xmm4
|
| - pxor %xmm11,%xmm8
|
| - pxor %xmm12,%xmm15
|
| - movdqu %xmm0,0 + 128(%rdi)
|
| - movdqu %xmm4,16 + 128(%rdi)
|
| - movdqu %xmm8,32 + 128(%rdi)
|
| - movdqu %xmm15,48 + 128(%rdi)
|
| -
|
| - movq $192,%rcx
|
| - subq $192,%rbx
|
| - leaq 192(%rsi),%rsi
|
| - movq $2,%rcx
|
| - movq $8,%r8
|
| - cmpq $64,%rbx
|
| - jbe seal_sse_tail_64
|
| - cmpq $128,%rbx
|
| - jbe seal_sse_tail_128
|
| - cmpq $192,%rbx
|
| - jbe seal_sse_tail_192
|
| -
|
| -1:
|
| - movdqa .chacha20_consts(%rip),%xmm0
|
| - movdqa 48(%rbp),%xmm4
|
| - movdqa 64(%rbp),%xmm8
|
| - movdqa %xmm0,%xmm1
|
| - movdqa %xmm4,%xmm5
|
| - movdqa %xmm8,%xmm9
|
| - movdqa %xmm0,%xmm2
|
| - movdqa %xmm4,%xmm6
|
| - movdqa %xmm8,%xmm10
|
| - movdqa %xmm0,%xmm3
|
| - movdqa %xmm4,%xmm7
|
| - movdqa %xmm8,%xmm11
|
| - movdqa 96(%rbp),%xmm15
|
| - paddd .sse_inc(%rip),%xmm15
|
| - movdqa %xmm15,%xmm14
|
| - paddd .sse_inc(%rip),%xmm14
|
| - movdqa %xmm14,%xmm13
|
| - paddd .sse_inc(%rip),%xmm13
|
| - movdqa %xmm13,%xmm12
|
| - paddd .sse_inc(%rip),%xmm12
|
| - movdqa %xmm12,96(%rbp)
|
| - movdqa %xmm13,112(%rbp)
|
| - movdqa %xmm14,128(%rbp)
|
| - movdqa %xmm15,144(%rbp)
|
| -
|
| -2:
|
| - movdqa %xmm8,80(%rbp)
|
| - movdqa .rol16(%rip),%xmm8
|
| - paddd %xmm7,%xmm3
|
| - paddd %xmm6,%xmm2
|
| - paddd %xmm5,%xmm1
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm3,%xmm15
|
| - pxor %xmm2,%xmm14
|
| - pxor %xmm1,%xmm13
|
| - pxor %xmm0,%xmm12
|
| -.byte 102,69,15,56,0,248
|
| -.byte 102,69,15,56,0,240
|
| -.byte 102,69,15,56,0,232
|
| -.byte 102,69,15,56,0,224
|
| - movdqa 80(%rbp),%xmm8
|
| - paddd %xmm15,%xmm11
|
| - paddd %xmm14,%xmm10
|
| - paddd %xmm13,%xmm9
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm11,%xmm7
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - pxor %xmm10,%xmm6
|
| - pxor %xmm9,%xmm5
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm8,80(%rbp)
|
| - movdqa %xmm7,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm7
|
| - pxor %xmm8,%xmm7
|
| - movdqa %xmm6,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm6
|
| - pxor %xmm8,%xmm6
|
| - movdqa %xmm5,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm5
|
| - pxor %xmm8,%xmm5
|
| - movdqa %xmm4,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm4
|
| - pxor %xmm8,%xmm4
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movdqa .rol8(%rip),%xmm8
|
| - paddd %xmm7,%xmm3
|
| - paddd %xmm6,%xmm2
|
| - paddd %xmm5,%xmm1
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm3,%xmm15
|
| - pxor %xmm2,%xmm14
|
| - pxor %xmm1,%xmm13
|
| - pxor %xmm0,%xmm12
|
| -.byte 102,69,15,56,0,248
|
| -.byte 102,69,15,56,0,240
|
| -.byte 102,69,15,56,0,232
|
| -.byte 102,69,15,56,0,224
|
| - movdqa 80(%rbp),%xmm8
|
| - paddd %xmm15,%xmm11
|
| - paddd %xmm14,%xmm10
|
| - paddd %xmm13,%xmm9
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm11,%xmm7
|
| - pxor %xmm10,%xmm6
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - pxor %xmm9,%xmm5
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm8,80(%rbp)
|
| - movdqa %xmm7,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm7
|
| - pxor %xmm8,%xmm7
|
| - movdqa %xmm6,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm6
|
| - pxor %xmm8,%xmm6
|
| - movdqa %xmm5,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm5
|
| - pxor %xmm8,%xmm5
|
| - movdqa %xmm4,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm4
|
| - pxor %xmm8,%xmm4
|
| - movdqa 80(%rbp),%xmm8
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| -.byte 102,15,58,15,255,4
|
| -.byte 102,69,15,58,15,219,8
|
| -.byte 102,69,15,58,15,255,12
|
| -.byte 102,15,58,15,246,4
|
| -.byte 102,69,15,58,15,210,8
|
| -.byte 102,69,15,58,15,246,12
|
| -.byte 102,15,58,15,237,4
|
| -.byte 102,69,15,58,15,201,8
|
| -.byte 102,69,15,58,15,237,12
|
| -.byte 102,15,58,15,228,4
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,12
|
| - movdqa %xmm8,80(%rbp)
|
| - movdqa .rol16(%rip),%xmm8
|
| - paddd %xmm7,%xmm3
|
| - paddd %xmm6,%xmm2
|
| - paddd %xmm5,%xmm1
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm3,%xmm15
|
| - pxor %xmm2,%xmm14
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - pxor %xmm1,%xmm13
|
| - pxor %xmm0,%xmm12
|
| -.byte 102,69,15,56,0,248
|
| -.byte 102,69,15,56,0,240
|
| -.byte 102,69,15,56,0,232
|
| -.byte 102,69,15,56,0,224
|
| - movdqa 80(%rbp),%xmm8
|
| - paddd %xmm15,%xmm11
|
| - paddd %xmm14,%xmm10
|
| - paddd %xmm13,%xmm9
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm11,%xmm7
|
| - pxor %xmm10,%xmm6
|
| - pxor %xmm9,%xmm5
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm8,80(%rbp)
|
| - movdqa %xmm7,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm7
|
| - pxor %xmm8,%xmm7
|
| - movdqa %xmm6,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm6
|
| - pxor %xmm8,%xmm6
|
| - movdqa %xmm5,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm5
|
| - pxor %xmm8,%xmm5
|
| - movdqa %xmm4,%xmm8
|
| - psrld $20,%xmm8
|
| - pslld $32-20,%xmm4
|
| - pxor %xmm8,%xmm4
|
| - movdqa .rol8(%rip),%xmm8
|
| - paddd %xmm7,%xmm3
|
| - paddd %xmm6,%xmm2
|
| - paddd %xmm5,%xmm1
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm3,%xmm15
|
| - pxor %xmm2,%xmm14
|
| - pxor %xmm1,%xmm13
|
| - pxor %xmm0,%xmm12
|
| -.byte 102,69,15,56,0,248
|
| -.byte 102,69,15,56,0,240
|
| -.byte 102,69,15,56,0,232
|
| -.byte 102,69,15,56,0,224
|
| - movdqa 80(%rbp),%xmm8
|
| - paddd %xmm15,%xmm11
|
| - paddd %xmm14,%xmm10
|
| - paddd %xmm13,%xmm9
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm11,%xmm7
|
| - pxor %xmm10,%xmm6
|
| - pxor %xmm9,%xmm5
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm8,80(%rbp)
|
| - movdqa %xmm7,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm7
|
| - pxor %xmm8,%xmm7
|
| - movdqa %xmm6,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm6
|
| - pxor %xmm8,%xmm6
|
| - movdqa %xmm5,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm5
|
| - pxor %xmm8,%xmm5
|
| - movdqa %xmm4,%xmm8
|
| - psrld $25,%xmm8
|
| - pslld $32-25,%xmm4
|
| - pxor %xmm8,%xmm4
|
| - movdqa 80(%rbp),%xmm8
|
| -.byte 102,15,58,15,255,12
|
| -.byte 102,69,15,58,15,219,8
|
| -.byte 102,69,15,58,15,255,4
|
| -.byte 102,15,58,15,246,12
|
| -.byte 102,69,15,58,15,210,8
|
| -.byte 102,69,15,58,15,246,4
|
| -.byte 102,15,58,15,237,12
|
| -.byte 102,69,15,58,15,201,8
|
| -.byte 102,69,15,58,15,237,4
|
| -.byte 102,15,58,15,228,12
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,4
|
| -
|
| - leaq 16(%rdi),%rdi
|
| - decq %r8
|
| - jge 2b
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 16(%rdi),%rdi
|
| - decq %rcx
|
| - jg 2b
|
| - paddd .chacha20_consts(%rip),%xmm3
|
| - paddd 48(%rbp),%xmm7
|
| - paddd 64(%rbp),%xmm11
|
| - paddd 144(%rbp),%xmm15
|
| - paddd .chacha20_consts(%rip),%xmm2
|
| - paddd 48(%rbp),%xmm6
|
| - paddd 64(%rbp),%xmm10
|
| - paddd 128(%rbp),%xmm14
|
| - paddd .chacha20_consts(%rip),%xmm1
|
| - paddd 48(%rbp),%xmm5
|
| - paddd 64(%rbp),%xmm9
|
| - paddd 112(%rbp),%xmm13
|
| - paddd .chacha20_consts(%rip),%xmm0
|
| - paddd 48(%rbp),%xmm4
|
| - paddd 64(%rbp),%xmm8
|
| - paddd 96(%rbp),%xmm12
|
| -
|
| - movdqa %xmm14,80(%rbp)
|
| - movdqa %xmm14,80(%rbp)
|
| - movdqu 0 + 0(%rsi),%xmm14
|
| - pxor %xmm3,%xmm14
|
| - movdqu %xmm14,0 + 0(%rdi)
|
| - movdqu 16 + 0(%rsi),%xmm14
|
| - pxor %xmm7,%xmm14
|
| - movdqu %xmm14,16 + 0(%rdi)
|
| - movdqu 32 + 0(%rsi),%xmm14
|
| - pxor %xmm11,%xmm14
|
| - movdqu %xmm14,32 + 0(%rdi)
|
| - movdqu 48 + 0(%rsi),%xmm14
|
| - pxor %xmm15,%xmm14
|
| - movdqu %xmm14,48 + 0(%rdi)
|
| -
|
| - movdqa 80(%rbp),%xmm14
|
| - movdqu 0 + 64(%rsi),%xmm3
|
| - movdqu 16 + 64(%rsi),%xmm7
|
| - movdqu 32 + 64(%rsi),%xmm11
|
| - movdqu 48 + 64(%rsi),%xmm15
|
| - pxor %xmm3,%xmm2
|
| - pxor %xmm7,%xmm6
|
| - pxor %xmm11,%xmm10
|
| - pxor %xmm14,%xmm15
|
| - movdqu %xmm2,0 + 64(%rdi)
|
| - movdqu %xmm6,16 + 64(%rdi)
|
| - movdqu %xmm10,32 + 64(%rdi)
|
| - movdqu %xmm15,48 + 64(%rdi)
|
| - movdqu 0 + 128(%rsi),%xmm3
|
| - movdqu 16 + 128(%rsi),%xmm7
|
| - movdqu 32 + 128(%rsi),%xmm11
|
| - movdqu 48 + 128(%rsi),%xmm15
|
| - pxor %xmm3,%xmm1
|
| - pxor %xmm7,%xmm5
|
| - pxor %xmm11,%xmm9
|
| - pxor %xmm13,%xmm15
|
| - movdqu %xmm1,0 + 128(%rdi)
|
| - movdqu %xmm5,16 + 128(%rdi)
|
| - movdqu %xmm9,32 + 128(%rdi)
|
| - movdqu %xmm15,48 + 128(%rdi)
|
| -
|
| - cmpq $256,%rbx
|
| - ja 3f
|
| -
|
| - movq $192,%rcx
|
| - subq $192,%rbx
|
| - leaq 192(%rsi),%rsi
|
| - jmp seal_sse_128_seal_hash
|
| -3:
|
| - movdqu 0 + 192(%rsi),%xmm3
|
| - movdqu 16 + 192(%rsi),%xmm7
|
| - movdqu 32 + 192(%rsi),%xmm11
|
| - movdqu 48 + 192(%rsi),%xmm15
|
| - pxor %xmm3,%xmm0
|
| - pxor %xmm7,%xmm4
|
| - pxor %xmm11,%xmm8
|
| - pxor %xmm12,%xmm15
|
| - movdqu %xmm0,0 + 192(%rdi)
|
| - movdqu %xmm4,16 + 192(%rdi)
|
| - movdqu %xmm8,32 + 192(%rdi)
|
| - movdqu %xmm15,48 + 192(%rdi)
|
| -
|
| - leaq 256(%rsi),%rsi
|
| - subq $256,%rbx
|
| - movq $6,%rcx
|
| - movq $4,%r8
|
| - cmpq $192,%rbx
|
| - jg 1b
|
| - movq %rbx,%rcx
|
| - testq %rbx,%rbx
|
| - je seal_sse_128_seal_hash
|
| - movq $6,%rcx
|
| - cmpq $64,%rbx
|
| - jg 3f
|
| -
|
| -seal_sse_tail_64:
|
| - movdqa .chacha20_consts(%rip),%xmm0
|
| - movdqa 48(%rbp),%xmm4
|
| - movdqa 64(%rbp),%xmm8
|
| - movdqa 96(%rbp),%xmm12
|
| - paddd .sse_inc(%rip),%xmm12
|
| - movdqa %xmm12,96(%rbp)
|
| -
|
| -1:
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 16(%rdi),%rdi
|
| -2:
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol16(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm4
|
| - pxor %xmm3,%xmm4
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol8(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm4
|
| - pxor %xmm3,%xmm4
|
| -.byte 102,15,58,15,228,4
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,12
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol16(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm4
|
| - pxor %xmm3,%xmm4
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol8(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm4
|
| - pxor %xmm3,%xmm4
|
| -.byte 102,15,58,15,228,12
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,4
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 16(%rdi),%rdi
|
| - decq %rcx
|
| - jg 1b
|
| - decq %r8
|
| - jge 2b
|
| - paddd .chacha20_consts(%rip),%xmm0
|
| - paddd 48(%rbp),%xmm4
|
| - paddd 64(%rbp),%xmm8
|
| - paddd 96(%rbp),%xmm12
|
| -
|
| - jmp seal_sse_128_seal
|
| -3:
|
| - cmpq $128,%rbx
|
| - jg 3f
|
| -
|
| -seal_sse_tail_128:
|
| - movdqa .chacha20_consts(%rip),%xmm0
|
| - movdqa 48(%rbp),%xmm4
|
| - movdqa 64(%rbp),%xmm8
|
| - movdqa %xmm0,%xmm1
|
| - movdqa %xmm4,%xmm5
|
| - movdqa %xmm8,%xmm9
|
| - movdqa 96(%rbp),%xmm13
|
| - paddd .sse_inc(%rip),%xmm13
|
| - movdqa %xmm13,%xmm12
|
| - paddd .sse_inc(%rip),%xmm12
|
| - movdqa %xmm12,96(%rbp)
|
| - movdqa %xmm13,112(%rbp)
|
| -
|
| -1:
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 16(%rdi),%rdi
|
| -2:
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol16(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm4
|
| - pxor %xmm3,%xmm4
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol8(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm4
|
| - pxor %xmm3,%xmm4
|
| -.byte 102,15,58,15,228,4
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,12
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol16(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm5
|
| - pxor %xmm3,%xmm5
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol8(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm5
|
| - pxor %xmm3,%xmm5
|
| -.byte 102,15,58,15,237,4
|
| -.byte 102,69,15,58,15,201,8
|
| -.byte 102,69,15,58,15,237,12
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol16(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm4
|
| - pxor %xmm3,%xmm4
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol8(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm4
|
| - pxor %xmm3,%xmm4
|
| -.byte 102,15,58,15,228,12
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,4
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol16(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm5
|
| - pxor %xmm3,%xmm5
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol8(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm5
|
| - pxor %xmm3,%xmm5
|
| -.byte 102,15,58,15,237,12
|
| -.byte 102,69,15,58,15,201,8
|
| -.byte 102,69,15,58,15,237,4
|
| -
|
| - leaq 16(%rdi),%rdi
|
| - decq %rcx
|
| - jg 1b
|
| - decq %r8
|
| - jge 2b
|
| - paddd .chacha20_consts(%rip),%xmm1
|
| - paddd 48(%rbp),%xmm5
|
| - paddd 64(%rbp),%xmm9
|
| - paddd 112(%rbp),%xmm13
|
| - paddd .chacha20_consts(%rip),%xmm0
|
| - paddd 48(%rbp),%xmm4
|
| - paddd 64(%rbp),%xmm8
|
| - paddd 96(%rbp),%xmm12
|
| - movdqu 0 + 0(%rsi),%xmm3
|
| - movdqu 16 + 0(%rsi),%xmm7
|
| - movdqu 32 + 0(%rsi),%xmm11
|
| - movdqu 48 + 0(%rsi),%xmm15
|
| - pxor %xmm3,%xmm1
|
| - pxor %xmm7,%xmm5
|
| - pxor %xmm11,%xmm9
|
| - pxor %xmm13,%xmm15
|
| - movdqu %xmm1,0 + 0(%rdi)
|
| - movdqu %xmm5,16 + 0(%rdi)
|
| - movdqu %xmm9,32 + 0(%rdi)
|
| - movdqu %xmm15,48 + 0(%rdi)
|
| -
|
| - movq $64,%rcx
|
| - subq $64,%rbx
|
| - leaq 64(%rsi),%rsi
|
| - jmp seal_sse_128_seal_hash
|
| -3:
|
| -
|
| -seal_sse_tail_192:
|
| - movdqa .chacha20_consts(%rip),%xmm0
|
| - movdqa 48(%rbp),%xmm4
|
| - movdqa 64(%rbp),%xmm8
|
| - movdqa %xmm0,%xmm1
|
| - movdqa %xmm4,%xmm5
|
| - movdqa %xmm8,%xmm9
|
| - movdqa %xmm0,%xmm2
|
| - movdqa %xmm4,%xmm6
|
| - movdqa %xmm8,%xmm10
|
| - movdqa 96(%rbp),%xmm14
|
| - paddd .sse_inc(%rip),%xmm14
|
| - movdqa %xmm14,%xmm13
|
| - paddd .sse_inc(%rip),%xmm13
|
| - movdqa %xmm13,%xmm12
|
| - paddd .sse_inc(%rip),%xmm12
|
| - movdqa %xmm12,96(%rbp)
|
| - movdqa %xmm13,112(%rbp)
|
| - movdqa %xmm14,128(%rbp)
|
| -
|
| -1:
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 16(%rdi),%rdi
|
| -2:
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol16(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm4
|
| - pxor %xmm3,%xmm4
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol8(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm4
|
| - pxor %xmm3,%xmm4
|
| -.byte 102,15,58,15,228,4
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,12
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol16(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm5
|
| - pxor %xmm3,%xmm5
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol8(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm5
|
| - pxor %xmm3,%xmm5
|
| -.byte 102,15,58,15,237,4
|
| -.byte 102,69,15,58,15,201,8
|
| -.byte 102,69,15,58,15,237,12
|
| - paddd %xmm6,%xmm2
|
| - pxor %xmm2,%xmm14
|
| - pshufb .rol16(%rip),%xmm14
|
| - paddd %xmm14,%xmm10
|
| - pxor %xmm10,%xmm6
|
| - movdqa %xmm6,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm6
|
| - pxor %xmm3,%xmm6
|
| - paddd %xmm6,%xmm2
|
| - pxor %xmm2,%xmm14
|
| - pshufb .rol8(%rip),%xmm14
|
| - paddd %xmm14,%xmm10
|
| - pxor %xmm10,%xmm6
|
| - movdqa %xmm6,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm6
|
| - pxor %xmm3,%xmm6
|
| -.byte 102,15,58,15,246,4
|
| -.byte 102,69,15,58,15,210,8
|
| -.byte 102,69,15,58,15,246,12
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol16(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm4
|
| - pxor %xmm3,%xmm4
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol8(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm4
|
| - pxor %xmm3,%xmm4
|
| -.byte 102,15,58,15,228,12
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,4
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol16(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm5
|
| - pxor %xmm3,%xmm5
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol8(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm5
|
| - pxor %xmm3,%xmm5
|
| -.byte 102,15,58,15,237,12
|
| -.byte 102,69,15,58,15,201,8
|
| -.byte 102,69,15,58,15,237,4
|
| - paddd %xmm6,%xmm2
|
| - pxor %xmm2,%xmm14
|
| - pshufb .rol16(%rip),%xmm14
|
| - paddd %xmm14,%xmm10
|
| - pxor %xmm10,%xmm6
|
| - movdqa %xmm6,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm6
|
| - pxor %xmm3,%xmm6
|
| - paddd %xmm6,%xmm2
|
| - pxor %xmm2,%xmm14
|
| - pshufb .rol8(%rip),%xmm14
|
| - paddd %xmm14,%xmm10
|
| - pxor %xmm10,%xmm6
|
| - movdqa %xmm6,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm6
|
| - pxor %xmm3,%xmm6
|
| -.byte 102,15,58,15,246,12
|
| -.byte 102,69,15,58,15,210,8
|
| -.byte 102,69,15,58,15,246,4
|
| -
|
| - leaq 16(%rdi),%rdi
|
| - decq %rcx
|
| - jg 1b
|
| - decq %r8
|
| - jge 2b
|
| - paddd .chacha20_consts(%rip),%xmm2
|
| - paddd 48(%rbp),%xmm6
|
| - paddd 64(%rbp),%xmm10
|
| - paddd 128(%rbp),%xmm14
|
| - paddd .chacha20_consts(%rip),%xmm1
|
| - paddd 48(%rbp),%xmm5
|
| - paddd 64(%rbp),%xmm9
|
| - paddd 112(%rbp),%xmm13
|
| - paddd .chacha20_consts(%rip),%xmm0
|
| - paddd 48(%rbp),%xmm4
|
| - paddd 64(%rbp),%xmm8
|
| - paddd 96(%rbp),%xmm12
|
| - movdqu 0 + 0(%rsi),%xmm3
|
| - movdqu 16 + 0(%rsi),%xmm7
|
| - movdqu 32 + 0(%rsi),%xmm11
|
| - movdqu 48 + 0(%rsi),%xmm15
|
| - pxor %xmm3,%xmm2
|
| - pxor %xmm7,%xmm6
|
| - pxor %xmm11,%xmm10
|
| - pxor %xmm14,%xmm15
|
| - movdqu %xmm2,0 + 0(%rdi)
|
| - movdqu %xmm6,16 + 0(%rdi)
|
| - movdqu %xmm10,32 + 0(%rdi)
|
| - movdqu %xmm15,48 + 0(%rdi)
|
| - movdqu 0 + 64(%rsi),%xmm3
|
| - movdqu 16 + 64(%rsi),%xmm7
|
| - movdqu 32 + 64(%rsi),%xmm11
|
| - movdqu 48 + 64(%rsi),%xmm15
|
| - pxor %xmm3,%xmm1
|
| - pxor %xmm7,%xmm5
|
| - pxor %xmm11,%xmm9
|
| - pxor %xmm13,%xmm15
|
| - movdqu %xmm1,0 + 64(%rdi)
|
| - movdqu %xmm5,16 + 64(%rdi)
|
| - movdqu %xmm9,32 + 64(%rdi)
|
| - movdqu %xmm15,48 + 64(%rdi)
|
| -
|
| - movq $128,%rcx
|
| - subq $128,%rbx
|
| - leaq 128(%rsi),%rsi
|
| -
|
| -seal_sse_128_seal_hash:
|
| - cmpq $16,%rcx
|
| - jb seal_sse_128_seal
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - subq $16,%rcx
|
| - leaq 16(%rdi),%rdi
|
| - jmp seal_sse_128_seal_hash
|
| -
|
| -seal_sse_128_seal:
|
| - cmpq $16,%rbx
|
| - jb seal_sse_tail_16
|
| - subq $16,%rbx
|
| -
|
| - movdqu 0(%rsi),%xmm3
|
| - pxor %xmm3,%xmm0
|
| - movdqu %xmm0,0(%rdi)
|
| -
|
| - addq 0(%rdi),%r10
|
| - adcq 8(%rdi),%r11
|
| - adcq $1,%r12
|
| - leaq 16(%rsi),%rsi
|
| - leaq 16(%rdi),%rdi
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| -
|
| - movdqa %xmm4,%xmm0
|
| - movdqa %xmm8,%xmm4
|
| - movdqa %xmm12,%xmm8
|
| - movdqa %xmm1,%xmm12
|
| - movdqa %xmm5,%xmm1
|
| - movdqa %xmm9,%xmm5
|
| - movdqa %xmm13,%xmm9
|
| - jmp seal_sse_128_seal
|
| -
|
| -seal_sse_tail_16:
|
| - testq %rbx,%rbx
|
| - jz seal_sse_finalize
|
| -
|
| - movq %rbx,%r8
|
| - shlq $4,%r8
|
| - leaq .and_masks(%rip),%r13
|
| - movq %rbx,%rcx
|
| - leaq -1(%rsi,%rbx), %rsi
|
| - pxor %xmm15,%xmm15
|
| -1:
|
| - pslldq $1,%xmm15
|
| - pinsrb $0,(%rsi),%xmm15
|
| - leaq -1(%rsi),%rsi
|
| - decq %rcx
|
| - jne 1b
|
| -
|
| -
|
| - pxor %xmm0,%xmm15
|
| -
|
| -
|
| - movq %rbx,%rcx
|
| - movdqu %xmm15,%xmm0
|
| -2:
|
| - pextrb $0,%xmm0,(%rdi)
|
| - psrldq $1,%xmm0
|
| - addq $1,%rdi
|
| - subq $1,%rcx
|
| - jnz 2b
|
| -
|
| - pand -16(%r13,%r8), %xmm15
|
| -.byte 102,77,15,126,253
|
| - pextrq $1,%xmm15,%r14
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| -seal_sse_finalize:
|
| - addq 32(%rbp),%r10
|
| - adcq 8+32(%rbp),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| -
|
| - movq %r10,%r13
|
| - movq %r11,%r14
|
| - movq %r12,%r15
|
| - subq $-5,%r10
|
| - sbbq $-1,%r11
|
| - sbbq $3,%r12
|
| - cmovcq %r13,%r10
|
| - cmovcq %r14,%r11
|
| - cmovcq %r15,%r12
|
| -
|
| - addq 0+16(%rbp),%r10
|
| - adcq 8+16(%rbp),%r11
|
| -
|
| - addq $288 + 32,%rsp
|
| -.cfi_adjust_cfa_offset -(288 + 32)
|
| - popq %r9
|
| -.cfi_adjust_cfa_offset -8
|
| - movq %r10,0(%r9)
|
| - movq %r11,8(%r9)
|
| -
|
| - popq %r15
|
| -.cfi_adjust_cfa_offset -8
|
| - popq %r14
|
| -.cfi_adjust_cfa_offset -8
|
| - popq %r13
|
| -.cfi_adjust_cfa_offset -8
|
| - popq %r12
|
| -.cfi_adjust_cfa_offset -8
|
| - popq %rbx
|
| -.cfi_adjust_cfa_offset -8
|
| - popq %rbp
|
| -.cfi_adjust_cfa_offset -8
|
| - .byte 0xf3,0xc3
|
| -.cfi_adjust_cfa_offset (8 * 6) + 288 + 32
|
| -
|
| -seal_sse_128:
|
| - movdqu .chacha20_consts(%rip),%xmm0
|
| - movdqa %xmm0,%xmm1
|
| - movdqa %xmm0,%xmm2
|
| - movdqu 0(%r9),%xmm4
|
| - movdqa %xmm4,%xmm5
|
| - movdqa %xmm4,%xmm6
|
| - movdqu 16(%r9),%xmm8
|
| - movdqa %xmm8,%xmm9
|
| - movdqa %xmm8,%xmm10
|
| - movdqu 32(%r9),%xmm14
|
| - movdqa %xmm14,%xmm12
|
| - paddd .sse_inc(%rip),%xmm12
|
| - movdqa %xmm12,%xmm13
|
| - paddd .sse_inc(%rip),%xmm13
|
| - movdqa %xmm4,%xmm7
|
| - movdqa %xmm8,%xmm11
|
| - movdqa %xmm12,%xmm15
|
| - movq $10,%r10
|
| -1:
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol16(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm4
|
| - pxor %xmm3,%xmm4
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol8(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm4
|
| - pxor %xmm3,%xmm4
|
| -.byte 102,15,58,15,228,4
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,12
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol16(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm5
|
| - pxor %xmm3,%xmm5
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol8(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm5
|
| - pxor %xmm3,%xmm5
|
| -.byte 102,15,58,15,237,4
|
| -.byte 102,69,15,58,15,201,8
|
| -.byte 102,69,15,58,15,237,12
|
| - paddd %xmm6,%xmm2
|
| - pxor %xmm2,%xmm14
|
| - pshufb .rol16(%rip),%xmm14
|
| - paddd %xmm14,%xmm10
|
| - pxor %xmm10,%xmm6
|
| - movdqa %xmm6,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm6
|
| - pxor %xmm3,%xmm6
|
| - paddd %xmm6,%xmm2
|
| - pxor %xmm2,%xmm14
|
| - pshufb .rol8(%rip),%xmm14
|
| - paddd %xmm14,%xmm10
|
| - pxor %xmm10,%xmm6
|
| - movdqa %xmm6,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm6
|
| - pxor %xmm3,%xmm6
|
| -.byte 102,15,58,15,246,4
|
| -.byte 102,69,15,58,15,210,8
|
| -.byte 102,69,15,58,15,246,12
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol16(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm4
|
| - pxor %xmm3,%xmm4
|
| - paddd %xmm4,%xmm0
|
| - pxor %xmm0,%xmm12
|
| - pshufb .rol8(%rip),%xmm12
|
| - paddd %xmm12,%xmm8
|
| - pxor %xmm8,%xmm4
|
| - movdqa %xmm4,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm4
|
| - pxor %xmm3,%xmm4
|
| -.byte 102,15,58,15,228,12
|
| -.byte 102,69,15,58,15,192,8
|
| -.byte 102,69,15,58,15,228,4
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol16(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm5
|
| - pxor %xmm3,%xmm5
|
| - paddd %xmm5,%xmm1
|
| - pxor %xmm1,%xmm13
|
| - pshufb .rol8(%rip),%xmm13
|
| - paddd %xmm13,%xmm9
|
| - pxor %xmm9,%xmm5
|
| - movdqa %xmm5,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm5
|
| - pxor %xmm3,%xmm5
|
| -.byte 102,15,58,15,237,12
|
| -.byte 102,69,15,58,15,201,8
|
| -.byte 102,69,15,58,15,237,4
|
| - paddd %xmm6,%xmm2
|
| - pxor %xmm2,%xmm14
|
| - pshufb .rol16(%rip),%xmm14
|
| - paddd %xmm14,%xmm10
|
| - pxor %xmm10,%xmm6
|
| - movdqa %xmm6,%xmm3
|
| - pslld $12,%xmm3
|
| - psrld $20,%xmm6
|
| - pxor %xmm3,%xmm6
|
| - paddd %xmm6,%xmm2
|
| - pxor %xmm2,%xmm14
|
| - pshufb .rol8(%rip),%xmm14
|
| - paddd %xmm14,%xmm10
|
| - pxor %xmm10,%xmm6
|
| - movdqa %xmm6,%xmm3
|
| - pslld $7,%xmm3
|
| - psrld $25,%xmm6
|
| - pxor %xmm3,%xmm6
|
| -.byte 102,15,58,15,246,12
|
| -.byte 102,69,15,58,15,210,8
|
| -.byte 102,69,15,58,15,246,4
|
| -
|
| - decq %r10
|
| - jnz 1b
|
| - paddd .chacha20_consts(%rip),%xmm0
|
| - paddd .chacha20_consts(%rip),%xmm1
|
| - paddd .chacha20_consts(%rip),%xmm2
|
| - paddd %xmm7,%xmm4
|
| - paddd %xmm7,%xmm5
|
| - paddd %xmm7,%xmm6
|
| - paddd %xmm11,%xmm8
|
| - paddd %xmm11,%xmm9
|
| - paddd %xmm15,%xmm12
|
| - paddd .sse_inc(%rip),%xmm15
|
| - paddd %xmm15,%xmm13
|
| -
|
| - pand .clamp(%rip),%xmm2
|
| - movdqa %xmm2,0(%rbp)
|
| - movdqa %xmm6,16(%rbp)
|
| -
|
| - movq %r8,%r8
|
| - call poly_hash_ad_internal
|
| - jmp seal_sse_128_seal
|
| -.size chacha20_poly1305_seal, .-chacha20_poly1305_seal
|
| -
|
| -
|
| -.type chacha20_poly1305_open_avx2,@function
|
| -.align 64
|
| -chacha20_poly1305_open_avx2:
|
| - vzeroupper
|
| - vmovdqa .chacha20_consts(%rip),%ymm0
|
| - vbroadcasti128 0(%r9),%ymm4
|
| - vbroadcasti128 16(%r9),%ymm8
|
| - vbroadcasti128 32(%r9),%ymm12
|
| - vpaddd .avx2_init(%rip),%ymm12,%ymm12
|
| - cmpq $192,%rbx
|
| - jbe open_avx2_192
|
| - cmpq $320,%rbx
|
| - jbe open_avx2_320
|
| -
|
| - vmovdqa %ymm4,64(%rbp)
|
| - vmovdqa %ymm8,96(%rbp)
|
| - vmovdqa %ymm12,160(%rbp)
|
| - movq $10,%r10
|
| -1:
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $12,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $4,%ymm4,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $4,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $12,%ymm4,%ymm4,%ymm4
|
| -
|
| - decq %r10
|
| - jne 1b
|
| - vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
|
| - vpaddd 64(%rbp),%ymm4,%ymm4
|
| - vpaddd 96(%rbp),%ymm8,%ymm8
|
| - vpaddd 160(%rbp),%ymm12,%ymm12
|
| -
|
| - vperm2i128 $0x02,%ymm0,%ymm4,%ymm3
|
| -
|
| - vpand .clamp(%rip),%ymm3,%ymm3
|
| - vmovdqa %ymm3,0(%rbp)
|
| -
|
| - vperm2i128 $0x13,%ymm0,%ymm4,%ymm0
|
| - vperm2i128 $0x13,%ymm8,%ymm12,%ymm4
|
| -
|
| - movq %r8,%r8
|
| - call poly_hash_ad_internal
|
| - xorq %rcx,%rcx
|
| -
|
| -1:
|
| - addq 0(%rsi,%rcx), %r10
|
| - adcq 8+0(%rsi,%rcx), %r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - addq $16,%rcx
|
| - cmpq $64,%rcx
|
| - jne 1b
|
| -
|
| - vpxor 0(%rsi),%ymm0,%ymm0
|
| - vpxor 32(%rsi),%ymm4,%ymm4
|
| - vmovdqu %ymm0,0(%rdi)
|
| - vmovdqu %ymm4,32(%rdi)
|
| - leaq 64(%rsi),%rsi
|
| - leaq 64(%rdi),%rdi
|
| - subq $64,%rbx
|
| -1:
|
| -
|
| - cmpq $512,%rbx
|
| - jb 3f
|
| - vmovdqa .chacha20_consts(%rip),%ymm0
|
| - vmovdqa 64(%rbp),%ymm4
|
| - vmovdqa 96(%rbp),%ymm8
|
| - vmovdqa %ymm0,%ymm1
|
| - vmovdqa %ymm4,%ymm5
|
| - vmovdqa %ymm8,%ymm9
|
| - vmovdqa %ymm0,%ymm2
|
| - vmovdqa %ymm4,%ymm6
|
| - vmovdqa %ymm8,%ymm10
|
| - vmovdqa %ymm0,%ymm3
|
| - vmovdqa %ymm4,%ymm7
|
| - vmovdqa %ymm8,%ymm11
|
| - vmovdqa .avx2_inc(%rip),%ymm12
|
| - vpaddd 160(%rbp),%ymm12,%ymm15
|
| - vpaddd %ymm15,%ymm12,%ymm14
|
| - vpaddd %ymm14,%ymm12,%ymm13
|
| - vpaddd %ymm13,%ymm12,%ymm12
|
| - vmovdqa %ymm15,256(%rbp)
|
| - vmovdqa %ymm14,224(%rbp)
|
| - vmovdqa %ymm13,192(%rbp)
|
| - vmovdqa %ymm12,160(%rbp)
|
| -
|
| - xorq %rcx,%rcx
|
| -2:
|
| - addq 0*8(%rsi,%rcx), %r10
|
| - adcq 8+0*8(%rsi,%rcx), %r11
|
| - adcq $1,%r12
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vmovdqa .rol16(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - movq 0+0(%rbp),%rdx
|
| - movq %rdx,%r15
|
| - mulxq %r10,%r13,%r14
|
| - mulxq %r11,%rax,%rdx
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - movq 8+0(%rbp),%rdx
|
| - mulxq %r10,%r10,%rax
|
| - addq %r10,%r14
|
| - mulxq %r11,%r11,%r9
|
| - adcq %r11,%r15
|
| - adcq $0,%r9
|
| - imulq %r12,%rdx
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $20,%ymm7,%ymm8
|
| - vpslld $32-20,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $20,%ymm6,%ymm8
|
| - vpslld $32-20,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $20,%ymm5,%ymm8
|
| - addq %rax,%r15
|
| - adcq %rdx,%r9
|
| - vpslld $32-20,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $20,%ymm4,%ymm8
|
| - vpslld $32-20,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa .rol8(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - addq 2*8(%rsi,%rcx), %r10
|
| - adcq 8+2*8(%rsi,%rcx), %r11
|
| - adcq $1,%r12
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - movq 0+0(%rbp),%rdx
|
| - movq %rdx,%r15
|
| - mulxq %r10,%r13,%r14
|
| - mulxq %r11,%rax,%rdx
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $25,%ymm7,%ymm8
|
| - vpslld $32-25,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $25,%ymm6,%ymm8
|
| - vpslld $32-25,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $25,%ymm5,%ymm8
|
| - vpslld $32-25,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $25,%ymm4,%ymm8
|
| - vpslld $32-25,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpalignr $4,%ymm7,%ymm7,%ymm7
|
| - vpalignr $8,%ymm11,%ymm11,%ymm11
|
| - vpalignr $12,%ymm15,%ymm15,%ymm15
|
| - vpalignr $4,%ymm6,%ymm6,%ymm6
|
| - movq 8+0(%rbp),%rdx
|
| - mulxq %r10,%r10,%rax
|
| - addq %r10,%r14
|
| - mulxq %r11,%r11,%r9
|
| - adcq %r11,%r15
|
| - adcq $0,%r9
|
| - imulq %r12,%rdx
|
| - vpalignr $8,%ymm10,%ymm10,%ymm10
|
| - vpalignr $12,%ymm14,%ymm14,%ymm14
|
| - vpalignr $4,%ymm5,%ymm5,%ymm5
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $12,%ymm13,%ymm13,%ymm13
|
| - vpalignr $4,%ymm4,%ymm4,%ymm4
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $12,%ymm12,%ymm12,%ymm12
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vmovdqa .rol16(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - addq %rax,%r15
|
| - adcq %rdx,%r9
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $20,%ymm7,%ymm8
|
| - vpslld $32-20,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - addq 4*8(%rsi,%rcx), %r10
|
| - adcq 8+4*8(%rsi,%rcx), %r11
|
| - adcq $1,%r12
|
| -
|
| - leaq 48(%rcx),%rcx
|
| - vpsrld $20,%ymm6,%ymm8
|
| - vpslld $32-20,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $20,%ymm5,%ymm8
|
| - vpslld $32-20,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $20,%ymm4,%ymm8
|
| - vpslld $32-20,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa .rol8(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - movq 0+0(%rbp),%rdx
|
| - movq %rdx,%r15
|
| - mulxq %r10,%r13,%r14
|
| - mulxq %r11,%rax,%rdx
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - movq 8+0(%rbp),%rdx
|
| - mulxq %r10,%r10,%rax
|
| - addq %r10,%r14
|
| - mulxq %r11,%r11,%r9
|
| - adcq %r11,%r15
|
| - adcq $0,%r9
|
| - imulq %r12,%rdx
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $25,%ymm7,%ymm8
|
| - vpslld $32-25,%ymm7,%ymm7
|
| - addq %rax,%r15
|
| - adcq %rdx,%r9
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $25,%ymm6,%ymm8
|
| - vpslld $32-25,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $25,%ymm5,%ymm8
|
| - vpslld $32-25,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $25,%ymm4,%ymm8
|
| - vpslld $32-25,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpalignr $12,%ymm7,%ymm7,%ymm7
|
| - vpalignr $8,%ymm11,%ymm11,%ymm11
|
| - vpalignr $4,%ymm15,%ymm15,%ymm15
|
| - vpalignr $12,%ymm6,%ymm6,%ymm6
|
| - vpalignr $8,%ymm10,%ymm10,%ymm10
|
| - vpalignr $4,%ymm14,%ymm14,%ymm14
|
| - vpalignr $12,%ymm5,%ymm5,%ymm5
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $4,%ymm13,%ymm13,%ymm13
|
| - vpalignr $12,%ymm4,%ymm4,%ymm4
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $4,%ymm12,%ymm12,%ymm12
|
| -
|
| - cmpq $60*8,%rcx
|
| - jne 2b
|
| - vpaddd .chacha20_consts(%rip),%ymm3,%ymm3
|
| - vpaddd 64(%rbp),%ymm7,%ymm7
|
| - vpaddd 96(%rbp),%ymm11,%ymm11
|
| - vpaddd 256(%rbp),%ymm15,%ymm15
|
| - vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
|
| - vpaddd 64(%rbp),%ymm6,%ymm6
|
| - vpaddd 96(%rbp),%ymm10,%ymm10
|
| - vpaddd 224(%rbp),%ymm14,%ymm14
|
| - vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
|
| - vpaddd 64(%rbp),%ymm5,%ymm5
|
| - vpaddd 96(%rbp),%ymm9,%ymm9
|
| - vpaddd 192(%rbp),%ymm13,%ymm13
|
| - vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
|
| - vpaddd 64(%rbp),%ymm4,%ymm4
|
| - vpaddd 96(%rbp),%ymm8,%ymm8
|
| - vpaddd 160(%rbp),%ymm12,%ymm12
|
| -
|
| - vmovdqa %ymm0,128(%rbp)
|
| - addq 60*8(%rsi),%r10
|
| - adcq 8+60*8(%rsi),%r11
|
| - adcq $1,%r12
|
| - vperm2i128 $0x02,%ymm3,%ymm7,%ymm0
|
| - vperm2i128 $0x13,%ymm3,%ymm7,%ymm7
|
| - vperm2i128 $0x02,%ymm11,%ymm15,%ymm3
|
| - vperm2i128 $0x13,%ymm11,%ymm15,%ymm11
|
| - vpxor 0+0(%rsi),%ymm0,%ymm0
|
| - vpxor 32+0(%rsi),%ymm3,%ymm3
|
| - vpxor 64+0(%rsi),%ymm7,%ymm7
|
| - vpxor 96+0(%rsi),%ymm11,%ymm11
|
| - vmovdqu %ymm0,0+0(%rdi)
|
| - vmovdqu %ymm3,32+0(%rdi)
|
| - vmovdqu %ymm7,64+0(%rdi)
|
| - vmovdqu %ymm11,96+0(%rdi)
|
| -
|
| - vmovdqa 128(%rbp),%ymm0
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - vperm2i128 $0x02,%ymm2,%ymm6,%ymm3
|
| - vperm2i128 $0x13,%ymm2,%ymm6,%ymm6
|
| - vperm2i128 $0x02,%ymm10,%ymm14,%ymm2
|
| - vperm2i128 $0x13,%ymm10,%ymm14,%ymm10
|
| - vpxor 0+128(%rsi),%ymm3,%ymm3
|
| - vpxor 32+128(%rsi),%ymm2,%ymm2
|
| - vpxor 64+128(%rsi),%ymm6,%ymm6
|
| - vpxor 96+128(%rsi),%ymm10,%ymm10
|
| - vmovdqu %ymm3,0+128(%rdi)
|
| - vmovdqu %ymm2,32+128(%rdi)
|
| - vmovdqu %ymm6,64+128(%rdi)
|
| - vmovdqu %ymm10,96+128(%rdi)
|
| - addq 60*8+16(%rsi),%r10
|
| - adcq 8+60*8+16(%rsi),%r11
|
| - adcq $1,%r12
|
| - vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
|
| - vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
|
| - vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
|
| - vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
|
| - vpxor 0+256(%rsi),%ymm3,%ymm3
|
| - vpxor 32+256(%rsi),%ymm1,%ymm1
|
| - vpxor 64+256(%rsi),%ymm5,%ymm5
|
| - vpxor 96+256(%rsi),%ymm9,%ymm9
|
| - vmovdqu %ymm3,0+256(%rdi)
|
| - vmovdqu %ymm1,32+256(%rdi)
|
| - vmovdqu %ymm5,64+256(%rdi)
|
| - vmovdqu %ymm9,96+256(%rdi)
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - vperm2i128 $0x02,%ymm0,%ymm4,%ymm3
|
| - vperm2i128 $0x13,%ymm0,%ymm4,%ymm4
|
| - vperm2i128 $0x02,%ymm8,%ymm12,%ymm0
|
| - vperm2i128 $0x13,%ymm8,%ymm12,%ymm8
|
| - vpxor 0+384(%rsi),%ymm3,%ymm3
|
| - vpxor 32+384(%rsi),%ymm0,%ymm0
|
| - vpxor 64+384(%rsi),%ymm4,%ymm4
|
| - vpxor 96+384(%rsi),%ymm8,%ymm8
|
| - vmovdqu %ymm3,0+384(%rdi)
|
| - vmovdqu %ymm0,32+384(%rdi)
|
| - vmovdqu %ymm4,64+384(%rdi)
|
| - vmovdqu %ymm8,96+384(%rdi)
|
| -
|
| - leaq 512(%rsi),%rsi
|
| - leaq 512(%rdi),%rdi
|
| - subq $512,%rbx
|
| - jmp 1b
|
| -3:
|
| - testq %rbx,%rbx
|
| - vzeroupper
|
| - je open_sse_finalize
|
| -3:
|
| - cmpq $128,%rbx
|
| - ja 3f
|
| - vmovdqa .chacha20_consts(%rip),%ymm0
|
| - vmovdqa 64(%rbp),%ymm4
|
| - vmovdqa 96(%rbp),%ymm8
|
| - vmovdqa .avx2_inc(%rip),%ymm12
|
| - vpaddd 160(%rbp),%ymm12,%ymm12
|
| - vmovdqa %ymm12,160(%rbp)
|
| -
|
| - xorq %r8,%r8
|
| - movq %rbx,%rcx
|
| - andq $-16,%rcx
|
| - testq %rcx,%rcx
|
| - je 2f
|
| -1:
|
| - addq 0*8(%rsi,%r8), %r10
|
| - adcq 8+0*8(%rsi,%r8), %r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| -2:
|
| - addq $16,%r8
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $12,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $4,%ymm4,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $4,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $12,%ymm4,%ymm4,%ymm4
|
| -
|
| - cmpq %rcx,%r8
|
| - jb 1b
|
| - cmpq $160,%r8
|
| - jne 2b
|
| - vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
|
| - vpaddd 64(%rbp),%ymm4,%ymm4
|
| - vpaddd 96(%rbp),%ymm8,%ymm8
|
| - vpaddd 160(%rbp),%ymm12,%ymm12
|
| - vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
|
| - vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
|
| - vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
|
| - vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
|
| - vmovdqa %ymm3,%ymm8
|
| -
|
| - jmp open_avx2_tail_loop
|
| -3:
|
| - cmpq $256,%rbx
|
| - ja 3f
|
| - vmovdqa .chacha20_consts(%rip),%ymm0
|
| - vmovdqa 64(%rbp),%ymm4
|
| - vmovdqa 96(%rbp),%ymm8
|
| - vmovdqa %ymm0,%ymm1
|
| - vmovdqa %ymm4,%ymm5
|
| - vmovdqa %ymm8,%ymm9
|
| - vmovdqa .avx2_inc(%rip),%ymm12
|
| - vpaddd 160(%rbp),%ymm12,%ymm13
|
| - vpaddd %ymm13,%ymm12,%ymm12
|
| - vmovdqa %ymm12,160(%rbp)
|
| - vmovdqa %ymm13,192(%rbp)
|
| -
|
| - movq %rbx,128(%rbp)
|
| - movq %rbx,%rcx
|
| - subq $128,%rcx
|
| - shrq $4,%rcx
|
| - movq $10,%r8
|
| - cmpq $10,%rcx
|
| - cmovgq %r8,%rcx
|
| - movq %rsi,%rbx
|
| - xorq %r8,%r8
|
| -1:
|
| - addq 0(%rbx),%r10
|
| - adcq 8+0(%rbx),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rdx
|
| - movq %rdx,%r15
|
| - mulxq %r10,%r13,%r14
|
| - mulxq %r11,%rax,%rdx
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rdx
|
| - mulxq %r10,%r10,%rax
|
| - addq %r10,%r14
|
| - mulxq %r11,%r11,%r9
|
| - adcq %r11,%r15
|
| - adcq $0,%r9
|
| - imulq %r12,%rdx
|
| - addq %rax,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 16(%rbx),%rbx
|
| -2:
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $12,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $4,%ymm4,%ymm4,%ymm4
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol16(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpsrld $20,%ymm5,%ymm3
|
| - vpslld $12,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol8(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpslld $7,%ymm5,%ymm3
|
| - vpsrld $25,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpalignr $12,%ymm13,%ymm13,%ymm13
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $4,%ymm5,%ymm5,%ymm5
|
| -
|
| - incq %r8
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $4,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $12,%ymm4,%ymm4,%ymm4
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol16(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpsrld $20,%ymm5,%ymm3
|
| - vpslld $12,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol8(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpslld $7,%ymm5,%ymm3
|
| - vpsrld $25,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpalignr $4,%ymm13,%ymm13,%ymm13
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $12,%ymm5,%ymm5,%ymm5
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpshufb .rol16(%rip),%ymm14,%ymm14
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpsrld $20,%ymm6,%ymm3
|
| - vpslld $12,%ymm6,%ymm6
|
| - vpxor %ymm3,%ymm6,%ymm6
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpshufb .rol8(%rip),%ymm14,%ymm14
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpslld $7,%ymm6,%ymm3
|
| - vpsrld $25,%ymm6,%ymm6
|
| - vpxor %ymm3,%ymm6,%ymm6
|
| - vpalignr $4,%ymm14,%ymm14,%ymm14
|
| - vpalignr $8,%ymm10,%ymm10,%ymm10
|
| - vpalignr $12,%ymm6,%ymm6,%ymm6
|
| -
|
| - cmpq %rcx,%r8
|
| - jb 1b
|
| - cmpq $10,%r8
|
| - jne 2b
|
| - movq %rbx,%r8
|
| - subq %rsi,%rbx
|
| - movq %rbx,%rcx
|
| - movq 128(%rbp),%rbx
|
| -1:
|
| - addq $16,%rcx
|
| - cmpq %rbx,%rcx
|
| - jg 1f
|
| - addq 0(%r8),%r10
|
| - adcq 8+0(%r8),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rdx
|
| - movq %rdx,%r15
|
| - mulxq %r10,%r13,%r14
|
| - mulxq %r11,%rax,%rdx
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rdx
|
| - mulxq %r10,%r10,%rax
|
| - addq %r10,%r14
|
| - mulxq %r11,%r11,%r9
|
| - adcq %r11,%r15
|
| - adcq $0,%r9
|
| - imulq %r12,%rdx
|
| - addq %rax,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 16(%r8),%r8
|
| - jmp 1b
|
| -1:
|
| - vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
|
| - vpaddd 64(%rbp),%ymm5,%ymm5
|
| - vpaddd 96(%rbp),%ymm9,%ymm9
|
| - vpaddd 192(%rbp),%ymm13,%ymm13
|
| - vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
|
| - vpaddd 64(%rbp),%ymm4,%ymm4
|
| - vpaddd 96(%rbp),%ymm8,%ymm8
|
| - vpaddd 160(%rbp),%ymm12,%ymm12
|
| - vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
|
| - vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
|
| - vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
|
| - vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
|
| - vpxor 0+0(%rsi),%ymm3,%ymm3
|
| - vpxor 32+0(%rsi),%ymm1,%ymm1
|
| - vpxor 64+0(%rsi),%ymm5,%ymm5
|
| - vpxor 96+0(%rsi),%ymm9,%ymm9
|
| - vmovdqu %ymm3,0+0(%rdi)
|
| - vmovdqu %ymm1,32+0(%rdi)
|
| - vmovdqu %ymm5,64+0(%rdi)
|
| - vmovdqu %ymm9,96+0(%rdi)
|
| - vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
|
| - vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
|
| - vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
|
| - vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
|
| - vmovdqa %ymm3,%ymm8
|
| -
|
| - leaq 128(%rsi),%rsi
|
| - leaq 128(%rdi),%rdi
|
| - subq $128,%rbx
|
| - jmp open_avx2_tail_loop
|
| -3:
|
| - cmpq $384,%rbx
|
| - ja 3f
|
| - vmovdqa .chacha20_consts(%rip),%ymm0
|
| - vmovdqa 64(%rbp),%ymm4
|
| - vmovdqa 96(%rbp),%ymm8
|
| - vmovdqa %ymm0,%ymm1
|
| - vmovdqa %ymm4,%ymm5
|
| - vmovdqa %ymm8,%ymm9
|
| - vmovdqa %ymm0,%ymm2
|
| - vmovdqa %ymm4,%ymm6
|
| - vmovdqa %ymm8,%ymm10
|
| - vmovdqa .avx2_inc(%rip),%ymm12
|
| - vpaddd 160(%rbp),%ymm12,%ymm14
|
| - vpaddd %ymm14,%ymm12,%ymm13
|
| - vpaddd %ymm13,%ymm12,%ymm12
|
| - vmovdqa %ymm12,160(%rbp)
|
| - vmovdqa %ymm13,192(%rbp)
|
| - vmovdqa %ymm14,224(%rbp)
|
| -
|
| - movq %rbx,128(%rbp)
|
| - movq %rbx,%rcx
|
| - subq $256,%rcx
|
| - shrq $4,%rcx
|
| - addq $6,%rcx
|
| - movq $10,%r8
|
| - cmpq $10,%rcx
|
| - cmovgq %r8,%rcx
|
| - movq %rsi,%rbx
|
| - xorq %r8,%r8
|
| -1:
|
| - addq 0(%rbx),%r10
|
| - adcq 8+0(%rbx),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rdx
|
| - movq %rdx,%r15
|
| - mulxq %r10,%r13,%r14
|
| - mulxq %r11,%rax,%rdx
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rdx
|
| - mulxq %r10,%r10,%rax
|
| - addq %r10,%r14
|
| - mulxq %r11,%r11,%r9
|
| - adcq %r11,%r15
|
| - adcq $0,%r9
|
| - imulq %r12,%rdx
|
| - addq %rax,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 16(%rbx),%rbx
|
| -2:
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpshufb .rol16(%rip),%ymm14,%ymm14
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpsrld $20,%ymm6,%ymm3
|
| - vpslld $12,%ymm6,%ymm6
|
| - vpxor %ymm3,%ymm6,%ymm6
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpshufb .rol8(%rip),%ymm14,%ymm14
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpslld $7,%ymm6,%ymm3
|
| - vpsrld $25,%ymm6,%ymm6
|
| - vpxor %ymm3,%ymm6,%ymm6
|
| - vpalignr $12,%ymm14,%ymm14,%ymm14
|
| - vpalignr $8,%ymm10,%ymm10,%ymm10
|
| - vpalignr $4,%ymm6,%ymm6,%ymm6
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol16(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpsrld $20,%ymm5,%ymm3
|
| - vpslld $12,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol8(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpslld $7,%ymm5,%ymm3
|
| - vpsrld $25,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpalignr $12,%ymm13,%ymm13,%ymm13
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $4,%ymm5,%ymm5,%ymm5
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $12,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $4,%ymm4,%ymm4,%ymm4
|
| - addq 0(%rbx),%r10
|
| - adcq 8+0(%rbx),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 16(%rbx),%rbx
|
| - incq %r8
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpshufb .rol16(%rip),%ymm14,%ymm14
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpsrld $20,%ymm6,%ymm3
|
| - vpslld $12,%ymm6,%ymm6
|
| - vpxor %ymm3,%ymm6,%ymm6
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpshufb .rol8(%rip),%ymm14,%ymm14
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpslld $7,%ymm6,%ymm3
|
| - vpsrld $25,%ymm6,%ymm6
|
| - vpxor %ymm3,%ymm6,%ymm6
|
| - vpalignr $4,%ymm14,%ymm14,%ymm14
|
| - vpalignr $8,%ymm10,%ymm10,%ymm10
|
| - vpalignr $12,%ymm6,%ymm6,%ymm6
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol16(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpsrld $20,%ymm5,%ymm3
|
| - vpslld $12,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol8(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpslld $7,%ymm5,%ymm3
|
| - vpsrld $25,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpalignr $4,%ymm13,%ymm13,%ymm13
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $12,%ymm5,%ymm5,%ymm5
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $4,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $12,%ymm4,%ymm4,%ymm4
|
| -
|
| - cmpq %rcx,%r8
|
| - jb 1b
|
| - cmpq $10,%r8
|
| - jne 2b
|
| - movq %rbx,%r8
|
| - subq %rsi,%rbx
|
| - movq %rbx,%rcx
|
| - movq 128(%rbp),%rbx
|
| -1:
|
| - addq $16,%rcx
|
| - cmpq %rbx,%rcx
|
| - jg 1f
|
| - addq 0(%r8),%r10
|
| - adcq 8+0(%r8),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rdx
|
| - movq %rdx,%r15
|
| - mulxq %r10,%r13,%r14
|
| - mulxq %r11,%rax,%rdx
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rdx
|
| - mulxq %r10,%r10,%rax
|
| - addq %r10,%r14
|
| - mulxq %r11,%r11,%r9
|
| - adcq %r11,%r15
|
| - adcq $0,%r9
|
| - imulq %r12,%rdx
|
| - addq %rax,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 16(%r8),%r8
|
| - jmp 1b
|
| -1:
|
| - vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
|
| - vpaddd 64(%rbp),%ymm6,%ymm6
|
| - vpaddd 96(%rbp),%ymm10,%ymm10
|
| - vpaddd 224(%rbp),%ymm14,%ymm14
|
| - vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
|
| - vpaddd 64(%rbp),%ymm5,%ymm5
|
| - vpaddd 96(%rbp),%ymm9,%ymm9
|
| - vpaddd 192(%rbp),%ymm13,%ymm13
|
| - vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
|
| - vpaddd 64(%rbp),%ymm4,%ymm4
|
| - vpaddd 96(%rbp),%ymm8,%ymm8
|
| - vpaddd 160(%rbp),%ymm12,%ymm12
|
| - vperm2i128 $0x02,%ymm2,%ymm6,%ymm3
|
| - vperm2i128 $0x13,%ymm2,%ymm6,%ymm6
|
| - vperm2i128 $0x02,%ymm10,%ymm14,%ymm2
|
| - vperm2i128 $0x13,%ymm10,%ymm14,%ymm10
|
| - vpxor 0+0(%rsi),%ymm3,%ymm3
|
| - vpxor 32+0(%rsi),%ymm2,%ymm2
|
| - vpxor 64+0(%rsi),%ymm6,%ymm6
|
| - vpxor 96+0(%rsi),%ymm10,%ymm10
|
| - vmovdqu %ymm3,0+0(%rdi)
|
| - vmovdqu %ymm2,32+0(%rdi)
|
| - vmovdqu %ymm6,64+0(%rdi)
|
| - vmovdqu %ymm10,96+0(%rdi)
|
| - vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
|
| - vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
|
| - vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
|
| - vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
|
| - vpxor 0+128(%rsi),%ymm3,%ymm3
|
| - vpxor 32+128(%rsi),%ymm1,%ymm1
|
| - vpxor 64+128(%rsi),%ymm5,%ymm5
|
| - vpxor 96+128(%rsi),%ymm9,%ymm9
|
| - vmovdqu %ymm3,0+128(%rdi)
|
| - vmovdqu %ymm1,32+128(%rdi)
|
| - vmovdqu %ymm5,64+128(%rdi)
|
| - vmovdqu %ymm9,96+128(%rdi)
|
| - vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
|
| - vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
|
| - vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
|
| - vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
|
| - vmovdqa %ymm3,%ymm8
|
| -
|
| - leaq 256(%rsi),%rsi
|
| - leaq 256(%rdi),%rdi
|
| - subq $256,%rbx
|
| - jmp open_avx2_tail_loop
|
| -3:
|
| - vmovdqa .chacha20_consts(%rip),%ymm0
|
| - vmovdqa 64(%rbp),%ymm4
|
| - vmovdqa 96(%rbp),%ymm8
|
| - vmovdqa %ymm0,%ymm1
|
| - vmovdqa %ymm4,%ymm5
|
| - vmovdqa %ymm8,%ymm9
|
| - vmovdqa %ymm0,%ymm2
|
| - vmovdqa %ymm4,%ymm6
|
| - vmovdqa %ymm8,%ymm10
|
| - vmovdqa %ymm0,%ymm3
|
| - vmovdqa %ymm4,%ymm7
|
| - vmovdqa %ymm8,%ymm11
|
| - vmovdqa .avx2_inc(%rip),%ymm12
|
| - vpaddd 160(%rbp),%ymm12,%ymm15
|
| - vpaddd %ymm15,%ymm12,%ymm14
|
| - vpaddd %ymm14,%ymm12,%ymm13
|
| - vpaddd %ymm13,%ymm12,%ymm12
|
| - vmovdqa %ymm15,256(%rbp)
|
| - vmovdqa %ymm14,224(%rbp)
|
| - vmovdqa %ymm13,192(%rbp)
|
| - vmovdqa %ymm12,160(%rbp)
|
| -
|
| - xorq %rcx,%rcx
|
| - movq %rsi,%r8
|
| -1:
|
| - addq 0(%r8),%r10
|
| - adcq 8+0(%r8),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 16(%r8),%r8
|
| -2:
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vmovdqa .rol16(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $20,%ymm7,%ymm8
|
| - vpslld $32-20,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $20,%ymm6,%ymm8
|
| - vpslld $32-20,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $20,%ymm5,%ymm8
|
| - vpslld $32-20,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $20,%ymm4,%ymm8
|
| - vpslld $32-20,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa .rol8(%rip),%ymm8
|
| - addq 0(%r8),%r10
|
| - adcq 8+0(%r8),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rdx
|
| - movq %rdx,%r15
|
| - mulxq %r10,%r13,%r14
|
| - mulxq %r11,%rax,%rdx
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rdx
|
| - mulxq %r10,%r10,%rax
|
| - addq %r10,%r14
|
| - mulxq %r11,%r11,%r9
|
| - adcq %r11,%r15
|
| - adcq $0,%r9
|
| - imulq %r12,%rdx
|
| - addq %rax,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $25,%ymm7,%ymm8
|
| - vpslld $32-25,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $25,%ymm6,%ymm8
|
| - vpslld $32-25,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $25,%ymm5,%ymm8
|
| - vpslld $32-25,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $25,%ymm4,%ymm8
|
| - vpslld $32-25,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpalignr $4,%ymm7,%ymm7,%ymm7
|
| - vpalignr $8,%ymm11,%ymm11,%ymm11
|
| - vpalignr $12,%ymm15,%ymm15,%ymm15
|
| - vpalignr $4,%ymm6,%ymm6,%ymm6
|
| - vpalignr $8,%ymm10,%ymm10,%ymm10
|
| - vpalignr $12,%ymm14,%ymm14,%ymm14
|
| - vpalignr $4,%ymm5,%ymm5,%ymm5
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $12,%ymm13,%ymm13,%ymm13
|
| - vpalignr $4,%ymm4,%ymm4,%ymm4
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $12,%ymm12,%ymm12,%ymm12
|
| - vmovdqa %ymm8,128(%rbp)
|
| - addq 16(%r8),%r10
|
| - adcq 8+16(%r8),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rdx
|
| - movq %rdx,%r15
|
| - mulxq %r10,%r13,%r14
|
| - mulxq %r11,%rax,%rdx
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rdx
|
| - mulxq %r10,%r10,%rax
|
| - addq %r10,%r14
|
| - mulxq %r11,%r11,%r9
|
| - adcq %r11,%r15
|
| - adcq $0,%r9
|
| - imulq %r12,%rdx
|
| - addq %rax,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 32(%r8),%r8
|
| - vmovdqa .rol16(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $20,%ymm7,%ymm8
|
| - vpslld $32-20,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $20,%ymm6,%ymm8
|
| - vpslld $32-20,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $20,%ymm5,%ymm8
|
| - vpslld $32-20,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $20,%ymm4,%ymm8
|
| - vpslld $32-20,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa .rol8(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $25,%ymm7,%ymm8
|
| - vpslld $32-25,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $25,%ymm6,%ymm8
|
| - vpslld $32-25,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $25,%ymm5,%ymm8
|
| - vpslld $32-25,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $25,%ymm4,%ymm8
|
| - vpslld $32-25,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpalignr $12,%ymm7,%ymm7,%ymm7
|
| - vpalignr $8,%ymm11,%ymm11,%ymm11
|
| - vpalignr $4,%ymm15,%ymm15,%ymm15
|
| - vpalignr $12,%ymm6,%ymm6,%ymm6
|
| - vpalignr $8,%ymm10,%ymm10,%ymm10
|
| - vpalignr $4,%ymm14,%ymm14,%ymm14
|
| - vpalignr $12,%ymm5,%ymm5,%ymm5
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $4,%ymm13,%ymm13,%ymm13
|
| - vpalignr $12,%ymm4,%ymm4,%ymm4
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $4,%ymm12,%ymm12,%ymm12
|
| -
|
| - incq %rcx
|
| - cmpq $4,%rcx
|
| - jl 1b
|
| - cmpq $10,%rcx
|
| - jne 2b
|
| - movq %rbx,%rcx
|
| - subq $384,%rcx
|
| - andq $-16,%rcx
|
| -1:
|
| - testq %rcx,%rcx
|
| - je 1f
|
| - addq 0(%r8),%r10
|
| - adcq 8+0(%r8),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rdx
|
| - movq %rdx,%r15
|
| - mulxq %r10,%r13,%r14
|
| - mulxq %r11,%rax,%rdx
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rdx
|
| - mulxq %r10,%r10,%rax
|
| - addq %r10,%r14
|
| - mulxq %r11,%r11,%r9
|
| - adcq %r11,%r15
|
| - adcq $0,%r9
|
| - imulq %r12,%rdx
|
| - addq %rax,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 16(%r8),%r8
|
| - subq $16,%rcx
|
| - jmp 1b
|
| -1:
|
| - vpaddd .chacha20_consts(%rip),%ymm3,%ymm3
|
| - vpaddd 64(%rbp),%ymm7,%ymm7
|
| - vpaddd 96(%rbp),%ymm11,%ymm11
|
| - vpaddd 256(%rbp),%ymm15,%ymm15
|
| - vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
|
| - vpaddd 64(%rbp),%ymm6,%ymm6
|
| - vpaddd 96(%rbp),%ymm10,%ymm10
|
| - vpaddd 224(%rbp),%ymm14,%ymm14
|
| - vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
|
| - vpaddd 64(%rbp),%ymm5,%ymm5
|
| - vpaddd 96(%rbp),%ymm9,%ymm9
|
| - vpaddd 192(%rbp),%ymm13,%ymm13
|
| - vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
|
| - vpaddd 64(%rbp),%ymm4,%ymm4
|
| - vpaddd 96(%rbp),%ymm8,%ymm8
|
| - vpaddd 160(%rbp),%ymm12,%ymm12
|
| -
|
| - vmovdqa %ymm0,128(%rbp)
|
| - vperm2i128 $0x02,%ymm3,%ymm7,%ymm0
|
| - vperm2i128 $0x13,%ymm3,%ymm7,%ymm7
|
| - vperm2i128 $0x02,%ymm11,%ymm15,%ymm3
|
| - vperm2i128 $0x13,%ymm11,%ymm15,%ymm11
|
| - vpxor 0+0(%rsi),%ymm0,%ymm0
|
| - vpxor 32+0(%rsi),%ymm3,%ymm3
|
| - vpxor 64+0(%rsi),%ymm7,%ymm7
|
| - vpxor 96+0(%rsi),%ymm11,%ymm11
|
| - vmovdqu %ymm0,0+0(%rdi)
|
| - vmovdqu %ymm3,32+0(%rdi)
|
| - vmovdqu %ymm7,64+0(%rdi)
|
| - vmovdqu %ymm11,96+0(%rdi)
|
| -
|
| - vmovdqa 128(%rbp),%ymm0
|
| - vperm2i128 $0x02,%ymm2,%ymm6,%ymm3
|
| - vperm2i128 $0x13,%ymm2,%ymm6,%ymm6
|
| - vperm2i128 $0x02,%ymm10,%ymm14,%ymm2
|
| - vperm2i128 $0x13,%ymm10,%ymm14,%ymm10
|
| - vpxor 0+128(%rsi),%ymm3,%ymm3
|
| - vpxor 32+128(%rsi),%ymm2,%ymm2
|
| - vpxor 64+128(%rsi),%ymm6,%ymm6
|
| - vpxor 96+128(%rsi),%ymm10,%ymm10
|
| - vmovdqu %ymm3,0+128(%rdi)
|
| - vmovdqu %ymm2,32+128(%rdi)
|
| - vmovdqu %ymm6,64+128(%rdi)
|
| - vmovdqu %ymm10,96+128(%rdi)
|
| - vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
|
| - vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
|
| - vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
|
| - vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
|
| - vpxor 0+256(%rsi),%ymm3,%ymm3
|
| - vpxor 32+256(%rsi),%ymm1,%ymm1
|
| - vpxor 64+256(%rsi),%ymm5,%ymm5
|
| - vpxor 96+256(%rsi),%ymm9,%ymm9
|
| - vmovdqu %ymm3,0+256(%rdi)
|
| - vmovdqu %ymm1,32+256(%rdi)
|
| - vmovdqu %ymm5,64+256(%rdi)
|
| - vmovdqu %ymm9,96+256(%rdi)
|
| - vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
|
| - vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
|
| - vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
|
| - vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
|
| - vmovdqa %ymm3,%ymm8
|
| -
|
| - leaq 384(%rsi),%rsi
|
| - leaq 384(%rdi),%rdi
|
| - subq $384,%rbx
|
| -open_avx2_tail_loop:
|
| - cmpq $32,%rbx
|
| - jb open_avx2_tail
|
| - subq $32,%rbx
|
| - vpxor (%rsi),%ymm0,%ymm0
|
| - vmovdqu %ymm0,(%rdi)
|
| - leaq 32(%rsi),%rsi
|
| - leaq 32(%rdi),%rdi
|
| - vmovdqa %ymm4,%ymm0
|
| - vmovdqa %ymm8,%ymm4
|
| - vmovdqa %ymm12,%ymm8
|
| - jmp open_avx2_tail_loop
|
| -open_avx2_tail:
|
| - cmpq $16,%rbx
|
| - vmovdqa %xmm0,%xmm1
|
| - jb 1f
|
| - subq $16,%rbx
|
| -
|
| - vpxor (%rsi),%xmm0,%xmm1
|
| - vmovdqu %xmm1,(%rdi)
|
| - leaq 16(%rsi),%rsi
|
| - leaq 16(%rdi),%rdi
|
| - vperm2i128 $0x11,%ymm0,%ymm0,%ymm0
|
| - vmovdqa %xmm0,%xmm1
|
| -1:
|
| - vzeroupper
|
| - jmp open_sse_tail_16
|
| -
|
| -open_avx2_192:
|
| - vmovdqa %ymm0,%ymm1
|
| - vmovdqa %ymm0,%ymm2
|
| - vmovdqa %ymm4,%ymm5
|
| - vmovdqa %ymm4,%ymm6
|
| - vmovdqa %ymm8,%ymm9
|
| - vmovdqa %ymm8,%ymm10
|
| - vpaddd .avx2_inc(%rip),%ymm12,%ymm13
|
| - vmovdqa %ymm12,%ymm11
|
| - vmovdqa %ymm13,%ymm15
|
| - movq $10,%r10
|
| -1:
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $12,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $4,%ymm4,%ymm4,%ymm4
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol16(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpsrld $20,%ymm5,%ymm3
|
| - vpslld $12,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol8(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpslld $7,%ymm5,%ymm3
|
| - vpsrld $25,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpalignr $12,%ymm13,%ymm13,%ymm13
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $4,%ymm5,%ymm5,%ymm5
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $4,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $12,%ymm4,%ymm4,%ymm4
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol16(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpsrld $20,%ymm5,%ymm3
|
| - vpslld $12,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol8(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpslld $7,%ymm5,%ymm3
|
| - vpsrld $25,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpalignr $4,%ymm13,%ymm13,%ymm13
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $12,%ymm5,%ymm5,%ymm5
|
| -
|
| - decq %r10
|
| - jne 1b
|
| - vpaddd %ymm2,%ymm0,%ymm0
|
| - vpaddd %ymm2,%ymm1,%ymm1
|
| - vpaddd %ymm6,%ymm4,%ymm4
|
| - vpaddd %ymm6,%ymm5,%ymm5
|
| - vpaddd %ymm10,%ymm8,%ymm8
|
| - vpaddd %ymm10,%ymm9,%ymm9
|
| - vpaddd %ymm11,%ymm12,%ymm12
|
| - vpaddd %ymm15,%ymm13,%ymm13
|
| - vperm2i128 $0x02,%ymm0,%ymm4,%ymm3
|
| -
|
| - vpand .clamp(%rip),%ymm3,%ymm3
|
| - vmovdqa %ymm3,0(%rbp)
|
| -
|
| - vperm2i128 $0x13,%ymm0,%ymm4,%ymm0
|
| - vperm2i128 $0x13,%ymm8,%ymm12,%ymm4
|
| - vperm2i128 $0x02,%ymm1,%ymm5,%ymm8
|
| - vperm2i128 $0x02,%ymm9,%ymm13,%ymm12
|
| - vperm2i128 $0x13,%ymm1,%ymm5,%ymm1
|
| - vperm2i128 $0x13,%ymm9,%ymm13,%ymm5
|
| -open_avx2_short:
|
| - movq %r8,%r8
|
| - call poly_hash_ad_internal
|
| -open_avx2_hash_and_xor_loop:
|
| - cmpq $32,%rbx
|
| - jb open_avx2_short_tail_32
|
| - subq $32,%rbx
|
| - addq 0(%rsi),%r10
|
| - adcq 8+0(%rsi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - addq 16(%rsi),%r10
|
| - adcq 8+16(%rsi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| -
|
| - vpxor (%rsi),%ymm0,%ymm0
|
| - vmovdqu %ymm0,(%rdi)
|
| - leaq 32(%rsi),%rsi
|
| - leaq 32(%rdi),%rdi
|
| -
|
| - vmovdqa %ymm4,%ymm0
|
| - vmovdqa %ymm8,%ymm4
|
| - vmovdqa %ymm12,%ymm8
|
| - vmovdqa %ymm1,%ymm12
|
| - vmovdqa %ymm5,%ymm1
|
| - vmovdqa %ymm9,%ymm5
|
| - vmovdqa %ymm13,%ymm9
|
| - vmovdqa %ymm2,%ymm13
|
| - vmovdqa %ymm6,%ymm2
|
| - jmp open_avx2_hash_and_xor_loop
|
| -open_avx2_short_tail_32:
|
| - cmpq $16,%rbx
|
| - vmovdqa %xmm0,%xmm1
|
| - jb 1f
|
| - subq $16,%rbx
|
| - addq 0(%rsi),%r10
|
| - adcq 8+0(%rsi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - vpxor (%rsi),%xmm0,%xmm3
|
| - vmovdqu %xmm3,(%rdi)
|
| - leaq 16(%rsi),%rsi
|
| - leaq 16(%rdi),%rdi
|
| - vextracti128 $1,%ymm0,%xmm1
|
| -1:
|
| - vzeroupper
|
| - jmp open_sse_tail_16
|
| -
|
| -open_avx2_320:
|
| - vmovdqa %ymm0,%ymm1
|
| - vmovdqa %ymm0,%ymm2
|
| - vmovdqa %ymm4,%ymm5
|
| - vmovdqa %ymm4,%ymm6
|
| - vmovdqa %ymm8,%ymm9
|
| - vmovdqa %ymm8,%ymm10
|
| - vpaddd .avx2_inc(%rip),%ymm12,%ymm13
|
| - vpaddd .avx2_inc(%rip),%ymm13,%ymm14
|
| - vmovdqa %ymm4,%ymm7
|
| - vmovdqa %ymm8,%ymm11
|
| - vmovdqa %ymm12,160(%rbp)
|
| - vmovdqa %ymm13,192(%rbp)
|
| - vmovdqa %ymm14,224(%rbp)
|
| - movq $10,%r10
|
| -1:
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $12,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $4,%ymm4,%ymm4,%ymm4
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol16(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpsrld $20,%ymm5,%ymm3
|
| - vpslld $12,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol8(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpslld $7,%ymm5,%ymm3
|
| - vpsrld $25,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpalignr $12,%ymm13,%ymm13,%ymm13
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $4,%ymm5,%ymm5,%ymm5
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpshufb .rol16(%rip),%ymm14,%ymm14
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpsrld $20,%ymm6,%ymm3
|
| - vpslld $12,%ymm6,%ymm6
|
| - vpxor %ymm3,%ymm6,%ymm6
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpshufb .rol8(%rip),%ymm14,%ymm14
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpslld $7,%ymm6,%ymm3
|
| - vpsrld $25,%ymm6,%ymm6
|
| - vpxor %ymm3,%ymm6,%ymm6
|
| - vpalignr $12,%ymm14,%ymm14,%ymm14
|
| - vpalignr $8,%ymm10,%ymm10,%ymm10
|
| - vpalignr $4,%ymm6,%ymm6,%ymm6
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $4,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $12,%ymm4,%ymm4,%ymm4
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol16(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpsrld $20,%ymm5,%ymm3
|
| - vpslld $12,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol8(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpslld $7,%ymm5,%ymm3
|
| - vpsrld $25,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpalignr $4,%ymm13,%ymm13,%ymm13
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $12,%ymm5,%ymm5,%ymm5
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpshufb .rol16(%rip),%ymm14,%ymm14
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpsrld $20,%ymm6,%ymm3
|
| - vpslld $12,%ymm6,%ymm6
|
| - vpxor %ymm3,%ymm6,%ymm6
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpshufb .rol8(%rip),%ymm14,%ymm14
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpslld $7,%ymm6,%ymm3
|
| - vpsrld $25,%ymm6,%ymm6
|
| - vpxor %ymm3,%ymm6,%ymm6
|
| - vpalignr $4,%ymm14,%ymm14,%ymm14
|
| - vpalignr $8,%ymm10,%ymm10,%ymm10
|
| - vpalignr $12,%ymm6,%ymm6,%ymm6
|
| -
|
| - decq %r10
|
| - jne 1b
|
| - vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
|
| - vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
|
| - vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
|
| - vpaddd %ymm7,%ymm4,%ymm4
|
| - vpaddd %ymm7,%ymm5,%ymm5
|
| - vpaddd %ymm7,%ymm6,%ymm6
|
| - vpaddd %ymm11,%ymm8,%ymm8
|
| - vpaddd %ymm11,%ymm9,%ymm9
|
| - vpaddd %ymm11,%ymm10,%ymm10
|
| - vpaddd 160(%rbp),%ymm12,%ymm12
|
| - vpaddd 192(%rbp),%ymm13,%ymm13
|
| - vpaddd 224(%rbp),%ymm14,%ymm14
|
| - vperm2i128 $0x02,%ymm0,%ymm4,%ymm3
|
| -
|
| - vpand .clamp(%rip),%ymm3,%ymm3
|
| - vmovdqa %ymm3,0(%rbp)
|
| -
|
| - vperm2i128 $0x13,%ymm0,%ymm4,%ymm0
|
| - vperm2i128 $0x13,%ymm8,%ymm12,%ymm4
|
| - vperm2i128 $0x02,%ymm1,%ymm5,%ymm8
|
| - vperm2i128 $0x02,%ymm9,%ymm13,%ymm12
|
| - vperm2i128 $0x13,%ymm1,%ymm5,%ymm1
|
| - vperm2i128 $0x13,%ymm9,%ymm13,%ymm5
|
| - vperm2i128 $0x02,%ymm2,%ymm6,%ymm9
|
| - vperm2i128 $0x02,%ymm10,%ymm14,%ymm13
|
| - vperm2i128 $0x13,%ymm2,%ymm6,%ymm2
|
| - vperm2i128 $0x13,%ymm10,%ymm14,%ymm6
|
| - jmp open_avx2_short
|
| -.size chacha20_poly1305_open_avx2, .-chacha20_poly1305_open_avx2
|
| -
|
| -
|
| -.type chacha20_poly1305_seal_avx2,@function
|
| -.align 64
|
| -chacha20_poly1305_seal_avx2:
|
| - vzeroupper
|
| - vmovdqa .chacha20_consts(%rip),%ymm0
|
| - vbroadcasti128 0(%r9),%ymm4
|
| - vbroadcasti128 16(%r9),%ymm8
|
| - vbroadcasti128 32(%r9),%ymm12
|
| - vpaddd .avx2_init(%rip),%ymm12,%ymm12
|
| - cmpq $192,%rbx
|
| - jbe seal_avx2_192
|
| - cmpq $320,%rbx
|
| - jbe seal_avx2_320
|
| - vmovdqa %ymm0,%ymm1
|
| - vmovdqa %ymm0,%ymm2
|
| - vmovdqa %ymm0,%ymm3
|
| - vmovdqa %ymm4,%ymm5
|
| - vmovdqa %ymm4,%ymm6
|
| - vmovdqa %ymm4,%ymm7
|
| - vmovdqa %ymm4,64(%rbp)
|
| - vmovdqa %ymm8,%ymm9
|
| - vmovdqa %ymm8,%ymm10
|
| - vmovdqa %ymm8,%ymm11
|
| - vmovdqa %ymm8,96(%rbp)
|
| - vmovdqa %ymm12,%ymm15
|
| - vpaddd .avx2_inc(%rip),%ymm15,%ymm14
|
| - vpaddd .avx2_inc(%rip),%ymm14,%ymm13
|
| - vpaddd .avx2_inc(%rip),%ymm13,%ymm12
|
| - vmovdqa %ymm12,160(%rbp)
|
| - vmovdqa %ymm13,192(%rbp)
|
| - vmovdqa %ymm14,224(%rbp)
|
| - vmovdqa %ymm15,256(%rbp)
|
| - movq $10,%r10
|
| -1:
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vmovdqa .rol16(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $20,%ymm7,%ymm8
|
| - vpslld $32-20,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $20,%ymm6,%ymm8
|
| - vpslld $32-20,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $20,%ymm5,%ymm8
|
| - vpslld $32-20,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $20,%ymm4,%ymm8
|
| - vpslld $32-20,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa .rol8(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $25,%ymm7,%ymm8
|
| - vpslld $32-25,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $25,%ymm6,%ymm8
|
| - vpslld $32-25,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $25,%ymm5,%ymm8
|
| - vpslld $32-25,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $25,%ymm4,%ymm8
|
| - vpslld $32-25,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpalignr $4,%ymm7,%ymm7,%ymm7
|
| - vpalignr $8,%ymm11,%ymm11,%ymm11
|
| - vpalignr $12,%ymm15,%ymm15,%ymm15
|
| - vpalignr $4,%ymm6,%ymm6,%ymm6
|
| - vpalignr $8,%ymm10,%ymm10,%ymm10
|
| - vpalignr $12,%ymm14,%ymm14,%ymm14
|
| - vpalignr $4,%ymm5,%ymm5,%ymm5
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $12,%ymm13,%ymm13,%ymm13
|
| - vpalignr $4,%ymm4,%ymm4,%ymm4
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $12,%ymm12,%ymm12,%ymm12
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vmovdqa .rol16(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $20,%ymm7,%ymm8
|
| - vpslld $32-20,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $20,%ymm6,%ymm8
|
| - vpslld $32-20,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $20,%ymm5,%ymm8
|
| - vpslld $32-20,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $20,%ymm4,%ymm8
|
| - vpslld $32-20,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa .rol8(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $25,%ymm7,%ymm8
|
| - vpslld $32-25,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $25,%ymm6,%ymm8
|
| - vpslld $32-25,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $25,%ymm5,%ymm8
|
| - vpslld $32-25,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $25,%ymm4,%ymm8
|
| - vpslld $32-25,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpalignr $12,%ymm7,%ymm7,%ymm7
|
| - vpalignr $8,%ymm11,%ymm11,%ymm11
|
| - vpalignr $4,%ymm15,%ymm15,%ymm15
|
| - vpalignr $12,%ymm6,%ymm6,%ymm6
|
| - vpalignr $8,%ymm10,%ymm10,%ymm10
|
| - vpalignr $4,%ymm14,%ymm14,%ymm14
|
| - vpalignr $12,%ymm5,%ymm5,%ymm5
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $4,%ymm13,%ymm13,%ymm13
|
| - vpalignr $12,%ymm4,%ymm4,%ymm4
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $4,%ymm12,%ymm12,%ymm12
|
| -
|
| - decq %r10
|
| - jnz 1b
|
| - vpaddd .chacha20_consts(%rip),%ymm3,%ymm3
|
| - vpaddd 64(%rbp),%ymm7,%ymm7
|
| - vpaddd 96(%rbp),%ymm11,%ymm11
|
| - vpaddd 256(%rbp),%ymm15,%ymm15
|
| - vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
|
| - vpaddd 64(%rbp),%ymm6,%ymm6
|
| - vpaddd 96(%rbp),%ymm10,%ymm10
|
| - vpaddd 224(%rbp),%ymm14,%ymm14
|
| - vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
|
| - vpaddd 64(%rbp),%ymm5,%ymm5
|
| - vpaddd 96(%rbp),%ymm9,%ymm9
|
| - vpaddd 192(%rbp),%ymm13,%ymm13
|
| - vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
|
| - vpaddd 64(%rbp),%ymm4,%ymm4
|
| - vpaddd 96(%rbp),%ymm8,%ymm8
|
| - vpaddd 160(%rbp),%ymm12,%ymm12
|
| -
|
| - vperm2i128 $0x13,%ymm11,%ymm15,%ymm11
|
| - vperm2i128 $0x02,%ymm3,%ymm7,%ymm15
|
| - vperm2i128 $0x13,%ymm3,%ymm7,%ymm3
|
| - vpand .clamp(%rip),%ymm15,%ymm15
|
| - vmovdqa %ymm15,0(%rbp)
|
| - movq %r8,%r8
|
| - call poly_hash_ad_internal
|
| -
|
| - vpxor 0(%rsi),%ymm3,%ymm3
|
| - vpxor 32(%rsi),%ymm11,%ymm11
|
| - vmovdqu %ymm3,0(%rdi)
|
| - vmovdqu %ymm11,32(%rdi)
|
| - vperm2i128 $0x02,%ymm2,%ymm6,%ymm15
|
| - vperm2i128 $0x13,%ymm2,%ymm6,%ymm6
|
| - vperm2i128 $0x02,%ymm10,%ymm14,%ymm2
|
| - vperm2i128 $0x13,%ymm10,%ymm14,%ymm10
|
| - vpxor 0+64(%rsi),%ymm15,%ymm15
|
| - vpxor 32+64(%rsi),%ymm2,%ymm2
|
| - vpxor 64+64(%rsi),%ymm6,%ymm6
|
| - vpxor 96+64(%rsi),%ymm10,%ymm10
|
| - vmovdqu %ymm15,0+64(%rdi)
|
| - vmovdqu %ymm2,32+64(%rdi)
|
| - vmovdqu %ymm6,64+64(%rdi)
|
| - vmovdqu %ymm10,96+64(%rdi)
|
| - vperm2i128 $0x02,%ymm1,%ymm5,%ymm15
|
| - vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
|
| - vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
|
| - vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
|
| - vpxor 0+192(%rsi),%ymm15,%ymm15
|
| - vpxor 32+192(%rsi),%ymm1,%ymm1
|
| - vpxor 64+192(%rsi),%ymm5,%ymm5
|
| - vpxor 96+192(%rsi),%ymm9,%ymm9
|
| - vmovdqu %ymm15,0+192(%rdi)
|
| - vmovdqu %ymm1,32+192(%rdi)
|
| - vmovdqu %ymm5,64+192(%rdi)
|
| - vmovdqu %ymm9,96+192(%rdi)
|
| - vperm2i128 $0x13,%ymm0,%ymm4,%ymm15
|
| - vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
|
| - vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
|
| - vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
|
| - vmovdqa %ymm15,%ymm8
|
| -
|
| - leaq 320(%rsi),%rsi
|
| - subq $320,%rbx
|
| - movq $320,%rcx
|
| - cmpq $128,%rbx
|
| - jbe seal_avx2_hash
|
| - vpxor 0(%rsi),%ymm0,%ymm0
|
| - vpxor 32(%rsi),%ymm4,%ymm4
|
| - vpxor 64(%rsi),%ymm8,%ymm8
|
| - vpxor 96(%rsi),%ymm12,%ymm12
|
| - vmovdqu %ymm0,320(%rdi)
|
| - vmovdqu %ymm4,352(%rdi)
|
| - vmovdqu %ymm8,384(%rdi)
|
| - vmovdqu %ymm12,416(%rdi)
|
| - leaq 128(%rsi),%rsi
|
| - subq $128,%rbx
|
| - movq $8,%rcx
|
| - movq $2,%r8
|
| - cmpq $128,%rbx
|
| - jbe seal_avx2_tail_128
|
| - cmpq $256,%rbx
|
| - jbe seal_avx2_tail_256
|
| - cmpq $384,%rbx
|
| - jbe seal_avx2_tail_384
|
| - cmpq $512,%rbx
|
| - jbe seal_avx2_tail_512
|
| - vmovdqa .chacha20_consts(%rip),%ymm0
|
| - vmovdqa 64(%rbp),%ymm4
|
| - vmovdqa 96(%rbp),%ymm8
|
| - vmovdqa %ymm0,%ymm1
|
| - vmovdqa %ymm4,%ymm5
|
| - vmovdqa %ymm8,%ymm9
|
| - vmovdqa %ymm0,%ymm2
|
| - vmovdqa %ymm4,%ymm6
|
| - vmovdqa %ymm8,%ymm10
|
| - vmovdqa %ymm0,%ymm3
|
| - vmovdqa %ymm4,%ymm7
|
| - vmovdqa %ymm8,%ymm11
|
| - vmovdqa .avx2_inc(%rip),%ymm12
|
| - vpaddd 160(%rbp),%ymm12,%ymm15
|
| - vpaddd %ymm15,%ymm12,%ymm14
|
| - vpaddd %ymm14,%ymm12,%ymm13
|
| - vpaddd %ymm13,%ymm12,%ymm12
|
| - vmovdqa %ymm15,256(%rbp)
|
| - vmovdqa %ymm14,224(%rbp)
|
| - vmovdqa %ymm13,192(%rbp)
|
| - vmovdqa %ymm12,160(%rbp)
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vmovdqa .rol16(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $20,%ymm7,%ymm8
|
| - vpslld $32-20,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $20,%ymm6,%ymm8
|
| - vpslld $32-20,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $20,%ymm5,%ymm8
|
| - vpslld $32-20,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $20,%ymm4,%ymm8
|
| - vpslld $32-20,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa .rol8(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $25,%ymm7,%ymm8
|
| - vpslld $32-25,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $25,%ymm6,%ymm8
|
| - vpslld $32-25,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $25,%ymm5,%ymm8
|
| - vpslld $32-25,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $25,%ymm4,%ymm8
|
| - vpslld $32-25,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpalignr $4,%ymm7,%ymm7,%ymm7
|
| - vpalignr $8,%ymm11,%ymm11,%ymm11
|
| - vpalignr $12,%ymm15,%ymm15,%ymm15
|
| - vpalignr $4,%ymm6,%ymm6,%ymm6
|
| - vpalignr $8,%ymm10,%ymm10,%ymm10
|
| - vpalignr $12,%ymm14,%ymm14,%ymm14
|
| - vpalignr $4,%ymm5,%ymm5,%ymm5
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $12,%ymm13,%ymm13,%ymm13
|
| - vpalignr $4,%ymm4,%ymm4,%ymm4
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $12,%ymm12,%ymm12,%ymm12
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vmovdqa .rol16(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $20,%ymm7,%ymm8
|
| - vpslld $32-20,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $20,%ymm6,%ymm8
|
| - vpslld $32-20,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $20,%ymm5,%ymm8
|
| - vpslld $32-20,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $20,%ymm4,%ymm8
|
| - vpslld $32-20,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa .rol8(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $25,%ymm7,%ymm8
|
| - vpslld $32-25,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $25,%ymm6,%ymm8
|
| - vpslld $32-25,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $25,%ymm5,%ymm8
|
| - vpslld $32-25,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $25,%ymm4,%ymm8
|
| - vpslld $32-25,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpalignr $12,%ymm7,%ymm7,%ymm7
|
| - vpalignr $8,%ymm11,%ymm11,%ymm11
|
| - vpalignr $4,%ymm15,%ymm15,%ymm15
|
| - vpalignr $12,%ymm6,%ymm6,%ymm6
|
| - vpalignr $8,%ymm10,%ymm10,%ymm10
|
| - vpalignr $4,%ymm14,%ymm14,%ymm14
|
| - vpalignr $12,%ymm5,%ymm5,%ymm5
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $4,%ymm13,%ymm13,%ymm13
|
| - vpalignr $12,%ymm4,%ymm4,%ymm4
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $4,%ymm12,%ymm12,%ymm12
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vmovdqa .rol16(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $20,%ymm7,%ymm8
|
| - vpslld $32-20,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $20,%ymm6,%ymm8
|
| - vpslld $32-20,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $20,%ymm5,%ymm8
|
| - vpslld $32-20,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $20,%ymm4,%ymm8
|
| - vpslld $32-20,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa .rol8(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| -
|
| - subq $16,%rdi
|
| - movq $9,%rcx
|
| - jmp 4f
|
| -1:
|
| - vmovdqa .chacha20_consts(%rip),%ymm0
|
| - vmovdqa 64(%rbp),%ymm4
|
| - vmovdqa 96(%rbp),%ymm8
|
| - vmovdqa %ymm0,%ymm1
|
| - vmovdqa %ymm4,%ymm5
|
| - vmovdqa %ymm8,%ymm9
|
| - vmovdqa %ymm0,%ymm2
|
| - vmovdqa %ymm4,%ymm6
|
| - vmovdqa %ymm8,%ymm10
|
| - vmovdqa %ymm0,%ymm3
|
| - vmovdqa %ymm4,%ymm7
|
| - vmovdqa %ymm8,%ymm11
|
| - vmovdqa .avx2_inc(%rip),%ymm12
|
| - vpaddd 160(%rbp),%ymm12,%ymm15
|
| - vpaddd %ymm15,%ymm12,%ymm14
|
| - vpaddd %ymm14,%ymm12,%ymm13
|
| - vpaddd %ymm13,%ymm12,%ymm12
|
| - vmovdqa %ymm15,256(%rbp)
|
| - vmovdqa %ymm14,224(%rbp)
|
| - vmovdqa %ymm13,192(%rbp)
|
| - vmovdqa %ymm12,160(%rbp)
|
| -
|
| - movq $10,%rcx
|
| -2:
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vmovdqa .rol16(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - movq 0+0(%rbp),%rdx
|
| - movq %rdx,%r15
|
| - mulxq %r10,%r13,%r14
|
| - mulxq %r11,%rax,%rdx
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - movq 8+0(%rbp),%rdx
|
| - mulxq %r10,%r10,%rax
|
| - addq %r10,%r14
|
| - mulxq %r11,%r11,%r9
|
| - adcq %r11,%r15
|
| - adcq $0,%r9
|
| - imulq %r12,%rdx
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $20,%ymm7,%ymm8
|
| - vpslld $32-20,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $20,%ymm6,%ymm8
|
| - vpslld $32-20,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $20,%ymm5,%ymm8
|
| - addq %rax,%r15
|
| - adcq %rdx,%r9
|
| - vpslld $32-20,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $20,%ymm4,%ymm8
|
| - vpslld $32-20,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa .rol8(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| -4:
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - addq 16(%rdi),%r10
|
| - adcq 8+16(%rdi),%r11
|
| - adcq $1,%r12
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - movq 0+0(%rbp),%rdx
|
| - movq %rdx,%r15
|
| - mulxq %r10,%r13,%r14
|
| - mulxq %r11,%rax,%rdx
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $25,%ymm7,%ymm8
|
| - vpslld $32-25,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $25,%ymm6,%ymm8
|
| - vpslld $32-25,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $25,%ymm5,%ymm8
|
| - vpslld $32-25,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $25,%ymm4,%ymm8
|
| - vpslld $32-25,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpalignr $4,%ymm7,%ymm7,%ymm7
|
| - vpalignr $8,%ymm11,%ymm11,%ymm11
|
| - vpalignr $12,%ymm15,%ymm15,%ymm15
|
| - vpalignr $4,%ymm6,%ymm6,%ymm6
|
| - movq 8+0(%rbp),%rdx
|
| - mulxq %r10,%r10,%rax
|
| - addq %r10,%r14
|
| - mulxq %r11,%r11,%r9
|
| - adcq %r11,%r15
|
| - adcq $0,%r9
|
| - imulq %r12,%rdx
|
| - vpalignr $8,%ymm10,%ymm10,%ymm10
|
| - vpalignr $12,%ymm14,%ymm14,%ymm14
|
| - vpalignr $4,%ymm5,%ymm5,%ymm5
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $12,%ymm13,%ymm13,%ymm13
|
| - vpalignr $4,%ymm4,%ymm4,%ymm4
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $12,%ymm12,%ymm12,%ymm12
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vmovdqa .rol16(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - addq %rax,%r15
|
| - adcq %rdx,%r9
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $20,%ymm7,%ymm8
|
| - vpslld $32-20,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - addq 32(%rdi),%r10
|
| - adcq 8+32(%rdi),%r11
|
| - adcq $1,%r12
|
| -
|
| - leaq 48(%rdi),%rdi
|
| - vpsrld $20,%ymm6,%ymm8
|
| - vpslld $32-20,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $20,%ymm5,%ymm8
|
| - vpslld $32-20,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $20,%ymm4,%ymm8
|
| - vpslld $32-20,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa .rol8(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - movq 0+0(%rbp),%rdx
|
| - movq %rdx,%r15
|
| - mulxq %r10,%r13,%r14
|
| - mulxq %r11,%rax,%rdx
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - movq 8+0(%rbp),%rdx
|
| - mulxq %r10,%r10,%rax
|
| - addq %r10,%r14
|
| - mulxq %r11,%r11,%r9
|
| - adcq %r11,%r15
|
| - adcq $0,%r9
|
| - imulq %r12,%rdx
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $25,%ymm7,%ymm8
|
| - vpslld $32-25,%ymm7,%ymm7
|
| - addq %rax,%r15
|
| - adcq %rdx,%r9
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $25,%ymm6,%ymm8
|
| - vpslld $32-25,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $25,%ymm5,%ymm8
|
| - vpslld $32-25,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $25,%ymm4,%ymm8
|
| - vpslld $32-25,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpalignr $12,%ymm7,%ymm7,%ymm7
|
| - vpalignr $8,%ymm11,%ymm11,%ymm11
|
| - vpalignr $4,%ymm15,%ymm15,%ymm15
|
| - vpalignr $12,%ymm6,%ymm6,%ymm6
|
| - vpalignr $8,%ymm10,%ymm10,%ymm10
|
| - vpalignr $4,%ymm14,%ymm14,%ymm14
|
| - vpalignr $12,%ymm5,%ymm5,%ymm5
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $4,%ymm13,%ymm13,%ymm13
|
| - vpalignr $12,%ymm4,%ymm4,%ymm4
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $4,%ymm12,%ymm12,%ymm12
|
| -
|
| - decq %rcx
|
| - jne 2b
|
| - vpaddd .chacha20_consts(%rip),%ymm3,%ymm3
|
| - vpaddd 64(%rbp),%ymm7,%ymm7
|
| - vpaddd 96(%rbp),%ymm11,%ymm11
|
| - vpaddd 256(%rbp),%ymm15,%ymm15
|
| - vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
|
| - vpaddd 64(%rbp),%ymm6,%ymm6
|
| - vpaddd 96(%rbp),%ymm10,%ymm10
|
| - vpaddd 224(%rbp),%ymm14,%ymm14
|
| - vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
|
| - vpaddd 64(%rbp),%ymm5,%ymm5
|
| - vpaddd 96(%rbp),%ymm9,%ymm9
|
| - vpaddd 192(%rbp),%ymm13,%ymm13
|
| - vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
|
| - vpaddd 64(%rbp),%ymm4,%ymm4
|
| - vpaddd 96(%rbp),%ymm8,%ymm8
|
| - vpaddd 160(%rbp),%ymm12,%ymm12
|
| -
|
| - leaq 32(%rdi),%rdi
|
| - vmovdqa %ymm0,128(%rbp)
|
| - addq -32(%rdi),%r10
|
| - adcq 8+-32(%rdi),%r11
|
| - adcq $1,%r12
|
| - vperm2i128 $0x02,%ymm3,%ymm7,%ymm0
|
| - vperm2i128 $0x13,%ymm3,%ymm7,%ymm7
|
| - vperm2i128 $0x02,%ymm11,%ymm15,%ymm3
|
| - vperm2i128 $0x13,%ymm11,%ymm15,%ymm11
|
| - vpxor 0+0(%rsi),%ymm0,%ymm0
|
| - vpxor 32+0(%rsi),%ymm3,%ymm3
|
| - vpxor 64+0(%rsi),%ymm7,%ymm7
|
| - vpxor 96+0(%rsi),%ymm11,%ymm11
|
| - vmovdqu %ymm0,0+0(%rdi)
|
| - vmovdqu %ymm3,32+0(%rdi)
|
| - vmovdqu %ymm7,64+0(%rdi)
|
| - vmovdqu %ymm11,96+0(%rdi)
|
| -
|
| - vmovdqa 128(%rbp),%ymm0
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - vperm2i128 $0x02,%ymm2,%ymm6,%ymm3
|
| - vperm2i128 $0x13,%ymm2,%ymm6,%ymm6
|
| - vperm2i128 $0x02,%ymm10,%ymm14,%ymm2
|
| - vperm2i128 $0x13,%ymm10,%ymm14,%ymm10
|
| - vpxor 0+128(%rsi),%ymm3,%ymm3
|
| - vpxor 32+128(%rsi),%ymm2,%ymm2
|
| - vpxor 64+128(%rsi),%ymm6,%ymm6
|
| - vpxor 96+128(%rsi),%ymm10,%ymm10
|
| - vmovdqu %ymm3,0+128(%rdi)
|
| - vmovdqu %ymm2,32+128(%rdi)
|
| - vmovdqu %ymm6,64+128(%rdi)
|
| - vmovdqu %ymm10,96+128(%rdi)
|
| - addq -16(%rdi),%r10
|
| - adcq 8+-16(%rdi),%r11
|
| - adcq $1,%r12
|
| - vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
|
| - vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
|
| - vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
|
| - vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
|
| - vpxor 0+256(%rsi),%ymm3,%ymm3
|
| - vpxor 32+256(%rsi),%ymm1,%ymm1
|
| - vpxor 64+256(%rsi),%ymm5,%ymm5
|
| - vpxor 96+256(%rsi),%ymm9,%ymm9
|
| - vmovdqu %ymm3,0+256(%rdi)
|
| - vmovdqu %ymm1,32+256(%rdi)
|
| - vmovdqu %ymm5,64+256(%rdi)
|
| - vmovdqu %ymm9,96+256(%rdi)
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - vperm2i128 $0x02,%ymm0,%ymm4,%ymm3
|
| - vperm2i128 $0x13,%ymm0,%ymm4,%ymm4
|
| - vperm2i128 $0x02,%ymm8,%ymm12,%ymm0
|
| - vperm2i128 $0x13,%ymm8,%ymm12,%ymm8
|
| - vpxor 0+384(%rsi),%ymm3,%ymm3
|
| - vpxor 32+384(%rsi),%ymm0,%ymm0
|
| - vpxor 64+384(%rsi),%ymm4,%ymm4
|
| - vpxor 96+384(%rsi),%ymm8,%ymm8
|
| - vmovdqu %ymm3,0+384(%rdi)
|
| - vmovdqu %ymm0,32+384(%rdi)
|
| - vmovdqu %ymm4,64+384(%rdi)
|
| - vmovdqu %ymm8,96+384(%rdi)
|
| -
|
| - leaq 512(%rsi),%rsi
|
| - subq $512,%rbx
|
| - cmpq $512,%rbx
|
| - jg 1b
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - addq 16(%rdi),%r10
|
| - adcq 8+16(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 32(%rdi),%rdi
|
| - movq $10,%rcx
|
| - xorq %r8,%r8
|
| - cmpq $128,%rbx
|
| - ja 3f
|
| -
|
| -seal_avx2_tail_128:
|
| - vmovdqa .chacha20_consts(%rip),%ymm0
|
| - vmovdqa 64(%rbp),%ymm4
|
| - vmovdqa 96(%rbp),%ymm8
|
| - vmovdqa .avx2_inc(%rip),%ymm12
|
| - vpaddd 160(%rbp),%ymm12,%ymm12
|
| - vmovdqa %ymm12,160(%rbp)
|
| -
|
| -1:
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 16(%rdi),%rdi
|
| -2:
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $12,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $4,%ymm4,%ymm4,%ymm4
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $4,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $12,%ymm4,%ymm4,%ymm4
|
| - addq 16(%rdi),%r10
|
| - adcq 8+16(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 32(%rdi),%rdi
|
| - decq %rcx
|
| - jg 1b
|
| - decq %r8
|
| - jge 2b
|
| - vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
|
| - vpaddd 64(%rbp),%ymm4,%ymm4
|
| - vpaddd 96(%rbp),%ymm8,%ymm8
|
| - vpaddd 160(%rbp),%ymm12,%ymm12
|
| - vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
|
| - vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
|
| - vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
|
| - vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
|
| - vmovdqa %ymm3,%ymm8
|
| -
|
| - jmp seal_avx2_short_loop
|
| -3:
|
| - cmpq $256,%rbx
|
| - ja 3f
|
| -
|
| -seal_avx2_tail_256:
|
| - vmovdqa .chacha20_consts(%rip),%ymm0
|
| - vmovdqa 64(%rbp),%ymm4
|
| - vmovdqa 96(%rbp),%ymm8
|
| - vmovdqa %ymm0,%ymm1
|
| - vmovdqa %ymm4,%ymm5
|
| - vmovdqa %ymm8,%ymm9
|
| - vmovdqa .avx2_inc(%rip),%ymm12
|
| - vpaddd 160(%rbp),%ymm12,%ymm13
|
| - vpaddd %ymm13,%ymm12,%ymm12
|
| - vmovdqa %ymm12,160(%rbp)
|
| - vmovdqa %ymm13,192(%rbp)
|
| -
|
| -1:
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 16(%rdi),%rdi
|
| -2:
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $12,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $4,%ymm4,%ymm4,%ymm4
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol16(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpsrld $20,%ymm5,%ymm3
|
| - vpslld $12,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol8(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpslld $7,%ymm5,%ymm3
|
| - vpsrld $25,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpalignr $12,%ymm13,%ymm13,%ymm13
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $4,%ymm5,%ymm5,%ymm5
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $4,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $12,%ymm4,%ymm4,%ymm4
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol16(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpsrld $20,%ymm5,%ymm3
|
| - vpslld $12,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol8(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpslld $7,%ymm5,%ymm3
|
| - vpsrld $25,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpalignr $4,%ymm13,%ymm13,%ymm13
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $12,%ymm5,%ymm5,%ymm5
|
| - addq 16(%rdi),%r10
|
| - adcq 8+16(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 32(%rdi),%rdi
|
| - decq %rcx
|
| - jg 1b
|
| - decq %r8
|
| - jge 2b
|
| - vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
|
| - vpaddd 64(%rbp),%ymm5,%ymm5
|
| - vpaddd 96(%rbp),%ymm9,%ymm9
|
| - vpaddd 192(%rbp),%ymm13,%ymm13
|
| - vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
|
| - vpaddd 64(%rbp),%ymm4,%ymm4
|
| - vpaddd 96(%rbp),%ymm8,%ymm8
|
| - vpaddd 160(%rbp),%ymm12,%ymm12
|
| - vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
|
| - vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
|
| - vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
|
| - vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
|
| - vpxor 0+0(%rsi),%ymm3,%ymm3
|
| - vpxor 32+0(%rsi),%ymm1,%ymm1
|
| - vpxor 64+0(%rsi),%ymm5,%ymm5
|
| - vpxor 96+0(%rsi),%ymm9,%ymm9
|
| - vmovdqu %ymm3,0+0(%rdi)
|
| - vmovdqu %ymm1,32+0(%rdi)
|
| - vmovdqu %ymm5,64+0(%rdi)
|
| - vmovdqu %ymm9,96+0(%rdi)
|
| - vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
|
| - vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
|
| - vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
|
| - vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
|
| - vmovdqa %ymm3,%ymm8
|
| -
|
| - movq $128,%rcx
|
| - leaq 128(%rsi),%rsi
|
| - subq $128,%rbx
|
| - jmp seal_avx2_hash
|
| -3:
|
| - cmpq $384,%rbx
|
| - ja seal_avx2_tail_512
|
| -
|
| -seal_avx2_tail_384:
|
| - vmovdqa .chacha20_consts(%rip),%ymm0
|
| - vmovdqa 64(%rbp),%ymm4
|
| - vmovdqa 96(%rbp),%ymm8
|
| - vmovdqa %ymm0,%ymm1
|
| - vmovdqa %ymm4,%ymm5
|
| - vmovdqa %ymm8,%ymm9
|
| - vmovdqa %ymm0,%ymm2
|
| - vmovdqa %ymm4,%ymm6
|
| - vmovdqa %ymm8,%ymm10
|
| - vmovdqa .avx2_inc(%rip),%ymm12
|
| - vpaddd 160(%rbp),%ymm12,%ymm14
|
| - vpaddd %ymm14,%ymm12,%ymm13
|
| - vpaddd %ymm13,%ymm12,%ymm12
|
| - vmovdqa %ymm12,160(%rbp)
|
| - vmovdqa %ymm13,192(%rbp)
|
| - vmovdqa %ymm14,224(%rbp)
|
| -
|
| -1:
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 16(%rdi),%rdi
|
| -2:
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $12,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $4,%ymm4,%ymm4,%ymm4
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol16(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpsrld $20,%ymm5,%ymm3
|
| - vpslld $12,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol8(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpslld $7,%ymm5,%ymm3
|
| - vpsrld $25,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpalignr $12,%ymm13,%ymm13,%ymm13
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $4,%ymm5,%ymm5,%ymm5
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpshufb .rol16(%rip),%ymm14,%ymm14
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpsrld $20,%ymm6,%ymm3
|
| - vpslld $12,%ymm6,%ymm6
|
| - vpxor %ymm3,%ymm6,%ymm6
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpshufb .rol8(%rip),%ymm14,%ymm14
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpslld $7,%ymm6,%ymm3
|
| - vpsrld $25,%ymm6,%ymm6
|
| - vpxor %ymm3,%ymm6,%ymm6
|
| - vpalignr $12,%ymm14,%ymm14,%ymm14
|
| - vpalignr $8,%ymm10,%ymm10,%ymm10
|
| - vpalignr $4,%ymm6,%ymm6,%ymm6
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $4,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $12,%ymm4,%ymm4,%ymm4
|
| - addq 16(%rdi),%r10
|
| - adcq 8+16(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol16(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpsrld $20,%ymm5,%ymm3
|
| - vpslld $12,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol8(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpslld $7,%ymm5,%ymm3
|
| - vpsrld $25,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpalignr $4,%ymm13,%ymm13,%ymm13
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $12,%ymm5,%ymm5,%ymm5
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpshufb .rol16(%rip),%ymm14,%ymm14
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpsrld $20,%ymm6,%ymm3
|
| - vpslld $12,%ymm6,%ymm6
|
| - vpxor %ymm3,%ymm6,%ymm6
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpshufb .rol8(%rip),%ymm14,%ymm14
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpslld $7,%ymm6,%ymm3
|
| - vpsrld $25,%ymm6,%ymm6
|
| - vpxor %ymm3,%ymm6,%ymm6
|
| - vpalignr $4,%ymm14,%ymm14,%ymm14
|
| - vpalignr $8,%ymm10,%ymm10,%ymm10
|
| - vpalignr $12,%ymm6,%ymm6,%ymm6
|
| -
|
| - leaq 32(%rdi),%rdi
|
| - decq %rcx
|
| - jg 1b
|
| - decq %r8
|
| - jge 2b
|
| - vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
|
| - vpaddd 64(%rbp),%ymm6,%ymm6
|
| - vpaddd 96(%rbp),%ymm10,%ymm10
|
| - vpaddd 224(%rbp),%ymm14,%ymm14
|
| - vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
|
| - vpaddd 64(%rbp),%ymm5,%ymm5
|
| - vpaddd 96(%rbp),%ymm9,%ymm9
|
| - vpaddd 192(%rbp),%ymm13,%ymm13
|
| - vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
|
| - vpaddd 64(%rbp),%ymm4,%ymm4
|
| - vpaddd 96(%rbp),%ymm8,%ymm8
|
| - vpaddd 160(%rbp),%ymm12,%ymm12
|
| - vperm2i128 $0x02,%ymm2,%ymm6,%ymm3
|
| - vperm2i128 $0x13,%ymm2,%ymm6,%ymm6
|
| - vperm2i128 $0x02,%ymm10,%ymm14,%ymm2
|
| - vperm2i128 $0x13,%ymm10,%ymm14,%ymm10
|
| - vpxor 0+0(%rsi),%ymm3,%ymm3
|
| - vpxor 32+0(%rsi),%ymm2,%ymm2
|
| - vpxor 64+0(%rsi),%ymm6,%ymm6
|
| - vpxor 96+0(%rsi),%ymm10,%ymm10
|
| - vmovdqu %ymm3,0+0(%rdi)
|
| - vmovdqu %ymm2,32+0(%rdi)
|
| - vmovdqu %ymm6,64+0(%rdi)
|
| - vmovdqu %ymm10,96+0(%rdi)
|
| - vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
|
| - vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
|
| - vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
|
| - vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
|
| - vpxor 0+128(%rsi),%ymm3,%ymm3
|
| - vpxor 32+128(%rsi),%ymm1,%ymm1
|
| - vpxor 64+128(%rsi),%ymm5,%ymm5
|
| - vpxor 96+128(%rsi),%ymm9,%ymm9
|
| - vmovdqu %ymm3,0+128(%rdi)
|
| - vmovdqu %ymm1,32+128(%rdi)
|
| - vmovdqu %ymm5,64+128(%rdi)
|
| - vmovdqu %ymm9,96+128(%rdi)
|
| - vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
|
| - vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
|
| - vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
|
| - vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
|
| - vmovdqa %ymm3,%ymm8
|
| -
|
| - movq $256,%rcx
|
| - leaq 256(%rsi),%rsi
|
| - subq $256,%rbx
|
| - jmp seal_avx2_hash
|
| -
|
| -seal_avx2_tail_512:
|
| - vmovdqa .chacha20_consts(%rip),%ymm0
|
| - vmovdqa 64(%rbp),%ymm4
|
| - vmovdqa 96(%rbp),%ymm8
|
| - vmovdqa %ymm0,%ymm1
|
| - vmovdqa %ymm4,%ymm5
|
| - vmovdqa %ymm8,%ymm9
|
| - vmovdqa %ymm0,%ymm2
|
| - vmovdqa %ymm4,%ymm6
|
| - vmovdqa %ymm8,%ymm10
|
| - vmovdqa %ymm0,%ymm3
|
| - vmovdqa %ymm4,%ymm7
|
| - vmovdqa %ymm8,%ymm11
|
| - vmovdqa .avx2_inc(%rip),%ymm12
|
| - vpaddd 160(%rbp),%ymm12,%ymm15
|
| - vpaddd %ymm15,%ymm12,%ymm14
|
| - vpaddd %ymm14,%ymm12,%ymm13
|
| - vpaddd %ymm13,%ymm12,%ymm12
|
| - vmovdqa %ymm15,256(%rbp)
|
| - vmovdqa %ymm14,224(%rbp)
|
| - vmovdqa %ymm13,192(%rbp)
|
| - vmovdqa %ymm12,160(%rbp)
|
| -
|
| -1:
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rdx
|
| - movq %rdx,%r15
|
| - mulxq %r10,%r13,%r14
|
| - mulxq %r11,%rax,%rdx
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rdx
|
| - mulxq %r10,%r10,%rax
|
| - addq %r10,%r14
|
| - mulxq %r11,%r11,%r9
|
| - adcq %r11,%r15
|
| - adcq $0,%r9
|
| - imulq %r12,%rdx
|
| - addq %rax,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 16(%rdi),%rdi
|
| -2:
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vmovdqa .rol16(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $20,%ymm7,%ymm8
|
| - vpslld $32-20,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $20,%ymm6,%ymm8
|
| - vpslld $32-20,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $20,%ymm5,%ymm8
|
| - vpslld $32-20,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $20,%ymm4,%ymm8
|
| - vpslld $32-20,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa .rol8(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - movq 0+0(%rbp),%rdx
|
| - movq %rdx,%r15
|
| - mulxq %r10,%r13,%r14
|
| - mulxq %r11,%rax,%rdx
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $25,%ymm7,%ymm8
|
| - movq 8+0(%rbp),%rdx
|
| - mulxq %r10,%r10,%rax
|
| - addq %r10,%r14
|
| - mulxq %r11,%r11,%r9
|
| - adcq %r11,%r15
|
| - adcq $0,%r9
|
| - imulq %r12,%rdx
|
| - vpslld $32-25,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $25,%ymm6,%ymm8
|
| - vpslld $32-25,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $25,%ymm5,%ymm8
|
| - vpslld $32-25,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $25,%ymm4,%ymm8
|
| - vpslld $32-25,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpalignr $4,%ymm7,%ymm7,%ymm7
|
| - vpalignr $8,%ymm11,%ymm11,%ymm11
|
| - vpalignr $12,%ymm15,%ymm15,%ymm15
|
| - vpalignr $4,%ymm6,%ymm6,%ymm6
|
| - vpalignr $8,%ymm10,%ymm10,%ymm10
|
| - vpalignr $12,%ymm14,%ymm14,%ymm14
|
| - vpalignr $4,%ymm5,%ymm5,%ymm5
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - addq %rax,%r15
|
| - adcq %rdx,%r9
|
| - vpalignr $12,%ymm13,%ymm13,%ymm13
|
| - vpalignr $4,%ymm4,%ymm4,%ymm4
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $12,%ymm12,%ymm12,%ymm12
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vmovdqa .rol16(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $20,%ymm7,%ymm8
|
| - vpslld $32-20,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $20,%ymm6,%ymm8
|
| - vpslld $32-20,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $20,%ymm5,%ymm8
|
| - vpslld $32-20,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $20,%ymm4,%ymm8
|
| - vpslld $32-20,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - addq 16(%rdi),%r10
|
| - adcq 8+16(%rdi),%r11
|
| - adcq $1,%r12
|
| - vmovdqa .rol8(%rip),%ymm8
|
| - vpaddd %ymm7,%ymm3,%ymm3
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm3,%ymm15,%ymm15
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb %ymm8,%ymm15,%ymm15
|
| - vpshufb %ymm8,%ymm14,%ymm14
|
| - vpshufb %ymm8,%ymm13,%ymm13
|
| - vpshufb %ymm8,%ymm12,%ymm12
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpaddd %ymm15,%ymm11,%ymm11
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm11,%ymm7,%ymm7
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - movq 0+0(%rbp),%rdx
|
| - movq %rdx,%r15
|
| - mulxq %r10,%r13,%r14
|
| - mulxq %r11,%rax,%rdx
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa %ymm8,128(%rbp)
|
| - vpsrld $25,%ymm7,%ymm8
|
| - vpslld $32-25,%ymm7,%ymm7
|
| - vpxor %ymm8,%ymm7,%ymm7
|
| - vpsrld $25,%ymm6,%ymm8
|
| - vpslld $32-25,%ymm6,%ymm6
|
| - vpxor %ymm8,%ymm6,%ymm6
|
| - vpsrld $25,%ymm5,%ymm8
|
| - vpslld $32-25,%ymm5,%ymm5
|
| - vpxor %ymm8,%ymm5,%ymm5
|
| - vpsrld $25,%ymm4,%ymm8
|
| - vpslld $32-25,%ymm4,%ymm4
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vmovdqa 128(%rbp),%ymm8
|
| - vpalignr $12,%ymm7,%ymm7,%ymm7
|
| - vpalignr $8,%ymm11,%ymm11,%ymm11
|
| - vpalignr $4,%ymm15,%ymm15,%ymm15
|
| - vpalignr $12,%ymm6,%ymm6,%ymm6
|
| - movq 8+0(%rbp),%rdx
|
| - mulxq %r10,%r10,%rax
|
| - addq %r10,%r14
|
| - mulxq %r11,%r11,%r9
|
| - adcq %r11,%r15
|
| - adcq $0,%r9
|
| - imulq %r12,%rdx
|
| - vpalignr $8,%ymm10,%ymm10,%ymm10
|
| - vpalignr $4,%ymm14,%ymm14,%ymm14
|
| - vpalignr $12,%ymm5,%ymm5,%ymm5
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $4,%ymm13,%ymm13,%ymm13
|
| - vpalignr $12,%ymm4,%ymm4,%ymm4
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $4,%ymm12,%ymm12,%ymm12
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| - addq %rax,%r15
|
| - adcq %rdx,%r9
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| -
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 32(%rdi),%rdi
|
| - decq %rcx
|
| - jg 1b
|
| - decq %r8
|
| - jge 2b
|
| - vpaddd .chacha20_consts(%rip),%ymm3,%ymm3
|
| - vpaddd 64(%rbp),%ymm7,%ymm7
|
| - vpaddd 96(%rbp),%ymm11,%ymm11
|
| - vpaddd 256(%rbp),%ymm15,%ymm15
|
| - vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
|
| - vpaddd 64(%rbp),%ymm6,%ymm6
|
| - vpaddd 96(%rbp),%ymm10,%ymm10
|
| - vpaddd 224(%rbp),%ymm14,%ymm14
|
| - vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
|
| - vpaddd 64(%rbp),%ymm5,%ymm5
|
| - vpaddd 96(%rbp),%ymm9,%ymm9
|
| - vpaddd 192(%rbp),%ymm13,%ymm13
|
| - vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
|
| - vpaddd 64(%rbp),%ymm4,%ymm4
|
| - vpaddd 96(%rbp),%ymm8,%ymm8
|
| - vpaddd 160(%rbp),%ymm12,%ymm12
|
| -
|
| - vmovdqa %ymm0,128(%rbp)
|
| - vperm2i128 $0x02,%ymm3,%ymm7,%ymm0
|
| - vperm2i128 $0x13,%ymm3,%ymm7,%ymm7
|
| - vperm2i128 $0x02,%ymm11,%ymm15,%ymm3
|
| - vperm2i128 $0x13,%ymm11,%ymm15,%ymm11
|
| - vpxor 0+0(%rsi),%ymm0,%ymm0
|
| - vpxor 32+0(%rsi),%ymm3,%ymm3
|
| - vpxor 64+0(%rsi),%ymm7,%ymm7
|
| - vpxor 96+0(%rsi),%ymm11,%ymm11
|
| - vmovdqu %ymm0,0+0(%rdi)
|
| - vmovdqu %ymm3,32+0(%rdi)
|
| - vmovdqu %ymm7,64+0(%rdi)
|
| - vmovdqu %ymm11,96+0(%rdi)
|
| -
|
| - vmovdqa 128(%rbp),%ymm0
|
| - vperm2i128 $0x02,%ymm2,%ymm6,%ymm3
|
| - vperm2i128 $0x13,%ymm2,%ymm6,%ymm6
|
| - vperm2i128 $0x02,%ymm10,%ymm14,%ymm2
|
| - vperm2i128 $0x13,%ymm10,%ymm14,%ymm10
|
| - vpxor 0+128(%rsi),%ymm3,%ymm3
|
| - vpxor 32+128(%rsi),%ymm2,%ymm2
|
| - vpxor 64+128(%rsi),%ymm6,%ymm6
|
| - vpxor 96+128(%rsi),%ymm10,%ymm10
|
| - vmovdqu %ymm3,0+128(%rdi)
|
| - vmovdqu %ymm2,32+128(%rdi)
|
| - vmovdqu %ymm6,64+128(%rdi)
|
| - vmovdqu %ymm10,96+128(%rdi)
|
| - vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
|
| - vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
|
| - vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
|
| - vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
|
| - vpxor 0+256(%rsi),%ymm3,%ymm3
|
| - vpxor 32+256(%rsi),%ymm1,%ymm1
|
| - vpxor 64+256(%rsi),%ymm5,%ymm5
|
| - vpxor 96+256(%rsi),%ymm9,%ymm9
|
| - vmovdqu %ymm3,0+256(%rdi)
|
| - vmovdqu %ymm1,32+256(%rdi)
|
| - vmovdqu %ymm5,64+256(%rdi)
|
| - vmovdqu %ymm9,96+256(%rdi)
|
| - vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
|
| - vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
|
| - vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
|
| - vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
|
| - vmovdqa %ymm3,%ymm8
|
| -
|
| - movq $384,%rcx
|
| - leaq 384(%rsi),%rsi
|
| - subq $384,%rbx
|
| - jmp seal_avx2_hash
|
| -
|
| -seal_avx2_320:
|
| - vmovdqa %ymm0,%ymm1
|
| - vmovdqa %ymm0,%ymm2
|
| - vmovdqa %ymm4,%ymm5
|
| - vmovdqa %ymm4,%ymm6
|
| - vmovdqa %ymm8,%ymm9
|
| - vmovdqa %ymm8,%ymm10
|
| - vpaddd .avx2_inc(%rip),%ymm12,%ymm13
|
| - vpaddd .avx2_inc(%rip),%ymm13,%ymm14
|
| - vmovdqa %ymm4,%ymm7
|
| - vmovdqa %ymm8,%ymm11
|
| - vmovdqa %ymm12,160(%rbp)
|
| - vmovdqa %ymm13,192(%rbp)
|
| - vmovdqa %ymm14,224(%rbp)
|
| - movq $10,%r10
|
| -1:
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $12,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $4,%ymm4,%ymm4,%ymm4
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol16(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpsrld $20,%ymm5,%ymm3
|
| - vpslld $12,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol8(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpslld $7,%ymm5,%ymm3
|
| - vpsrld $25,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpalignr $12,%ymm13,%ymm13,%ymm13
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $4,%ymm5,%ymm5,%ymm5
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpshufb .rol16(%rip),%ymm14,%ymm14
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpsrld $20,%ymm6,%ymm3
|
| - vpslld $12,%ymm6,%ymm6
|
| - vpxor %ymm3,%ymm6,%ymm6
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpshufb .rol8(%rip),%ymm14,%ymm14
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpslld $7,%ymm6,%ymm3
|
| - vpsrld $25,%ymm6,%ymm6
|
| - vpxor %ymm3,%ymm6,%ymm6
|
| - vpalignr $12,%ymm14,%ymm14,%ymm14
|
| - vpalignr $8,%ymm10,%ymm10,%ymm10
|
| - vpalignr $4,%ymm6,%ymm6,%ymm6
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $4,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $12,%ymm4,%ymm4,%ymm4
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol16(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpsrld $20,%ymm5,%ymm3
|
| - vpslld $12,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol8(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpslld $7,%ymm5,%ymm3
|
| - vpsrld $25,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpalignr $4,%ymm13,%ymm13,%ymm13
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $12,%ymm5,%ymm5,%ymm5
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpshufb .rol16(%rip),%ymm14,%ymm14
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpsrld $20,%ymm6,%ymm3
|
| - vpslld $12,%ymm6,%ymm6
|
| - vpxor %ymm3,%ymm6,%ymm6
|
| - vpaddd %ymm6,%ymm2,%ymm2
|
| - vpxor %ymm2,%ymm14,%ymm14
|
| - vpshufb .rol8(%rip),%ymm14,%ymm14
|
| - vpaddd %ymm14,%ymm10,%ymm10
|
| - vpxor %ymm10,%ymm6,%ymm6
|
| - vpslld $7,%ymm6,%ymm3
|
| - vpsrld $25,%ymm6,%ymm6
|
| - vpxor %ymm3,%ymm6,%ymm6
|
| - vpalignr $4,%ymm14,%ymm14,%ymm14
|
| - vpalignr $8,%ymm10,%ymm10,%ymm10
|
| - vpalignr $12,%ymm6,%ymm6,%ymm6
|
| -
|
| - decq %r10
|
| - jne 1b
|
| - vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
|
| - vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
|
| - vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
|
| - vpaddd %ymm7,%ymm4,%ymm4
|
| - vpaddd %ymm7,%ymm5,%ymm5
|
| - vpaddd %ymm7,%ymm6,%ymm6
|
| - vpaddd %ymm11,%ymm8,%ymm8
|
| - vpaddd %ymm11,%ymm9,%ymm9
|
| - vpaddd %ymm11,%ymm10,%ymm10
|
| - vpaddd 160(%rbp),%ymm12,%ymm12
|
| - vpaddd 192(%rbp),%ymm13,%ymm13
|
| - vpaddd 224(%rbp),%ymm14,%ymm14
|
| - vperm2i128 $0x02,%ymm0,%ymm4,%ymm3
|
| -
|
| - vpand .clamp(%rip),%ymm3,%ymm3
|
| - vmovdqa %ymm3,0(%rbp)
|
| -
|
| - vperm2i128 $0x13,%ymm0,%ymm4,%ymm0
|
| - vperm2i128 $0x13,%ymm8,%ymm12,%ymm4
|
| - vperm2i128 $0x02,%ymm1,%ymm5,%ymm8
|
| - vperm2i128 $0x02,%ymm9,%ymm13,%ymm12
|
| - vperm2i128 $0x13,%ymm1,%ymm5,%ymm1
|
| - vperm2i128 $0x13,%ymm9,%ymm13,%ymm5
|
| - vperm2i128 $0x02,%ymm2,%ymm6,%ymm9
|
| - vperm2i128 $0x02,%ymm10,%ymm14,%ymm13
|
| - vperm2i128 $0x13,%ymm2,%ymm6,%ymm2
|
| - vperm2i128 $0x13,%ymm10,%ymm14,%ymm6
|
| - jmp seal_avx2_short
|
| -
|
| -seal_avx2_192:
|
| - vmovdqa %ymm0,%ymm1
|
| - vmovdqa %ymm0,%ymm2
|
| - vmovdqa %ymm4,%ymm5
|
| - vmovdqa %ymm4,%ymm6
|
| - vmovdqa %ymm8,%ymm9
|
| - vmovdqa %ymm8,%ymm10
|
| - vpaddd .avx2_inc(%rip),%ymm12,%ymm13
|
| - vmovdqa %ymm12,%ymm11
|
| - vmovdqa %ymm13,%ymm15
|
| - movq $10,%r10
|
| -1:
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $12,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $4,%ymm4,%ymm4,%ymm4
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol16(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpsrld $20,%ymm5,%ymm3
|
| - vpslld $12,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol8(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpslld $7,%ymm5,%ymm3
|
| - vpsrld $25,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpalignr $12,%ymm13,%ymm13,%ymm13
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $4,%ymm5,%ymm5,%ymm5
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol16(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpsrld $20,%ymm4,%ymm3
|
| - vpslld $12,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpaddd %ymm4,%ymm0,%ymm0
|
| - vpxor %ymm0,%ymm12,%ymm12
|
| - vpshufb .rol8(%rip),%ymm12,%ymm12
|
| - vpaddd %ymm12,%ymm8,%ymm8
|
| - vpxor %ymm8,%ymm4,%ymm4
|
| - vpslld $7,%ymm4,%ymm3
|
| - vpsrld $25,%ymm4,%ymm4
|
| - vpxor %ymm3,%ymm4,%ymm4
|
| - vpalignr $4,%ymm12,%ymm12,%ymm12
|
| - vpalignr $8,%ymm8,%ymm8,%ymm8
|
| - vpalignr $12,%ymm4,%ymm4,%ymm4
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol16(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpsrld $20,%ymm5,%ymm3
|
| - vpslld $12,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpaddd %ymm5,%ymm1,%ymm1
|
| - vpxor %ymm1,%ymm13,%ymm13
|
| - vpshufb .rol8(%rip),%ymm13,%ymm13
|
| - vpaddd %ymm13,%ymm9,%ymm9
|
| - vpxor %ymm9,%ymm5,%ymm5
|
| - vpslld $7,%ymm5,%ymm3
|
| - vpsrld $25,%ymm5,%ymm5
|
| - vpxor %ymm3,%ymm5,%ymm5
|
| - vpalignr $4,%ymm13,%ymm13,%ymm13
|
| - vpalignr $8,%ymm9,%ymm9,%ymm9
|
| - vpalignr $12,%ymm5,%ymm5,%ymm5
|
| -
|
| - decq %r10
|
| - jne 1b
|
| - vpaddd %ymm2,%ymm0,%ymm0
|
| - vpaddd %ymm2,%ymm1,%ymm1
|
| - vpaddd %ymm6,%ymm4,%ymm4
|
| - vpaddd %ymm6,%ymm5,%ymm5
|
| - vpaddd %ymm10,%ymm8,%ymm8
|
| - vpaddd %ymm10,%ymm9,%ymm9
|
| - vpaddd %ymm11,%ymm12,%ymm12
|
| - vpaddd %ymm15,%ymm13,%ymm13
|
| - vperm2i128 $0x02,%ymm0,%ymm4,%ymm3
|
| -
|
| - vpand .clamp(%rip),%ymm3,%ymm3
|
| - vmovdqa %ymm3,0(%rbp)
|
| -
|
| - vperm2i128 $0x13,%ymm0,%ymm4,%ymm0
|
| - vperm2i128 $0x13,%ymm8,%ymm12,%ymm4
|
| - vperm2i128 $0x02,%ymm1,%ymm5,%ymm8
|
| - vperm2i128 $0x02,%ymm9,%ymm13,%ymm12
|
| - vperm2i128 $0x13,%ymm1,%ymm5,%ymm1
|
| - vperm2i128 $0x13,%ymm9,%ymm13,%ymm5
|
| -seal_avx2_short:
|
| - movq %r8,%r8
|
| - call poly_hash_ad_internal
|
| - xorq %rcx,%rcx
|
| -seal_avx2_hash:
|
| - cmpq $16,%rcx
|
| - jb seal_avx2_short_loop
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - subq $16,%rcx
|
| - addq $16,%rdi
|
| - jmp seal_avx2_hash
|
| -seal_avx2_short_loop:
|
| - cmpq $32,%rbx
|
| - jb seal_avx2_short_tail
|
| - subq $32,%rbx
|
| -
|
| - vpxor (%rsi),%ymm0,%ymm0
|
| - vmovdqu %ymm0,(%rdi)
|
| - leaq 32(%rsi),%rsi
|
| -
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| - addq 16(%rdi),%r10
|
| - adcq 8+16(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 32(%rdi),%rdi
|
| -
|
| - vmovdqa %ymm4,%ymm0
|
| - vmovdqa %ymm8,%ymm4
|
| - vmovdqa %ymm12,%ymm8
|
| - vmovdqa %ymm1,%ymm12
|
| - vmovdqa %ymm5,%ymm1
|
| - vmovdqa %ymm9,%ymm5
|
| - vmovdqa %ymm13,%ymm9
|
| - vmovdqa %ymm2,%ymm13
|
| - vmovdqa %ymm6,%ymm2
|
| - jmp seal_avx2_short_loop
|
| -seal_avx2_short_tail:
|
| - cmpq $16,%rbx
|
| - jb 1f
|
| - subq $16,%rbx
|
| - vpxor (%rsi),%xmm0,%xmm3
|
| - vmovdqu %xmm3,(%rdi)
|
| - leaq 16(%rsi),%rsi
|
| - addq 0(%rdi),%r10
|
| - adcq 8+0(%rdi),%r11
|
| - adcq $1,%r12
|
| - movq 0+0(%rbp),%rax
|
| - movq %rax,%r15
|
| - mulq %r10
|
| - movq %rax,%r13
|
| - movq %rdx,%r14
|
| - movq 0+0(%rbp),%rax
|
| - mulq %r11
|
| - imulq %r12,%r15
|
| - addq %rax,%r14
|
| - adcq %rdx,%r15
|
| - movq 8+0(%rbp),%rax
|
| - movq %rax,%r9
|
| - mulq %r10
|
| - addq %rax,%r14
|
| - adcq $0,%rdx
|
| - movq %rdx,%r10
|
| - movq 8+0(%rbp),%rax
|
| - mulq %r11
|
| - addq %rax,%r15
|
| - adcq $0,%rdx
|
| - imulq %r12,%r9
|
| - addq %r10,%r15
|
| - adcq %rdx,%r9
|
| - movq %r13,%r10
|
| - movq %r14,%r11
|
| - movq %r15,%r12
|
| - andq $3,%r12
|
| - movq %r15,%r13
|
| - andq $-4,%r13
|
| - movq %r9,%r14
|
| - shrdq $2,%r9,%r15
|
| - shrq $2,%r9
|
| - addq %r13,%r10
|
| - adcq %r14,%r11
|
| - adcq $0,%r12
|
| - addq %r15,%r10
|
| - adcq %r9,%r11
|
| - adcq $0,%r12
|
| -
|
| - leaq 16(%rdi),%rdi
|
| - vextracti128 $1,%ymm0,%xmm0
|
| -1:
|
| - vzeroupper
|
| - jmp seal_sse_tail_16
|
| -.cfi_endproc
|
| -#endif
|
|
|