| Index: third_party/boringssl/linux-x86/crypto/chacha/chacha-x86.S
|
| diff --git a/third_party/boringssl/linux-x86/crypto/chacha/chacha-x86.S b/third_party/boringssl/linux-x86/crypto/chacha/chacha-x86.S
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..d3c39ace9b2aebf5dbfc913cfe7f213e38fb08c1
|
| --- /dev/null
|
| +++ b/third_party/boringssl/linux-x86/crypto/chacha/chacha-x86.S
|
| @@ -0,0 +1,969 @@
|
| +#if defined(__i386__)
|
| +.file "chacha-x86.S"
|
| +.text
|
| +.globl ChaCha20_ctr32
|
| +.hidden ChaCha20_ctr32
|
| +.type ChaCha20_ctr32,@function
|
| +.align 16
|
| +ChaCha20_ctr32:
|
| +.L_ChaCha20_ctr32_begin:
|
| + pushl %ebp
|
| + pushl %ebx
|
| + pushl %esi
|
| + pushl %edi
|
| + xorl %eax,%eax
|
| + cmpl 28(%esp),%eax
|
| + je .L000no_data
|
| + call .Lpic_point
|
| +.Lpic_point:
|
| + popl %eax
|
| + leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
|
| + testl $16777216,(%ebp)
|
| + jz .L001x86
|
| + testl $512,4(%ebp)
|
| + jz .L001x86
|
| + jmp .Lssse3_shortcut
|
| +.L001x86:
|
| + movl 32(%esp),%esi
|
| + movl 36(%esp),%edi
|
| + subl $132,%esp
|
| + movl (%esi),%eax
|
| + movl 4(%esi),%ebx
|
| + movl 8(%esi),%ecx
|
| + movl 12(%esi),%edx
|
| + movl %eax,80(%esp)
|
| + movl %ebx,84(%esp)
|
| + movl %ecx,88(%esp)
|
| + movl %edx,92(%esp)
|
| + movl 16(%esi),%eax
|
| + movl 20(%esi),%ebx
|
| + movl 24(%esi),%ecx
|
| + movl 28(%esi),%edx
|
| + movl %eax,96(%esp)
|
| + movl %ebx,100(%esp)
|
| + movl %ecx,104(%esp)
|
| + movl %edx,108(%esp)
|
| + movl (%edi),%eax
|
| + movl 4(%edi),%ebx
|
| + movl 8(%edi),%ecx
|
| + movl 12(%edi),%edx
|
| + subl $1,%eax
|
| + movl %eax,112(%esp)
|
| + movl %ebx,116(%esp)
|
| + movl %ecx,120(%esp)
|
| + movl %edx,124(%esp)
|
| + jmp .L002entry
|
| +.align 16
|
| +.L003outer_loop:
|
| + movl %ebx,156(%esp)
|
| + movl %eax,152(%esp)
|
| + movl %ecx,160(%esp)
|
| +.L002entry:
|
| + movl $1634760805,%eax
|
| + movl $857760878,4(%esp)
|
| + movl $2036477234,8(%esp)
|
| + movl $1797285236,12(%esp)
|
| + movl 84(%esp),%ebx
|
| + movl 88(%esp),%ebp
|
| + movl 104(%esp),%ecx
|
| + movl 108(%esp),%esi
|
| + movl 116(%esp),%edx
|
| + movl 120(%esp),%edi
|
| + movl %ebx,20(%esp)
|
| + movl %ebp,24(%esp)
|
| + movl %ecx,40(%esp)
|
| + movl %esi,44(%esp)
|
| + movl %edx,52(%esp)
|
| + movl %edi,56(%esp)
|
| + movl 92(%esp),%ebx
|
| + movl 124(%esp),%edi
|
| + movl 112(%esp),%edx
|
| + movl 80(%esp),%ebp
|
| + movl 96(%esp),%ecx
|
| + movl 100(%esp),%esi
|
| + addl $1,%edx
|
| + movl %ebx,28(%esp)
|
| + movl %edi,60(%esp)
|
| + movl %edx,112(%esp)
|
| + movl $10,%ebx
|
| + jmp .L004loop
|
| +.align 16
|
| +.L004loop:
|
| + addl %ebp,%eax
|
| + movl %ebx,128(%esp)
|
| + movl %ebp,%ebx
|
| + xorl %eax,%edx
|
| + roll $16,%edx
|
| + addl %edx,%ecx
|
| + xorl %ecx,%ebx
|
| + movl 52(%esp),%edi
|
| + roll $12,%ebx
|
| + movl 20(%esp),%ebp
|
| + addl %ebx,%eax
|
| + xorl %eax,%edx
|
| + movl %eax,(%esp)
|
| + roll $8,%edx
|
| + movl 4(%esp),%eax
|
| + addl %edx,%ecx
|
| + movl %edx,48(%esp)
|
| + xorl %ecx,%ebx
|
| + addl %ebp,%eax
|
| + roll $7,%ebx
|
| + xorl %eax,%edi
|
| + movl %ecx,32(%esp)
|
| + roll $16,%edi
|
| + movl %ebx,16(%esp)
|
| + addl %edi,%esi
|
| + movl 40(%esp),%ecx
|
| + xorl %esi,%ebp
|
| + movl 56(%esp),%edx
|
| + roll $12,%ebp
|
| + movl 24(%esp),%ebx
|
| + addl %ebp,%eax
|
| + xorl %eax,%edi
|
| + movl %eax,4(%esp)
|
| + roll $8,%edi
|
| + movl 8(%esp),%eax
|
| + addl %edi,%esi
|
| + movl %edi,52(%esp)
|
| + xorl %esi,%ebp
|
| + addl %ebx,%eax
|
| + roll $7,%ebp
|
| + xorl %eax,%edx
|
| + movl %esi,36(%esp)
|
| + roll $16,%edx
|
| + movl %ebp,20(%esp)
|
| + addl %edx,%ecx
|
| + movl 44(%esp),%esi
|
| + xorl %ecx,%ebx
|
| + movl 60(%esp),%edi
|
| + roll $12,%ebx
|
| + movl 28(%esp),%ebp
|
| + addl %ebx,%eax
|
| + xorl %eax,%edx
|
| + movl %eax,8(%esp)
|
| + roll $8,%edx
|
| + movl 12(%esp),%eax
|
| + addl %edx,%ecx
|
| + movl %edx,56(%esp)
|
| + xorl %ecx,%ebx
|
| + addl %ebp,%eax
|
| + roll $7,%ebx
|
| + xorl %eax,%edi
|
| + roll $16,%edi
|
| + movl %ebx,24(%esp)
|
| + addl %edi,%esi
|
| + xorl %esi,%ebp
|
| + roll $12,%ebp
|
| + movl 20(%esp),%ebx
|
| + addl %ebp,%eax
|
| + xorl %eax,%edi
|
| + movl %eax,12(%esp)
|
| + roll $8,%edi
|
| + movl (%esp),%eax
|
| + addl %edi,%esi
|
| + movl %edi,%edx
|
| + xorl %esi,%ebp
|
| + addl %ebx,%eax
|
| + roll $7,%ebp
|
| + xorl %eax,%edx
|
| + roll $16,%edx
|
| + movl %ebp,28(%esp)
|
| + addl %edx,%ecx
|
| + xorl %ecx,%ebx
|
| + movl 48(%esp),%edi
|
| + roll $12,%ebx
|
| + movl 24(%esp),%ebp
|
| + addl %ebx,%eax
|
| + xorl %eax,%edx
|
| + movl %eax,(%esp)
|
| + roll $8,%edx
|
| + movl 4(%esp),%eax
|
| + addl %edx,%ecx
|
| + movl %edx,60(%esp)
|
| + xorl %ecx,%ebx
|
| + addl %ebp,%eax
|
| + roll $7,%ebx
|
| + xorl %eax,%edi
|
| + movl %ecx,40(%esp)
|
| + roll $16,%edi
|
| + movl %ebx,20(%esp)
|
| + addl %edi,%esi
|
| + movl 32(%esp),%ecx
|
| + xorl %esi,%ebp
|
| + movl 52(%esp),%edx
|
| + roll $12,%ebp
|
| + movl 28(%esp),%ebx
|
| + addl %ebp,%eax
|
| + xorl %eax,%edi
|
| + movl %eax,4(%esp)
|
| + roll $8,%edi
|
| + movl 8(%esp),%eax
|
| + addl %edi,%esi
|
| + movl %edi,48(%esp)
|
| + xorl %esi,%ebp
|
| + addl %ebx,%eax
|
| + roll $7,%ebp
|
| + xorl %eax,%edx
|
| + movl %esi,44(%esp)
|
| + roll $16,%edx
|
| + movl %ebp,24(%esp)
|
| + addl %edx,%ecx
|
| + movl 36(%esp),%esi
|
| + xorl %ecx,%ebx
|
| + movl 56(%esp),%edi
|
| + roll $12,%ebx
|
| + movl 16(%esp),%ebp
|
| + addl %ebx,%eax
|
| + xorl %eax,%edx
|
| + movl %eax,8(%esp)
|
| + roll $8,%edx
|
| + movl 12(%esp),%eax
|
| + addl %edx,%ecx
|
| + movl %edx,52(%esp)
|
| + xorl %ecx,%ebx
|
| + addl %ebp,%eax
|
| + roll $7,%ebx
|
| + xorl %eax,%edi
|
| + roll $16,%edi
|
| + movl %ebx,28(%esp)
|
| + addl %edi,%esi
|
| + xorl %esi,%ebp
|
| + movl 48(%esp),%edx
|
| + roll $12,%ebp
|
| + movl 128(%esp),%ebx
|
| + addl %ebp,%eax
|
| + xorl %eax,%edi
|
| + movl %eax,12(%esp)
|
| + roll $8,%edi
|
| + movl (%esp),%eax
|
| + addl %edi,%esi
|
| + movl %edi,56(%esp)
|
| + xorl %esi,%ebp
|
| + roll $7,%ebp
|
| + decl %ebx
|
| + jnz .L004loop
|
| + movl 160(%esp),%ebx
|
| + addl $1634760805,%eax
|
| + addl 80(%esp),%ebp
|
| + addl 96(%esp),%ecx
|
| + addl 100(%esp),%esi
|
| + cmpl $64,%ebx
|
| + jb .L005tail
|
| + movl 156(%esp),%ebx
|
| + addl 112(%esp),%edx
|
| + addl 120(%esp),%edi
|
| + xorl (%ebx),%eax
|
| + xorl 16(%ebx),%ebp
|
| + movl %eax,(%esp)
|
| + movl 152(%esp),%eax
|
| + xorl 32(%ebx),%ecx
|
| + xorl 36(%ebx),%esi
|
| + xorl 48(%ebx),%edx
|
| + xorl 56(%ebx),%edi
|
| + movl %ebp,16(%eax)
|
| + movl %ecx,32(%eax)
|
| + movl %esi,36(%eax)
|
| + movl %edx,48(%eax)
|
| + movl %edi,56(%eax)
|
| + movl 4(%esp),%ebp
|
| + movl 8(%esp),%ecx
|
| + movl 12(%esp),%esi
|
| + movl 20(%esp),%edx
|
| + movl 24(%esp),%edi
|
| + addl $857760878,%ebp
|
| + addl $2036477234,%ecx
|
| + addl $1797285236,%esi
|
| + addl 84(%esp),%edx
|
| + addl 88(%esp),%edi
|
| + xorl 4(%ebx),%ebp
|
| + xorl 8(%ebx),%ecx
|
| + xorl 12(%ebx),%esi
|
| + xorl 20(%ebx),%edx
|
| + xorl 24(%ebx),%edi
|
| + movl %ebp,4(%eax)
|
| + movl %ecx,8(%eax)
|
| + movl %esi,12(%eax)
|
| + movl %edx,20(%eax)
|
| + movl %edi,24(%eax)
|
| + movl 28(%esp),%ebp
|
| + movl 40(%esp),%ecx
|
| + movl 44(%esp),%esi
|
| + movl 52(%esp),%edx
|
| + movl 60(%esp),%edi
|
| + addl 92(%esp),%ebp
|
| + addl 104(%esp),%ecx
|
| + addl 108(%esp),%esi
|
| + addl 116(%esp),%edx
|
| + addl 124(%esp),%edi
|
| + xorl 28(%ebx),%ebp
|
| + xorl 40(%ebx),%ecx
|
| + xorl 44(%ebx),%esi
|
| + xorl 52(%ebx),%edx
|
| + xorl 60(%ebx),%edi
|
| + leal 64(%ebx),%ebx
|
| + movl %ebp,28(%eax)
|
| + movl (%esp),%ebp
|
| + movl %ecx,40(%eax)
|
| + movl 160(%esp),%ecx
|
| + movl %esi,44(%eax)
|
| + movl %edx,52(%eax)
|
| + movl %edi,60(%eax)
|
| + movl %ebp,(%eax)
|
| + leal 64(%eax),%eax
|
| + subl $64,%ecx
|
| + jnz .L003outer_loop
|
| + jmp .L006done
|
| +.L005tail:
|
| + addl 112(%esp),%edx
|
| + addl 120(%esp),%edi
|
| + movl %eax,(%esp)
|
| + movl %ebp,16(%esp)
|
| + movl %ecx,32(%esp)
|
| + movl %esi,36(%esp)
|
| + movl %edx,48(%esp)
|
| + movl %edi,56(%esp)
|
| + movl 4(%esp),%ebp
|
| + movl 8(%esp),%ecx
|
| + movl 12(%esp),%esi
|
| + movl 20(%esp),%edx
|
| + movl 24(%esp),%edi
|
| + addl $857760878,%ebp
|
| + addl $2036477234,%ecx
|
| + addl $1797285236,%esi
|
| + addl 84(%esp),%edx
|
| + addl 88(%esp),%edi
|
| + movl %ebp,4(%esp)
|
| + movl %ecx,8(%esp)
|
| + movl %esi,12(%esp)
|
| + movl %edx,20(%esp)
|
| + movl %edi,24(%esp)
|
| + movl 28(%esp),%ebp
|
| + movl 40(%esp),%ecx
|
| + movl 44(%esp),%esi
|
| + movl 52(%esp),%edx
|
| + movl 60(%esp),%edi
|
| + addl 92(%esp),%ebp
|
| + addl 104(%esp),%ecx
|
| + addl 108(%esp),%esi
|
| + addl 116(%esp),%edx
|
| + addl 124(%esp),%edi
|
| + movl %ebp,28(%esp)
|
| + movl 156(%esp),%ebp
|
| + movl %ecx,40(%esp)
|
| + movl 152(%esp),%ecx
|
| + movl %esi,44(%esp)
|
| + xorl %esi,%esi
|
| + movl %edx,52(%esp)
|
| + movl %edi,60(%esp)
|
| + xorl %eax,%eax
|
| + xorl %edx,%edx
|
| +.L007tail_loop:
|
| + movb (%esi,%ebp,1),%al
|
| + movb (%esp,%esi,1),%dl
|
| + leal 1(%esi),%esi
|
| + xorb %dl,%al
|
| + movb %al,-1(%ecx,%esi,1)
|
| + decl %ebx
|
| + jnz .L007tail_loop
|
| +.L006done:
|
| + addl $132,%esp
|
| +.L000no_data:
|
| + popl %edi
|
| + popl %esi
|
| + popl %ebx
|
| + popl %ebp
|
| + ret
|
| +.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
|
| +.globl ChaCha20_ssse3
|
| +.hidden ChaCha20_ssse3
|
| +.type ChaCha20_ssse3,@function
|
| +.align 16
|
| +ChaCha20_ssse3:
|
| +.L_ChaCha20_ssse3_begin:
|
| + pushl %ebp
|
| + pushl %ebx
|
| + pushl %esi
|
| + pushl %edi
|
| +.Lssse3_shortcut:
|
| + movl 20(%esp),%edi
|
| + movl 24(%esp),%esi
|
| + movl 28(%esp),%ecx
|
| + movl 32(%esp),%edx
|
| + movl 36(%esp),%ebx
|
| + movl %esp,%ebp
|
| + subl $524,%esp
|
| + andl $-64,%esp
|
| + movl %ebp,512(%esp)
|
| + leal .Lssse3_data-.Lpic_point(%eax),%eax
|
| + movdqu (%ebx),%xmm3
|
| + cmpl $256,%ecx
|
| + jb .L0081x
|
| + movl %edx,516(%esp)
|
| + movl %ebx,520(%esp)
|
| + subl $256,%ecx
|
| + leal 384(%esp),%ebp
|
| + movdqu (%edx),%xmm7
|
| + pshufd $0,%xmm3,%xmm0
|
| + pshufd $85,%xmm3,%xmm1
|
| + pshufd $170,%xmm3,%xmm2
|
| + pshufd $255,%xmm3,%xmm3
|
| + paddd 48(%eax),%xmm0
|
| + pshufd $0,%xmm7,%xmm4
|
| + pshufd $85,%xmm7,%xmm5
|
| + psubd 64(%eax),%xmm0
|
| + pshufd $170,%xmm7,%xmm6
|
| + pshufd $255,%xmm7,%xmm7
|
| + movdqa %xmm0,64(%ebp)
|
| + movdqa %xmm1,80(%ebp)
|
| + movdqa %xmm2,96(%ebp)
|
| + movdqa %xmm3,112(%ebp)
|
| + movdqu 16(%edx),%xmm3
|
| + movdqa %xmm4,-64(%ebp)
|
| + movdqa %xmm5,-48(%ebp)
|
| + movdqa %xmm6,-32(%ebp)
|
| + movdqa %xmm7,-16(%ebp)
|
| + movdqa 32(%eax),%xmm7
|
| + leal 128(%esp),%ebx
|
| + pshufd $0,%xmm3,%xmm0
|
| + pshufd $85,%xmm3,%xmm1
|
| + pshufd $170,%xmm3,%xmm2
|
| + pshufd $255,%xmm3,%xmm3
|
| + pshufd $0,%xmm7,%xmm4
|
| + pshufd $85,%xmm7,%xmm5
|
| + pshufd $170,%xmm7,%xmm6
|
| + pshufd $255,%xmm7,%xmm7
|
| + movdqa %xmm0,(%ebp)
|
| + movdqa %xmm1,16(%ebp)
|
| + movdqa %xmm2,32(%ebp)
|
| + movdqa %xmm3,48(%ebp)
|
| + movdqa %xmm4,-128(%ebp)
|
| + movdqa %xmm5,-112(%ebp)
|
| + movdqa %xmm6,-96(%ebp)
|
| + movdqa %xmm7,-80(%ebp)
|
| + leal 128(%esi),%esi
|
| + leal 128(%edi),%edi
|
| + jmp .L009outer_loop
|
| +.align 16
|
| +.L009outer_loop:
|
| + movdqa -112(%ebp),%xmm1
|
| + movdqa -96(%ebp),%xmm2
|
| + movdqa -80(%ebp),%xmm3
|
| + movdqa -48(%ebp),%xmm5
|
| + movdqa -32(%ebp),%xmm6
|
| + movdqa -16(%ebp),%xmm7
|
| + movdqa %xmm1,-112(%ebx)
|
| + movdqa %xmm2,-96(%ebx)
|
| + movdqa %xmm3,-80(%ebx)
|
| + movdqa %xmm5,-48(%ebx)
|
| + movdqa %xmm6,-32(%ebx)
|
| + movdqa %xmm7,-16(%ebx)
|
| + movdqa 32(%ebp),%xmm2
|
| + movdqa 48(%ebp),%xmm3
|
| + movdqa 64(%ebp),%xmm4
|
| + movdqa 80(%ebp),%xmm5
|
| + movdqa 96(%ebp),%xmm6
|
| + movdqa 112(%ebp),%xmm7
|
| + paddd 64(%eax),%xmm4
|
| + movdqa %xmm2,32(%ebx)
|
| + movdqa %xmm3,48(%ebx)
|
| + movdqa %xmm4,64(%ebx)
|
| + movdqa %xmm5,80(%ebx)
|
| + movdqa %xmm6,96(%ebx)
|
| + movdqa %xmm7,112(%ebx)
|
| + movdqa %xmm4,64(%ebp)
|
| + movdqa -128(%ebp),%xmm0
|
| + movdqa %xmm4,%xmm6
|
| + movdqa -64(%ebp),%xmm3
|
| + movdqa (%ebp),%xmm4
|
| + movdqa 16(%ebp),%xmm5
|
| + movl $10,%edx
|
| + nop
|
| +.align 16
|
| +.L010loop:
|
| + paddd %xmm3,%xmm0
|
| + movdqa %xmm3,%xmm2
|
| + pxor %xmm0,%xmm6
|
| + pshufb (%eax),%xmm6
|
| + paddd %xmm6,%xmm4
|
| + pxor %xmm4,%xmm2
|
| + movdqa -48(%ebx),%xmm3
|
| + movdqa %xmm2,%xmm1
|
| + pslld $12,%xmm2
|
| + psrld $20,%xmm1
|
| + por %xmm1,%xmm2
|
| + movdqa -112(%ebx),%xmm1
|
| + paddd %xmm2,%xmm0
|
| + movdqa 80(%ebx),%xmm7
|
| + pxor %xmm0,%xmm6
|
| + movdqa %xmm0,-128(%ebx)
|
| + pshufb 16(%eax),%xmm6
|
| + paddd %xmm6,%xmm4
|
| + movdqa %xmm6,64(%ebx)
|
| + pxor %xmm4,%xmm2
|
| + paddd %xmm3,%xmm1
|
| + movdqa %xmm2,%xmm0
|
| + pslld $7,%xmm2
|
| + psrld $25,%xmm0
|
| + pxor %xmm1,%xmm7
|
| + por %xmm0,%xmm2
|
| + movdqa %xmm4,(%ebx)
|
| + pshufb (%eax),%xmm7
|
| + movdqa %xmm2,-64(%ebx)
|
| + paddd %xmm7,%xmm5
|
| + movdqa 32(%ebx),%xmm4
|
| + pxor %xmm5,%xmm3
|
| + movdqa -32(%ebx),%xmm2
|
| + movdqa %xmm3,%xmm0
|
| + pslld $12,%xmm3
|
| + psrld $20,%xmm0
|
| + por %xmm0,%xmm3
|
| + movdqa -96(%ebx),%xmm0
|
| + paddd %xmm3,%xmm1
|
| + movdqa 96(%ebx),%xmm6
|
| + pxor %xmm1,%xmm7
|
| + movdqa %xmm1,-112(%ebx)
|
| + pshufb 16(%eax),%xmm7
|
| + paddd %xmm7,%xmm5
|
| + movdqa %xmm7,80(%ebx)
|
| + pxor %xmm5,%xmm3
|
| + paddd %xmm2,%xmm0
|
| + movdqa %xmm3,%xmm1
|
| + pslld $7,%xmm3
|
| + psrld $25,%xmm1
|
| + pxor %xmm0,%xmm6
|
| + por %xmm1,%xmm3
|
| + movdqa %xmm5,16(%ebx)
|
| + pshufb (%eax),%xmm6
|
| + movdqa %xmm3,-48(%ebx)
|
| + paddd %xmm6,%xmm4
|
| + movdqa 48(%ebx),%xmm5
|
| + pxor %xmm4,%xmm2
|
| + movdqa -16(%ebx),%xmm3
|
| + movdqa %xmm2,%xmm1
|
| + pslld $12,%xmm2
|
| + psrld $20,%xmm1
|
| + por %xmm1,%xmm2
|
| + movdqa -80(%ebx),%xmm1
|
| + paddd %xmm2,%xmm0
|
| + movdqa 112(%ebx),%xmm7
|
| + pxor %xmm0,%xmm6
|
| + movdqa %xmm0,-96(%ebx)
|
| + pshufb 16(%eax),%xmm6
|
| + paddd %xmm6,%xmm4
|
| + movdqa %xmm6,96(%ebx)
|
| + pxor %xmm4,%xmm2
|
| + paddd %xmm3,%xmm1
|
| + movdqa %xmm2,%xmm0
|
| + pslld $7,%xmm2
|
| + psrld $25,%xmm0
|
| + pxor %xmm1,%xmm7
|
| + por %xmm0,%xmm2
|
| + pshufb (%eax),%xmm7
|
| + movdqa %xmm2,-32(%ebx)
|
| + paddd %xmm7,%xmm5
|
| + pxor %xmm5,%xmm3
|
| + movdqa -48(%ebx),%xmm2
|
| + movdqa %xmm3,%xmm0
|
| + pslld $12,%xmm3
|
| + psrld $20,%xmm0
|
| + por %xmm0,%xmm3
|
| + movdqa -128(%ebx),%xmm0
|
| + paddd %xmm3,%xmm1
|
| + pxor %xmm1,%xmm7
|
| + movdqa %xmm1,-80(%ebx)
|
| + pshufb 16(%eax),%xmm7
|
| + paddd %xmm7,%xmm5
|
| + movdqa %xmm7,%xmm6
|
| + pxor %xmm5,%xmm3
|
| + paddd %xmm2,%xmm0
|
| + movdqa %xmm3,%xmm1
|
| + pslld $7,%xmm3
|
| + psrld $25,%xmm1
|
| + pxor %xmm0,%xmm6
|
| + por %xmm1,%xmm3
|
| + pshufb (%eax),%xmm6
|
| + movdqa %xmm3,-16(%ebx)
|
| + paddd %xmm6,%xmm4
|
| + pxor %xmm4,%xmm2
|
| + movdqa -32(%ebx),%xmm3
|
| + movdqa %xmm2,%xmm1
|
| + pslld $12,%xmm2
|
| + psrld $20,%xmm1
|
| + por %xmm1,%xmm2
|
| + movdqa -112(%ebx),%xmm1
|
| + paddd %xmm2,%xmm0
|
| + movdqa 64(%ebx),%xmm7
|
| + pxor %xmm0,%xmm6
|
| + movdqa %xmm0,-128(%ebx)
|
| + pshufb 16(%eax),%xmm6
|
| + paddd %xmm6,%xmm4
|
| + movdqa %xmm6,112(%ebx)
|
| + pxor %xmm4,%xmm2
|
| + paddd %xmm3,%xmm1
|
| + movdqa %xmm2,%xmm0
|
| + pslld $7,%xmm2
|
| + psrld $25,%xmm0
|
| + pxor %xmm1,%xmm7
|
| + por %xmm0,%xmm2
|
| + movdqa %xmm4,32(%ebx)
|
| + pshufb (%eax),%xmm7
|
| + movdqa %xmm2,-48(%ebx)
|
| + paddd %xmm7,%xmm5
|
| + movdqa (%ebx),%xmm4
|
| + pxor %xmm5,%xmm3
|
| + movdqa -16(%ebx),%xmm2
|
| + movdqa %xmm3,%xmm0
|
| + pslld $12,%xmm3
|
| + psrld $20,%xmm0
|
| + por %xmm0,%xmm3
|
| + movdqa -96(%ebx),%xmm0
|
| + paddd %xmm3,%xmm1
|
| + movdqa 80(%ebx),%xmm6
|
| + pxor %xmm1,%xmm7
|
| + movdqa %xmm1,-112(%ebx)
|
| + pshufb 16(%eax),%xmm7
|
| + paddd %xmm7,%xmm5
|
| + movdqa %xmm7,64(%ebx)
|
| + pxor %xmm5,%xmm3
|
| + paddd %xmm2,%xmm0
|
| + movdqa %xmm3,%xmm1
|
| + pslld $7,%xmm3
|
| + psrld $25,%xmm1
|
| + pxor %xmm0,%xmm6
|
| + por %xmm1,%xmm3
|
| + movdqa %xmm5,48(%ebx)
|
| + pshufb (%eax),%xmm6
|
| + movdqa %xmm3,-32(%ebx)
|
| + paddd %xmm6,%xmm4
|
| + movdqa 16(%ebx),%xmm5
|
| + pxor %xmm4,%xmm2
|
| + movdqa -64(%ebx),%xmm3
|
| + movdqa %xmm2,%xmm1
|
| + pslld $12,%xmm2
|
| + psrld $20,%xmm1
|
| + por %xmm1,%xmm2
|
| + movdqa -80(%ebx),%xmm1
|
| + paddd %xmm2,%xmm0
|
| + movdqa 96(%ebx),%xmm7
|
| + pxor %xmm0,%xmm6
|
| + movdqa %xmm0,-96(%ebx)
|
| + pshufb 16(%eax),%xmm6
|
| + paddd %xmm6,%xmm4
|
| + movdqa %xmm6,80(%ebx)
|
| + pxor %xmm4,%xmm2
|
| + paddd %xmm3,%xmm1
|
| + movdqa %xmm2,%xmm0
|
| + pslld $7,%xmm2
|
| + psrld $25,%xmm0
|
| + pxor %xmm1,%xmm7
|
| + por %xmm0,%xmm2
|
| + pshufb (%eax),%xmm7
|
| + movdqa %xmm2,-16(%ebx)
|
| + paddd %xmm7,%xmm5
|
| + pxor %xmm5,%xmm3
|
| + movdqa %xmm3,%xmm0
|
| + pslld $12,%xmm3
|
| + psrld $20,%xmm0
|
| + por %xmm0,%xmm3
|
| + movdqa -128(%ebx),%xmm0
|
| + paddd %xmm3,%xmm1
|
| + movdqa 64(%ebx),%xmm6
|
| + pxor %xmm1,%xmm7
|
| + movdqa %xmm1,-80(%ebx)
|
| + pshufb 16(%eax),%xmm7
|
| + paddd %xmm7,%xmm5
|
| + movdqa %xmm7,96(%ebx)
|
| + pxor %xmm5,%xmm3
|
| + movdqa %xmm3,%xmm1
|
| + pslld $7,%xmm3
|
| + psrld $25,%xmm1
|
| + por %xmm1,%xmm3
|
| + decl %edx
|
| + jnz .L010loop
|
| + movdqa %xmm3,-64(%ebx)
|
| + movdqa %xmm4,(%ebx)
|
| + movdqa %xmm5,16(%ebx)
|
| + movdqa %xmm6,64(%ebx)
|
| + movdqa %xmm7,96(%ebx)
|
| + movdqa -112(%ebx),%xmm1
|
| + movdqa -96(%ebx),%xmm2
|
| + movdqa -80(%ebx),%xmm3
|
| + paddd -128(%ebp),%xmm0
|
| + paddd -112(%ebp),%xmm1
|
| + paddd -96(%ebp),%xmm2
|
| + paddd -80(%ebp),%xmm3
|
| + movdqa %xmm0,%xmm6
|
| + punpckldq %xmm1,%xmm0
|
| + movdqa %xmm2,%xmm7
|
| + punpckldq %xmm3,%xmm2
|
| + punpckhdq %xmm1,%xmm6
|
| + punpckhdq %xmm3,%xmm7
|
| + movdqa %xmm0,%xmm1
|
| + punpcklqdq %xmm2,%xmm0
|
| + movdqa %xmm6,%xmm3
|
| + punpcklqdq %xmm7,%xmm6
|
| + punpckhqdq %xmm2,%xmm1
|
| + punpckhqdq %xmm7,%xmm3
|
| + movdqu -128(%esi),%xmm4
|
| + movdqu -64(%esi),%xmm5
|
| + movdqu (%esi),%xmm2
|
| + movdqu 64(%esi),%xmm7
|
| + leal 16(%esi),%esi
|
| + pxor %xmm0,%xmm4
|
| + movdqa -64(%ebx),%xmm0
|
| + pxor %xmm1,%xmm5
|
| + movdqa -48(%ebx),%xmm1
|
| + pxor %xmm2,%xmm6
|
| + movdqa -32(%ebx),%xmm2
|
| + pxor %xmm3,%xmm7
|
| + movdqa -16(%ebx),%xmm3
|
| + movdqu %xmm4,-128(%edi)
|
| + movdqu %xmm5,-64(%edi)
|
| + movdqu %xmm6,(%edi)
|
| + movdqu %xmm7,64(%edi)
|
| + leal 16(%edi),%edi
|
| + paddd -64(%ebp),%xmm0
|
| + paddd -48(%ebp),%xmm1
|
| + paddd -32(%ebp),%xmm2
|
| + paddd -16(%ebp),%xmm3
|
| + movdqa %xmm0,%xmm6
|
| + punpckldq %xmm1,%xmm0
|
| + movdqa %xmm2,%xmm7
|
| + punpckldq %xmm3,%xmm2
|
| + punpckhdq %xmm1,%xmm6
|
| + punpckhdq %xmm3,%xmm7
|
| + movdqa %xmm0,%xmm1
|
| + punpcklqdq %xmm2,%xmm0
|
| + movdqa %xmm6,%xmm3
|
| + punpcklqdq %xmm7,%xmm6
|
| + punpckhqdq %xmm2,%xmm1
|
| + punpckhqdq %xmm7,%xmm3
|
| + movdqu -128(%esi),%xmm4
|
| + movdqu -64(%esi),%xmm5
|
| + movdqu (%esi),%xmm2
|
| + movdqu 64(%esi),%xmm7
|
| + leal 16(%esi),%esi
|
| + pxor %xmm0,%xmm4
|
| + movdqa (%ebx),%xmm0
|
| + pxor %xmm1,%xmm5
|
| + movdqa 16(%ebx),%xmm1
|
| + pxor %xmm2,%xmm6
|
| + movdqa 32(%ebx),%xmm2
|
| + pxor %xmm3,%xmm7
|
| + movdqa 48(%ebx),%xmm3
|
| + movdqu %xmm4,-128(%edi)
|
| + movdqu %xmm5,-64(%edi)
|
| + movdqu %xmm6,(%edi)
|
| + movdqu %xmm7,64(%edi)
|
| + leal 16(%edi),%edi
|
| + paddd (%ebp),%xmm0
|
| + paddd 16(%ebp),%xmm1
|
| + paddd 32(%ebp),%xmm2
|
| + paddd 48(%ebp),%xmm3
|
| + movdqa %xmm0,%xmm6
|
| + punpckldq %xmm1,%xmm0
|
| + movdqa %xmm2,%xmm7
|
| + punpckldq %xmm3,%xmm2
|
| + punpckhdq %xmm1,%xmm6
|
| + punpckhdq %xmm3,%xmm7
|
| + movdqa %xmm0,%xmm1
|
| + punpcklqdq %xmm2,%xmm0
|
| + movdqa %xmm6,%xmm3
|
| + punpcklqdq %xmm7,%xmm6
|
| + punpckhqdq %xmm2,%xmm1
|
| + punpckhqdq %xmm7,%xmm3
|
| + movdqu -128(%esi),%xmm4
|
| + movdqu -64(%esi),%xmm5
|
| + movdqu (%esi),%xmm2
|
| + movdqu 64(%esi),%xmm7
|
| + leal 16(%esi),%esi
|
| + pxor %xmm0,%xmm4
|
| + movdqa 64(%ebx),%xmm0
|
| + pxor %xmm1,%xmm5
|
| + movdqa 80(%ebx),%xmm1
|
| + pxor %xmm2,%xmm6
|
| + movdqa 96(%ebx),%xmm2
|
| + pxor %xmm3,%xmm7
|
| + movdqa 112(%ebx),%xmm3
|
| + movdqu %xmm4,-128(%edi)
|
| + movdqu %xmm5,-64(%edi)
|
| + movdqu %xmm6,(%edi)
|
| + movdqu %xmm7,64(%edi)
|
| + leal 16(%edi),%edi
|
| + paddd 64(%ebp),%xmm0
|
| + paddd 80(%ebp),%xmm1
|
| + paddd 96(%ebp),%xmm2
|
| + paddd 112(%ebp),%xmm3
|
| + movdqa %xmm0,%xmm6
|
| + punpckldq %xmm1,%xmm0
|
| + movdqa %xmm2,%xmm7
|
| + punpckldq %xmm3,%xmm2
|
| + punpckhdq %xmm1,%xmm6
|
| + punpckhdq %xmm3,%xmm7
|
| + movdqa %xmm0,%xmm1
|
| + punpcklqdq %xmm2,%xmm0
|
| + movdqa %xmm6,%xmm3
|
| + punpcklqdq %xmm7,%xmm6
|
| + punpckhqdq %xmm2,%xmm1
|
| + punpckhqdq %xmm7,%xmm3
|
| + movdqu -128(%esi),%xmm4
|
| + movdqu -64(%esi),%xmm5
|
| + movdqu (%esi),%xmm2
|
| + movdqu 64(%esi),%xmm7
|
| + leal 208(%esi),%esi
|
| + pxor %xmm0,%xmm4
|
| + pxor %xmm1,%xmm5
|
| + pxor %xmm2,%xmm6
|
| + pxor %xmm3,%xmm7
|
| + movdqu %xmm4,-128(%edi)
|
| + movdqu %xmm5,-64(%edi)
|
| + movdqu %xmm6,(%edi)
|
| + movdqu %xmm7,64(%edi)
|
| + leal 208(%edi),%edi
|
| + subl $256,%ecx
|
| + jnc .L009outer_loop
|
| + addl $256,%ecx
|
| + jz .L011done
|
| + movl 520(%esp),%ebx
|
| + leal -128(%esi),%esi
|
| + movl 516(%esp),%edx
|
| + leal -128(%edi),%edi
|
| + movd 64(%ebp),%xmm2
|
| + movdqu (%ebx),%xmm3
|
| + paddd 96(%eax),%xmm2
|
| + pand 112(%eax),%xmm3
|
| + por %xmm2,%xmm3
|
| +.L0081x:
|
| + movdqa 32(%eax),%xmm0
|
| + movdqu (%edx),%xmm1
|
| + movdqu 16(%edx),%xmm2
|
| + movdqa (%eax),%xmm6
|
| + movdqa 16(%eax),%xmm7
|
| + movl %ebp,48(%esp)
|
| + movdqa %xmm0,(%esp)
|
| + movdqa %xmm1,16(%esp)
|
| + movdqa %xmm2,32(%esp)
|
| + movdqa %xmm3,48(%esp)
|
| + movl $10,%edx
|
| + jmp .L012loop1x
|
| +.align 16
|
| +.L013outer1x:
|
| + movdqa 80(%eax),%xmm3
|
| + movdqa (%esp),%xmm0
|
| + movdqa 16(%esp),%xmm1
|
| + movdqa 32(%esp),%xmm2
|
| + paddd 48(%esp),%xmm3
|
| + movl $10,%edx
|
| + movdqa %xmm3,48(%esp)
|
| + jmp .L012loop1x
|
| +.align 16
|
| +.L012loop1x:
|
| + paddd %xmm1,%xmm0
|
| + pxor %xmm0,%xmm3
|
| +.byte 102,15,56,0,222
|
| + paddd %xmm3,%xmm2
|
| + pxor %xmm2,%xmm1
|
| + movdqa %xmm1,%xmm4
|
| + psrld $20,%xmm1
|
| + pslld $12,%xmm4
|
| + por %xmm4,%xmm1
|
| + paddd %xmm1,%xmm0
|
| + pxor %xmm0,%xmm3
|
| +.byte 102,15,56,0,223
|
| + paddd %xmm3,%xmm2
|
| + pxor %xmm2,%xmm1
|
| + movdqa %xmm1,%xmm4
|
| + psrld $25,%xmm1
|
| + pslld $7,%xmm4
|
| + por %xmm4,%xmm1
|
| + pshufd $78,%xmm2,%xmm2
|
| + pshufd $57,%xmm1,%xmm1
|
| + pshufd $147,%xmm3,%xmm3
|
| + nop
|
| + paddd %xmm1,%xmm0
|
| + pxor %xmm0,%xmm3
|
| +.byte 102,15,56,0,222
|
| + paddd %xmm3,%xmm2
|
| + pxor %xmm2,%xmm1
|
| + movdqa %xmm1,%xmm4
|
| + psrld $20,%xmm1
|
| + pslld $12,%xmm4
|
| + por %xmm4,%xmm1
|
| + paddd %xmm1,%xmm0
|
| + pxor %xmm0,%xmm3
|
| +.byte 102,15,56,0,223
|
| + paddd %xmm3,%xmm2
|
| + pxor %xmm2,%xmm1
|
| + movdqa %xmm1,%xmm4
|
| + psrld $25,%xmm1
|
| + pslld $7,%xmm4
|
| + por %xmm4,%xmm1
|
| + pshufd $78,%xmm2,%xmm2
|
| + pshufd $147,%xmm1,%xmm1
|
| + pshufd $57,%xmm3,%xmm3
|
| + decl %edx
|
| + jnz .L012loop1x
|
| + paddd (%esp),%xmm0
|
| + paddd 16(%esp),%xmm1
|
| + paddd 32(%esp),%xmm2
|
| + paddd 48(%esp),%xmm3
|
| + cmpl $64,%ecx
|
| + jb .L014tail
|
| + movdqu (%esi),%xmm4
|
| + movdqu 16(%esi),%xmm5
|
| + pxor %xmm4,%xmm0
|
| + movdqu 32(%esi),%xmm4
|
| + pxor %xmm5,%xmm1
|
| + movdqu 48(%esi),%xmm5
|
| + pxor %xmm4,%xmm2
|
| + pxor %xmm5,%xmm3
|
| + leal 64(%esi),%esi
|
| + movdqu %xmm0,(%edi)
|
| + movdqu %xmm1,16(%edi)
|
| + movdqu %xmm2,32(%edi)
|
| + movdqu %xmm3,48(%edi)
|
| + leal 64(%edi),%edi
|
| + subl $64,%ecx
|
| + jnz .L013outer1x
|
| + jmp .L011done
|
| +.L014tail:
|
| + movdqa %xmm0,(%esp)
|
| + movdqa %xmm1,16(%esp)
|
| + movdqa %xmm2,32(%esp)
|
| + movdqa %xmm3,48(%esp)
|
| + xorl %eax,%eax
|
| + xorl %edx,%edx
|
| + xorl %ebp,%ebp
|
| +.L015tail_loop:
|
| + movb (%esp,%ebp,1),%al
|
| + movb (%esi,%ebp,1),%dl
|
| + leal 1(%ebp),%ebp
|
| + xorb %dl,%al
|
| + movb %al,-1(%edi,%ebp,1)
|
| + decl %ecx
|
| + jnz .L015tail_loop
|
| +.L011done:
|
| + movl 512(%esp),%esp
|
| + popl %edi
|
| + popl %esi
|
| + popl %ebx
|
| + popl %ebp
|
| + ret
|
| +.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
|
| +.align 64
|
| +.Lssse3_data:
|
| +.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
|
| +.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
|
| +.long 1634760805,857760878,2036477234,1797285236
|
| +.long 0,1,2,3
|
| +.long 4,4,4,4
|
| +.long 1,0,0,0
|
| +.long 4,0,0,0
|
| +.long 0,-1,-1,-1
|
| +.align 64
|
| +.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
|
| +.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
|
| +.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
|
| +.byte 114,103,62,0
|
| +#endif
|
|
|