Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(943)

Unified Diff: third_party/boringssl/mac-x86/crypto/chacha/chacha-x86.S

Issue 2219933002: Land BoringSSL roll on master (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/boringssl/mac-x86/crypto/chacha/chacha-x86.S
diff --git a/third_party/boringssl/mac-x86/crypto/chacha/chacha-x86.S b/third_party/boringssl/mac-x86/crypto/chacha/chacha-x86.S
new file mode 100644
index 0000000000000000000000000000000000000000..5de98a3d61589cd32843673c4f2d0497f7a36863
--- /dev/null
+++ b/third_party/boringssl/mac-x86/crypto/chacha/chacha-x86.S
@@ -0,0 +1,969 @@
+#if defined(__i386__)
+.file "chacha-x86.S"
+.text
+.globl _ChaCha20_ctr32
+.private_extern _ChaCha20_ctr32
+.align 4
+_ChaCha20_ctr32:
+L_ChaCha20_ctr32_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ xorl %eax,%eax
+ cmpl 28(%esp),%eax
+ je L000no_data
+ call Lpic_point
+Lpic_point:
+ popl %eax
+ movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lpic_point(%eax),%ebp
+ testl $16777216,(%ebp)
+ jz L001x86
+ testl $512,4(%ebp)
+ jz L001x86
+ jmp Lssse3_shortcut
+L001x86:
+ movl 32(%esp),%esi
+ movl 36(%esp),%edi
+ subl $132,%esp
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edx
+ movl %eax,80(%esp)
+ movl %ebx,84(%esp)
+ movl %ecx,88(%esp)
+ movl %edx,92(%esp)
+ movl 16(%esi),%eax
+ movl 20(%esi),%ebx
+ movl 24(%esi),%ecx
+ movl 28(%esi),%edx
+ movl %eax,96(%esp)
+ movl %ebx,100(%esp)
+ movl %ecx,104(%esp)
+ movl %edx,108(%esp)
+ movl (%edi),%eax
+ movl 4(%edi),%ebx
+ movl 8(%edi),%ecx
+ movl 12(%edi),%edx
+ subl $1,%eax
+ movl %eax,112(%esp)
+ movl %ebx,116(%esp)
+ movl %ecx,120(%esp)
+ movl %edx,124(%esp)
+ jmp L002entry
+.align 4,0x90
+L003outer_loop:
+ movl %ebx,156(%esp)
+ movl %eax,152(%esp)
+ movl %ecx,160(%esp)
+L002entry:
+ movl $1634760805,%eax
+ movl $857760878,4(%esp)
+ movl $2036477234,8(%esp)
+ movl $1797285236,12(%esp)
+ movl 84(%esp),%ebx
+ movl 88(%esp),%ebp
+ movl 104(%esp),%ecx
+ movl 108(%esp),%esi
+ movl 116(%esp),%edx
+ movl 120(%esp),%edi
+ movl %ebx,20(%esp)
+ movl %ebp,24(%esp)
+ movl %ecx,40(%esp)
+ movl %esi,44(%esp)
+ movl %edx,52(%esp)
+ movl %edi,56(%esp)
+ movl 92(%esp),%ebx
+ movl 124(%esp),%edi
+ movl 112(%esp),%edx
+ movl 80(%esp),%ebp
+ movl 96(%esp),%ecx
+ movl 100(%esp),%esi
+ addl $1,%edx
+ movl %ebx,28(%esp)
+ movl %edi,60(%esp)
+ movl %edx,112(%esp)
+ movl $10,%ebx
+ jmp L004loop
+.align 4,0x90
+L004loop:
+ addl %ebp,%eax
+ movl %ebx,128(%esp)
+ movl %ebp,%ebx
+ xorl %eax,%edx
+ roll $16,%edx
+ addl %edx,%ecx
+ xorl %ecx,%ebx
+ movl 52(%esp),%edi
+ roll $12,%ebx
+ movl 20(%esp),%ebp
+ addl %ebx,%eax
+ xorl %eax,%edx
+ movl %eax,(%esp)
+ roll $8,%edx
+ movl 4(%esp),%eax
+ addl %edx,%ecx
+ movl %edx,48(%esp)
+ xorl %ecx,%ebx
+ addl %ebp,%eax
+ roll $7,%ebx
+ xorl %eax,%edi
+ movl %ecx,32(%esp)
+ roll $16,%edi
+ movl %ebx,16(%esp)
+ addl %edi,%esi
+ movl 40(%esp),%ecx
+ xorl %esi,%ebp
+ movl 56(%esp),%edx
+ roll $12,%ebp
+ movl 24(%esp),%ebx
+ addl %ebp,%eax
+ xorl %eax,%edi
+ movl %eax,4(%esp)
+ roll $8,%edi
+ movl 8(%esp),%eax
+ addl %edi,%esi
+ movl %edi,52(%esp)
+ xorl %esi,%ebp
+ addl %ebx,%eax
+ roll $7,%ebp
+ xorl %eax,%edx
+ movl %esi,36(%esp)
+ roll $16,%edx
+ movl %ebp,20(%esp)
+ addl %edx,%ecx
+ movl 44(%esp),%esi
+ xorl %ecx,%ebx
+ movl 60(%esp),%edi
+ roll $12,%ebx
+ movl 28(%esp),%ebp
+ addl %ebx,%eax
+ xorl %eax,%edx
+ movl %eax,8(%esp)
+ roll $8,%edx
+ movl 12(%esp),%eax
+ addl %edx,%ecx
+ movl %edx,56(%esp)
+ xorl %ecx,%ebx
+ addl %ebp,%eax
+ roll $7,%ebx
+ xorl %eax,%edi
+ roll $16,%edi
+ movl %ebx,24(%esp)
+ addl %edi,%esi
+ xorl %esi,%ebp
+ roll $12,%ebp
+ movl 20(%esp),%ebx
+ addl %ebp,%eax
+ xorl %eax,%edi
+ movl %eax,12(%esp)
+ roll $8,%edi
+ movl (%esp),%eax
+ addl %edi,%esi
+ movl %edi,%edx
+ xorl %esi,%ebp
+ addl %ebx,%eax
+ roll $7,%ebp
+ xorl %eax,%edx
+ roll $16,%edx
+ movl %ebp,28(%esp)
+ addl %edx,%ecx
+ xorl %ecx,%ebx
+ movl 48(%esp),%edi
+ roll $12,%ebx
+ movl 24(%esp),%ebp
+ addl %ebx,%eax
+ xorl %eax,%edx
+ movl %eax,(%esp)
+ roll $8,%edx
+ movl 4(%esp),%eax
+ addl %edx,%ecx
+ movl %edx,60(%esp)
+ xorl %ecx,%ebx
+ addl %ebp,%eax
+ roll $7,%ebx
+ xorl %eax,%edi
+ movl %ecx,40(%esp)
+ roll $16,%edi
+ movl %ebx,20(%esp)
+ addl %edi,%esi
+ movl 32(%esp),%ecx
+ xorl %esi,%ebp
+ movl 52(%esp),%edx
+ roll $12,%ebp
+ movl 28(%esp),%ebx
+ addl %ebp,%eax
+ xorl %eax,%edi
+ movl %eax,4(%esp)
+ roll $8,%edi
+ movl 8(%esp),%eax
+ addl %edi,%esi
+ movl %edi,48(%esp)
+ xorl %esi,%ebp
+ addl %ebx,%eax
+ roll $7,%ebp
+ xorl %eax,%edx
+ movl %esi,44(%esp)
+ roll $16,%edx
+ movl %ebp,24(%esp)
+ addl %edx,%ecx
+ movl 36(%esp),%esi
+ xorl %ecx,%ebx
+ movl 56(%esp),%edi
+ roll $12,%ebx
+ movl 16(%esp),%ebp
+ addl %ebx,%eax
+ xorl %eax,%edx
+ movl %eax,8(%esp)
+ roll $8,%edx
+ movl 12(%esp),%eax
+ addl %edx,%ecx
+ movl %edx,52(%esp)
+ xorl %ecx,%ebx
+ addl %ebp,%eax
+ roll $7,%ebx
+ xorl %eax,%edi
+ roll $16,%edi
+ movl %ebx,28(%esp)
+ addl %edi,%esi
+ xorl %esi,%ebp
+ movl 48(%esp),%edx
+ roll $12,%ebp
+ movl 128(%esp),%ebx
+ addl %ebp,%eax
+ xorl %eax,%edi
+ movl %eax,12(%esp)
+ roll $8,%edi
+ movl (%esp),%eax
+ addl %edi,%esi
+ movl %edi,56(%esp)
+ xorl %esi,%ebp
+ roll $7,%ebp
+ decl %ebx
+ jnz L004loop
+ movl 160(%esp),%ebx
+ addl $1634760805,%eax
+ addl 80(%esp),%ebp
+ addl 96(%esp),%ecx
+ addl 100(%esp),%esi
+ cmpl $64,%ebx
+ jb L005tail
+ movl 156(%esp),%ebx
+ addl 112(%esp),%edx
+ addl 120(%esp),%edi
+ xorl (%ebx),%eax
+ xorl 16(%ebx),%ebp
+ movl %eax,(%esp)
+ movl 152(%esp),%eax
+ xorl 32(%ebx),%ecx
+ xorl 36(%ebx),%esi
+ xorl 48(%ebx),%edx
+ xorl 56(%ebx),%edi
+ movl %ebp,16(%eax)
+ movl %ecx,32(%eax)
+ movl %esi,36(%eax)
+ movl %edx,48(%eax)
+ movl %edi,56(%eax)
+ movl 4(%esp),%ebp
+ movl 8(%esp),%ecx
+ movl 12(%esp),%esi
+ movl 20(%esp),%edx
+ movl 24(%esp),%edi
+ addl $857760878,%ebp
+ addl $2036477234,%ecx
+ addl $1797285236,%esi
+ addl 84(%esp),%edx
+ addl 88(%esp),%edi
+ xorl 4(%ebx),%ebp
+ xorl 8(%ebx),%ecx
+ xorl 12(%ebx),%esi
+ xorl 20(%ebx),%edx
+ xorl 24(%ebx),%edi
+ movl %ebp,4(%eax)
+ movl %ecx,8(%eax)
+ movl %esi,12(%eax)
+ movl %edx,20(%eax)
+ movl %edi,24(%eax)
+ movl 28(%esp),%ebp
+ movl 40(%esp),%ecx
+ movl 44(%esp),%esi
+ movl 52(%esp),%edx
+ movl 60(%esp),%edi
+ addl 92(%esp),%ebp
+ addl 104(%esp),%ecx
+ addl 108(%esp),%esi
+ addl 116(%esp),%edx
+ addl 124(%esp),%edi
+ xorl 28(%ebx),%ebp
+ xorl 40(%ebx),%ecx
+ xorl 44(%ebx),%esi
+ xorl 52(%ebx),%edx
+ xorl 60(%ebx),%edi
+ leal 64(%ebx),%ebx
+ movl %ebp,28(%eax)
+ movl (%esp),%ebp
+ movl %ecx,40(%eax)
+ movl 160(%esp),%ecx
+ movl %esi,44(%eax)
+ movl %edx,52(%eax)
+ movl %edi,60(%eax)
+ movl %ebp,(%eax)
+ leal 64(%eax),%eax
+ subl $64,%ecx
+ jnz L003outer_loop
+ jmp L006done
+L005tail:
+ addl 112(%esp),%edx
+ addl 120(%esp),%edi
+ movl %eax,(%esp)
+ movl %ebp,16(%esp)
+ movl %ecx,32(%esp)
+ movl %esi,36(%esp)
+ movl %edx,48(%esp)
+ movl %edi,56(%esp)
+ movl 4(%esp),%ebp
+ movl 8(%esp),%ecx
+ movl 12(%esp),%esi
+ movl 20(%esp),%edx
+ movl 24(%esp),%edi
+ addl $857760878,%ebp
+ addl $2036477234,%ecx
+ addl $1797285236,%esi
+ addl 84(%esp),%edx
+ addl 88(%esp),%edi
+ movl %ebp,4(%esp)
+ movl %ecx,8(%esp)
+ movl %esi,12(%esp)
+ movl %edx,20(%esp)
+ movl %edi,24(%esp)
+ movl 28(%esp),%ebp
+ movl 40(%esp),%ecx
+ movl 44(%esp),%esi
+ movl 52(%esp),%edx
+ movl 60(%esp),%edi
+ addl 92(%esp),%ebp
+ addl 104(%esp),%ecx
+ addl 108(%esp),%esi
+ addl 116(%esp),%edx
+ addl 124(%esp),%edi
+ movl %ebp,28(%esp)
+ movl 156(%esp),%ebp
+ movl %ecx,40(%esp)
+ movl 152(%esp),%ecx
+ movl %esi,44(%esp)
+ xorl %esi,%esi
+ movl %edx,52(%esp)
+ movl %edi,60(%esp)
+ xorl %eax,%eax
+ xorl %edx,%edx
+L007tail_loop:
+ movb (%esi,%ebp,1),%al
+ movb (%esp,%esi,1),%dl
+ leal 1(%esi),%esi
+ xorb %dl,%al
+ movb %al,-1(%ecx,%esi,1)
+ decl %ebx
+ jnz L007tail_loop
+L006done:
+ addl $132,%esp
+L000no_data:
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _ChaCha20_ssse3
+.private_extern _ChaCha20_ssse3
+.align 4
+_ChaCha20_ssse3:
+L_ChaCha20_ssse3_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+Lssse3_shortcut:
+ movl 20(%esp),%edi
+ movl 24(%esp),%esi
+ movl 28(%esp),%ecx
+ movl 32(%esp),%edx
+ movl 36(%esp),%ebx
+ movl %esp,%ebp
+ subl $524,%esp
+ andl $-64,%esp
+ movl %ebp,512(%esp)
+ leal Lssse3_data-Lpic_point(%eax),%eax
+ movdqu (%ebx),%xmm3
+ cmpl $256,%ecx
+ jb L0081x
+ movl %edx,516(%esp)
+ movl %ebx,520(%esp)
+ subl $256,%ecx
+ leal 384(%esp),%ebp
+ movdqu (%edx),%xmm7
+ pshufd $0,%xmm3,%xmm0
+ pshufd $85,%xmm3,%xmm1
+ pshufd $170,%xmm3,%xmm2
+ pshufd $255,%xmm3,%xmm3
+ paddd 48(%eax),%xmm0
+ pshufd $0,%xmm7,%xmm4
+ pshufd $85,%xmm7,%xmm5
+ psubd 64(%eax),%xmm0
+ pshufd $170,%xmm7,%xmm6
+ pshufd $255,%xmm7,%xmm7
+ movdqa %xmm0,64(%ebp)
+ movdqa %xmm1,80(%ebp)
+ movdqa %xmm2,96(%ebp)
+ movdqa %xmm3,112(%ebp)
+ movdqu 16(%edx),%xmm3
+ movdqa %xmm4,-64(%ebp)
+ movdqa %xmm5,-48(%ebp)
+ movdqa %xmm6,-32(%ebp)
+ movdqa %xmm7,-16(%ebp)
+ movdqa 32(%eax),%xmm7
+ leal 128(%esp),%ebx
+ pshufd $0,%xmm3,%xmm0
+ pshufd $85,%xmm3,%xmm1
+ pshufd $170,%xmm3,%xmm2
+ pshufd $255,%xmm3,%xmm3
+ pshufd $0,%xmm7,%xmm4
+ pshufd $85,%xmm7,%xmm5
+ pshufd $170,%xmm7,%xmm6
+ pshufd $255,%xmm7,%xmm7
+ movdqa %xmm0,(%ebp)
+ movdqa %xmm1,16(%ebp)
+ movdqa %xmm2,32(%ebp)
+ movdqa %xmm3,48(%ebp)
+ movdqa %xmm4,-128(%ebp)
+ movdqa %xmm5,-112(%ebp)
+ movdqa %xmm6,-96(%ebp)
+ movdqa %xmm7,-80(%ebp)
+ leal 128(%esi),%esi
+ leal 128(%edi),%edi
+ jmp L009outer_loop
+.align 4,0x90
+L009outer_loop:
+ movdqa -112(%ebp),%xmm1
+ movdqa -96(%ebp),%xmm2
+ movdqa -80(%ebp),%xmm3
+ movdqa -48(%ebp),%xmm5
+ movdqa -32(%ebp),%xmm6
+ movdqa -16(%ebp),%xmm7
+ movdqa %xmm1,-112(%ebx)
+ movdqa %xmm2,-96(%ebx)
+ movdqa %xmm3,-80(%ebx)
+ movdqa %xmm5,-48(%ebx)
+ movdqa %xmm6,-32(%ebx)
+ movdqa %xmm7,-16(%ebx)
+ movdqa 32(%ebp),%xmm2
+ movdqa 48(%ebp),%xmm3
+ movdqa 64(%ebp),%xmm4
+ movdqa 80(%ebp),%xmm5
+ movdqa 96(%ebp),%xmm6
+ movdqa 112(%ebp),%xmm7
+ paddd 64(%eax),%xmm4
+ movdqa %xmm2,32(%ebx)
+ movdqa %xmm3,48(%ebx)
+ movdqa %xmm4,64(%ebx)
+ movdqa %xmm5,80(%ebx)
+ movdqa %xmm6,96(%ebx)
+ movdqa %xmm7,112(%ebx)
+ movdqa %xmm4,64(%ebp)
+ movdqa -128(%ebp),%xmm0
+ movdqa %xmm4,%xmm6
+ movdqa -64(%ebp),%xmm3
+ movdqa (%ebp),%xmm4
+ movdqa 16(%ebp),%xmm5
+ movl $10,%edx
+ nop
+.align 4,0x90
+L010loop:
+ paddd %xmm3,%xmm0
+ movdqa %xmm3,%xmm2
+ pxor %xmm0,%xmm6
+ pshufb (%eax),%xmm6
+ paddd %xmm6,%xmm4
+ pxor %xmm4,%xmm2
+ movdqa -48(%ebx),%xmm3
+ movdqa %xmm2,%xmm1
+ pslld $12,%xmm2
+ psrld $20,%xmm1
+ por %xmm1,%xmm2
+ movdqa -112(%ebx),%xmm1
+ paddd %xmm2,%xmm0
+ movdqa 80(%ebx),%xmm7
+ pxor %xmm0,%xmm6
+ movdqa %xmm0,-128(%ebx)
+ pshufb 16(%eax),%xmm6
+ paddd %xmm6,%xmm4
+ movdqa %xmm6,64(%ebx)
+ pxor %xmm4,%xmm2
+ paddd %xmm3,%xmm1
+ movdqa %xmm2,%xmm0
+ pslld $7,%xmm2
+ psrld $25,%xmm0
+ pxor %xmm1,%xmm7
+ por %xmm0,%xmm2
+ movdqa %xmm4,(%ebx)
+ pshufb (%eax),%xmm7
+ movdqa %xmm2,-64(%ebx)
+ paddd %xmm7,%xmm5
+ movdqa 32(%ebx),%xmm4
+ pxor %xmm5,%xmm3
+ movdqa -32(%ebx),%xmm2
+ movdqa %xmm3,%xmm0
+ pslld $12,%xmm3
+ psrld $20,%xmm0
+ por %xmm0,%xmm3
+ movdqa -96(%ebx),%xmm0
+ paddd %xmm3,%xmm1
+ movdqa 96(%ebx),%xmm6
+ pxor %xmm1,%xmm7
+ movdqa %xmm1,-112(%ebx)
+ pshufb 16(%eax),%xmm7
+ paddd %xmm7,%xmm5
+ movdqa %xmm7,80(%ebx)
+ pxor %xmm5,%xmm3
+ paddd %xmm2,%xmm0
+ movdqa %xmm3,%xmm1
+ pslld $7,%xmm3
+ psrld $25,%xmm1
+ pxor %xmm0,%xmm6
+ por %xmm1,%xmm3
+ movdqa %xmm5,16(%ebx)
+ pshufb (%eax),%xmm6
+ movdqa %xmm3,-48(%ebx)
+ paddd %xmm6,%xmm4
+ movdqa 48(%ebx),%xmm5
+ pxor %xmm4,%xmm2
+ movdqa -16(%ebx),%xmm3
+ movdqa %xmm2,%xmm1
+ pslld $12,%xmm2
+ psrld $20,%xmm1
+ por %xmm1,%xmm2
+ movdqa -80(%ebx),%xmm1
+ paddd %xmm2,%xmm0
+ movdqa 112(%ebx),%xmm7
+ pxor %xmm0,%xmm6
+ movdqa %xmm0,-96(%ebx)
+ pshufb 16(%eax),%xmm6
+ paddd %xmm6,%xmm4
+ movdqa %xmm6,96(%ebx)
+ pxor %xmm4,%xmm2
+ paddd %xmm3,%xmm1
+ movdqa %xmm2,%xmm0
+ pslld $7,%xmm2
+ psrld $25,%xmm0
+ pxor %xmm1,%xmm7
+ por %xmm0,%xmm2
+ pshufb (%eax),%xmm7
+ movdqa %xmm2,-32(%ebx)
+ paddd %xmm7,%xmm5
+ pxor %xmm5,%xmm3
+ movdqa -48(%ebx),%xmm2
+ movdqa %xmm3,%xmm0
+ pslld $12,%xmm3
+ psrld $20,%xmm0
+ por %xmm0,%xmm3
+ movdqa -128(%ebx),%xmm0
+ paddd %xmm3,%xmm1
+ pxor %xmm1,%xmm7
+ movdqa %xmm1,-80(%ebx)
+ pshufb 16(%eax),%xmm7
+ paddd %xmm7,%xmm5
+ movdqa %xmm7,%xmm6
+ pxor %xmm5,%xmm3
+ paddd %xmm2,%xmm0
+ movdqa %xmm3,%xmm1
+ pslld $7,%xmm3
+ psrld $25,%xmm1
+ pxor %xmm0,%xmm6
+ por %xmm1,%xmm3
+ pshufb (%eax),%xmm6
+ movdqa %xmm3,-16(%ebx)
+ paddd %xmm6,%xmm4
+ pxor %xmm4,%xmm2
+ movdqa -32(%ebx),%xmm3
+ movdqa %xmm2,%xmm1
+ pslld $12,%xmm2
+ psrld $20,%xmm1
+ por %xmm1,%xmm2
+ movdqa -112(%ebx),%xmm1
+ paddd %xmm2,%xmm0
+ movdqa 64(%ebx),%xmm7
+ pxor %xmm0,%xmm6
+ movdqa %xmm0,-128(%ebx)
+ pshufb 16(%eax),%xmm6
+ paddd %xmm6,%xmm4
+ movdqa %xmm6,112(%ebx)
+ pxor %xmm4,%xmm2
+ paddd %xmm3,%xmm1
+ movdqa %xmm2,%xmm0
+ pslld $7,%xmm2
+ psrld $25,%xmm0
+ pxor %xmm1,%xmm7
+ por %xmm0,%xmm2
+ movdqa %xmm4,32(%ebx)
+ pshufb (%eax),%xmm7
+ movdqa %xmm2,-48(%ebx)
+ paddd %xmm7,%xmm5
+ movdqa (%ebx),%xmm4
+ pxor %xmm5,%xmm3
+ movdqa -16(%ebx),%xmm2
+ movdqa %xmm3,%xmm0
+ pslld $12,%xmm3
+ psrld $20,%xmm0
+ por %xmm0,%xmm3
+ movdqa -96(%ebx),%xmm0
+ paddd %xmm3,%xmm1
+ movdqa 80(%ebx),%xmm6
+ pxor %xmm1,%xmm7
+ movdqa %xmm1,-112(%ebx)
+ pshufb 16(%eax),%xmm7
+ paddd %xmm7,%xmm5
+ movdqa %xmm7,64(%ebx)
+ pxor %xmm5,%xmm3
+ paddd %xmm2,%xmm0
+ movdqa %xmm3,%xmm1
+ pslld $7,%xmm3
+ psrld $25,%xmm1
+ pxor %xmm0,%xmm6
+ por %xmm1,%xmm3
+ movdqa %xmm5,48(%ebx)
+ pshufb (%eax),%xmm6
+ movdqa %xmm3,-32(%ebx)
+ paddd %xmm6,%xmm4
+ movdqa 16(%ebx),%xmm5
+ pxor %xmm4,%xmm2
+ movdqa -64(%ebx),%xmm3
+ movdqa %xmm2,%xmm1
+ pslld $12,%xmm2
+ psrld $20,%xmm1
+ por %xmm1,%xmm2
+ movdqa -80(%ebx),%xmm1
+ paddd %xmm2,%xmm0
+ movdqa 96(%ebx),%xmm7
+ pxor %xmm0,%xmm6
+ movdqa %xmm0,-96(%ebx)
+ pshufb 16(%eax),%xmm6
+ paddd %xmm6,%xmm4
+ movdqa %xmm6,80(%ebx)
+ pxor %xmm4,%xmm2
+ paddd %xmm3,%xmm1
+ movdqa %xmm2,%xmm0
+ pslld $7,%xmm2
+ psrld $25,%xmm0
+ pxor %xmm1,%xmm7
+ por %xmm0,%xmm2
+ pshufb (%eax),%xmm7
+ movdqa %xmm2,-16(%ebx)
+ paddd %xmm7,%xmm5
+ pxor %xmm5,%xmm3
+ movdqa %xmm3,%xmm0
+ pslld $12,%xmm3
+ psrld $20,%xmm0
+ por %xmm0,%xmm3
+ movdqa -128(%ebx),%xmm0
+ paddd %xmm3,%xmm1
+ movdqa 64(%ebx),%xmm6
+ pxor %xmm1,%xmm7
+ movdqa %xmm1,-80(%ebx)
+ pshufb 16(%eax),%xmm7
+ paddd %xmm7,%xmm5
+ movdqa %xmm7,96(%ebx)
+ pxor %xmm5,%xmm3
+ movdqa %xmm3,%xmm1
+ pslld $7,%xmm3
+ psrld $25,%xmm1
+ por %xmm1,%xmm3
+ decl %edx
+ jnz L010loop
+ movdqa %xmm3,-64(%ebx)
+ movdqa %xmm4,(%ebx)
+ movdqa %xmm5,16(%ebx)
+ movdqa %xmm6,64(%ebx)
+ movdqa %xmm7,96(%ebx)
+ movdqa -112(%ebx),%xmm1
+ movdqa -96(%ebx),%xmm2
+ movdqa -80(%ebx),%xmm3
+ paddd -128(%ebp),%xmm0
+ paddd -112(%ebp),%xmm1
+ paddd -96(%ebp),%xmm2
+ paddd -80(%ebp),%xmm3
+ movdqa %xmm0,%xmm6
+ punpckldq %xmm1,%xmm0
+ movdqa %xmm2,%xmm7
+ punpckldq %xmm3,%xmm2
+ punpckhdq %xmm1,%xmm6
+ punpckhdq %xmm3,%xmm7
+ movdqa %xmm0,%xmm1
+ punpcklqdq %xmm2,%xmm0
+ movdqa %xmm6,%xmm3
+ punpcklqdq %xmm7,%xmm6
+ punpckhqdq %xmm2,%xmm1
+ punpckhqdq %xmm7,%xmm3
+ movdqu -128(%esi),%xmm4
+ movdqu -64(%esi),%xmm5
+ movdqu (%esi),%xmm2
+ movdqu 64(%esi),%xmm7
+ leal 16(%esi),%esi
+ pxor %xmm0,%xmm4
+ movdqa -64(%ebx),%xmm0
+ pxor %xmm1,%xmm5
+ movdqa -48(%ebx),%xmm1
+ pxor %xmm2,%xmm6
+ movdqa -32(%ebx),%xmm2
+ pxor %xmm3,%xmm7
+ movdqa -16(%ebx),%xmm3
+ movdqu %xmm4,-128(%edi)
+ movdqu %xmm5,-64(%edi)
+ movdqu %xmm6,(%edi)
+ movdqu %xmm7,64(%edi)
+ leal 16(%edi),%edi
+ paddd -64(%ebp),%xmm0
+ paddd -48(%ebp),%xmm1
+ paddd -32(%ebp),%xmm2
+ paddd -16(%ebp),%xmm3
+ movdqa %xmm0,%xmm6
+ punpckldq %xmm1,%xmm0
+ movdqa %xmm2,%xmm7
+ punpckldq %xmm3,%xmm2
+ punpckhdq %xmm1,%xmm6
+ punpckhdq %xmm3,%xmm7
+ movdqa %xmm0,%xmm1
+ punpcklqdq %xmm2,%xmm0
+ movdqa %xmm6,%xmm3
+ punpcklqdq %xmm7,%xmm6
+ punpckhqdq %xmm2,%xmm1
+ punpckhqdq %xmm7,%xmm3
+ movdqu -128(%esi),%xmm4
+ movdqu -64(%esi),%xmm5
+ movdqu (%esi),%xmm2
+ movdqu 64(%esi),%xmm7
+ leal 16(%esi),%esi
+ pxor %xmm0,%xmm4
+ movdqa (%ebx),%xmm0
+ pxor %xmm1,%xmm5
+ movdqa 16(%ebx),%xmm1
+ pxor %xmm2,%xmm6
+ movdqa 32(%ebx),%xmm2
+ pxor %xmm3,%xmm7
+ movdqa 48(%ebx),%xmm3
+ movdqu %xmm4,-128(%edi)
+ movdqu %xmm5,-64(%edi)
+ movdqu %xmm6,(%edi)
+ movdqu %xmm7,64(%edi)
+ leal 16(%edi),%edi
+ paddd (%ebp),%xmm0
+ paddd 16(%ebp),%xmm1
+ paddd 32(%ebp),%xmm2
+ paddd 48(%ebp),%xmm3
+ movdqa %xmm0,%xmm6
+ punpckldq %xmm1,%xmm0
+ movdqa %xmm2,%xmm7
+ punpckldq %xmm3,%xmm2
+ punpckhdq %xmm1,%xmm6
+ punpckhdq %xmm3,%xmm7
+ movdqa %xmm0,%xmm1
+ punpcklqdq %xmm2,%xmm0
+ movdqa %xmm6,%xmm3
+ punpcklqdq %xmm7,%xmm6
+ punpckhqdq %xmm2,%xmm1
+ punpckhqdq %xmm7,%xmm3
+ movdqu -128(%esi),%xmm4
+ movdqu -64(%esi),%xmm5
+ movdqu (%esi),%xmm2
+ movdqu 64(%esi),%xmm7
+ leal 16(%esi),%esi
+ pxor %xmm0,%xmm4
+ movdqa 64(%ebx),%xmm0
+ pxor %xmm1,%xmm5
+ movdqa 80(%ebx),%xmm1
+ pxor %xmm2,%xmm6
+ movdqa 96(%ebx),%xmm2
+ pxor %xmm3,%xmm7
+ movdqa 112(%ebx),%xmm3
+ movdqu %xmm4,-128(%edi)
+ movdqu %xmm5,-64(%edi)
+ movdqu %xmm6,(%edi)
+ movdqu %xmm7,64(%edi)
+ leal 16(%edi),%edi
+ paddd 64(%ebp),%xmm0
+ paddd 80(%ebp),%xmm1
+ paddd 96(%ebp),%xmm2
+ paddd 112(%ebp),%xmm3
+ movdqa %xmm0,%xmm6
+ punpckldq %xmm1,%xmm0
+ movdqa %xmm2,%xmm7
+ punpckldq %xmm3,%xmm2
+ punpckhdq %xmm1,%xmm6
+ punpckhdq %xmm3,%xmm7
+ movdqa %xmm0,%xmm1
+ punpcklqdq %xmm2,%xmm0
+ movdqa %xmm6,%xmm3
+ punpcklqdq %xmm7,%xmm6
+ punpckhqdq %xmm2,%xmm1
+ punpckhqdq %xmm7,%xmm3
+ movdqu -128(%esi),%xmm4
+ movdqu -64(%esi),%xmm5
+ movdqu (%esi),%xmm2
+ movdqu 64(%esi),%xmm7
+ leal 208(%esi),%esi
+ pxor %xmm0,%xmm4
+ pxor %xmm1,%xmm5
+ pxor %xmm2,%xmm6
+ pxor %xmm3,%xmm7
+ movdqu %xmm4,-128(%edi)
+ movdqu %xmm5,-64(%edi)
+ movdqu %xmm6,(%edi)
+ movdqu %xmm7,64(%edi)
+ leal 208(%edi),%edi
+ subl $256,%ecx
+ jnc L009outer_loop
+ addl $256,%ecx
+ jz L011done
+ movl 520(%esp),%ebx
+ leal -128(%esi),%esi
+ movl 516(%esp),%edx
+ leal -128(%edi),%edi
+ movd 64(%ebp),%xmm2
+ movdqu (%ebx),%xmm3
+ paddd 96(%eax),%xmm2
+ pand 112(%eax),%xmm3
+ por %xmm2,%xmm3
+L0081x:
+ movdqa 32(%eax),%xmm0
+ movdqu (%edx),%xmm1
+ movdqu 16(%edx),%xmm2
+ movdqa (%eax),%xmm6
+ movdqa 16(%eax),%xmm7
+ movl %ebp,48(%esp)
+ movdqa %xmm0,(%esp)
+ movdqa %xmm1,16(%esp)
+ movdqa %xmm2,32(%esp)
+ movdqa %xmm3,48(%esp)
+ movl $10,%edx
+ jmp L012loop1x
+.align 4,0x90
+L013outer1x:
+ movdqa 80(%eax),%xmm3
+ movdqa (%esp),%xmm0
+ movdqa 16(%esp),%xmm1
+ movdqa 32(%esp),%xmm2
+ paddd 48(%esp),%xmm3
+ movl $10,%edx
+ movdqa %xmm3,48(%esp)
+ jmp L012loop1x
+.align 4,0x90
+L012loop1x:
+ paddd %xmm1,%xmm0
+ pxor %xmm0,%xmm3
+.byte 102,15,56,0,222
+ paddd %xmm3,%xmm2
+ pxor %xmm2,%xmm1
+ movdqa %xmm1,%xmm4
+ psrld $20,%xmm1
+ pslld $12,%xmm4
+ por %xmm4,%xmm1
+ paddd %xmm1,%xmm0
+ pxor %xmm0,%xmm3
+.byte 102,15,56,0,223
+ paddd %xmm3,%xmm2
+ pxor %xmm2,%xmm1
+ movdqa %xmm1,%xmm4
+ psrld $25,%xmm1
+ pslld $7,%xmm4
+ por %xmm4,%xmm1
+ pshufd $78,%xmm2,%xmm2
+ pshufd $57,%xmm1,%xmm1
+ pshufd $147,%xmm3,%xmm3
+ nop
+ paddd %xmm1,%xmm0
+ pxor %xmm0,%xmm3
+.byte 102,15,56,0,222
+ paddd %xmm3,%xmm2
+ pxor %xmm2,%xmm1
+ movdqa %xmm1,%xmm4
+ psrld $20,%xmm1
+ pslld $12,%xmm4
+ por %xmm4,%xmm1
+ paddd %xmm1,%xmm0
+ pxor %xmm0,%xmm3
+.byte 102,15,56,0,223
+ paddd %xmm3,%xmm2
+ pxor %xmm2,%xmm1
+ movdqa %xmm1,%xmm4
+ psrld $25,%xmm1
+ pslld $7,%xmm4
+ por %xmm4,%xmm1
+ pshufd $78,%xmm2,%xmm2
+ pshufd $147,%xmm1,%xmm1
+ pshufd $57,%xmm3,%xmm3
+ decl %edx
+ jnz L012loop1x
+ paddd (%esp),%xmm0
+ paddd 16(%esp),%xmm1
+ paddd 32(%esp),%xmm2
+ paddd 48(%esp),%xmm3
+ cmpl $64,%ecx
+ jb L014tail
+ movdqu (%esi),%xmm4
+ movdqu 16(%esi),%xmm5
+ pxor %xmm4,%xmm0
+ movdqu 32(%esi),%xmm4
+ pxor %xmm5,%xmm1
+ movdqu 48(%esi),%xmm5
+ pxor %xmm4,%xmm2
+ pxor %xmm5,%xmm3
+ leal 64(%esi),%esi
+ movdqu %xmm0,(%edi)
+ movdqu %xmm1,16(%edi)
+ movdqu %xmm2,32(%edi)
+ movdqu %xmm3,48(%edi)
+ leal 64(%edi),%edi
+ subl $64,%ecx
+ jnz L013outer1x
+ jmp L011done
+L014tail:
+ movdqa %xmm0,(%esp)
+ movdqa %xmm1,16(%esp)
+ movdqa %xmm2,32(%esp)
+ movdqa %xmm3,48(%esp)
+ xorl %eax,%eax
+ xorl %edx,%edx
+ xorl %ebp,%ebp
+L015tail_loop:
+ movb (%esp,%ebp,1),%al
+ movb (%esi,%ebp,1),%dl
+ leal 1(%ebp),%ebp
+ xorb %dl,%al
+ movb %al,-1(%edi,%ebp,1)
+ decl %ecx
+ jnz L015tail_loop
+L011done:
+ movl 512(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 6,0x90
+Lssse3_data:
+.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
+.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
+.long 1634760805,857760878,2036477234,1797285236
+.long 0,1,2,3
+.long 4,4,4,4
+.long 1,0,0,0
+.long 4,0,0,0
+.long 0,-1,-1,-1
+.align 6,0x90
+.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
+.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
+.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
+.byte 114,103,62,0
+.section __IMPORT,__pointers,non_lazy_symbol_pointers
+L_OPENSSL_ia32cap_P$non_lazy_ptr:
+.indirect_symbol _OPENSSL_ia32cap_P
+.long 0
+#endif
« no previous file with comments | « third_party/boringssl/linux-x86_64/crypto/sha/sha512-x86_64.S ('k') | third_party/boringssl/mac-x86/crypto/cpu-x86-asm.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698