Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(530)

Unified Diff: third_party/boringssl/linux-x86/crypto/sha/sha1-586.S

Issue 1924693003: Rolls BoringSSL forward (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/boringssl/linux-x86/crypto/sha/sha1-586.S
diff --git a/third_party/boringssl/linux-x86/crypto/sha/sha1-586.S b/third_party/boringssl/linux-x86/crypto/sha/sha1-586.S
index 808ccac5178f731a0373308b699d132fb7bd3f14..58d0bc127732abeaaed31c38c79fb15116e8e15c 100644
--- a/third_party/boringssl/linux-x86/crypto/sha/sha1-586.S
+++ b/third_party/boringssl/linux-x86/crypto/sha/sha1-586.S
@@ -23,8 +23,11 @@ sha1_block_data_order:
movl 8(%esi),%ecx
testl $16777216,%eax
jz .L001x86
- testl $536870912,%ecx
- jnz .Lshaext_shortcut
+ andl $268435456,%edx
+ andl $1073741824,%eax
+ orl %edx,%eax
+ cmpl $1342177280,%eax
+ je .Lavx_shortcut
jmp .Lssse3_shortcut
.align 16
.L001x86:
@@ -1393,177 +1396,6 @@ sha1_block_data_order:
popl %ebp
ret
.size sha1_block_data_order,.-.L_sha1_block_data_order_begin
-.hidden _sha1_block_data_order_shaext
-.type _sha1_block_data_order_shaext,@function
-.align 16
-_sha1_block_data_order_shaext:
- pushl %ebp
- pushl %ebx
- pushl %esi
- pushl %edi
- call .L003pic_point
-.L003pic_point:
- popl %ebp
- leal .LK_XX_XX-.L003pic_point(%ebp),%ebp
-.Lshaext_shortcut:
- movl 20(%esp),%edi
- movl %esp,%ebx
- movl 24(%esp),%esi
- movl 28(%esp),%ecx
- subl $32,%esp
- movdqu (%edi),%xmm0
- movd 16(%edi),%xmm1
- andl $-32,%esp
- movdqa 80(%ebp),%xmm3
- movdqu (%esi),%xmm4
- pshufd $27,%xmm0,%xmm0
- movdqu 16(%esi),%xmm5
- pshufd $27,%xmm1,%xmm1
- movdqu 32(%esi),%xmm6
-.byte 102,15,56,0,227
- movdqu 48(%esi),%xmm7
-.byte 102,15,56,0,235
-.byte 102,15,56,0,243
-.byte 102,15,56,0,251
- jmp .L004loop_shaext
-.align 16
-.L004loop_shaext:
- decl %ecx
- leal 64(%esi),%eax
- movdqa %xmm1,(%esp)
- paddd %xmm4,%xmm1
- cmovnel %eax,%esi
- movdqa %xmm0,16(%esp)
-.byte 15,56,201,229
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,0
-.byte 15,56,200,213
- pxor %xmm6,%xmm4
-.byte 15,56,201,238
-.byte 15,56,202,231
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,0
-.byte 15,56,200,206
- pxor %xmm7,%xmm5
-.byte 15,56,202,236
-.byte 15,56,201,247
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,0
-.byte 15,56,200,215
- pxor %xmm4,%xmm6
-.byte 15,56,201,252
-.byte 15,56,202,245
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,0
-.byte 15,56,200,204
- pxor %xmm5,%xmm7
-.byte 15,56,202,254
-.byte 15,56,201,229
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,0
-.byte 15,56,200,213
- pxor %xmm6,%xmm4
-.byte 15,56,201,238
-.byte 15,56,202,231
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,1
-.byte 15,56,200,206
- pxor %xmm7,%xmm5
-.byte 15,56,202,236
-.byte 15,56,201,247
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,1
-.byte 15,56,200,215
- pxor %xmm4,%xmm6
-.byte 15,56,201,252
-.byte 15,56,202,245
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,1
-.byte 15,56,200,204
- pxor %xmm5,%xmm7
-.byte 15,56,202,254
-.byte 15,56,201,229
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,1
-.byte 15,56,200,213
- pxor %xmm6,%xmm4
-.byte 15,56,201,238
-.byte 15,56,202,231
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,1
-.byte 15,56,200,206
- pxor %xmm7,%xmm5
-.byte 15,56,202,236
-.byte 15,56,201,247
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,2
-.byte 15,56,200,215
- pxor %xmm4,%xmm6
-.byte 15,56,201,252
-.byte 15,56,202,245
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,2
-.byte 15,56,200,204
- pxor %xmm5,%xmm7
-.byte 15,56,202,254
-.byte 15,56,201,229
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,2
-.byte 15,56,200,213
- pxor %xmm6,%xmm4
-.byte 15,56,201,238
-.byte 15,56,202,231
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,2
-.byte 15,56,200,206
- pxor %xmm7,%xmm5
-.byte 15,56,202,236
-.byte 15,56,201,247
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,2
-.byte 15,56,200,215
- pxor %xmm4,%xmm6
-.byte 15,56,201,252
-.byte 15,56,202,245
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,3
-.byte 15,56,200,204
- pxor %xmm5,%xmm7
-.byte 15,56,202,254
- movdqu (%esi),%xmm4
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,3
-.byte 15,56,200,213
- movdqu 16(%esi),%xmm5
-.byte 102,15,56,0,227
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,3
-.byte 15,56,200,206
- movdqu 32(%esi),%xmm6
-.byte 102,15,56,0,235
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,3
-.byte 15,56,200,215
- movdqu 48(%esi),%xmm7
-.byte 102,15,56,0,243
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,3
- movdqa (%esp),%xmm2
-.byte 102,15,56,0,251
-.byte 15,56,200,202
- paddd 16(%esp),%xmm0
- jnz .L004loop_shaext
- pshufd $27,%xmm0,%xmm0
- pshufd $27,%xmm1,%xmm1
- movdqu %xmm0,(%edi)
- movd %xmm1,16(%edi)
- movl %ebx,%esp
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.size _sha1_block_data_order_shaext,.-_sha1_block_data_order_shaext
.hidden _sha1_block_data_order_ssse3
.type _sha1_block_data_order_ssse3,@function
.align 16
@@ -1572,10 +1404,10 @@ _sha1_block_data_order_ssse3:
pushl %ebx
pushl %esi
pushl %edi
- call .L005pic_point
-.L005pic_point:
+ call .L003pic_point
+.L003pic_point:
popl %ebp
- leal .LK_XX_XX-.L005pic_point(%ebp),%ebp
+ leal .LK_XX_XX-.L003pic_point(%ebp),%ebp
.Lssse3_shortcut:
movdqa (%ebp),%xmm7
movdqa 16(%ebp),%xmm0
@@ -1628,9 +1460,9 @@ _sha1_block_data_order_ssse3:
xorl %edx,%ebp
pshufd $238,%xmm0,%xmm4
andl %ebp,%esi
- jmp .L006loop
+ jmp .L004loop
.align 16
-.L006loop:
+.L004loop:
rorl $2,%ebx
xorl %edx,%esi
movl %eax,%ebp
@@ -2533,7 +2365,7 @@ _sha1_block_data_order_ssse3:
addl %edx,%ecx
movl 196(%esp),%ebp
cmpl 200(%esp),%ebp
- je .L007done
+ je .L005done
movdqa 160(%esp),%xmm7
movdqa 176(%esp),%xmm6
movdqu (%ebp),%xmm0
@@ -2668,9 +2500,9 @@ _sha1_block_data_order_ssse3:
pshufd $238,%xmm0,%xmm4
andl %ebx,%esi
movl %ebp,%ebx
- jmp .L006loop
+ jmp .L004loop
.align 16
-.L007done:
+.L005done:
addl 16(%esp),%ebx
xorl %edi,%esi
movl %ecx,%ebp
@@ -2784,6 +2616,1177 @@ _sha1_block_data_order_ssse3:
popl %ebp
ret
.size _sha1_block_data_order_ssse3,.-_sha1_block_data_order_ssse3
+.hidden _sha1_block_data_order_avx
+.type _sha1_block_data_order_avx,@function
+.align 16
+_sha1_block_data_order_avx:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ call .L006pic_point
+.L006pic_point:
+ popl %ebp
+ leal .LK_XX_XX-.L006pic_point(%ebp),%ebp
+.Lavx_shortcut:
+ vzeroall
+ vmovdqa (%ebp),%xmm7
+ vmovdqa 16(%ebp),%xmm0
+ vmovdqa 32(%ebp),%xmm1
+ vmovdqa 48(%ebp),%xmm2
+ vmovdqa 64(%ebp),%xmm6
+ movl 20(%esp),%edi
+ movl 24(%esp),%ebp
+ movl 28(%esp),%edx
+ movl %esp,%esi
+ subl $208,%esp
+ andl $-64,%esp
+ vmovdqa %xmm0,112(%esp)
+ vmovdqa %xmm1,128(%esp)
+ vmovdqa %xmm2,144(%esp)
+ shll $6,%edx
+ vmovdqa %xmm7,160(%esp)
+ addl %ebp,%edx
+ vmovdqa %xmm6,176(%esp)
+ addl $64,%ebp
+ movl %edi,192(%esp)
+ movl %ebp,196(%esp)
+ movl %edx,200(%esp)
+ movl %esi,204(%esp)
+ movl (%edi),%eax
+ movl 4(%edi),%ebx
+ movl 8(%edi),%ecx
+ movl 12(%edi),%edx
+ movl 16(%edi),%edi
+ movl %ebx,%esi
+ vmovdqu -64(%ebp),%xmm0
+ vmovdqu -48(%ebp),%xmm1
+ vmovdqu -32(%ebp),%xmm2
+ vmovdqu -16(%ebp),%xmm3
+ vpshufb %xmm6,%xmm0,%xmm0
+ vpshufb %xmm6,%xmm1,%xmm1
+ vpshufb %xmm6,%xmm2,%xmm2
+ vmovdqa %xmm7,96(%esp)
+ vpshufb %xmm6,%xmm3,%xmm3
+ vpaddd %xmm7,%xmm0,%xmm4
+ vpaddd %xmm7,%xmm1,%xmm5
+ vpaddd %xmm7,%xmm2,%xmm6
+ vmovdqa %xmm4,(%esp)
+ movl %ecx,%ebp
+ vmovdqa %xmm5,16(%esp)
+ xorl %edx,%ebp
+ vmovdqa %xmm6,32(%esp)
+ andl %ebp,%esi
+ jmp .L007loop
+.align 16
+.L007loop:
+ shrdl $2,%ebx,%ebx
+ xorl %edx,%esi
+ vpalignr $8,%xmm0,%xmm1,%xmm4
+ movl %eax,%ebp
+ addl (%esp),%edi
+ vpaddd %xmm3,%xmm7,%xmm7
+ vmovdqa %xmm0,64(%esp)
+ xorl %ecx,%ebx
+ shldl $5,%eax,%eax
+ vpsrldq $4,%xmm3,%xmm6
+ addl %esi,%edi
+ andl %ebx,%ebp
+ vpxor %xmm0,%xmm4,%xmm4
+ xorl %ecx,%ebx
+ addl %eax,%edi
+ vpxor %xmm2,%xmm6,%xmm6
+ shrdl $7,%eax,%eax
+ xorl %ecx,%ebp
+ vmovdqa %xmm7,48(%esp)
+ movl %edi,%esi
+ addl 4(%esp),%edx
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %ebx,%eax
+ shldl $5,%edi,%edi
+ addl %ebp,%edx
+ andl %eax,%esi
+ vpsrld $31,%xmm4,%xmm6
+ xorl %ebx,%eax
+ addl %edi,%edx
+ shrdl $7,%edi,%edi
+ xorl %ebx,%esi
+ vpslldq $12,%xmm4,%xmm0
+ vpaddd %xmm4,%xmm4,%xmm4
+ movl %edx,%ebp
+ addl 8(%esp),%ecx
+ xorl %eax,%edi
+ shldl $5,%edx,%edx
+ vpsrld $30,%xmm0,%xmm7
+ vpor %xmm6,%xmm4,%xmm4
+ addl %esi,%ecx
+ andl %edi,%ebp
+ xorl %eax,%edi
+ addl %edx,%ecx
+ vpslld $2,%xmm0,%xmm0
+ shrdl $7,%edx,%edx
+ xorl %eax,%ebp
+ vpxor %xmm7,%xmm4,%xmm4
+ movl %ecx,%esi
+ addl 12(%esp),%ebx
+ xorl %edi,%edx
+ shldl $5,%ecx,%ecx
+ vpxor %xmm0,%xmm4,%xmm4
+ addl %ebp,%ebx
+ andl %edx,%esi
+ vmovdqa 96(%esp),%xmm0
+ xorl %edi,%edx
+ addl %ecx,%ebx
+ shrdl $7,%ecx,%ecx
+ xorl %edi,%esi
+ vpalignr $8,%xmm1,%xmm2,%xmm5
+ movl %ebx,%ebp
+ addl 16(%esp),%eax
+ vpaddd %xmm4,%xmm0,%xmm0
+ vmovdqa %xmm1,80(%esp)
+ xorl %edx,%ecx
+ shldl $5,%ebx,%ebx
+ vpsrldq $4,%xmm4,%xmm7
+ addl %esi,%eax
+ andl %ecx,%ebp
+ vpxor %xmm1,%xmm5,%xmm5
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ vpxor %xmm3,%xmm7,%xmm7
+ shrdl $7,%ebx,%ebx
+ xorl %edx,%ebp
+ vmovdqa %xmm0,(%esp)
+ movl %eax,%esi
+ addl 20(%esp),%edi
+ vpxor %xmm7,%xmm5,%xmm5
+ xorl %ecx,%ebx
+ shldl $5,%eax,%eax
+ addl %ebp,%edi
+ andl %ebx,%esi
+ vpsrld $31,%xmm5,%xmm7
+ xorl %ecx,%ebx
+ addl %eax,%edi
+ shrdl $7,%eax,%eax
+ xorl %ecx,%esi
+ vpslldq $12,%xmm5,%xmm1
+ vpaddd %xmm5,%xmm5,%xmm5
+ movl %edi,%ebp
+ addl 24(%esp),%edx
+ xorl %ebx,%eax
+ shldl $5,%edi,%edi
+ vpsrld $30,%xmm1,%xmm0
+ vpor %xmm7,%xmm5,%xmm5
+ addl %esi,%edx
+ andl %eax,%ebp
+ xorl %ebx,%eax
+ addl %edi,%edx
+ vpslld $2,%xmm1,%xmm1
+ shrdl $7,%edi,%edi
+ xorl %ebx,%ebp
+ vpxor %xmm0,%xmm5,%xmm5
+ movl %edx,%esi
+ addl 28(%esp),%ecx
+ xorl %eax,%edi
+ shldl $5,%edx,%edx
+ vpxor %xmm1,%xmm5,%xmm5
+ addl %ebp,%ecx
+ andl %edi,%esi
+ vmovdqa 112(%esp),%xmm1
+ xorl %eax,%edi
+ addl %edx,%ecx
+ shrdl $7,%edx,%edx
+ xorl %eax,%esi
+ vpalignr $8,%xmm2,%xmm3,%xmm6
+ movl %ecx,%ebp
+ addl 32(%esp),%ebx
+ vpaddd %xmm5,%xmm1,%xmm1
+ vmovdqa %xmm2,96(%esp)
+ xorl %edi,%edx
+ shldl $5,%ecx,%ecx
+ vpsrldq $4,%xmm5,%xmm0
+ addl %esi,%ebx
+ andl %edx,%ebp
+ vpxor %xmm2,%xmm6,%xmm6
+ xorl %edi,%edx
+ addl %ecx,%ebx
+ vpxor %xmm4,%xmm0,%xmm0
+ shrdl $7,%ecx,%ecx
+ xorl %edi,%ebp
+ vmovdqa %xmm1,16(%esp)
+ movl %ebx,%esi
+ addl 36(%esp),%eax
+ vpxor %xmm0,%xmm6,%xmm6
+ xorl %edx,%ecx
+ shldl $5,%ebx,%ebx
+ addl %ebp,%eax
+ andl %ecx,%esi
+ vpsrld $31,%xmm6,%xmm0
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ shrdl $7,%ebx,%ebx
+ xorl %edx,%esi
+ vpslldq $12,%xmm6,%xmm2
+ vpaddd %xmm6,%xmm6,%xmm6
+ movl %eax,%ebp
+ addl 40(%esp),%edi
+ xorl %ecx,%ebx
+ shldl $5,%eax,%eax
+ vpsrld $30,%xmm2,%xmm1
+ vpor %xmm0,%xmm6,%xmm6
+ addl %esi,%edi
+ andl %ebx,%ebp
+ xorl %ecx,%ebx
+ addl %eax,%edi
+ vpslld $2,%xmm2,%xmm2
+ vmovdqa 64(%esp),%xmm0
+ shrdl $7,%eax,%eax
+ xorl %ecx,%ebp
+ vpxor %xmm1,%xmm6,%xmm6
+ movl %edi,%esi
+ addl 44(%esp),%edx
+ xorl %ebx,%eax
+ shldl $5,%edi,%edi
+ vpxor %xmm2,%xmm6,%xmm6
+ addl %ebp,%edx
+ andl %eax,%esi
+ vmovdqa 112(%esp),%xmm2
+ xorl %ebx,%eax
+ addl %edi,%edx
+ shrdl $7,%edi,%edi
+ xorl %ebx,%esi
+ vpalignr $8,%xmm3,%xmm4,%xmm7
+ movl %edx,%ebp
+ addl 48(%esp),%ecx
+ vpaddd %xmm6,%xmm2,%xmm2
+ vmovdqa %xmm3,64(%esp)
+ xorl %eax,%edi
+ shldl $5,%edx,%edx
+ vpsrldq $4,%xmm6,%xmm1
+ addl %esi,%ecx
+ andl %edi,%ebp
+ vpxor %xmm3,%xmm7,%xmm7
+ xorl %eax,%edi
+ addl %edx,%ecx
+ vpxor %xmm5,%xmm1,%xmm1
+ shrdl $7,%edx,%edx
+ xorl %eax,%ebp
+ vmovdqa %xmm2,32(%esp)
+ movl %ecx,%esi
+ addl 52(%esp),%ebx
+ vpxor %xmm1,%xmm7,%xmm7
+ xorl %edi,%edx
+ shldl $5,%ecx,%ecx
+ addl %ebp,%ebx
+ andl %edx,%esi
+ vpsrld $31,%xmm7,%xmm1
+ xorl %edi,%edx
+ addl %ecx,%ebx
+ shrdl $7,%ecx,%ecx
+ xorl %edi,%esi
+ vpslldq $12,%xmm7,%xmm3
+ vpaddd %xmm7,%xmm7,%xmm7
+ movl %ebx,%ebp
+ addl 56(%esp),%eax
+ xorl %edx,%ecx
+ shldl $5,%ebx,%ebx
+ vpsrld $30,%xmm3,%xmm2
+ vpor %xmm1,%xmm7,%xmm7
+ addl %esi,%eax
+ andl %ecx,%ebp
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ vpslld $2,%xmm3,%xmm3
+ vmovdqa 80(%esp),%xmm1
+ shrdl $7,%ebx,%ebx
+ xorl %edx,%ebp
+ vpxor %xmm2,%xmm7,%xmm7
+ movl %eax,%esi
+ addl 60(%esp),%edi
+ xorl %ecx,%ebx
+ shldl $5,%eax,%eax
+ vpxor %xmm3,%xmm7,%xmm7
+ addl %ebp,%edi
+ andl %ebx,%esi
+ vmovdqa 112(%esp),%xmm3
+ xorl %ecx,%ebx
+ addl %eax,%edi
+ vpalignr $8,%xmm6,%xmm7,%xmm2
+ vpxor %xmm4,%xmm0,%xmm0
+ shrdl $7,%eax,%eax
+ xorl %ecx,%esi
+ movl %edi,%ebp
+ addl (%esp),%edx
+ vpxor %xmm1,%xmm0,%xmm0
+ vmovdqa %xmm4,80(%esp)
+ xorl %ebx,%eax
+ shldl $5,%edi,%edi
+ vmovdqa %xmm3,%xmm4
+ vpaddd %xmm7,%xmm3,%xmm3
+ addl %esi,%edx
+ andl %eax,%ebp
+ vpxor %xmm2,%xmm0,%xmm0
+ xorl %ebx,%eax
+ addl %edi,%edx
+ shrdl $7,%edi,%edi
+ xorl %ebx,%ebp
+ vpsrld $30,%xmm0,%xmm2
+ vmovdqa %xmm3,48(%esp)
+ movl %edx,%esi
+ addl 4(%esp),%ecx
+ xorl %eax,%edi
+ shldl $5,%edx,%edx
+ vpslld $2,%xmm0,%xmm0
+ addl %ebp,%ecx
+ andl %edi,%esi
+ xorl %eax,%edi
+ addl %edx,%ecx
+ shrdl $7,%edx,%edx
+ xorl %eax,%esi
+ movl %ecx,%ebp
+ addl 8(%esp),%ebx
+ vpor %xmm2,%xmm0,%xmm0
+ xorl %edi,%edx
+ shldl $5,%ecx,%ecx
+ vmovdqa 96(%esp),%xmm2
+ addl %esi,%ebx
+ andl %edx,%ebp
+ xorl %edi,%edx
+ addl %ecx,%ebx
+ addl 12(%esp),%eax
+ xorl %edi,%ebp
+ movl %ebx,%esi
+ shldl $5,%ebx,%ebx
+ addl %ebp,%eax
+ xorl %edx,%esi
+ shrdl $7,%ecx,%ecx
+ addl %ebx,%eax
+ vpalignr $8,%xmm7,%xmm0,%xmm3
+ vpxor %xmm5,%xmm1,%xmm1
+ addl 16(%esp),%edi
+ xorl %ecx,%esi
+ movl %eax,%ebp
+ shldl $5,%eax,%eax
+ vpxor %xmm2,%xmm1,%xmm1
+ vmovdqa %xmm5,96(%esp)
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ vmovdqa %xmm4,%xmm5
+ vpaddd %xmm0,%xmm4,%xmm4
+ shrdl $7,%ebx,%ebx
+ addl %eax,%edi
+ vpxor %xmm3,%xmm1,%xmm1
+ addl 20(%esp),%edx
+ xorl %ebx,%ebp
+ movl %edi,%esi
+ shldl $5,%edi,%edi
+ vpsrld $30,%xmm1,%xmm3
+ vmovdqa %xmm4,(%esp)
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ shrdl $7,%eax,%eax
+ addl %edi,%edx
+ vpslld $2,%xmm1,%xmm1
+ addl 24(%esp),%ecx
+ xorl %eax,%esi
+ movl %edx,%ebp
+ shldl $5,%edx,%edx
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ shrdl $7,%edi,%edi
+ addl %edx,%ecx
+ vpor %xmm3,%xmm1,%xmm1
+ addl 28(%esp),%ebx
+ xorl %edi,%ebp
+ vmovdqa 64(%esp),%xmm3
+ movl %ecx,%esi
+ shldl $5,%ecx,%ecx
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ shrdl $7,%edx,%edx
+ addl %ecx,%ebx
+ vpalignr $8,%xmm0,%xmm1,%xmm4
+ vpxor %xmm6,%xmm2,%xmm2
+ addl 32(%esp),%eax
+ xorl %edx,%esi
+ movl %ebx,%ebp
+ shldl $5,%ebx,%ebx
+ vpxor %xmm3,%xmm2,%xmm2
+ vmovdqa %xmm6,64(%esp)
+ addl %esi,%eax
+ xorl %edx,%ebp
+ vmovdqa 128(%esp),%xmm6
+ vpaddd %xmm1,%xmm5,%xmm5
+ shrdl $7,%ecx,%ecx
+ addl %ebx,%eax
+ vpxor %xmm4,%xmm2,%xmm2
+ addl 36(%esp),%edi
+ xorl %ecx,%ebp
+ movl %eax,%esi
+ shldl $5,%eax,%eax
+ vpsrld $30,%xmm2,%xmm4
+ vmovdqa %xmm5,16(%esp)
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ shrdl $7,%ebx,%ebx
+ addl %eax,%edi
+ vpslld $2,%xmm2,%xmm2
+ addl 40(%esp),%edx
+ xorl %ebx,%esi
+ movl %edi,%ebp
+ shldl $5,%edi,%edi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ shrdl $7,%eax,%eax
+ addl %edi,%edx
+ vpor %xmm4,%xmm2,%xmm2
+ addl 44(%esp),%ecx
+ xorl %eax,%ebp
+ vmovdqa 80(%esp),%xmm4
+ movl %edx,%esi
+ shldl $5,%edx,%edx
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ shrdl $7,%edi,%edi
+ addl %edx,%ecx
+ vpalignr $8,%xmm1,%xmm2,%xmm5
+ vpxor %xmm7,%xmm3,%xmm3
+ addl 48(%esp),%ebx
+ xorl %edi,%esi
+ movl %ecx,%ebp
+ shldl $5,%ecx,%ecx
+ vpxor %xmm4,%xmm3,%xmm3
+ vmovdqa %xmm7,80(%esp)
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ vmovdqa %xmm6,%xmm7
+ vpaddd %xmm2,%xmm6,%xmm6
+ shrdl $7,%edx,%edx
+ addl %ecx,%ebx
+ vpxor %xmm5,%xmm3,%xmm3
+ addl 52(%esp),%eax
+ xorl %edx,%ebp
+ movl %ebx,%esi
+ shldl $5,%ebx,%ebx
+ vpsrld $30,%xmm3,%xmm5
+ vmovdqa %xmm6,32(%esp)
+ addl %ebp,%eax
+ xorl %edx,%esi
+ shrdl $7,%ecx,%ecx
+ addl %ebx,%eax
+ vpslld $2,%xmm3,%xmm3
+ addl 56(%esp),%edi
+ xorl %ecx,%esi
+ movl %eax,%ebp
+ shldl $5,%eax,%eax
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ shrdl $7,%ebx,%ebx
+ addl %eax,%edi
+ vpor %xmm5,%xmm3,%xmm3
+ addl 60(%esp),%edx
+ xorl %ebx,%ebp
+ vmovdqa 96(%esp),%xmm5
+ movl %edi,%esi
+ shldl $5,%edi,%edi
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ shrdl $7,%eax,%eax
+ addl %edi,%edx
+ vpalignr $8,%xmm2,%xmm3,%xmm6
+ vpxor %xmm0,%xmm4,%xmm4
+ addl (%esp),%ecx
+ xorl %eax,%esi
+ movl %edx,%ebp
+ shldl $5,%edx,%edx
+ vpxor %xmm5,%xmm4,%xmm4
+ vmovdqa %xmm0,96(%esp)
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ vmovdqa %xmm7,%xmm0
+ vpaddd %xmm3,%xmm7,%xmm7
+ shrdl $7,%edi,%edi
+ addl %edx,%ecx
+ vpxor %xmm6,%xmm4,%xmm4
+ addl 4(%esp),%ebx
+ xorl %edi,%ebp
+ movl %ecx,%esi
+ shldl $5,%ecx,%ecx
+ vpsrld $30,%xmm4,%xmm6
+ vmovdqa %xmm7,48(%esp)
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ shrdl $7,%edx,%edx
+ addl %ecx,%ebx
+ vpslld $2,%xmm4,%xmm4
+ addl 8(%esp),%eax
+ xorl %edx,%esi
+ movl %ebx,%ebp
+ shldl $5,%ebx,%ebx
+ addl %esi,%eax
+ xorl %edx,%ebp
+ shrdl $7,%ecx,%ecx
+ addl %ebx,%eax
+ vpor %xmm6,%xmm4,%xmm4
+ addl 12(%esp),%edi
+ xorl %ecx,%ebp
+ vmovdqa 64(%esp),%xmm6
+ movl %eax,%esi
+ shldl $5,%eax,%eax
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ shrdl $7,%ebx,%ebx
+ addl %eax,%edi
+ vpalignr $8,%xmm3,%xmm4,%xmm7
+ vpxor %xmm1,%xmm5,%xmm5
+ addl 16(%esp),%edx
+ xorl %ebx,%esi
+ movl %edi,%ebp
+ shldl $5,%edi,%edi
+ vpxor %xmm6,%xmm5,%xmm5
+ vmovdqa %xmm1,64(%esp)
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ vmovdqa %xmm0,%xmm1
+ vpaddd %xmm4,%xmm0,%xmm0
+ shrdl $7,%eax,%eax
+ addl %edi,%edx
+ vpxor %xmm7,%xmm5,%xmm5
+ addl 20(%esp),%ecx
+ xorl %eax,%ebp
+ movl %edx,%esi
+ shldl $5,%edx,%edx
+ vpsrld $30,%xmm5,%xmm7
+ vmovdqa %xmm0,(%esp)
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ shrdl $7,%edi,%edi
+ addl %edx,%ecx
+ vpslld $2,%xmm5,%xmm5
+ addl 24(%esp),%ebx
+ xorl %edi,%esi
+ movl %ecx,%ebp
+ shldl $5,%ecx,%ecx
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ shrdl $7,%edx,%edx
+ addl %ecx,%ebx
+ vpor %xmm7,%xmm5,%xmm5
+ addl 28(%esp),%eax
+ vmovdqa 80(%esp),%xmm7
+ shrdl $7,%ecx,%ecx
+ movl %ebx,%esi
+ xorl %edx,%ebp
+ shldl $5,%ebx,%ebx
+ addl %ebp,%eax
+ xorl %ecx,%esi
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ vpalignr $8,%xmm4,%xmm5,%xmm0
+ vpxor %xmm2,%xmm6,%xmm6
+ addl 32(%esp),%edi
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ shrdl $7,%ebx,%ebx
+ vpxor %xmm7,%xmm6,%xmm6
+ vmovdqa %xmm2,80(%esp)
+ movl %eax,%ebp
+ xorl %ecx,%esi
+ vmovdqa %xmm1,%xmm2
+ vpaddd %xmm5,%xmm1,%xmm1
+ shldl $5,%eax,%eax
+ addl %esi,%edi
+ vpxor %xmm0,%xmm6,%xmm6
+ xorl %ebx,%ebp
+ xorl %ecx,%ebx
+ addl %eax,%edi
+ addl 36(%esp),%edx
+ vpsrld $30,%xmm6,%xmm0
+ vmovdqa %xmm1,16(%esp)
+ andl %ebx,%ebp
+ xorl %ecx,%ebx
+ shrdl $7,%eax,%eax
+ movl %edi,%esi
+ vpslld $2,%xmm6,%xmm6
+ xorl %ebx,%ebp
+ shldl $5,%edi,%edi
+ addl %ebp,%edx
+ xorl %eax,%esi
+ xorl %ebx,%eax
+ addl %edi,%edx
+ addl 40(%esp),%ecx
+ andl %eax,%esi
+ vpor %xmm0,%xmm6,%xmm6
+ xorl %ebx,%eax
+ shrdl $7,%edi,%edi
+ vmovdqa 96(%esp),%xmm0
+ movl %edx,%ebp
+ xorl %eax,%esi
+ shldl $5,%edx,%edx
+ addl %esi,%ecx
+ xorl %edi,%ebp
+ xorl %eax,%edi
+ addl %edx,%ecx
+ addl 44(%esp),%ebx
+ andl %edi,%ebp
+ xorl %eax,%edi
+ shrdl $7,%edx,%edx
+ movl %ecx,%esi
+ xorl %edi,%ebp
+ shldl $5,%ecx,%ecx
+ addl %ebp,%ebx
+ xorl %edx,%esi
+ xorl %edi,%edx
+ addl %ecx,%ebx
+ vpalignr $8,%xmm5,%xmm6,%xmm1
+ vpxor %xmm3,%xmm7,%xmm7
+ addl 48(%esp),%eax
+ andl %edx,%esi
+ xorl %edi,%edx
+ shrdl $7,%ecx,%ecx
+ vpxor %xmm0,%xmm7,%xmm7
+ vmovdqa %xmm3,96(%esp)
+ movl %ebx,%ebp
+ xorl %edx,%esi
+ vmovdqa 144(%esp),%xmm3
+ vpaddd %xmm6,%xmm2,%xmm2
+ shldl $5,%ebx,%ebx
+ addl %esi,%eax
+ vpxor %xmm1,%xmm7,%xmm7
+ xorl %ecx,%ebp
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ addl 52(%esp),%edi
+ vpsrld $30,%xmm7,%xmm1
+ vmovdqa %xmm2,32(%esp)
+ andl %ecx,%ebp
+ xorl %edx,%ecx
+ shrdl $7,%ebx,%ebx
+ movl %eax,%esi
+ vpslld $2,%xmm7,%xmm7
+ xorl %ecx,%ebp
+ shldl $5,%eax,%eax
+ addl %ebp,%edi
+ xorl %ebx,%esi
+ xorl %ecx,%ebx
+ addl %eax,%edi
+ addl 56(%esp),%edx
+ andl %ebx,%esi
+ vpor %xmm1,%xmm7,%xmm7
+ xorl %ecx,%ebx
+ shrdl $7,%eax,%eax
+ vmovdqa 64(%esp),%xmm1
+ movl %edi,%ebp
+ xorl %ebx,%esi
+ shldl $5,%edi,%edi
+ addl %esi,%edx
+ xorl %eax,%ebp
+ xorl %ebx,%eax
+ addl %edi,%edx
+ addl 60(%esp),%ecx
+ andl %eax,%ebp
+ xorl %ebx,%eax
+ shrdl $7,%edi,%edi
+ movl %edx,%esi
+ xorl %eax,%ebp
+ shldl $5,%edx,%edx
+ addl %ebp,%ecx
+ xorl %edi,%esi
+ xorl %eax,%edi
+ addl %edx,%ecx
+ vpalignr $8,%xmm6,%xmm7,%xmm2
+ vpxor %xmm4,%xmm0,%xmm0
+ addl (%esp),%ebx
+ andl %edi,%esi
+ xorl %eax,%edi
+ shrdl $7,%edx,%edx
+ vpxor %xmm1,%xmm0,%xmm0
+ vmovdqa %xmm4,64(%esp)
+ movl %ecx,%ebp
+ xorl %edi,%esi
+ vmovdqa %xmm3,%xmm4
+ vpaddd %xmm7,%xmm3,%xmm3
+ shldl $5,%ecx,%ecx
+ addl %esi,%ebx
+ vpxor %xmm2,%xmm0,%xmm0
+ xorl %edx,%ebp
+ xorl %edi,%edx
+ addl %ecx,%ebx
+ addl 4(%esp),%eax
+ vpsrld $30,%xmm0,%xmm2
+ vmovdqa %xmm3,48(%esp)
+ andl %edx,%ebp
+ xorl %edi,%edx
+ shrdl $7,%ecx,%ecx
+ movl %ebx,%esi
+ vpslld $2,%xmm0,%xmm0
+ xorl %edx,%ebp
+ shldl $5,%ebx,%ebx
+ addl %ebp,%eax
+ xorl %ecx,%esi
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ addl 8(%esp),%edi
+ andl %ecx,%esi
+ vpor %xmm2,%xmm0,%xmm0
+ xorl %edx,%ecx
+ shrdl $7,%ebx,%ebx
+ vmovdqa 80(%esp),%xmm2
+ movl %eax,%ebp
+ xorl %ecx,%esi
+ shldl $5,%eax,%eax
+ addl %esi,%edi
+ xorl %ebx,%ebp
+ xorl %ecx,%ebx
+ addl %eax,%edi
+ addl 12(%esp),%edx
+ andl %ebx,%ebp
+ xorl %ecx,%ebx
+ shrdl $7,%eax,%eax
+ movl %edi,%esi
+ xorl %ebx,%ebp
+ shldl $5,%edi,%edi
+ addl %ebp,%edx
+ xorl %eax,%esi
+ xorl %ebx,%eax
+ addl %edi,%edx
+ vpalignr $8,%xmm7,%xmm0,%xmm3
+ vpxor %xmm5,%xmm1,%xmm1
+ addl 16(%esp),%ecx
+ andl %eax,%esi
+ xorl %ebx,%eax
+ shrdl $7,%edi,%edi
+ vpxor %xmm2,%xmm1,%xmm1
+ vmovdqa %xmm5,80(%esp)
+ movl %edx,%ebp
+ xorl %eax,%esi
+ vmovdqa %xmm4,%xmm5
+ vpaddd %xmm0,%xmm4,%xmm4
+ shldl $5,%edx,%edx
+ addl %esi,%ecx
+ vpxor %xmm3,%xmm1,%xmm1
+ xorl %edi,%ebp
+ xorl %eax,%edi
+ addl %edx,%ecx
+ addl 20(%esp),%ebx
+ vpsrld $30,%xmm1,%xmm3
+ vmovdqa %xmm4,(%esp)
+ andl %edi,%ebp
+ xorl %eax,%edi
+ shrdl $7,%edx,%edx
+ movl %ecx,%esi
+ vpslld $2,%xmm1,%xmm1
+ xorl %edi,%ebp
+ shldl $5,%ecx,%ecx
+ addl %ebp,%ebx
+ xorl %edx,%esi
+ xorl %edi,%edx
+ addl %ecx,%ebx
+ addl 24(%esp),%eax
+ andl %edx,%esi
+ vpor %xmm3,%xmm1,%xmm1
+ xorl %edi,%edx
+ shrdl $7,%ecx,%ecx
+ vmovdqa 96(%esp),%xmm3
+ movl %ebx,%ebp
+ xorl %edx,%esi
+ shldl $5,%ebx,%ebx
+ addl %esi,%eax
+ xorl %ecx,%ebp
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ addl 28(%esp),%edi
+ andl %ecx,%ebp
+ xorl %edx,%ecx
+ shrdl $7,%ebx,%ebx
+ movl %eax,%esi
+ xorl %ecx,%ebp
+ shldl $5,%eax,%eax
+ addl %ebp,%edi
+ xorl %ebx,%esi
+ xorl %ecx,%ebx
+ addl %eax,%edi
+ vpalignr $8,%xmm0,%xmm1,%xmm4
+ vpxor %xmm6,%xmm2,%xmm2
+ addl 32(%esp),%edx
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ shrdl $7,%eax,%eax
+ vpxor %xmm3,%xmm2,%xmm2
+ vmovdqa %xmm6,96(%esp)
+ movl %edi,%ebp
+ xorl %ebx,%esi
+ vmovdqa %xmm5,%xmm6
+ vpaddd %xmm1,%xmm5,%xmm5
+ shldl $5,%edi,%edi
+ addl %esi,%edx
+ vpxor %xmm4,%xmm2,%xmm2
+ xorl %eax,%ebp
+ xorl %ebx,%eax
+ addl %edi,%edx
+ addl 36(%esp),%ecx
+ vpsrld $30,%xmm2,%xmm4
+ vmovdqa %xmm5,16(%esp)
+ andl %eax,%ebp
+ xorl %ebx,%eax
+ shrdl $7,%edi,%edi
+ movl %edx,%esi
+ vpslld $2,%xmm2,%xmm2
+ xorl %eax,%ebp
+ shldl $5,%edx,%edx
+ addl %ebp,%ecx
+ xorl %edi,%esi
+ xorl %eax,%edi
+ addl %edx,%ecx
+ addl 40(%esp),%ebx
+ andl %edi,%esi
+ vpor %xmm4,%xmm2,%xmm2
+ xorl %eax,%edi
+ shrdl $7,%edx,%edx
+ vmovdqa 64(%esp),%xmm4
+ movl %ecx,%ebp
+ xorl %edi,%esi
+ shldl $5,%ecx,%ecx
+ addl %esi,%ebx
+ xorl %edx,%ebp
+ xorl %edi,%edx
+ addl %ecx,%ebx
+ addl 44(%esp),%eax
+ andl %edx,%ebp
+ xorl %edi,%edx
+ shrdl $7,%ecx,%ecx
+ movl %ebx,%esi
+ xorl %edx,%ebp
+ shldl $5,%ebx,%ebx
+ addl %ebp,%eax
+ xorl %edx,%esi
+ addl %ebx,%eax
+ vpalignr $8,%xmm1,%xmm2,%xmm5
+ vpxor %xmm7,%xmm3,%xmm3
+ addl 48(%esp),%edi
+ xorl %ecx,%esi
+ movl %eax,%ebp
+ shldl $5,%eax,%eax
+ vpxor %xmm4,%xmm3,%xmm3
+ vmovdqa %xmm7,64(%esp)
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ vmovdqa %xmm6,%xmm7
+ vpaddd %xmm2,%xmm6,%xmm6
+ shrdl $7,%ebx,%ebx
+ addl %eax,%edi
+ vpxor %xmm5,%xmm3,%xmm3
+ addl 52(%esp),%edx
+ xorl %ebx,%ebp
+ movl %edi,%esi
+ shldl $5,%edi,%edi
+ vpsrld $30,%xmm3,%xmm5
+ vmovdqa %xmm6,32(%esp)
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ shrdl $7,%eax,%eax
+ addl %edi,%edx
+ vpslld $2,%xmm3,%xmm3
+ addl 56(%esp),%ecx
+ xorl %eax,%esi
+ movl %edx,%ebp
+ shldl $5,%edx,%edx
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ shrdl $7,%edi,%edi
+ addl %edx,%ecx
+ vpor %xmm5,%xmm3,%xmm3
+ addl 60(%esp),%ebx
+ xorl %edi,%ebp
+ movl %ecx,%esi
+ shldl $5,%ecx,%ecx
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ shrdl $7,%edx,%edx
+ addl %ecx,%ebx
+ addl (%esp),%eax
+ vpaddd %xmm3,%xmm7,%xmm7
+ xorl %edx,%esi
+ movl %ebx,%ebp
+ shldl $5,%ebx,%ebx
+ addl %esi,%eax
+ vmovdqa %xmm7,48(%esp)
+ xorl %edx,%ebp
+ shrdl $7,%ecx,%ecx
+ addl %ebx,%eax
+ addl 4(%esp),%edi
+ xorl %ecx,%ebp
+ movl %eax,%esi
+ shldl $5,%eax,%eax
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ shrdl $7,%ebx,%ebx
+ addl %eax,%edi
+ addl 8(%esp),%edx
+ xorl %ebx,%esi
+ movl %edi,%ebp
+ shldl $5,%edi,%edi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ shrdl $7,%eax,%eax
+ addl %edi,%edx
+ addl 12(%esp),%ecx
+ xorl %eax,%ebp
+ movl %edx,%esi
+ shldl $5,%edx,%edx
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ shrdl $7,%edi,%edi
+ addl %edx,%ecx
+ movl 196(%esp),%ebp
+ cmpl 200(%esp),%ebp
+ je .L008done
+ vmovdqa 160(%esp),%xmm7
+ vmovdqa 176(%esp),%xmm6
+ vmovdqu (%ebp),%xmm0
+ vmovdqu 16(%ebp),%xmm1
+ vmovdqu 32(%ebp),%xmm2
+ vmovdqu 48(%ebp),%xmm3
+ addl $64,%ebp
+ vpshufb %xmm6,%xmm0,%xmm0
+ movl %ebp,196(%esp)
+ vmovdqa %xmm7,96(%esp)
+ addl 16(%esp),%ebx
+ xorl %edi,%esi
+ vpshufb %xmm6,%xmm1,%xmm1
+ movl %ecx,%ebp
+ shldl $5,%ecx,%ecx
+ vpaddd %xmm7,%xmm0,%xmm4
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ shrdl $7,%edx,%edx
+ addl %ecx,%ebx
+ vmovdqa %xmm4,(%esp)
+ addl 20(%esp),%eax
+ xorl %edx,%ebp
+ movl %ebx,%esi
+ shldl $5,%ebx,%ebx
+ addl %ebp,%eax
+ xorl %edx,%esi
+ shrdl $7,%ecx,%ecx
+ addl %ebx,%eax
+ addl 24(%esp),%edi
+ xorl %ecx,%esi
+ movl %eax,%ebp
+ shldl $5,%eax,%eax
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ shrdl $7,%ebx,%ebx
+ addl %eax,%edi
+ addl 28(%esp),%edx
+ xorl %ebx,%ebp
+ movl %edi,%esi
+ shldl $5,%edi,%edi
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ shrdl $7,%eax,%eax
+ addl %edi,%edx
+ addl 32(%esp),%ecx
+ xorl %eax,%esi
+ vpshufb %xmm6,%xmm2,%xmm2
+ movl %edx,%ebp
+ shldl $5,%edx,%edx
+ vpaddd %xmm7,%xmm1,%xmm5
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ shrdl $7,%edi,%edi
+ addl %edx,%ecx
+ vmovdqa %xmm5,16(%esp)
+ addl 36(%esp),%ebx
+ xorl %edi,%ebp
+ movl %ecx,%esi
+ shldl $5,%ecx,%ecx
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ shrdl $7,%edx,%edx
+ addl %ecx,%ebx
+ addl 40(%esp),%eax
+ xorl %edx,%esi
+ movl %ebx,%ebp
+ shldl $5,%ebx,%ebx
+ addl %esi,%eax
+ xorl %edx,%ebp
+ shrdl $7,%ecx,%ecx
+ addl %ebx,%eax
+ addl 44(%esp),%edi
+ xorl %ecx,%ebp
+ movl %eax,%esi
+ shldl $5,%eax,%eax
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ shrdl $7,%ebx,%ebx
+ addl %eax,%edi
+ addl 48(%esp),%edx
+ xorl %ebx,%esi
+ vpshufb %xmm6,%xmm3,%xmm3
+ movl %edi,%ebp
+ shldl $5,%edi,%edi
+ vpaddd %xmm7,%xmm2,%xmm6
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ shrdl $7,%eax,%eax
+ addl %edi,%edx
+ vmovdqa %xmm6,32(%esp)
+ addl 52(%esp),%ecx
+ xorl %eax,%ebp
+ movl %edx,%esi
+ shldl $5,%edx,%edx
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ shrdl $7,%edi,%edi
+ addl %edx,%ecx
+ addl 56(%esp),%ebx
+ xorl %edi,%esi
+ movl %ecx,%ebp
+ shldl $5,%ecx,%ecx
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ shrdl $7,%edx,%edx
+ addl %ecx,%ebx
+ addl 60(%esp),%eax
+ xorl %edx,%ebp
+ movl %ebx,%esi
+ shldl $5,%ebx,%ebx
+ addl %ebp,%eax
+ shrdl $7,%ecx,%ecx
+ addl %ebx,%eax
+ movl 192(%esp),%ebp
+ addl (%ebp),%eax
+ addl 4(%ebp),%esi
+ addl 8(%ebp),%ecx
+ movl %eax,(%ebp)
+ addl 12(%ebp),%edx
+ movl %esi,4(%ebp)
+ addl 16(%ebp),%edi
+ movl %ecx,%ebx
+ movl %ecx,8(%ebp)
+ xorl %edx,%ebx
+ movl %edx,12(%ebp)
+ movl %edi,16(%ebp)
+ movl %esi,%ebp
+ andl %ebx,%esi
+ movl %ebp,%ebx
+ jmp .L007loop
+.align 16
+.L008done:
+ addl 16(%esp),%ebx
+ xorl %edi,%esi
+ movl %ecx,%ebp
+ shldl $5,%ecx,%ecx
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ shrdl $7,%edx,%edx
+ addl %ecx,%ebx
+ addl 20(%esp),%eax
+ xorl %edx,%ebp
+ movl %ebx,%esi
+ shldl $5,%ebx,%ebx
+ addl %ebp,%eax
+ xorl %edx,%esi
+ shrdl $7,%ecx,%ecx
+ addl %ebx,%eax
+ addl 24(%esp),%edi
+ xorl %ecx,%esi
+ movl %eax,%ebp
+ shldl $5,%eax,%eax
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ shrdl $7,%ebx,%ebx
+ addl %eax,%edi
+ addl 28(%esp),%edx
+ xorl %ebx,%ebp
+ movl %edi,%esi
+ shldl $5,%edi,%edi
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ shrdl $7,%eax,%eax
+ addl %edi,%edx
+ addl 32(%esp),%ecx
+ xorl %eax,%esi
+ movl %edx,%ebp
+ shldl $5,%edx,%edx
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ shrdl $7,%edi,%edi
+ addl %edx,%ecx
+ addl 36(%esp),%ebx
+ xorl %edi,%ebp
+ movl %ecx,%esi
+ shldl $5,%ecx,%ecx
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ shrdl $7,%edx,%edx
+ addl %ecx,%ebx
+ addl 40(%esp),%eax
+ xorl %edx,%esi
+ movl %ebx,%ebp
+ shldl $5,%ebx,%ebx
+ addl %esi,%eax
+ xorl %edx,%ebp
+ shrdl $7,%ecx,%ecx
+ addl %ebx,%eax
+ addl 44(%esp),%edi
+ xorl %ecx,%ebp
+ movl %eax,%esi
+ shldl $5,%eax,%eax
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ shrdl $7,%ebx,%ebx
+ addl %eax,%edi
+ addl 48(%esp),%edx
+ xorl %ebx,%esi
+ movl %edi,%ebp
+ shldl $5,%edi,%edi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ shrdl $7,%eax,%eax
+ addl %edi,%edx
+ addl 52(%esp),%ecx
+ xorl %eax,%ebp
+ movl %edx,%esi
+ shldl $5,%edx,%edx
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ shrdl $7,%edi,%edi
+ addl %edx,%ecx
+ addl 56(%esp),%ebx
+ xorl %edi,%esi
+ movl %ecx,%ebp
+ shldl $5,%ecx,%ecx
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ shrdl $7,%edx,%edx
+ addl %ecx,%ebx
+ addl 60(%esp),%eax
+ xorl %edx,%ebp
+ movl %ebx,%esi
+ shldl $5,%ebx,%ebx
+ addl %ebp,%eax
+ shrdl $7,%ecx,%ecx
+ addl %ebx,%eax
+ vzeroall
+ movl 192(%esp),%ebp
+ addl (%ebp),%eax
+ movl 204(%esp),%esp
+ addl 4(%ebp),%esi
+ addl 8(%ebp),%ecx
+ movl %eax,(%ebp)
+ addl 12(%ebp),%edx
+ movl %esi,4(%ebp)
+ addl 16(%ebp),%edi
+ movl %ecx,8(%ebp)
+ movl %edx,12(%ebp)
+ movl %edi,16(%ebp)
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size _sha1_block_data_order_avx,.-_sha1_block_data_order_avx
.align 64
.LK_XX_XX:
.long 1518500249,1518500249,1518500249,1518500249

Powered by Google App Engine
This is Rietveld 408576698