Index: third_party/boringssl/win-x86_64/crypto/sha/sha256-x86_64.asm |
diff --git a/third_party/boringssl/win-x86_64/crypto/sha/sha256-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/sha/sha256-x86_64.asm |
index e6193c5b9d75453e8f81003c4b4296ff281455a9..efaf9b55fccff2db693d37a0d8d653357c6cb87f 100644 |
--- a/third_party/boringssl/win-x86_64/crypto/sha/sha256-x86_64.asm |
+++ b/third_party/boringssl/win-x86_64/crypto/sha/sha256-x86_64.asm |
@@ -23,6 +23,11 @@ $L$SEH_begin_sha256_block_data_order: |
mov r9d,DWORD[r11] |
mov r10d,DWORD[4+r11] |
mov r11d,DWORD[8+r11] |
+ and r9d,1073741824 |
+ and r10d,268435968 |
+ or r10d,r9d |
+ cmp r10d,1342177792 |
+ je NEAR $L$avx_shortcut |
test r10d,512 |
jnz NEAR $L$ssse3_shortcut |
push rbx |
@@ -2877,6 +2882,1082 @@ $L$epilogue_ssse3: |
mov rsi,QWORD[16+rsp] |
DB 0F3h,0C3h ;repret |
$L$SEH_end_sha256_block_data_order_ssse3: |
+ |
+ALIGN 64 |
+sha256_block_data_order_avx: |
+ mov QWORD[8+rsp],rdi ;WIN64 prologue |
+ mov QWORD[16+rsp],rsi |
+ mov rax,rsp |
+$L$SEH_begin_sha256_block_data_order_avx: |
+ mov rdi,rcx |
+ mov rsi,rdx |
+ mov rdx,r8 |
+ |
+ |
+$L$avx_shortcut: |
+ push rbx |
+ push rbp |
+ push r12 |
+ push r13 |
+ push r14 |
+ push r15 |
+ mov r11,rsp |
+ shl rdx,4 |
+ sub rsp,160 |
+ lea rdx,[rdx*4+rsi] |
+ and rsp,-64 |
+ mov QWORD[((64+0))+rsp],rdi |
+ mov QWORD[((64+8))+rsp],rsi |
+ mov QWORD[((64+16))+rsp],rdx |
+ mov QWORD[((64+24))+rsp],r11 |
+ movaps XMMWORD[(64+32)+rsp],xmm6 |
+ movaps XMMWORD[(64+48)+rsp],xmm7 |
+ movaps XMMWORD[(64+64)+rsp],xmm8 |
+ movaps XMMWORD[(64+80)+rsp],xmm9 |
+$L$prologue_avx: |
+ |
+ vzeroupper |
+ mov eax,DWORD[rdi] |
+ mov ebx,DWORD[4+rdi] |
+ mov ecx,DWORD[8+rdi] |
+ mov edx,DWORD[12+rdi] |
+ mov r8d,DWORD[16+rdi] |
+ mov r9d,DWORD[20+rdi] |
+ mov r10d,DWORD[24+rdi] |
+ mov r11d,DWORD[28+rdi] |
+ vmovdqa xmm8,XMMWORD[((K256+512+32))] |
+ vmovdqa xmm9,XMMWORD[((K256+512+64))] |
+ jmp NEAR $L$loop_avx |
+ALIGN 16 |
+$L$loop_avx: |
+ vmovdqa xmm7,XMMWORD[((K256+512))] |
+ vmovdqu xmm0,XMMWORD[rsi] |
+ vmovdqu xmm1,XMMWORD[16+rsi] |
+ vmovdqu xmm2,XMMWORD[32+rsi] |
+ vmovdqu xmm3,XMMWORD[48+rsi] |
+ vpshufb xmm0,xmm0,xmm7 |
+ lea rbp,[K256] |
+ vpshufb xmm1,xmm1,xmm7 |
+ vpshufb xmm2,xmm2,xmm7 |
+ vpaddd xmm4,xmm0,XMMWORD[rbp] |
+ vpshufb xmm3,xmm3,xmm7 |
+ vpaddd xmm5,xmm1,XMMWORD[32+rbp] |
+ vpaddd xmm6,xmm2,XMMWORD[64+rbp] |
+ vpaddd xmm7,xmm3,XMMWORD[96+rbp] |
+ vmovdqa XMMWORD[rsp],xmm4 |
+ mov r14d,eax |
+ vmovdqa XMMWORD[16+rsp],xmm5 |
+ mov edi,ebx |
+ vmovdqa XMMWORD[32+rsp],xmm6 |
+ xor edi,ecx |
+ vmovdqa XMMWORD[48+rsp],xmm7 |
+ mov r13d,r8d |
+ jmp NEAR $L$avx_00_47 |
+ |
+ALIGN 16 |
+$L$avx_00_47: |
+ sub rbp,-128 |
+ vpalignr xmm4,xmm1,xmm0,4 |
+ shrd r13d,r13d,14 |
+ mov eax,r14d |
+ mov r12d,r9d |
+ vpalignr xmm7,xmm3,xmm2,4 |
+ shrd r14d,r14d,9 |
+ xor r13d,r8d |
+ xor r12d,r10d |
+ vpsrld xmm6,xmm4,7 |
+ shrd r13d,r13d,5 |
+ xor r14d,eax |
+ and r12d,r8d |
+ vpaddd xmm0,xmm0,xmm7 |
+ xor r13d,r8d |
+ add r11d,DWORD[rsp] |
+ mov r15d,eax |
+ vpsrld xmm7,xmm4,3 |
+ xor r12d,r10d |
+ shrd r14d,r14d,11 |
+ xor r15d,ebx |
+ vpslld xmm5,xmm4,14 |
+ add r11d,r12d |
+ shrd r13d,r13d,6 |
+ and edi,r15d |
+ vpxor xmm4,xmm7,xmm6 |
+ xor r14d,eax |
+ add r11d,r13d |
+ xor edi,ebx |
+ vpshufd xmm7,xmm3,250 |
+ shrd r14d,r14d,2 |
+ add edx,r11d |
+ add r11d,edi |
+ vpsrld xmm6,xmm6,11 |
+ mov r13d,edx |
+ add r14d,r11d |
+ shrd r13d,r13d,14 |
+ vpxor xmm4,xmm4,xmm5 |
+ mov r11d,r14d |
+ mov r12d,r8d |
+ shrd r14d,r14d,9 |
+ vpslld xmm5,xmm5,11 |
+ xor r13d,edx |
+ xor r12d,r9d |
+ shrd r13d,r13d,5 |
+ vpxor xmm4,xmm4,xmm6 |
+ xor r14d,r11d |
+ and r12d,edx |
+ xor r13d,edx |
+ vpsrld xmm6,xmm7,10 |
+ add r10d,DWORD[4+rsp] |
+ mov edi,r11d |
+ xor r12d,r9d |
+ vpxor xmm4,xmm4,xmm5 |
+ shrd r14d,r14d,11 |
+ xor edi,eax |
+ add r10d,r12d |
+ vpsrlq xmm7,xmm7,17 |
+ shrd r13d,r13d,6 |
+ and r15d,edi |
+ xor r14d,r11d |
+ vpaddd xmm0,xmm0,xmm4 |
+ add r10d,r13d |
+ xor r15d,eax |
+ shrd r14d,r14d,2 |
+ vpxor xmm6,xmm6,xmm7 |
+ add ecx,r10d |
+ add r10d,r15d |
+ mov r13d,ecx |
+ vpsrlq xmm7,xmm7,2 |
+ add r14d,r10d |
+ shrd r13d,r13d,14 |
+ mov r10d,r14d |
+ vpxor xmm6,xmm6,xmm7 |
+ mov r12d,edx |
+ shrd r14d,r14d,9 |
+ xor r13d,ecx |
+ vpshufb xmm6,xmm6,xmm8 |
+ xor r12d,r8d |
+ shrd r13d,r13d,5 |
+ xor r14d,r10d |
+ vpaddd xmm0,xmm0,xmm6 |
+ and r12d,ecx |
+ xor r13d,ecx |
+ add r9d,DWORD[8+rsp] |
+ vpshufd xmm7,xmm0,80 |
+ mov r15d,r10d |
+ xor r12d,r8d |
+ shrd r14d,r14d,11 |
+ vpsrld xmm6,xmm7,10 |
+ xor r15d,r11d |
+ add r9d,r12d |
+ shrd r13d,r13d,6 |
+ vpsrlq xmm7,xmm7,17 |
+ and edi,r15d |
+ xor r14d,r10d |
+ add r9d,r13d |
+ vpxor xmm6,xmm6,xmm7 |
+ xor edi,r11d |
+ shrd r14d,r14d,2 |
+ add ebx,r9d |
+ vpsrlq xmm7,xmm7,2 |
+ add r9d,edi |
+ mov r13d,ebx |
+ add r14d,r9d |
+ vpxor xmm6,xmm6,xmm7 |
+ shrd r13d,r13d,14 |
+ mov r9d,r14d |
+ mov r12d,ecx |
+ vpshufb xmm6,xmm6,xmm9 |
+ shrd r14d,r14d,9 |
+ xor r13d,ebx |
+ xor r12d,edx |
+ vpaddd xmm0,xmm0,xmm6 |
+ shrd r13d,r13d,5 |
+ xor r14d,r9d |
+ and r12d,ebx |
+ vpaddd xmm6,xmm0,XMMWORD[rbp] |
+ xor r13d,ebx |
+ add r8d,DWORD[12+rsp] |
+ mov edi,r9d |
+ xor r12d,edx |
+ shrd r14d,r14d,11 |
+ xor edi,r10d |
+ add r8d,r12d |
+ shrd r13d,r13d,6 |
+ and r15d,edi |
+ xor r14d,r9d |
+ add r8d,r13d |
+ xor r15d,r10d |
+ shrd r14d,r14d,2 |
+ add eax,r8d |
+ add r8d,r15d |
+ mov r13d,eax |
+ add r14d,r8d |
+ vmovdqa XMMWORD[rsp],xmm6 |
+ vpalignr xmm4,xmm2,xmm1,4 |
+ shrd r13d,r13d,14 |
+ mov r8d,r14d |
+ mov r12d,ebx |
+ vpalignr xmm7,xmm0,xmm3,4 |
+ shrd r14d,r14d,9 |
+ xor r13d,eax |
+ xor r12d,ecx |
+ vpsrld xmm6,xmm4,7 |
+ shrd r13d,r13d,5 |
+ xor r14d,r8d |
+ and r12d,eax |
+ vpaddd xmm1,xmm1,xmm7 |
+ xor r13d,eax |
+ add edx,DWORD[16+rsp] |
+ mov r15d,r8d |
+ vpsrld xmm7,xmm4,3 |
+ xor r12d,ecx |
+ shrd r14d,r14d,11 |
+ xor r15d,r9d |
+ vpslld xmm5,xmm4,14 |
+ add edx,r12d |
+ shrd r13d,r13d,6 |
+ and edi,r15d |
+ vpxor xmm4,xmm7,xmm6 |
+ xor r14d,r8d |
+ add edx,r13d |
+ xor edi,r9d |
+ vpshufd xmm7,xmm0,250 |
+ shrd r14d,r14d,2 |
+ add r11d,edx |
+ add edx,edi |
+ vpsrld xmm6,xmm6,11 |
+ mov r13d,r11d |
+ add r14d,edx |
+ shrd r13d,r13d,14 |
+ vpxor xmm4,xmm4,xmm5 |
+ mov edx,r14d |
+ mov r12d,eax |
+ shrd r14d,r14d,9 |
+ vpslld xmm5,xmm5,11 |
+ xor r13d,r11d |
+ xor r12d,ebx |
+ shrd r13d,r13d,5 |
+ vpxor xmm4,xmm4,xmm6 |
+ xor r14d,edx |
+ and r12d,r11d |
+ xor r13d,r11d |
+ vpsrld xmm6,xmm7,10 |
+ add ecx,DWORD[20+rsp] |
+ mov edi,edx |
+ xor r12d,ebx |
+ vpxor xmm4,xmm4,xmm5 |
+ shrd r14d,r14d,11 |
+ xor edi,r8d |
+ add ecx,r12d |
+ vpsrlq xmm7,xmm7,17 |
+ shrd r13d,r13d,6 |
+ and r15d,edi |
+ xor r14d,edx |
+ vpaddd xmm1,xmm1,xmm4 |
+ add ecx,r13d |
+ xor r15d,r8d |
+ shrd r14d,r14d,2 |
+ vpxor xmm6,xmm6,xmm7 |
+ add r10d,ecx |
+ add ecx,r15d |
+ mov r13d,r10d |
+ vpsrlq xmm7,xmm7,2 |
+ add r14d,ecx |
+ shrd r13d,r13d,14 |
+ mov ecx,r14d |
+ vpxor xmm6,xmm6,xmm7 |
+ mov r12d,r11d |
+ shrd r14d,r14d,9 |
+ xor r13d,r10d |
+ vpshufb xmm6,xmm6,xmm8 |
+ xor r12d,eax |
+ shrd r13d,r13d,5 |
+ xor r14d,ecx |
+ vpaddd xmm1,xmm1,xmm6 |
+ and r12d,r10d |
+ xor r13d,r10d |
+ add ebx,DWORD[24+rsp] |
+ vpshufd xmm7,xmm1,80 |
+ mov r15d,ecx |
+ xor r12d,eax |
+ shrd r14d,r14d,11 |
+ vpsrld xmm6,xmm7,10 |
+ xor r15d,edx |
+ add ebx,r12d |
+ shrd r13d,r13d,6 |
+ vpsrlq xmm7,xmm7,17 |
+ and edi,r15d |
+ xor r14d,ecx |
+ add ebx,r13d |
+ vpxor xmm6,xmm6,xmm7 |
+ xor edi,edx |
+ shrd r14d,r14d,2 |
+ add r9d,ebx |
+ vpsrlq xmm7,xmm7,2 |
+ add ebx,edi |
+ mov r13d,r9d |
+ add r14d,ebx |
+ vpxor xmm6,xmm6,xmm7 |
+ shrd r13d,r13d,14 |
+ mov ebx,r14d |
+ mov r12d,r10d |
+ vpshufb xmm6,xmm6,xmm9 |
+ shrd r14d,r14d,9 |
+ xor r13d,r9d |
+ xor r12d,r11d |
+ vpaddd xmm1,xmm1,xmm6 |
+ shrd r13d,r13d,5 |
+ xor r14d,ebx |
+ and r12d,r9d |
+ vpaddd xmm6,xmm1,XMMWORD[32+rbp] |
+ xor r13d,r9d |
+ add eax,DWORD[28+rsp] |
+ mov edi,ebx |
+ xor r12d,r11d |
+ shrd r14d,r14d,11 |
+ xor edi,ecx |
+ add eax,r12d |
+ shrd r13d,r13d,6 |
+ and r15d,edi |
+ xor r14d,ebx |
+ add eax,r13d |
+ xor r15d,ecx |
+ shrd r14d,r14d,2 |
+ add r8d,eax |
+ add eax,r15d |
+ mov r13d,r8d |
+ add r14d,eax |
+ vmovdqa XMMWORD[16+rsp],xmm6 |
+ vpalignr xmm4,xmm3,xmm2,4 |
+ shrd r13d,r13d,14 |
+ mov eax,r14d |
+ mov r12d,r9d |
+ vpalignr xmm7,xmm1,xmm0,4 |
+ shrd r14d,r14d,9 |
+ xor r13d,r8d |
+ xor r12d,r10d |
+ vpsrld xmm6,xmm4,7 |
+ shrd r13d,r13d,5 |
+ xor r14d,eax |
+ and r12d,r8d |
+ vpaddd xmm2,xmm2,xmm7 |
+ xor r13d,r8d |
+ add r11d,DWORD[32+rsp] |
+ mov r15d,eax |
+ vpsrld xmm7,xmm4,3 |
+ xor r12d,r10d |
+ shrd r14d,r14d,11 |
+ xor r15d,ebx |
+ vpslld xmm5,xmm4,14 |
+ add r11d,r12d |
+ shrd r13d,r13d,6 |
+ and edi,r15d |
+ vpxor xmm4,xmm7,xmm6 |
+ xor r14d,eax |
+ add r11d,r13d |
+ xor edi,ebx |
+ vpshufd xmm7,xmm1,250 |
+ shrd r14d,r14d,2 |
+ add edx,r11d |
+ add r11d,edi |
+ vpsrld xmm6,xmm6,11 |
+ mov r13d,edx |
+ add r14d,r11d |
+ shrd r13d,r13d,14 |
+ vpxor xmm4,xmm4,xmm5 |
+ mov r11d,r14d |
+ mov r12d,r8d |
+ shrd r14d,r14d,9 |
+ vpslld xmm5,xmm5,11 |
+ xor r13d,edx |
+ xor r12d,r9d |
+ shrd r13d,r13d,5 |
+ vpxor xmm4,xmm4,xmm6 |
+ xor r14d,r11d |
+ and r12d,edx |
+ xor r13d,edx |
+ vpsrld xmm6,xmm7,10 |
+ add r10d,DWORD[36+rsp] |
+ mov edi,r11d |
+ xor r12d,r9d |
+ vpxor xmm4,xmm4,xmm5 |
+ shrd r14d,r14d,11 |
+ xor edi,eax |
+ add r10d,r12d |
+ vpsrlq xmm7,xmm7,17 |
+ shrd r13d,r13d,6 |
+ and r15d,edi |
+ xor r14d,r11d |
+ vpaddd xmm2,xmm2,xmm4 |
+ add r10d,r13d |
+ xor r15d,eax |
+ shrd r14d,r14d,2 |
+ vpxor xmm6,xmm6,xmm7 |
+ add ecx,r10d |
+ add r10d,r15d |
+ mov r13d,ecx |
+ vpsrlq xmm7,xmm7,2 |
+ add r14d,r10d |
+ shrd r13d,r13d,14 |
+ mov r10d,r14d |
+ vpxor xmm6,xmm6,xmm7 |
+ mov r12d,edx |
+ shrd r14d,r14d,9 |
+ xor r13d,ecx |
+ vpshufb xmm6,xmm6,xmm8 |
+ xor r12d,r8d |
+ shrd r13d,r13d,5 |
+ xor r14d,r10d |
+ vpaddd xmm2,xmm2,xmm6 |
+ and r12d,ecx |
+ xor r13d,ecx |
+ add r9d,DWORD[40+rsp] |
+ vpshufd xmm7,xmm2,80 |
+ mov r15d,r10d |
+ xor r12d,r8d |
+ shrd r14d,r14d,11 |
+ vpsrld xmm6,xmm7,10 |
+ xor r15d,r11d |
+ add r9d,r12d |
+ shrd r13d,r13d,6 |
+ vpsrlq xmm7,xmm7,17 |
+ and edi,r15d |
+ xor r14d,r10d |
+ add r9d,r13d |
+ vpxor xmm6,xmm6,xmm7 |
+ xor edi,r11d |
+ shrd r14d,r14d,2 |
+ add ebx,r9d |
+ vpsrlq xmm7,xmm7,2 |
+ add r9d,edi |
+ mov r13d,ebx |
+ add r14d,r9d |
+ vpxor xmm6,xmm6,xmm7 |
+ shrd r13d,r13d,14 |
+ mov r9d,r14d |
+ mov r12d,ecx |
+ vpshufb xmm6,xmm6,xmm9 |
+ shrd r14d,r14d,9 |
+ xor r13d,ebx |
+ xor r12d,edx |
+ vpaddd xmm2,xmm2,xmm6 |
+ shrd r13d,r13d,5 |
+ xor r14d,r9d |
+ and r12d,ebx |
+ vpaddd xmm6,xmm2,XMMWORD[64+rbp] |
+ xor r13d,ebx |
+ add r8d,DWORD[44+rsp] |
+ mov edi,r9d |
+ xor r12d,edx |
+ shrd r14d,r14d,11 |
+ xor edi,r10d |
+ add r8d,r12d |
+ shrd r13d,r13d,6 |
+ and r15d,edi |
+ xor r14d,r9d |
+ add r8d,r13d |
+ xor r15d,r10d |
+ shrd r14d,r14d,2 |
+ add eax,r8d |
+ add r8d,r15d |
+ mov r13d,eax |
+ add r14d,r8d |
+ vmovdqa XMMWORD[32+rsp],xmm6 |
+ vpalignr xmm4,xmm0,xmm3,4 |
+ shrd r13d,r13d,14 |
+ mov r8d,r14d |
+ mov r12d,ebx |
+ vpalignr xmm7,xmm2,xmm1,4 |
+ shrd r14d,r14d,9 |
+ xor r13d,eax |
+ xor r12d,ecx |
+ vpsrld xmm6,xmm4,7 |
+ shrd r13d,r13d,5 |
+ xor r14d,r8d |
+ and r12d,eax |
+ vpaddd xmm3,xmm3,xmm7 |
+ xor r13d,eax |
+ add edx,DWORD[48+rsp] |
+ mov r15d,r8d |
+ vpsrld xmm7,xmm4,3 |
+ xor r12d,ecx |
+ shrd r14d,r14d,11 |
+ xor r15d,r9d |
+ vpslld xmm5,xmm4,14 |
+ add edx,r12d |
+ shrd r13d,r13d,6 |
+ and edi,r15d |
+ vpxor xmm4,xmm7,xmm6 |
+ xor r14d,r8d |
+ add edx,r13d |
+ xor edi,r9d |
+ vpshufd xmm7,xmm2,250 |
+ shrd r14d,r14d,2 |
+ add r11d,edx |
+ add edx,edi |
+ vpsrld xmm6,xmm6,11 |
+ mov r13d,r11d |
+ add r14d,edx |
+ shrd r13d,r13d,14 |
+ vpxor xmm4,xmm4,xmm5 |
+ mov edx,r14d |
+ mov r12d,eax |
+ shrd r14d,r14d,9 |
+ vpslld xmm5,xmm5,11 |
+ xor r13d,r11d |
+ xor r12d,ebx |
+ shrd r13d,r13d,5 |
+ vpxor xmm4,xmm4,xmm6 |
+ xor r14d,edx |
+ and r12d,r11d |
+ xor r13d,r11d |
+ vpsrld xmm6,xmm7,10 |
+ add ecx,DWORD[52+rsp] |
+ mov edi,edx |
+ xor r12d,ebx |
+ vpxor xmm4,xmm4,xmm5 |
+ shrd r14d,r14d,11 |
+ xor edi,r8d |
+ add ecx,r12d |
+ vpsrlq xmm7,xmm7,17 |
+ shrd r13d,r13d,6 |
+ and r15d,edi |
+ xor r14d,edx |
+ vpaddd xmm3,xmm3,xmm4 |
+ add ecx,r13d |
+ xor r15d,r8d |
+ shrd r14d,r14d,2 |
+ vpxor xmm6,xmm6,xmm7 |
+ add r10d,ecx |
+ add ecx,r15d |
+ mov r13d,r10d |
+ vpsrlq xmm7,xmm7,2 |
+ add r14d,ecx |
+ shrd r13d,r13d,14 |
+ mov ecx,r14d |
+ vpxor xmm6,xmm6,xmm7 |
+ mov r12d,r11d |
+ shrd r14d,r14d,9 |
+ xor r13d,r10d |
+ vpshufb xmm6,xmm6,xmm8 |
+ xor r12d,eax |
+ shrd r13d,r13d,5 |
+ xor r14d,ecx |
+ vpaddd xmm3,xmm3,xmm6 |
+ and r12d,r10d |
+ xor r13d,r10d |
+ add ebx,DWORD[56+rsp] |
+ vpshufd xmm7,xmm3,80 |
+ mov r15d,ecx |
+ xor r12d,eax |
+ shrd r14d,r14d,11 |
+ vpsrld xmm6,xmm7,10 |
+ xor r15d,edx |
+ add ebx,r12d |
+ shrd r13d,r13d,6 |
+ vpsrlq xmm7,xmm7,17 |
+ and edi,r15d |
+ xor r14d,ecx |
+ add ebx,r13d |
+ vpxor xmm6,xmm6,xmm7 |
+ xor edi,edx |
+ shrd r14d,r14d,2 |
+ add r9d,ebx |
+ vpsrlq xmm7,xmm7,2 |
+ add ebx,edi |
+ mov r13d,r9d |
+ add r14d,ebx |
+ vpxor xmm6,xmm6,xmm7 |
+ shrd r13d,r13d,14 |
+ mov ebx,r14d |
+ mov r12d,r10d |
+ vpshufb xmm6,xmm6,xmm9 |
+ shrd r14d,r14d,9 |
+ xor r13d,r9d |
+ xor r12d,r11d |
+ vpaddd xmm3,xmm3,xmm6 |
+ shrd r13d,r13d,5 |
+ xor r14d,ebx |
+ and r12d,r9d |
+ vpaddd xmm6,xmm3,XMMWORD[96+rbp] |
+ xor r13d,r9d |
+ add eax,DWORD[60+rsp] |
+ mov edi,ebx |
+ xor r12d,r11d |
+ shrd r14d,r14d,11 |
+ xor edi,ecx |
+ add eax,r12d |
+ shrd r13d,r13d,6 |
+ and r15d,edi |
+ xor r14d,ebx |
+ add eax,r13d |
+ xor r15d,ecx |
+ shrd r14d,r14d,2 |
+ add r8d,eax |
+ add eax,r15d |
+ mov r13d,r8d |
+ add r14d,eax |
+ vmovdqa XMMWORD[48+rsp],xmm6 |
+ cmp BYTE[131+rbp],0 |
+ jne NEAR $L$avx_00_47 |
+ shrd r13d,r13d,14 |
+ mov eax,r14d |
+ mov r12d,r9d |
+ shrd r14d,r14d,9 |
+ xor r13d,r8d |
+ xor r12d,r10d |
+ shrd r13d,r13d,5 |
+ xor r14d,eax |
+ and r12d,r8d |
+ xor r13d,r8d |
+ add r11d,DWORD[rsp] |
+ mov r15d,eax |
+ xor r12d,r10d |
+ shrd r14d,r14d,11 |
+ xor r15d,ebx |
+ add r11d,r12d |
+ shrd r13d,r13d,6 |
+ and edi,r15d |
+ xor r14d,eax |
+ add r11d,r13d |
+ xor edi,ebx |
+ shrd r14d,r14d,2 |
+ add edx,r11d |
+ add r11d,edi |
+ mov r13d,edx |
+ add r14d,r11d |
+ shrd r13d,r13d,14 |
+ mov r11d,r14d |
+ mov r12d,r8d |
+ shrd r14d,r14d,9 |
+ xor r13d,edx |
+ xor r12d,r9d |
+ shrd r13d,r13d,5 |
+ xor r14d,r11d |
+ and r12d,edx |
+ xor r13d,edx |
+ add r10d,DWORD[4+rsp] |
+ mov edi,r11d |
+ xor r12d,r9d |
+ shrd r14d,r14d,11 |
+ xor edi,eax |
+ add r10d,r12d |
+ shrd r13d,r13d,6 |
+ and r15d,edi |
+ xor r14d,r11d |
+ add r10d,r13d |
+ xor r15d,eax |
+ shrd r14d,r14d,2 |
+ add ecx,r10d |
+ add r10d,r15d |
+ mov r13d,ecx |
+ add r14d,r10d |
+ shrd r13d,r13d,14 |
+ mov r10d,r14d |
+ mov r12d,edx |
+ shrd r14d,r14d,9 |
+ xor r13d,ecx |
+ xor r12d,r8d |
+ shrd r13d,r13d,5 |
+ xor r14d,r10d |
+ and r12d,ecx |
+ xor r13d,ecx |
+ add r9d,DWORD[8+rsp] |
+ mov r15d,r10d |
+ xor r12d,r8d |
+ shrd r14d,r14d,11 |
+ xor r15d,r11d |
+ add r9d,r12d |
+ shrd r13d,r13d,6 |
+ and edi,r15d |
+ xor r14d,r10d |
+ add r9d,r13d |
+ xor edi,r11d |
+ shrd r14d,r14d,2 |
+ add ebx,r9d |
+ add r9d,edi |
+ mov r13d,ebx |
+ add r14d,r9d |
+ shrd r13d,r13d,14 |
+ mov r9d,r14d |
+ mov r12d,ecx |
+ shrd r14d,r14d,9 |
+ xor r13d,ebx |
+ xor r12d,edx |
+ shrd r13d,r13d,5 |
+ xor r14d,r9d |
+ and r12d,ebx |
+ xor r13d,ebx |
+ add r8d,DWORD[12+rsp] |
+ mov edi,r9d |
+ xor r12d,edx |
+ shrd r14d,r14d,11 |
+ xor edi,r10d |
+ add r8d,r12d |
+ shrd r13d,r13d,6 |
+ and r15d,edi |
+ xor r14d,r9d |
+ add r8d,r13d |
+ xor r15d,r10d |
+ shrd r14d,r14d,2 |
+ add eax,r8d |
+ add r8d,r15d |
+ mov r13d,eax |
+ add r14d,r8d |
+ shrd r13d,r13d,14 |
+ mov r8d,r14d |
+ mov r12d,ebx |
+ shrd r14d,r14d,9 |
+ xor r13d,eax |
+ xor r12d,ecx |
+ shrd r13d,r13d,5 |
+ xor r14d,r8d |
+ and r12d,eax |
+ xor r13d,eax |
+ add edx,DWORD[16+rsp] |
+ mov r15d,r8d |
+ xor r12d,ecx |
+ shrd r14d,r14d,11 |
+ xor r15d,r9d |
+ add edx,r12d |
+ shrd r13d,r13d,6 |
+ and edi,r15d |
+ xor r14d,r8d |
+ add edx,r13d |
+ xor edi,r9d |
+ shrd r14d,r14d,2 |
+ add r11d,edx |
+ add edx,edi |
+ mov r13d,r11d |
+ add r14d,edx |
+ shrd r13d,r13d,14 |
+ mov edx,r14d |
+ mov r12d,eax |
+ shrd r14d,r14d,9 |
+ xor r13d,r11d |
+ xor r12d,ebx |
+ shrd r13d,r13d,5 |
+ xor r14d,edx |
+ and r12d,r11d |
+ xor r13d,r11d |
+ add ecx,DWORD[20+rsp] |
+ mov edi,edx |
+ xor r12d,ebx |
+ shrd r14d,r14d,11 |
+ xor edi,r8d |
+ add ecx,r12d |
+ shrd r13d,r13d,6 |
+ and r15d,edi |
+ xor r14d,edx |
+ add ecx,r13d |
+ xor r15d,r8d |
+ shrd r14d,r14d,2 |
+ add r10d,ecx |
+ add ecx,r15d |
+ mov r13d,r10d |
+ add r14d,ecx |
+ shrd r13d,r13d,14 |
+ mov ecx,r14d |
+ mov r12d,r11d |
+ shrd r14d,r14d,9 |
+ xor r13d,r10d |
+ xor r12d,eax |
+ shrd r13d,r13d,5 |
+ xor r14d,ecx |
+ and r12d,r10d |
+ xor r13d,r10d |
+ add ebx,DWORD[24+rsp] |
+ mov r15d,ecx |
+ xor r12d,eax |
+ shrd r14d,r14d,11 |
+ xor r15d,edx |
+ add ebx,r12d |
+ shrd r13d,r13d,6 |
+ and edi,r15d |
+ xor r14d,ecx |
+ add ebx,r13d |
+ xor edi,edx |
+ shrd r14d,r14d,2 |
+ add r9d,ebx |
+ add ebx,edi |
+ mov r13d,r9d |
+ add r14d,ebx |
+ shrd r13d,r13d,14 |
+ mov ebx,r14d |
+ mov r12d,r10d |
+ shrd r14d,r14d,9 |
+ xor r13d,r9d |
+ xor r12d,r11d |
+ shrd r13d,r13d,5 |
+ xor r14d,ebx |
+ and r12d,r9d |
+ xor r13d,r9d |
+ add eax,DWORD[28+rsp] |
+ mov edi,ebx |
+ xor r12d,r11d |
+ shrd r14d,r14d,11 |
+ xor edi,ecx |
+ add eax,r12d |
+ shrd r13d,r13d,6 |
+ and r15d,edi |
+ xor r14d,ebx |
+ add eax,r13d |
+ xor r15d,ecx |
+ shrd r14d,r14d,2 |
+ add r8d,eax |
+ add eax,r15d |
+ mov r13d,r8d |
+ add r14d,eax |
+ shrd r13d,r13d,14 |
+ mov eax,r14d |
+ mov r12d,r9d |
+ shrd r14d,r14d,9 |
+ xor r13d,r8d |
+ xor r12d,r10d |
+ shrd r13d,r13d,5 |
+ xor r14d,eax |
+ and r12d,r8d |
+ xor r13d,r8d |
+ add r11d,DWORD[32+rsp] |
+ mov r15d,eax |
+ xor r12d,r10d |
+ shrd r14d,r14d,11 |
+ xor r15d,ebx |
+ add r11d,r12d |
+ shrd r13d,r13d,6 |
+ and edi,r15d |
+ xor r14d,eax |
+ add r11d,r13d |
+ xor edi,ebx |
+ shrd r14d,r14d,2 |
+ add edx,r11d |
+ add r11d,edi |
+ mov r13d,edx |
+ add r14d,r11d |
+ shrd r13d,r13d,14 |
+ mov r11d,r14d |
+ mov r12d,r8d |
+ shrd r14d,r14d,9 |
+ xor r13d,edx |
+ xor r12d,r9d |
+ shrd r13d,r13d,5 |
+ xor r14d,r11d |
+ and r12d,edx |
+ xor r13d,edx |
+ add r10d,DWORD[36+rsp] |
+ mov edi,r11d |
+ xor r12d,r9d |
+ shrd r14d,r14d,11 |
+ xor edi,eax |
+ add r10d,r12d |
+ shrd r13d,r13d,6 |
+ and r15d,edi |
+ xor r14d,r11d |
+ add r10d,r13d |
+ xor r15d,eax |
+ shrd r14d,r14d,2 |
+ add ecx,r10d |
+ add r10d,r15d |
+ mov r13d,ecx |
+ add r14d,r10d |
+ shrd r13d,r13d,14 |
+ mov r10d,r14d |
+ mov r12d,edx |
+ shrd r14d,r14d,9 |
+ xor r13d,ecx |
+ xor r12d,r8d |
+ shrd r13d,r13d,5 |
+ xor r14d,r10d |
+ and r12d,ecx |
+ xor r13d,ecx |
+ add r9d,DWORD[40+rsp] |
+ mov r15d,r10d |
+ xor r12d,r8d |
+ shrd r14d,r14d,11 |
+ xor r15d,r11d |
+ add r9d,r12d |
+ shrd r13d,r13d,6 |
+ and edi,r15d |
+ xor r14d,r10d |
+ add r9d,r13d |
+ xor edi,r11d |
+ shrd r14d,r14d,2 |
+ add ebx,r9d |
+ add r9d,edi |
+ mov r13d,ebx |
+ add r14d,r9d |
+ shrd r13d,r13d,14 |
+ mov r9d,r14d |
+ mov r12d,ecx |
+ shrd r14d,r14d,9 |
+ xor r13d,ebx |
+ xor r12d,edx |
+ shrd r13d,r13d,5 |
+ xor r14d,r9d |
+ and r12d,ebx |
+ xor r13d,ebx |
+ add r8d,DWORD[44+rsp] |
+ mov edi,r9d |
+ xor r12d,edx |
+ shrd r14d,r14d,11 |
+ xor edi,r10d |
+ add r8d,r12d |
+ shrd r13d,r13d,6 |
+ and r15d,edi |
+ xor r14d,r9d |
+ add r8d,r13d |
+ xor r15d,r10d |
+ shrd r14d,r14d,2 |
+ add eax,r8d |
+ add r8d,r15d |
+ mov r13d,eax |
+ add r14d,r8d |
+ shrd r13d,r13d,14 |
+ mov r8d,r14d |
+ mov r12d,ebx |
+ shrd r14d,r14d,9 |
+ xor r13d,eax |
+ xor r12d,ecx |
+ shrd r13d,r13d,5 |
+ xor r14d,r8d |
+ and r12d,eax |
+ xor r13d,eax |
+ add edx,DWORD[48+rsp] |
+ mov r15d,r8d |
+ xor r12d,ecx |
+ shrd r14d,r14d,11 |
+ xor r15d,r9d |
+ add edx,r12d |
+ shrd r13d,r13d,6 |
+ and edi,r15d |
+ xor r14d,r8d |
+ add edx,r13d |
+ xor edi,r9d |
+ shrd r14d,r14d,2 |
+ add r11d,edx |
+ add edx,edi |
+ mov r13d,r11d |
+ add r14d,edx |
+ shrd r13d,r13d,14 |
+ mov edx,r14d |
+ mov r12d,eax |
+ shrd r14d,r14d,9 |
+ xor r13d,r11d |
+ xor r12d,ebx |
+ shrd r13d,r13d,5 |
+ xor r14d,edx |
+ and r12d,r11d |
+ xor r13d,r11d |
+ add ecx,DWORD[52+rsp] |
+ mov edi,edx |
+ xor r12d,ebx |
+ shrd r14d,r14d,11 |
+ xor edi,r8d |
+ add ecx,r12d |
+ shrd r13d,r13d,6 |
+ and r15d,edi |
+ xor r14d,edx |
+ add ecx,r13d |
+ xor r15d,r8d |
+ shrd r14d,r14d,2 |
+ add r10d,ecx |
+ add ecx,r15d |
+ mov r13d,r10d |
+ add r14d,ecx |
+ shrd r13d,r13d,14 |
+ mov ecx,r14d |
+ mov r12d,r11d |
+ shrd r14d,r14d,9 |
+ xor r13d,r10d |
+ xor r12d,eax |
+ shrd r13d,r13d,5 |
+ xor r14d,ecx |
+ and r12d,r10d |
+ xor r13d,r10d |
+ add ebx,DWORD[56+rsp] |
+ mov r15d,ecx |
+ xor r12d,eax |
+ shrd r14d,r14d,11 |
+ xor r15d,edx |
+ add ebx,r12d |
+ shrd r13d,r13d,6 |
+ and edi,r15d |
+ xor r14d,ecx |
+ add ebx,r13d |
+ xor edi,edx |
+ shrd r14d,r14d,2 |
+ add r9d,ebx |
+ add ebx,edi |
+ mov r13d,r9d |
+ add r14d,ebx |
+ shrd r13d,r13d,14 |
+ mov ebx,r14d |
+ mov r12d,r10d |
+ shrd r14d,r14d,9 |
+ xor r13d,r9d |
+ xor r12d,r11d |
+ shrd r13d,r13d,5 |
+ xor r14d,ebx |
+ and r12d,r9d |
+ xor r13d,r9d |
+ add eax,DWORD[60+rsp] |
+ mov edi,ebx |
+ xor r12d,r11d |
+ shrd r14d,r14d,11 |
+ xor edi,ecx |
+ add eax,r12d |
+ shrd r13d,r13d,6 |
+ and r15d,edi |
+ xor r14d,ebx |
+ add eax,r13d |
+ xor r15d,ecx |
+ shrd r14d,r14d,2 |
+ add r8d,eax |
+ add eax,r15d |
+ mov r13d,r8d |
+ add r14d,eax |
+ mov rdi,QWORD[((64+0))+rsp] |
+ mov eax,r14d |
+ |
+ add eax,DWORD[rdi] |
+ lea rsi,[64+rsi] |
+ add ebx,DWORD[4+rdi] |
+ add ecx,DWORD[8+rdi] |
+ add edx,DWORD[12+rdi] |
+ add r8d,DWORD[16+rdi] |
+ add r9d,DWORD[20+rdi] |
+ add r10d,DWORD[24+rdi] |
+ add r11d,DWORD[28+rdi] |
+ |
+ cmp rsi,QWORD[((64+16))+rsp] |
+ |
+ mov DWORD[rdi],eax |
+ mov DWORD[4+rdi],ebx |
+ mov DWORD[8+rdi],ecx |
+ mov DWORD[12+rdi],edx |
+ mov DWORD[16+rdi],r8d |
+ mov DWORD[20+rdi],r9d |
+ mov DWORD[24+rdi],r10d |
+ mov DWORD[28+rdi],r11d |
+ jb NEAR $L$loop_avx |
+ |
+ mov rsi,QWORD[((64+24))+rsp] |
+ vzeroupper |
+ movaps xmm6,XMMWORD[((64+32))+rsp] |
+ movaps xmm7,XMMWORD[((64+48))+rsp] |
+ movaps xmm8,XMMWORD[((64+64))+rsp] |
+ movaps xmm9,XMMWORD[((64+80))+rsp] |
+ mov r15,QWORD[rsi] |
+ mov r14,QWORD[8+rsi] |
+ mov r13,QWORD[16+rsi] |
+ mov r12,QWORD[24+rsi] |
+ mov rbp,QWORD[32+rsi] |
+ mov rbx,QWORD[40+rsi] |
+ lea rsp,[48+rsi] |
+$L$epilogue_avx: |
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
+ mov rsi,QWORD[16+rsp] |
+ DB 0F3h,0C3h ;repret |
+$L$SEH_end_sha256_block_data_order_avx: |
EXTERN __imp_RtlVirtualUnwind |
ALIGN 16 |
@@ -2982,6 +4063,9 @@ ALIGN 4 |
DD $L$SEH_begin_sha256_block_data_order_ssse3 wrt ..imagebase |
DD $L$SEH_end_sha256_block_data_order_ssse3 wrt ..imagebase |
DD $L$SEH_info_sha256_block_data_order_ssse3 wrt ..imagebase |
+ DD $L$SEH_begin_sha256_block_data_order_avx wrt ..imagebase |
+ DD $L$SEH_end_sha256_block_data_order_avx wrt ..imagebase |
+ DD $L$SEH_info_sha256_block_data_order_avx wrt ..imagebase |
section .xdata rdata align=8 |
ALIGN 8 |
$L$SEH_info_sha256_block_data_order: |
@@ -2992,3 +4076,7 @@ $L$SEH_info_sha256_block_data_order_ssse3: |
DB 9,0,0,0 |
DD se_handler wrt ..imagebase |
DD $L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase |
+$L$SEH_info_sha256_block_data_order_avx: |
+DB 9,0,0,0 |
+ DD se_handler wrt ..imagebase |
+ DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase |