Index: third_party/boringssl/win-x86/crypto/chacha/chacha-x86.asm |
diff --git a/third_party/boringssl/win-x86/crypto/chacha/chacha-x86.asm b/third_party/boringssl/win-x86/crypto/chacha/chacha-x86.asm |
new file mode 100644 |
index 0000000000000000000000000000000000000000..3ba31a2b35094e291232eb1dcc60333f9ffc34b0 |
--- /dev/null |
+++ b/third_party/boringssl/win-x86/crypto/chacha/chacha-x86.asm |
@@ -0,0 +1,977 @@ |
+%ifidn __OUTPUT_FORMAT__,obj |
+section code use32 class=code align=64 |
+%elifidn __OUTPUT_FORMAT__,win32 |
+%ifdef __YASM_VERSION_ID__ |
+%if __YASM_VERSION_ID__ < 01010000h |
+%error yasm version 1.1.0 or later needed. |
+%endif |
+; Yasm automatically includes .00 and complains about redefining it. |
+; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html |
+%else |
+$@feat.00 equ 1 |
+%endif |
+section .text code align=64 |
+%else |
+section .text code |
+%endif |
+global _ChaCha20_ctr32 |
+align 16 |
+_ChaCha20_ctr32: |
+L$_ChaCha20_ctr32_begin: |
+ push ebp |
+ push ebx |
+ push esi |
+ push edi |
+ xor eax,eax |
+ cmp eax,DWORD [28+esp] |
+ je NEAR L$000no_data |
+ call L$pic_point |
+L$pic_point: |
+ pop eax |
+ lea ebp,[_OPENSSL_ia32cap_P] |
+ test DWORD [ebp],16777216 |
+ jz NEAR L$001x86 |
+ test DWORD [4+ebp],512 |
+ jz NEAR L$001x86 |
+ jmp NEAR L$ssse3_shortcut |
+L$001x86: |
+ mov esi,DWORD [32+esp] |
+ mov edi,DWORD [36+esp] |
+ sub esp,132 |
+ mov eax,DWORD [esi] |
+ mov ebx,DWORD [4+esi] |
+ mov ecx,DWORD [8+esi] |
+ mov edx,DWORD [12+esi] |
+ mov DWORD [80+esp],eax |
+ mov DWORD [84+esp],ebx |
+ mov DWORD [88+esp],ecx |
+ mov DWORD [92+esp],edx |
+ mov eax,DWORD [16+esi] |
+ mov ebx,DWORD [20+esi] |
+ mov ecx,DWORD [24+esi] |
+ mov edx,DWORD [28+esi] |
+ mov DWORD [96+esp],eax |
+ mov DWORD [100+esp],ebx |
+ mov DWORD [104+esp],ecx |
+ mov DWORD [108+esp],edx |
+ mov eax,DWORD [edi] |
+ mov ebx,DWORD [4+edi] |
+ mov ecx,DWORD [8+edi] |
+ mov edx,DWORD [12+edi] |
+ sub eax,1 |
+ mov DWORD [112+esp],eax |
+ mov DWORD [116+esp],ebx |
+ mov DWORD [120+esp],ecx |
+ mov DWORD [124+esp],edx |
+ jmp NEAR L$002entry |
+align 16 |
+L$003outer_loop: |
+ mov DWORD [156+esp],ebx |
+ mov DWORD [152+esp],eax |
+ mov DWORD [160+esp],ecx |
+L$002entry: |
+ mov eax,1634760805 |
+ mov DWORD [4+esp],857760878 |
+ mov DWORD [8+esp],2036477234 |
+ mov DWORD [12+esp],1797285236 |
+ mov ebx,DWORD [84+esp] |
+ mov ebp,DWORD [88+esp] |
+ mov ecx,DWORD [104+esp] |
+ mov esi,DWORD [108+esp] |
+ mov edx,DWORD [116+esp] |
+ mov edi,DWORD [120+esp] |
+ mov DWORD [20+esp],ebx |
+ mov DWORD [24+esp],ebp |
+ mov DWORD [40+esp],ecx |
+ mov DWORD [44+esp],esi |
+ mov DWORD [52+esp],edx |
+ mov DWORD [56+esp],edi |
+ mov ebx,DWORD [92+esp] |
+ mov edi,DWORD [124+esp] |
+ mov edx,DWORD [112+esp] |
+ mov ebp,DWORD [80+esp] |
+ mov ecx,DWORD [96+esp] |
+ mov esi,DWORD [100+esp] |
+ add edx,1 |
+ mov DWORD [28+esp],ebx |
+ mov DWORD [60+esp],edi |
+ mov DWORD [112+esp],edx |
+ mov ebx,10 |
+ jmp NEAR L$004loop |
+align 16 |
+L$004loop: |
+ add eax,ebp |
+ mov DWORD [128+esp],ebx |
+ mov ebx,ebp |
+ xor edx,eax |
+ rol edx,16 |
+ add ecx,edx |
+ xor ebx,ecx |
+ mov edi,DWORD [52+esp] |
+ rol ebx,12 |
+ mov ebp,DWORD [20+esp] |
+ add eax,ebx |
+ xor edx,eax |
+ mov DWORD [esp],eax |
+ rol edx,8 |
+ mov eax,DWORD [4+esp] |
+ add ecx,edx |
+ mov DWORD [48+esp],edx |
+ xor ebx,ecx |
+ add eax,ebp |
+ rol ebx,7 |
+ xor edi,eax |
+ mov DWORD [32+esp],ecx |
+ rol edi,16 |
+ mov DWORD [16+esp],ebx |
+ add esi,edi |
+ mov ecx,DWORD [40+esp] |
+ xor ebp,esi |
+ mov edx,DWORD [56+esp] |
+ rol ebp,12 |
+ mov ebx,DWORD [24+esp] |
+ add eax,ebp |
+ xor edi,eax |
+ mov DWORD [4+esp],eax |
+ rol edi,8 |
+ mov eax,DWORD [8+esp] |
+ add esi,edi |
+ mov DWORD [52+esp],edi |
+ xor ebp,esi |
+ add eax,ebx |
+ rol ebp,7 |
+ xor edx,eax |
+ mov DWORD [36+esp],esi |
+ rol edx,16 |
+ mov DWORD [20+esp],ebp |
+ add ecx,edx |
+ mov esi,DWORD [44+esp] |
+ xor ebx,ecx |
+ mov edi,DWORD [60+esp] |
+ rol ebx,12 |
+ mov ebp,DWORD [28+esp] |
+ add eax,ebx |
+ xor edx,eax |
+ mov DWORD [8+esp],eax |
+ rol edx,8 |
+ mov eax,DWORD [12+esp] |
+ add ecx,edx |
+ mov DWORD [56+esp],edx |
+ xor ebx,ecx |
+ add eax,ebp |
+ rol ebx,7 |
+ xor edi,eax |
+ rol edi,16 |
+ mov DWORD [24+esp],ebx |
+ add esi,edi |
+ xor ebp,esi |
+ rol ebp,12 |
+ mov ebx,DWORD [20+esp] |
+ add eax,ebp |
+ xor edi,eax |
+ mov DWORD [12+esp],eax |
+ rol edi,8 |
+ mov eax,DWORD [esp] |
+ add esi,edi |
+ mov edx,edi |
+ xor ebp,esi |
+ add eax,ebx |
+ rol ebp,7 |
+ xor edx,eax |
+ rol edx,16 |
+ mov DWORD [28+esp],ebp |
+ add ecx,edx |
+ xor ebx,ecx |
+ mov edi,DWORD [48+esp] |
+ rol ebx,12 |
+ mov ebp,DWORD [24+esp] |
+ add eax,ebx |
+ xor edx,eax |
+ mov DWORD [esp],eax |
+ rol edx,8 |
+ mov eax,DWORD [4+esp] |
+ add ecx,edx |
+ mov DWORD [60+esp],edx |
+ xor ebx,ecx |
+ add eax,ebp |
+ rol ebx,7 |
+ xor edi,eax |
+ mov DWORD [40+esp],ecx |
+ rol edi,16 |
+ mov DWORD [20+esp],ebx |
+ add esi,edi |
+ mov ecx,DWORD [32+esp] |
+ xor ebp,esi |
+ mov edx,DWORD [52+esp] |
+ rol ebp,12 |
+ mov ebx,DWORD [28+esp] |
+ add eax,ebp |
+ xor edi,eax |
+ mov DWORD [4+esp],eax |
+ rol edi,8 |
+ mov eax,DWORD [8+esp] |
+ add esi,edi |
+ mov DWORD [48+esp],edi |
+ xor ebp,esi |
+ add eax,ebx |
+ rol ebp,7 |
+ xor edx,eax |
+ mov DWORD [44+esp],esi |
+ rol edx,16 |
+ mov DWORD [24+esp],ebp |
+ add ecx,edx |
+ mov esi,DWORD [36+esp] |
+ xor ebx,ecx |
+ mov edi,DWORD [56+esp] |
+ rol ebx,12 |
+ mov ebp,DWORD [16+esp] |
+ add eax,ebx |
+ xor edx,eax |
+ mov DWORD [8+esp],eax |
+ rol edx,8 |
+ mov eax,DWORD [12+esp] |
+ add ecx,edx |
+ mov DWORD [52+esp],edx |
+ xor ebx,ecx |
+ add eax,ebp |
+ rol ebx,7 |
+ xor edi,eax |
+ rol edi,16 |
+ mov DWORD [28+esp],ebx |
+ add esi,edi |
+ xor ebp,esi |
+ mov edx,DWORD [48+esp] |
+ rol ebp,12 |
+ mov ebx,DWORD [128+esp] |
+ add eax,ebp |
+ xor edi,eax |
+ mov DWORD [12+esp],eax |
+ rol edi,8 |
+ mov eax,DWORD [esp] |
+ add esi,edi |
+ mov DWORD [56+esp],edi |
+ xor ebp,esi |
+ rol ebp,7 |
+ dec ebx |
+ jnz NEAR L$004loop |
+ mov ebx,DWORD [160+esp] |
+ add eax,1634760805 |
+ add ebp,DWORD [80+esp] |
+ add ecx,DWORD [96+esp] |
+ add esi,DWORD [100+esp] |
+ cmp ebx,64 |
+ jb NEAR L$005tail |
+ mov ebx,DWORD [156+esp] |
+ add edx,DWORD [112+esp] |
+ add edi,DWORD [120+esp] |
+ xor eax,DWORD [ebx] |
+ xor ebp,DWORD [16+ebx] |
+ mov DWORD [esp],eax |
+ mov eax,DWORD [152+esp] |
+ xor ecx,DWORD [32+ebx] |
+ xor esi,DWORD [36+ebx] |
+ xor edx,DWORD [48+ebx] |
+ xor edi,DWORD [56+ebx] |
+ mov DWORD [16+eax],ebp |
+ mov DWORD [32+eax],ecx |
+ mov DWORD [36+eax],esi |
+ mov DWORD [48+eax],edx |
+ mov DWORD [56+eax],edi |
+ mov ebp,DWORD [4+esp] |
+ mov ecx,DWORD [8+esp] |
+ mov esi,DWORD [12+esp] |
+ mov edx,DWORD [20+esp] |
+ mov edi,DWORD [24+esp] |
+ add ebp,857760878 |
+ add ecx,2036477234 |
+ add esi,1797285236 |
+ add edx,DWORD [84+esp] |
+ add edi,DWORD [88+esp] |
+ xor ebp,DWORD [4+ebx] |
+ xor ecx,DWORD [8+ebx] |
+ xor esi,DWORD [12+ebx] |
+ xor edx,DWORD [20+ebx] |
+ xor edi,DWORD [24+ebx] |
+ mov DWORD [4+eax],ebp |
+ mov DWORD [8+eax],ecx |
+ mov DWORD [12+eax],esi |
+ mov DWORD [20+eax],edx |
+ mov DWORD [24+eax],edi |
+ mov ebp,DWORD [28+esp] |
+ mov ecx,DWORD [40+esp] |
+ mov esi,DWORD [44+esp] |
+ mov edx,DWORD [52+esp] |
+ mov edi,DWORD [60+esp] |
+ add ebp,DWORD [92+esp] |
+ add ecx,DWORD [104+esp] |
+ add esi,DWORD [108+esp] |
+ add edx,DWORD [116+esp] |
+ add edi,DWORD [124+esp] |
+ xor ebp,DWORD [28+ebx] |
+ xor ecx,DWORD [40+ebx] |
+ xor esi,DWORD [44+ebx] |
+ xor edx,DWORD [52+ebx] |
+ xor edi,DWORD [60+ebx] |
+ lea ebx,[64+ebx] |
+ mov DWORD [28+eax],ebp |
+ mov ebp,DWORD [esp] |
+ mov DWORD [40+eax],ecx |
+ mov ecx,DWORD [160+esp] |
+ mov DWORD [44+eax],esi |
+ mov DWORD [52+eax],edx |
+ mov DWORD [60+eax],edi |
+ mov DWORD [eax],ebp |
+ lea eax,[64+eax] |
+ sub ecx,64 |
+ jnz NEAR L$003outer_loop |
+ jmp NEAR L$006done |
+L$005tail: |
+ add edx,DWORD [112+esp] |
+ add edi,DWORD [120+esp] |
+ mov DWORD [esp],eax |
+ mov DWORD [16+esp],ebp |
+ mov DWORD [32+esp],ecx |
+ mov DWORD [36+esp],esi |
+ mov DWORD [48+esp],edx |
+ mov DWORD [56+esp],edi |
+ mov ebp,DWORD [4+esp] |
+ mov ecx,DWORD [8+esp] |
+ mov esi,DWORD [12+esp] |
+ mov edx,DWORD [20+esp] |
+ mov edi,DWORD [24+esp] |
+ add ebp,857760878 |
+ add ecx,2036477234 |
+ add esi,1797285236 |
+ add edx,DWORD [84+esp] |
+ add edi,DWORD [88+esp] |
+ mov DWORD [4+esp],ebp |
+ mov DWORD [8+esp],ecx |
+ mov DWORD [12+esp],esi |
+ mov DWORD [20+esp],edx |
+ mov DWORD [24+esp],edi |
+ mov ebp,DWORD [28+esp] |
+ mov ecx,DWORD [40+esp] |
+ mov esi,DWORD [44+esp] |
+ mov edx,DWORD [52+esp] |
+ mov edi,DWORD [60+esp] |
+ add ebp,DWORD [92+esp] |
+ add ecx,DWORD [104+esp] |
+ add esi,DWORD [108+esp] |
+ add edx,DWORD [116+esp] |
+ add edi,DWORD [124+esp] |
+ mov DWORD [28+esp],ebp |
+ mov ebp,DWORD [156+esp] |
+ mov DWORD [40+esp],ecx |
+ mov ecx,DWORD [152+esp] |
+ mov DWORD [44+esp],esi |
+ xor esi,esi |
+ mov DWORD [52+esp],edx |
+ mov DWORD [60+esp],edi |
+ xor eax,eax |
+ xor edx,edx |
+L$007tail_loop: |
+ mov al,BYTE [ebp*1+esi] |
+ mov dl,BYTE [esi*1+esp] |
+ lea esi,[1+esi] |
+ xor al,dl |
+ mov BYTE [esi*1+ecx-1],al |
+ dec ebx |
+ jnz NEAR L$007tail_loop |
+L$006done: |
+ add esp,132 |
+L$000no_data: |
+ pop edi |
+ pop esi |
+ pop ebx |
+ pop ebp |
+ ret |
+global _ChaCha20_ssse3 |
+align 16 |
+_ChaCha20_ssse3: |
+L$_ChaCha20_ssse3_begin: |
+ push ebp |
+ push ebx |
+ push esi |
+ push edi |
+L$ssse3_shortcut: |
+ mov edi,DWORD [20+esp] |
+ mov esi,DWORD [24+esp] |
+ mov ecx,DWORD [28+esp] |
+ mov edx,DWORD [32+esp] |
+ mov ebx,DWORD [36+esp] |
+ mov ebp,esp |
+ sub esp,524 |
+ and esp,-64 |
+ mov DWORD [512+esp],ebp |
+ lea eax,[(L$ssse3_data-L$pic_point)+eax] |
+ movdqu xmm3,[ebx] |
+ cmp ecx,256 |
+ jb NEAR L$0081x |
+ mov DWORD [516+esp],edx |
+ mov DWORD [520+esp],ebx |
+ sub ecx,256 |
+ lea ebp,[384+esp] |
+ movdqu xmm7,[edx] |
+ pshufd xmm0,xmm3,0 |
+ pshufd xmm1,xmm3,85 |
+ pshufd xmm2,xmm3,170 |
+ pshufd xmm3,xmm3,255 |
+ paddd xmm0,[48+eax] |
+ pshufd xmm4,xmm7,0 |
+ pshufd xmm5,xmm7,85 |
+ psubd xmm0,[64+eax] |
+ pshufd xmm6,xmm7,170 |
+ pshufd xmm7,xmm7,255 |
+ movdqa [64+ebp],xmm0 |
+ movdqa [80+ebp],xmm1 |
+ movdqa [96+ebp],xmm2 |
+ movdqa [112+ebp],xmm3 |
+ movdqu xmm3,[16+edx] |
+ movdqa [ebp-64],xmm4 |
+ movdqa [ebp-48],xmm5 |
+ movdqa [ebp-32],xmm6 |
+ movdqa [ebp-16],xmm7 |
+ movdqa xmm7,[32+eax] |
+ lea ebx,[128+esp] |
+ pshufd xmm0,xmm3,0 |
+ pshufd xmm1,xmm3,85 |
+ pshufd xmm2,xmm3,170 |
+ pshufd xmm3,xmm3,255 |
+ pshufd xmm4,xmm7,0 |
+ pshufd xmm5,xmm7,85 |
+ pshufd xmm6,xmm7,170 |
+ pshufd xmm7,xmm7,255 |
+ movdqa [ebp],xmm0 |
+ movdqa [16+ebp],xmm1 |
+ movdqa [32+ebp],xmm2 |
+ movdqa [48+ebp],xmm3 |
+ movdqa [ebp-128],xmm4 |
+ movdqa [ebp-112],xmm5 |
+ movdqa [ebp-96],xmm6 |
+ movdqa [ebp-80],xmm7 |
+ lea esi,[128+esi] |
+ lea edi,[128+edi] |
+ jmp NEAR L$009outer_loop |
+align 16 |
+L$009outer_loop: |
+ movdqa xmm1,[ebp-112] |
+ movdqa xmm2,[ebp-96] |
+ movdqa xmm3,[ebp-80] |
+ movdqa xmm5,[ebp-48] |
+ movdqa xmm6,[ebp-32] |
+ movdqa xmm7,[ebp-16] |
+ movdqa [ebx-112],xmm1 |
+ movdqa [ebx-96],xmm2 |
+ movdqa [ebx-80],xmm3 |
+ movdqa [ebx-48],xmm5 |
+ movdqa [ebx-32],xmm6 |
+ movdqa [ebx-16],xmm7 |
+ movdqa xmm2,[32+ebp] |
+ movdqa xmm3,[48+ebp] |
+ movdqa xmm4,[64+ebp] |
+ movdqa xmm5,[80+ebp] |
+ movdqa xmm6,[96+ebp] |
+ movdqa xmm7,[112+ebp] |
+ paddd xmm4,[64+eax] |
+ movdqa [32+ebx],xmm2 |
+ movdqa [48+ebx],xmm3 |
+ movdqa [64+ebx],xmm4 |
+ movdqa [80+ebx],xmm5 |
+ movdqa [96+ebx],xmm6 |
+ movdqa [112+ebx],xmm7 |
+ movdqa [64+ebp],xmm4 |
+ movdqa xmm0,[ebp-128] |
+ movdqa xmm6,xmm4 |
+ movdqa xmm3,[ebp-64] |
+ movdqa xmm4,[ebp] |
+ movdqa xmm5,[16+ebp] |
+ mov edx,10 |
+ nop |
+align 16 |
+L$010loop: |
+ paddd xmm0,xmm3 |
+ movdqa xmm2,xmm3 |
+ pxor xmm6,xmm0 |
+ pshufb xmm6,[eax] |
+ paddd xmm4,xmm6 |
+ pxor xmm2,xmm4 |
+ movdqa xmm3,[ebx-48] |
+ movdqa xmm1,xmm2 |
+ pslld xmm2,12 |
+ psrld xmm1,20 |
+ por xmm2,xmm1 |
+ movdqa xmm1,[ebx-112] |
+ paddd xmm0,xmm2 |
+ movdqa xmm7,[80+ebx] |
+ pxor xmm6,xmm0 |
+ movdqa [ebx-128],xmm0 |
+ pshufb xmm6,[16+eax] |
+ paddd xmm4,xmm6 |
+ movdqa [64+ebx],xmm6 |
+ pxor xmm2,xmm4 |
+ paddd xmm1,xmm3 |
+ movdqa xmm0,xmm2 |
+ pslld xmm2,7 |
+ psrld xmm0,25 |
+ pxor xmm7,xmm1 |
+ por xmm2,xmm0 |
+ movdqa [ebx],xmm4 |
+ pshufb xmm7,[eax] |
+ movdqa [ebx-64],xmm2 |
+ paddd xmm5,xmm7 |
+ movdqa xmm4,[32+ebx] |
+ pxor xmm3,xmm5 |
+ movdqa xmm2,[ebx-32] |
+ movdqa xmm0,xmm3 |
+ pslld xmm3,12 |
+ psrld xmm0,20 |
+ por xmm3,xmm0 |
+ movdqa xmm0,[ebx-96] |
+ paddd xmm1,xmm3 |
+ movdqa xmm6,[96+ebx] |
+ pxor xmm7,xmm1 |
+ movdqa [ebx-112],xmm1 |
+ pshufb xmm7,[16+eax] |
+ paddd xmm5,xmm7 |
+ movdqa [80+ebx],xmm7 |
+ pxor xmm3,xmm5 |
+ paddd xmm0,xmm2 |
+ movdqa xmm1,xmm3 |
+ pslld xmm3,7 |
+ psrld xmm1,25 |
+ pxor xmm6,xmm0 |
+ por xmm3,xmm1 |
+ movdqa [16+ebx],xmm5 |
+ pshufb xmm6,[eax] |
+ movdqa [ebx-48],xmm3 |
+ paddd xmm4,xmm6 |
+ movdqa xmm5,[48+ebx] |
+ pxor xmm2,xmm4 |
+ movdqa xmm3,[ebx-16] |
+ movdqa xmm1,xmm2 |
+ pslld xmm2,12 |
+ psrld xmm1,20 |
+ por xmm2,xmm1 |
+ movdqa xmm1,[ebx-80] |
+ paddd xmm0,xmm2 |
+ movdqa xmm7,[112+ebx] |
+ pxor xmm6,xmm0 |
+ movdqa [ebx-96],xmm0 |
+ pshufb xmm6,[16+eax] |
+ paddd xmm4,xmm6 |
+ movdqa [96+ebx],xmm6 |
+ pxor xmm2,xmm4 |
+ paddd xmm1,xmm3 |
+ movdqa xmm0,xmm2 |
+ pslld xmm2,7 |
+ psrld xmm0,25 |
+ pxor xmm7,xmm1 |
+ por xmm2,xmm0 |
+ pshufb xmm7,[eax] |
+ movdqa [ebx-32],xmm2 |
+ paddd xmm5,xmm7 |
+ pxor xmm3,xmm5 |
+ movdqa xmm2,[ebx-48] |
+ movdqa xmm0,xmm3 |
+ pslld xmm3,12 |
+ psrld xmm0,20 |
+ por xmm3,xmm0 |
+ movdqa xmm0,[ebx-128] |
+ paddd xmm1,xmm3 |
+ pxor xmm7,xmm1 |
+ movdqa [ebx-80],xmm1 |
+ pshufb xmm7,[16+eax] |
+ paddd xmm5,xmm7 |
+ movdqa xmm6,xmm7 |
+ pxor xmm3,xmm5 |
+ paddd xmm0,xmm2 |
+ movdqa xmm1,xmm3 |
+ pslld xmm3,7 |
+ psrld xmm1,25 |
+ pxor xmm6,xmm0 |
+ por xmm3,xmm1 |
+ pshufb xmm6,[eax] |
+ movdqa [ebx-16],xmm3 |
+ paddd xmm4,xmm6 |
+ pxor xmm2,xmm4 |
+ movdqa xmm3,[ebx-32] |
+ movdqa xmm1,xmm2 |
+ pslld xmm2,12 |
+ psrld xmm1,20 |
+ por xmm2,xmm1 |
+ movdqa xmm1,[ebx-112] |
+ paddd xmm0,xmm2 |
+ movdqa xmm7,[64+ebx] |
+ pxor xmm6,xmm0 |
+ movdqa [ebx-128],xmm0 |
+ pshufb xmm6,[16+eax] |
+ paddd xmm4,xmm6 |
+ movdqa [112+ebx],xmm6 |
+ pxor xmm2,xmm4 |
+ paddd xmm1,xmm3 |
+ movdqa xmm0,xmm2 |
+ pslld xmm2,7 |
+ psrld xmm0,25 |
+ pxor xmm7,xmm1 |
+ por xmm2,xmm0 |
+ movdqa [32+ebx],xmm4 |
+ pshufb xmm7,[eax] |
+ movdqa [ebx-48],xmm2 |
+ paddd xmm5,xmm7 |
+ movdqa xmm4,[ebx] |
+ pxor xmm3,xmm5 |
+ movdqa xmm2,[ebx-16] |
+ movdqa xmm0,xmm3 |
+ pslld xmm3,12 |
+ psrld xmm0,20 |
+ por xmm3,xmm0 |
+ movdqa xmm0,[ebx-96] |
+ paddd xmm1,xmm3 |
+ movdqa xmm6,[80+ebx] |
+ pxor xmm7,xmm1 |
+ movdqa [ebx-112],xmm1 |
+ pshufb xmm7,[16+eax] |
+ paddd xmm5,xmm7 |
+ movdqa [64+ebx],xmm7 |
+ pxor xmm3,xmm5 |
+ paddd xmm0,xmm2 |
+ movdqa xmm1,xmm3 |
+ pslld xmm3,7 |
+ psrld xmm1,25 |
+ pxor xmm6,xmm0 |
+ por xmm3,xmm1 |
+ movdqa [48+ebx],xmm5 |
+ pshufb xmm6,[eax] |
+ movdqa [ebx-32],xmm3 |
+ paddd xmm4,xmm6 |
+ movdqa xmm5,[16+ebx] |
+ pxor xmm2,xmm4 |
+ movdqa xmm3,[ebx-64] |
+ movdqa xmm1,xmm2 |
+ pslld xmm2,12 |
+ psrld xmm1,20 |
+ por xmm2,xmm1 |
+ movdqa xmm1,[ebx-80] |
+ paddd xmm0,xmm2 |
+ movdqa xmm7,[96+ebx] |
+ pxor xmm6,xmm0 |
+ movdqa [ebx-96],xmm0 |
+ pshufb xmm6,[16+eax] |
+ paddd xmm4,xmm6 |
+ movdqa [80+ebx],xmm6 |
+ pxor xmm2,xmm4 |
+ paddd xmm1,xmm3 |
+ movdqa xmm0,xmm2 |
+ pslld xmm2,7 |
+ psrld xmm0,25 |
+ pxor xmm7,xmm1 |
+ por xmm2,xmm0 |
+ pshufb xmm7,[eax] |
+ movdqa [ebx-16],xmm2 |
+ paddd xmm5,xmm7 |
+ pxor xmm3,xmm5 |
+ movdqa xmm0,xmm3 |
+ pslld xmm3,12 |
+ psrld xmm0,20 |
+ por xmm3,xmm0 |
+ movdqa xmm0,[ebx-128] |
+ paddd xmm1,xmm3 |
+ movdqa xmm6,[64+ebx] |
+ pxor xmm7,xmm1 |
+ movdqa [ebx-80],xmm1 |
+ pshufb xmm7,[16+eax] |
+ paddd xmm5,xmm7 |
+ movdqa [96+ebx],xmm7 |
+ pxor xmm3,xmm5 |
+ movdqa xmm1,xmm3 |
+ pslld xmm3,7 |
+ psrld xmm1,25 |
+ por xmm3,xmm1 |
+ dec edx |
+ jnz NEAR L$010loop |
+ movdqa [ebx-64],xmm3 |
+ movdqa [ebx],xmm4 |
+ movdqa [16+ebx],xmm5 |
+ movdqa [64+ebx],xmm6 |
+ movdqa [96+ebx],xmm7 |
+ movdqa xmm1,[ebx-112] |
+ movdqa xmm2,[ebx-96] |
+ movdqa xmm3,[ebx-80] |
+ paddd xmm0,[ebp-128] |
+ paddd xmm1,[ebp-112] |
+ paddd xmm2,[ebp-96] |
+ paddd xmm3,[ebp-80] |
+ movdqa xmm6,xmm0 |
+ punpckldq xmm0,xmm1 |
+ movdqa xmm7,xmm2 |
+ punpckldq xmm2,xmm3 |
+ punpckhdq xmm6,xmm1 |
+ punpckhdq xmm7,xmm3 |
+ movdqa xmm1,xmm0 |
+ punpcklqdq xmm0,xmm2 |
+ movdqa xmm3,xmm6 |
+ punpcklqdq xmm6,xmm7 |
+ punpckhqdq xmm1,xmm2 |
+ punpckhqdq xmm3,xmm7 |
+ movdqu xmm4,[esi-128] |
+ movdqu xmm5,[esi-64] |
+ movdqu xmm2,[esi] |
+ movdqu xmm7,[64+esi] |
+ lea esi,[16+esi] |
+ pxor xmm4,xmm0 |
+ movdqa xmm0,[ebx-64] |
+ pxor xmm5,xmm1 |
+ movdqa xmm1,[ebx-48] |
+ pxor xmm6,xmm2 |
+ movdqa xmm2,[ebx-32] |
+ pxor xmm7,xmm3 |
+ movdqa xmm3,[ebx-16] |
+ movdqu [edi-128],xmm4 |
+ movdqu [edi-64],xmm5 |
+ movdqu [edi],xmm6 |
+ movdqu [64+edi],xmm7 |
+ lea edi,[16+edi] |
+ paddd xmm0,[ebp-64] |
+ paddd xmm1,[ebp-48] |
+ paddd xmm2,[ebp-32] |
+ paddd xmm3,[ebp-16] |
+ movdqa xmm6,xmm0 |
+ punpckldq xmm0,xmm1 |
+ movdqa xmm7,xmm2 |
+ punpckldq xmm2,xmm3 |
+ punpckhdq xmm6,xmm1 |
+ punpckhdq xmm7,xmm3 |
+ movdqa xmm1,xmm0 |
+ punpcklqdq xmm0,xmm2 |
+ movdqa xmm3,xmm6 |
+ punpcklqdq xmm6,xmm7 |
+ punpckhqdq xmm1,xmm2 |
+ punpckhqdq xmm3,xmm7 |
+ movdqu xmm4,[esi-128] |
+ movdqu xmm5,[esi-64] |
+ movdqu xmm2,[esi] |
+ movdqu xmm7,[64+esi] |
+ lea esi,[16+esi] |
+ pxor xmm4,xmm0 |
+ movdqa xmm0,[ebx] |
+ pxor xmm5,xmm1 |
+ movdqa xmm1,[16+ebx] |
+ pxor xmm6,xmm2 |
+ movdqa xmm2,[32+ebx] |
+ pxor xmm7,xmm3 |
+ movdqa xmm3,[48+ebx] |
+ movdqu [edi-128],xmm4 |
+ movdqu [edi-64],xmm5 |
+ movdqu [edi],xmm6 |
+ movdqu [64+edi],xmm7 |
+ lea edi,[16+edi] |
+ paddd xmm0,[ebp] |
+ paddd xmm1,[16+ebp] |
+ paddd xmm2,[32+ebp] |
+ paddd xmm3,[48+ebp] |
+ movdqa xmm6,xmm0 |
+ punpckldq xmm0,xmm1 |
+ movdqa xmm7,xmm2 |
+ punpckldq xmm2,xmm3 |
+ punpckhdq xmm6,xmm1 |
+ punpckhdq xmm7,xmm3 |
+ movdqa xmm1,xmm0 |
+ punpcklqdq xmm0,xmm2 |
+ movdqa xmm3,xmm6 |
+ punpcklqdq xmm6,xmm7 |
+ punpckhqdq xmm1,xmm2 |
+ punpckhqdq xmm3,xmm7 |
+ movdqu xmm4,[esi-128] |
+ movdqu xmm5,[esi-64] |
+ movdqu xmm2,[esi] |
+ movdqu xmm7,[64+esi] |
+ lea esi,[16+esi] |
+ pxor xmm4,xmm0 |
+ movdqa xmm0,[64+ebx] |
+ pxor xmm5,xmm1 |
+ movdqa xmm1,[80+ebx] |
+ pxor xmm6,xmm2 |
+ movdqa xmm2,[96+ebx] |
+ pxor xmm7,xmm3 |
+ movdqa xmm3,[112+ebx] |
+ movdqu [edi-128],xmm4 |
+ movdqu [edi-64],xmm5 |
+ movdqu [edi],xmm6 |
+ movdqu [64+edi],xmm7 |
+ lea edi,[16+edi] |
+ paddd xmm0,[64+ebp] |
+ paddd xmm1,[80+ebp] |
+ paddd xmm2,[96+ebp] |
+ paddd xmm3,[112+ebp] |
+ movdqa xmm6,xmm0 |
+ punpckldq xmm0,xmm1 |
+ movdqa xmm7,xmm2 |
+ punpckldq xmm2,xmm3 |
+ punpckhdq xmm6,xmm1 |
+ punpckhdq xmm7,xmm3 |
+ movdqa xmm1,xmm0 |
+ punpcklqdq xmm0,xmm2 |
+ movdqa xmm3,xmm6 |
+ punpcklqdq xmm6,xmm7 |
+ punpckhqdq xmm1,xmm2 |
+ punpckhqdq xmm3,xmm7 |
+ movdqu xmm4,[esi-128] |
+ movdqu xmm5,[esi-64] |
+ movdqu xmm2,[esi] |
+ movdqu xmm7,[64+esi] |
+ lea esi,[208+esi] |
+ pxor xmm4,xmm0 |
+ pxor xmm5,xmm1 |
+ pxor xmm6,xmm2 |
+ pxor xmm7,xmm3 |
+ movdqu [edi-128],xmm4 |
+ movdqu [edi-64],xmm5 |
+ movdqu [edi],xmm6 |
+ movdqu [64+edi],xmm7 |
+ lea edi,[208+edi] |
+ sub ecx,256 |
+ jnc NEAR L$009outer_loop |
+ add ecx,256 |
+ jz NEAR L$011done |
+ mov ebx,DWORD [520+esp] |
+ lea esi,[esi-128] |
+ mov edx,DWORD [516+esp] |
+ lea edi,[edi-128] |
+ movd xmm2,DWORD [64+ebp] |
+ movdqu xmm3,[ebx] |
+ paddd xmm2,[96+eax] |
+ pand xmm3,[112+eax] |
+ por xmm3,xmm2 |
+L$0081x: |
+ movdqa xmm0,[32+eax] |
+ movdqu xmm1,[edx] |
+ movdqu xmm2,[16+edx] |
+ movdqa xmm6,[eax] |
+ movdqa xmm7,[16+eax] |
+ mov DWORD [48+esp],ebp |
+ movdqa [esp],xmm0 |
+ movdqa [16+esp],xmm1 |
+ movdqa [32+esp],xmm2 |
+ movdqa [48+esp],xmm3 |
+ mov edx,10 |
+ jmp NEAR L$012loop1x |
+align 16 |
+L$013outer1x: |
+ movdqa xmm3,[80+eax] |
+ movdqa xmm0,[esp] |
+ movdqa xmm1,[16+esp] |
+ movdqa xmm2,[32+esp] |
+ paddd xmm3,[48+esp] |
+ mov edx,10 |
+ movdqa [48+esp],xmm3 |
+ jmp NEAR L$012loop1x |
+align 16 |
+L$012loop1x: |
+ paddd xmm0,xmm1 |
+ pxor xmm3,xmm0 |
+db 102,15,56,0,222 |
+ paddd xmm2,xmm3 |
+ pxor xmm1,xmm2 |
+ movdqa xmm4,xmm1 |
+ psrld xmm1,20 |
+ pslld xmm4,12 |
+ por xmm1,xmm4 |
+ paddd xmm0,xmm1 |
+ pxor xmm3,xmm0 |
+db 102,15,56,0,223 |
+ paddd xmm2,xmm3 |
+ pxor xmm1,xmm2 |
+ movdqa xmm4,xmm1 |
+ psrld xmm1,25 |
+ pslld xmm4,7 |
+ por xmm1,xmm4 |
+ pshufd xmm2,xmm2,78 |
+ pshufd xmm1,xmm1,57 |
+ pshufd xmm3,xmm3,147 |
+ nop |
+ paddd xmm0,xmm1 |
+ pxor xmm3,xmm0 |
+db 102,15,56,0,222 |
+ paddd xmm2,xmm3 |
+ pxor xmm1,xmm2 |
+ movdqa xmm4,xmm1 |
+ psrld xmm1,20 |
+ pslld xmm4,12 |
+ por xmm1,xmm4 |
+ paddd xmm0,xmm1 |
+ pxor xmm3,xmm0 |
+db 102,15,56,0,223 |
+ paddd xmm2,xmm3 |
+ pxor xmm1,xmm2 |
+ movdqa xmm4,xmm1 |
+ psrld xmm1,25 |
+ pslld xmm4,7 |
+ por xmm1,xmm4 |
+ pshufd xmm2,xmm2,78 |
+ pshufd xmm1,xmm1,147 |
+ pshufd xmm3,xmm3,57 |
+ dec edx |
+ jnz NEAR L$012loop1x |
+ paddd xmm0,[esp] |
+ paddd xmm1,[16+esp] |
+ paddd xmm2,[32+esp] |
+ paddd xmm3,[48+esp] |
+ cmp ecx,64 |
+ jb NEAR L$014tail |
+ movdqu xmm4,[esi] |
+ movdqu xmm5,[16+esi] |
+ pxor xmm0,xmm4 |
+ movdqu xmm4,[32+esi] |
+ pxor xmm1,xmm5 |
+ movdqu xmm5,[48+esi] |
+ pxor xmm2,xmm4 |
+ pxor xmm3,xmm5 |
+ lea esi,[64+esi] |
+ movdqu [edi],xmm0 |
+ movdqu [16+edi],xmm1 |
+ movdqu [32+edi],xmm2 |
+ movdqu [48+edi],xmm3 |
+ lea edi,[64+edi] |
+ sub ecx,64 |
+ jnz NEAR L$013outer1x |
+ jmp NEAR L$011done |
+L$014tail: |
+ movdqa [esp],xmm0 |
+ movdqa [16+esp],xmm1 |
+ movdqa [32+esp],xmm2 |
+ movdqa [48+esp],xmm3 |
+ xor eax,eax |
+ xor edx,edx |
+ xor ebp,ebp |
+L$015tail_loop: |
+ mov al,BYTE [ebp*1+esp] |
+ mov dl,BYTE [ebp*1+esi] |
+ lea ebp,[1+ebp] |
+ xor al,dl |
+ mov BYTE [ebp*1+edi-1],al |
+ dec ecx |
+ jnz NEAR L$015tail_loop |
+L$011done: |
+ mov esp,DWORD [512+esp] |
+ pop edi |
+ pop esi |
+ pop ebx |
+ pop ebp |
+ ret |
+align 64 |
+L$ssse3_data: |
+db 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 |
+db 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 |
+dd 1634760805,857760878,2036477234,1797285236 |
+dd 0,1,2,3 |
+dd 4,4,4,4 |
+dd 1,0,0,0 |
+dd 4,0,0,0 |
+dd 0,-1,-1,-1 |
+align 64 |
+db 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 |
+db 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 |
+db 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 |
+db 114,103,62,0 |
+segment .bss |
+common _OPENSSL_ia32cap_P 16 |