Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(680)

Unified Diff: third_party/boringssl/win-x86/crypto/chacha/chacha-x86.asm

Issue 2219933002: Land BoringSSL roll on master (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/boringssl/win-x86/crypto/chacha/chacha-x86.asm
diff --git a/third_party/boringssl/win-x86/crypto/chacha/chacha-x86.asm b/third_party/boringssl/win-x86/crypto/chacha/chacha-x86.asm
new file mode 100644
index 0000000000000000000000000000000000000000..3ba31a2b35094e291232eb1dcc60333f9ffc34b0
--- /dev/null
+++ b/third_party/boringssl/win-x86/crypto/chacha/chacha-x86.asm
@@ -0,0 +1,977 @@
+%ifidn __OUTPUT_FORMAT__,obj
+section code use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+%ifdef __YASM_VERSION_ID__
+%if __YASM_VERSION_ID__ < 01010000h
+%error yasm version 1.1.0 or later needed.
+%endif
+; Yasm automatically includes .00 and complains about redefining it.
+; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
+%else
+$@feat.00 equ 1
+%endif
+section .text code align=64
+%else
+section .text code
+%endif
+global _ChaCha20_ctr32
+align 16
+_ChaCha20_ctr32:
+L$_ChaCha20_ctr32_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ xor eax,eax
+ cmp eax,DWORD [28+esp]
+ je NEAR L$000no_data
+ call L$pic_point
+L$pic_point:
+ pop eax
+ lea ebp,[_OPENSSL_ia32cap_P]
+ test DWORD [ebp],16777216
+ jz NEAR L$001x86
+ test DWORD [4+ebp],512
+ jz NEAR L$001x86
+ jmp NEAR L$ssse3_shortcut
+L$001x86:
+ mov esi,DWORD [32+esp]
+ mov edi,DWORD [36+esp]
+ sub esp,132
+ mov eax,DWORD [esi]
+ mov ebx,DWORD [4+esi]
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [12+esi]
+ mov DWORD [80+esp],eax
+ mov DWORD [84+esp],ebx
+ mov DWORD [88+esp],ecx
+ mov DWORD [92+esp],edx
+ mov eax,DWORD [16+esi]
+ mov ebx,DWORD [20+esi]
+ mov ecx,DWORD [24+esi]
+ mov edx,DWORD [28+esi]
+ mov DWORD [96+esp],eax
+ mov DWORD [100+esp],ebx
+ mov DWORD [104+esp],ecx
+ mov DWORD [108+esp],edx
+ mov eax,DWORD [edi]
+ mov ebx,DWORD [4+edi]
+ mov ecx,DWORD [8+edi]
+ mov edx,DWORD [12+edi]
+ sub eax,1
+ mov DWORD [112+esp],eax
+ mov DWORD [116+esp],ebx
+ mov DWORD [120+esp],ecx
+ mov DWORD [124+esp],edx
+ jmp NEAR L$002entry
+align 16
+L$003outer_loop:
+ mov DWORD [156+esp],ebx
+ mov DWORD [152+esp],eax
+ mov DWORD [160+esp],ecx
+L$002entry:
+ mov eax,1634760805
+ mov DWORD [4+esp],857760878
+ mov DWORD [8+esp],2036477234
+ mov DWORD [12+esp],1797285236
+ mov ebx,DWORD [84+esp]
+ mov ebp,DWORD [88+esp]
+ mov ecx,DWORD [104+esp]
+ mov esi,DWORD [108+esp]
+ mov edx,DWORD [116+esp]
+ mov edi,DWORD [120+esp]
+ mov DWORD [20+esp],ebx
+ mov DWORD [24+esp],ebp
+ mov DWORD [40+esp],ecx
+ mov DWORD [44+esp],esi
+ mov DWORD [52+esp],edx
+ mov DWORD [56+esp],edi
+ mov ebx,DWORD [92+esp]
+ mov edi,DWORD [124+esp]
+ mov edx,DWORD [112+esp]
+ mov ebp,DWORD [80+esp]
+ mov ecx,DWORD [96+esp]
+ mov esi,DWORD [100+esp]
+ add edx,1
+ mov DWORD [28+esp],ebx
+ mov DWORD [60+esp],edi
+ mov DWORD [112+esp],edx
+ mov ebx,10
+ jmp NEAR L$004loop
+align 16
+L$004loop:
+ add eax,ebp
+ mov DWORD [128+esp],ebx
+ mov ebx,ebp
+ xor edx,eax
+ rol edx,16
+ add ecx,edx
+ xor ebx,ecx
+ mov edi,DWORD [52+esp]
+ rol ebx,12
+ mov ebp,DWORD [20+esp]
+ add eax,ebx
+ xor edx,eax
+ mov DWORD [esp],eax
+ rol edx,8
+ mov eax,DWORD [4+esp]
+ add ecx,edx
+ mov DWORD [48+esp],edx
+ xor ebx,ecx
+ add eax,ebp
+ rol ebx,7
+ xor edi,eax
+ mov DWORD [32+esp],ecx
+ rol edi,16
+ mov DWORD [16+esp],ebx
+ add esi,edi
+ mov ecx,DWORD [40+esp]
+ xor ebp,esi
+ mov edx,DWORD [56+esp]
+ rol ebp,12
+ mov ebx,DWORD [24+esp]
+ add eax,ebp
+ xor edi,eax
+ mov DWORD [4+esp],eax
+ rol edi,8
+ mov eax,DWORD [8+esp]
+ add esi,edi
+ mov DWORD [52+esp],edi
+ xor ebp,esi
+ add eax,ebx
+ rol ebp,7
+ xor edx,eax
+ mov DWORD [36+esp],esi
+ rol edx,16
+ mov DWORD [20+esp],ebp
+ add ecx,edx
+ mov esi,DWORD [44+esp]
+ xor ebx,ecx
+ mov edi,DWORD [60+esp]
+ rol ebx,12
+ mov ebp,DWORD [28+esp]
+ add eax,ebx
+ xor edx,eax
+ mov DWORD [8+esp],eax
+ rol edx,8
+ mov eax,DWORD [12+esp]
+ add ecx,edx
+ mov DWORD [56+esp],edx
+ xor ebx,ecx
+ add eax,ebp
+ rol ebx,7
+ xor edi,eax
+ rol edi,16
+ mov DWORD [24+esp],ebx
+ add esi,edi
+ xor ebp,esi
+ rol ebp,12
+ mov ebx,DWORD [20+esp]
+ add eax,ebp
+ xor edi,eax
+ mov DWORD [12+esp],eax
+ rol edi,8
+ mov eax,DWORD [esp]
+ add esi,edi
+ mov edx,edi
+ xor ebp,esi
+ add eax,ebx
+ rol ebp,7
+ xor edx,eax
+ rol edx,16
+ mov DWORD [28+esp],ebp
+ add ecx,edx
+ xor ebx,ecx
+ mov edi,DWORD [48+esp]
+ rol ebx,12
+ mov ebp,DWORD [24+esp]
+ add eax,ebx
+ xor edx,eax
+ mov DWORD [esp],eax
+ rol edx,8
+ mov eax,DWORD [4+esp]
+ add ecx,edx
+ mov DWORD [60+esp],edx
+ xor ebx,ecx
+ add eax,ebp
+ rol ebx,7
+ xor edi,eax
+ mov DWORD [40+esp],ecx
+ rol edi,16
+ mov DWORD [20+esp],ebx
+ add esi,edi
+ mov ecx,DWORD [32+esp]
+ xor ebp,esi
+ mov edx,DWORD [52+esp]
+ rol ebp,12
+ mov ebx,DWORD [28+esp]
+ add eax,ebp
+ xor edi,eax
+ mov DWORD [4+esp],eax
+ rol edi,8
+ mov eax,DWORD [8+esp]
+ add esi,edi
+ mov DWORD [48+esp],edi
+ xor ebp,esi
+ add eax,ebx
+ rol ebp,7
+ xor edx,eax
+ mov DWORD [44+esp],esi
+ rol edx,16
+ mov DWORD [24+esp],ebp
+ add ecx,edx
+ mov esi,DWORD [36+esp]
+ xor ebx,ecx
+ mov edi,DWORD [56+esp]
+ rol ebx,12
+ mov ebp,DWORD [16+esp]
+ add eax,ebx
+ xor edx,eax
+ mov DWORD [8+esp],eax
+ rol edx,8
+ mov eax,DWORD [12+esp]
+ add ecx,edx
+ mov DWORD [52+esp],edx
+ xor ebx,ecx
+ add eax,ebp
+ rol ebx,7
+ xor edi,eax
+ rol edi,16
+ mov DWORD [28+esp],ebx
+ add esi,edi
+ xor ebp,esi
+ mov edx,DWORD [48+esp]
+ rol ebp,12
+ mov ebx,DWORD [128+esp]
+ add eax,ebp
+ xor edi,eax
+ mov DWORD [12+esp],eax
+ rol edi,8
+ mov eax,DWORD [esp]
+ add esi,edi
+ mov DWORD [56+esp],edi
+ xor ebp,esi
+ rol ebp,7
+ dec ebx
+ jnz NEAR L$004loop
+ mov ebx,DWORD [160+esp]
+ add eax,1634760805
+ add ebp,DWORD [80+esp]
+ add ecx,DWORD [96+esp]
+ add esi,DWORD [100+esp]
+ cmp ebx,64
+ jb NEAR L$005tail
+ mov ebx,DWORD [156+esp]
+ add edx,DWORD [112+esp]
+ add edi,DWORD [120+esp]
+ xor eax,DWORD [ebx]
+ xor ebp,DWORD [16+ebx]
+ mov DWORD [esp],eax
+ mov eax,DWORD [152+esp]
+ xor ecx,DWORD [32+ebx]
+ xor esi,DWORD [36+ebx]
+ xor edx,DWORD [48+ebx]
+ xor edi,DWORD [56+ebx]
+ mov DWORD [16+eax],ebp
+ mov DWORD [32+eax],ecx
+ mov DWORD [36+eax],esi
+ mov DWORD [48+eax],edx
+ mov DWORD [56+eax],edi
+ mov ebp,DWORD [4+esp]
+ mov ecx,DWORD [8+esp]
+ mov esi,DWORD [12+esp]
+ mov edx,DWORD [20+esp]
+ mov edi,DWORD [24+esp]
+ add ebp,857760878
+ add ecx,2036477234
+ add esi,1797285236
+ add edx,DWORD [84+esp]
+ add edi,DWORD [88+esp]
+ xor ebp,DWORD [4+ebx]
+ xor ecx,DWORD [8+ebx]
+ xor esi,DWORD [12+ebx]
+ xor edx,DWORD [20+ebx]
+ xor edi,DWORD [24+ebx]
+ mov DWORD [4+eax],ebp
+ mov DWORD [8+eax],ecx
+ mov DWORD [12+eax],esi
+ mov DWORD [20+eax],edx
+ mov DWORD [24+eax],edi
+ mov ebp,DWORD [28+esp]
+ mov ecx,DWORD [40+esp]
+ mov esi,DWORD [44+esp]
+ mov edx,DWORD [52+esp]
+ mov edi,DWORD [60+esp]
+ add ebp,DWORD [92+esp]
+ add ecx,DWORD [104+esp]
+ add esi,DWORD [108+esp]
+ add edx,DWORD [116+esp]
+ add edi,DWORD [124+esp]
+ xor ebp,DWORD [28+ebx]
+ xor ecx,DWORD [40+ebx]
+ xor esi,DWORD [44+ebx]
+ xor edx,DWORD [52+ebx]
+ xor edi,DWORD [60+ebx]
+ lea ebx,[64+ebx]
+ mov DWORD [28+eax],ebp
+ mov ebp,DWORD [esp]
+ mov DWORD [40+eax],ecx
+ mov ecx,DWORD [160+esp]
+ mov DWORD [44+eax],esi
+ mov DWORD [52+eax],edx
+ mov DWORD [60+eax],edi
+ mov DWORD [eax],ebp
+ lea eax,[64+eax]
+ sub ecx,64
+ jnz NEAR L$003outer_loop
+ jmp NEAR L$006done
+L$005tail:
+ add edx,DWORD [112+esp]
+ add edi,DWORD [120+esp]
+ mov DWORD [esp],eax
+ mov DWORD [16+esp],ebp
+ mov DWORD [32+esp],ecx
+ mov DWORD [36+esp],esi
+ mov DWORD [48+esp],edx
+ mov DWORD [56+esp],edi
+ mov ebp,DWORD [4+esp]
+ mov ecx,DWORD [8+esp]
+ mov esi,DWORD [12+esp]
+ mov edx,DWORD [20+esp]
+ mov edi,DWORD [24+esp]
+ add ebp,857760878
+ add ecx,2036477234
+ add esi,1797285236
+ add edx,DWORD [84+esp]
+ add edi,DWORD [88+esp]
+ mov DWORD [4+esp],ebp
+ mov DWORD [8+esp],ecx
+ mov DWORD [12+esp],esi
+ mov DWORD [20+esp],edx
+ mov DWORD [24+esp],edi
+ mov ebp,DWORD [28+esp]
+ mov ecx,DWORD [40+esp]
+ mov esi,DWORD [44+esp]
+ mov edx,DWORD [52+esp]
+ mov edi,DWORD [60+esp]
+ add ebp,DWORD [92+esp]
+ add ecx,DWORD [104+esp]
+ add esi,DWORD [108+esp]
+ add edx,DWORD [116+esp]
+ add edi,DWORD [124+esp]
+ mov DWORD [28+esp],ebp
+ mov ebp,DWORD [156+esp]
+ mov DWORD [40+esp],ecx
+ mov ecx,DWORD [152+esp]
+ mov DWORD [44+esp],esi
+ xor esi,esi
+ mov DWORD [52+esp],edx
+ mov DWORD [60+esp],edi
+ xor eax,eax
+ xor edx,edx
+L$007tail_loop:
+ mov al,BYTE [ebp*1+esi]
+ mov dl,BYTE [esi*1+esp]
+ lea esi,[1+esi]
+ xor al,dl
+ mov BYTE [esi*1+ecx-1],al
+ dec ebx
+ jnz NEAR L$007tail_loop
+L$006done:
+ add esp,132
+L$000no_data:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _ChaCha20_ssse3
+align 16
+_ChaCha20_ssse3:
+L$_ChaCha20_ssse3_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+L$ssse3_shortcut:
+ mov edi,DWORD [20+esp]
+ mov esi,DWORD [24+esp]
+ mov ecx,DWORD [28+esp]
+ mov edx,DWORD [32+esp]
+ mov ebx,DWORD [36+esp]
+ mov ebp,esp
+ sub esp,524
+ and esp,-64
+ mov DWORD [512+esp],ebp
+ lea eax,[(L$ssse3_data-L$pic_point)+eax]
+ movdqu xmm3,[ebx]
+ cmp ecx,256
+ jb NEAR L$0081x
+ mov DWORD [516+esp],edx
+ mov DWORD [520+esp],ebx
+ sub ecx,256
+ lea ebp,[384+esp]
+ movdqu xmm7,[edx]
+ pshufd xmm0,xmm3,0
+ pshufd xmm1,xmm3,85
+ pshufd xmm2,xmm3,170
+ pshufd xmm3,xmm3,255
+ paddd xmm0,[48+eax]
+ pshufd xmm4,xmm7,0
+ pshufd xmm5,xmm7,85
+ psubd xmm0,[64+eax]
+ pshufd xmm6,xmm7,170
+ pshufd xmm7,xmm7,255
+ movdqa [64+ebp],xmm0
+ movdqa [80+ebp],xmm1
+ movdqa [96+ebp],xmm2
+ movdqa [112+ebp],xmm3
+ movdqu xmm3,[16+edx]
+ movdqa [ebp-64],xmm4
+ movdqa [ebp-48],xmm5
+ movdqa [ebp-32],xmm6
+ movdqa [ebp-16],xmm7
+ movdqa xmm7,[32+eax]
+ lea ebx,[128+esp]
+ pshufd xmm0,xmm3,0
+ pshufd xmm1,xmm3,85
+ pshufd xmm2,xmm3,170
+ pshufd xmm3,xmm3,255
+ pshufd xmm4,xmm7,0
+ pshufd xmm5,xmm7,85
+ pshufd xmm6,xmm7,170
+ pshufd xmm7,xmm7,255
+ movdqa [ebp],xmm0
+ movdqa [16+ebp],xmm1
+ movdqa [32+ebp],xmm2
+ movdqa [48+ebp],xmm3
+ movdqa [ebp-128],xmm4
+ movdqa [ebp-112],xmm5
+ movdqa [ebp-96],xmm6
+ movdqa [ebp-80],xmm7
+ lea esi,[128+esi]
+ lea edi,[128+edi]
+ jmp NEAR L$009outer_loop
+align 16
+L$009outer_loop:
+ movdqa xmm1,[ebp-112]
+ movdqa xmm2,[ebp-96]
+ movdqa xmm3,[ebp-80]
+ movdqa xmm5,[ebp-48]
+ movdqa xmm6,[ebp-32]
+ movdqa xmm7,[ebp-16]
+ movdqa [ebx-112],xmm1
+ movdqa [ebx-96],xmm2
+ movdqa [ebx-80],xmm3
+ movdqa [ebx-48],xmm5
+ movdqa [ebx-32],xmm6
+ movdqa [ebx-16],xmm7
+ movdqa xmm2,[32+ebp]
+ movdqa xmm3,[48+ebp]
+ movdqa xmm4,[64+ebp]
+ movdqa xmm5,[80+ebp]
+ movdqa xmm6,[96+ebp]
+ movdqa xmm7,[112+ebp]
+ paddd xmm4,[64+eax]
+ movdqa [32+ebx],xmm2
+ movdqa [48+ebx],xmm3
+ movdqa [64+ebx],xmm4
+ movdqa [80+ebx],xmm5
+ movdqa [96+ebx],xmm6
+ movdqa [112+ebx],xmm7
+ movdqa [64+ebp],xmm4
+ movdqa xmm0,[ebp-128]
+ movdqa xmm6,xmm4
+ movdqa xmm3,[ebp-64]
+ movdqa xmm4,[ebp]
+ movdqa xmm5,[16+ebp]
+ mov edx,10
+ nop
+align 16
+L$010loop:
+ paddd xmm0,xmm3
+ movdqa xmm2,xmm3
+ pxor xmm6,xmm0
+ pshufb xmm6,[eax]
+ paddd xmm4,xmm6
+ pxor xmm2,xmm4
+ movdqa xmm3,[ebx-48]
+ movdqa xmm1,xmm2
+ pslld xmm2,12
+ psrld xmm1,20
+ por xmm2,xmm1
+ movdqa xmm1,[ebx-112]
+ paddd xmm0,xmm2
+ movdqa xmm7,[80+ebx]
+ pxor xmm6,xmm0
+ movdqa [ebx-128],xmm0
+ pshufb xmm6,[16+eax]
+ paddd xmm4,xmm6
+ movdqa [64+ebx],xmm6
+ pxor xmm2,xmm4
+ paddd xmm1,xmm3
+ movdqa xmm0,xmm2
+ pslld xmm2,7
+ psrld xmm0,25
+ pxor xmm7,xmm1
+ por xmm2,xmm0
+ movdqa [ebx],xmm4
+ pshufb xmm7,[eax]
+ movdqa [ebx-64],xmm2
+ paddd xmm5,xmm7
+ movdqa xmm4,[32+ebx]
+ pxor xmm3,xmm5
+ movdqa xmm2,[ebx-32]
+ movdqa xmm0,xmm3
+ pslld xmm3,12
+ psrld xmm0,20
+ por xmm3,xmm0
+ movdqa xmm0,[ebx-96]
+ paddd xmm1,xmm3
+ movdqa xmm6,[96+ebx]
+ pxor xmm7,xmm1
+ movdqa [ebx-112],xmm1
+ pshufb xmm7,[16+eax]
+ paddd xmm5,xmm7
+ movdqa [80+ebx],xmm7
+ pxor xmm3,xmm5
+ paddd xmm0,xmm2
+ movdqa xmm1,xmm3
+ pslld xmm3,7
+ psrld xmm1,25
+ pxor xmm6,xmm0
+ por xmm3,xmm1
+ movdqa [16+ebx],xmm5
+ pshufb xmm6,[eax]
+ movdqa [ebx-48],xmm3
+ paddd xmm4,xmm6
+ movdqa xmm5,[48+ebx]
+ pxor xmm2,xmm4
+ movdqa xmm3,[ebx-16]
+ movdqa xmm1,xmm2
+ pslld xmm2,12
+ psrld xmm1,20
+ por xmm2,xmm1
+ movdqa xmm1,[ebx-80]
+ paddd xmm0,xmm2
+ movdqa xmm7,[112+ebx]
+ pxor xmm6,xmm0
+ movdqa [ebx-96],xmm0
+ pshufb xmm6,[16+eax]
+ paddd xmm4,xmm6
+ movdqa [96+ebx],xmm6
+ pxor xmm2,xmm4
+ paddd xmm1,xmm3
+ movdqa xmm0,xmm2
+ pslld xmm2,7
+ psrld xmm0,25
+ pxor xmm7,xmm1
+ por xmm2,xmm0
+ pshufb xmm7,[eax]
+ movdqa [ebx-32],xmm2
+ paddd xmm5,xmm7
+ pxor xmm3,xmm5
+ movdqa xmm2,[ebx-48]
+ movdqa xmm0,xmm3
+ pslld xmm3,12
+ psrld xmm0,20
+ por xmm3,xmm0
+ movdqa xmm0,[ebx-128]
+ paddd xmm1,xmm3
+ pxor xmm7,xmm1
+ movdqa [ebx-80],xmm1
+ pshufb xmm7,[16+eax]
+ paddd xmm5,xmm7
+ movdqa xmm6,xmm7
+ pxor xmm3,xmm5
+ paddd xmm0,xmm2
+ movdqa xmm1,xmm3
+ pslld xmm3,7
+ psrld xmm1,25
+ pxor xmm6,xmm0
+ por xmm3,xmm1
+ pshufb xmm6,[eax]
+ movdqa [ebx-16],xmm3
+ paddd xmm4,xmm6
+ pxor xmm2,xmm4
+ movdqa xmm3,[ebx-32]
+ movdqa xmm1,xmm2
+ pslld xmm2,12
+ psrld xmm1,20
+ por xmm2,xmm1
+ movdqa xmm1,[ebx-112]
+ paddd xmm0,xmm2
+ movdqa xmm7,[64+ebx]
+ pxor xmm6,xmm0
+ movdqa [ebx-128],xmm0
+ pshufb xmm6,[16+eax]
+ paddd xmm4,xmm6
+ movdqa [112+ebx],xmm6
+ pxor xmm2,xmm4
+ paddd xmm1,xmm3
+ movdqa xmm0,xmm2
+ pslld xmm2,7
+ psrld xmm0,25
+ pxor xmm7,xmm1
+ por xmm2,xmm0
+ movdqa [32+ebx],xmm4
+ pshufb xmm7,[eax]
+ movdqa [ebx-48],xmm2
+ paddd xmm5,xmm7
+ movdqa xmm4,[ebx]
+ pxor xmm3,xmm5
+ movdqa xmm2,[ebx-16]
+ movdqa xmm0,xmm3
+ pslld xmm3,12
+ psrld xmm0,20
+ por xmm3,xmm0
+ movdqa xmm0,[ebx-96]
+ paddd xmm1,xmm3
+ movdqa xmm6,[80+ebx]
+ pxor xmm7,xmm1
+ movdqa [ebx-112],xmm1
+ pshufb xmm7,[16+eax]
+ paddd xmm5,xmm7
+ movdqa [64+ebx],xmm7
+ pxor xmm3,xmm5
+ paddd xmm0,xmm2
+ movdqa xmm1,xmm3
+ pslld xmm3,7
+ psrld xmm1,25
+ pxor xmm6,xmm0
+ por xmm3,xmm1
+ movdqa [48+ebx],xmm5
+ pshufb xmm6,[eax]
+ movdqa [ebx-32],xmm3
+ paddd xmm4,xmm6
+ movdqa xmm5,[16+ebx]
+ pxor xmm2,xmm4
+ movdqa xmm3,[ebx-64]
+ movdqa xmm1,xmm2
+ pslld xmm2,12
+ psrld xmm1,20
+ por xmm2,xmm1
+ movdqa xmm1,[ebx-80]
+ paddd xmm0,xmm2
+ movdqa xmm7,[96+ebx]
+ pxor xmm6,xmm0
+ movdqa [ebx-96],xmm0
+ pshufb xmm6,[16+eax]
+ paddd xmm4,xmm6
+ movdqa [80+ebx],xmm6
+ pxor xmm2,xmm4
+ paddd xmm1,xmm3
+ movdqa xmm0,xmm2
+ pslld xmm2,7
+ psrld xmm0,25
+ pxor xmm7,xmm1
+ por xmm2,xmm0
+ pshufb xmm7,[eax]
+ movdqa [ebx-16],xmm2
+ paddd xmm5,xmm7
+ pxor xmm3,xmm5
+ movdqa xmm0,xmm3
+ pslld xmm3,12
+ psrld xmm0,20
+ por xmm3,xmm0
+ movdqa xmm0,[ebx-128]
+ paddd xmm1,xmm3
+ movdqa xmm6,[64+ebx]
+ pxor xmm7,xmm1
+ movdqa [ebx-80],xmm1
+ pshufb xmm7,[16+eax]
+ paddd xmm5,xmm7
+ movdqa [96+ebx],xmm7
+ pxor xmm3,xmm5
+ movdqa xmm1,xmm3
+ pslld xmm3,7
+ psrld xmm1,25
+ por xmm3,xmm1
+ dec edx
+ jnz NEAR L$010loop
+ movdqa [ebx-64],xmm3
+ movdqa [ebx],xmm4
+ movdqa [16+ebx],xmm5
+ movdqa [64+ebx],xmm6
+ movdqa [96+ebx],xmm7
+ movdqa xmm1,[ebx-112]
+ movdqa xmm2,[ebx-96]
+ movdqa xmm3,[ebx-80]
+ paddd xmm0,[ebp-128]
+ paddd xmm1,[ebp-112]
+ paddd xmm2,[ebp-96]
+ paddd xmm3,[ebp-80]
+ movdqa xmm6,xmm0
+ punpckldq xmm0,xmm1
+ movdqa xmm7,xmm2
+ punpckldq xmm2,xmm3
+ punpckhdq xmm6,xmm1
+ punpckhdq xmm7,xmm3
+ movdqa xmm1,xmm0
+ punpcklqdq xmm0,xmm2
+ movdqa xmm3,xmm6
+ punpcklqdq xmm6,xmm7
+ punpckhqdq xmm1,xmm2
+ punpckhqdq xmm3,xmm7
+ movdqu xmm4,[esi-128]
+ movdqu xmm5,[esi-64]
+ movdqu xmm2,[esi]
+ movdqu xmm7,[64+esi]
+ lea esi,[16+esi]
+ pxor xmm4,xmm0
+ movdqa xmm0,[ebx-64]
+ pxor xmm5,xmm1
+ movdqa xmm1,[ebx-48]
+ pxor xmm6,xmm2
+ movdqa xmm2,[ebx-32]
+ pxor xmm7,xmm3
+ movdqa xmm3,[ebx-16]
+ movdqu [edi-128],xmm4
+ movdqu [edi-64],xmm5
+ movdqu [edi],xmm6
+ movdqu [64+edi],xmm7
+ lea edi,[16+edi]
+ paddd xmm0,[ebp-64]
+ paddd xmm1,[ebp-48]
+ paddd xmm2,[ebp-32]
+ paddd xmm3,[ebp-16]
+ movdqa xmm6,xmm0
+ punpckldq xmm0,xmm1
+ movdqa xmm7,xmm2
+ punpckldq xmm2,xmm3
+ punpckhdq xmm6,xmm1
+ punpckhdq xmm7,xmm3
+ movdqa xmm1,xmm0
+ punpcklqdq xmm0,xmm2
+ movdqa xmm3,xmm6
+ punpcklqdq xmm6,xmm7
+ punpckhqdq xmm1,xmm2
+ punpckhqdq xmm3,xmm7
+ movdqu xmm4,[esi-128]
+ movdqu xmm5,[esi-64]
+ movdqu xmm2,[esi]
+ movdqu xmm7,[64+esi]
+ lea esi,[16+esi]
+ pxor xmm4,xmm0
+ movdqa xmm0,[ebx]
+ pxor xmm5,xmm1
+ movdqa xmm1,[16+ebx]
+ pxor xmm6,xmm2
+ movdqa xmm2,[32+ebx]
+ pxor xmm7,xmm3
+ movdqa xmm3,[48+ebx]
+ movdqu [edi-128],xmm4
+ movdqu [edi-64],xmm5
+ movdqu [edi],xmm6
+ movdqu [64+edi],xmm7
+ lea edi,[16+edi]
+ paddd xmm0,[ebp]
+ paddd xmm1,[16+ebp]
+ paddd xmm2,[32+ebp]
+ paddd xmm3,[48+ebp]
+ movdqa xmm6,xmm0
+ punpckldq xmm0,xmm1
+ movdqa xmm7,xmm2
+ punpckldq xmm2,xmm3
+ punpckhdq xmm6,xmm1
+ punpckhdq xmm7,xmm3
+ movdqa xmm1,xmm0
+ punpcklqdq xmm0,xmm2
+ movdqa xmm3,xmm6
+ punpcklqdq xmm6,xmm7
+ punpckhqdq xmm1,xmm2
+ punpckhqdq xmm3,xmm7
+ movdqu xmm4,[esi-128]
+ movdqu xmm5,[esi-64]
+ movdqu xmm2,[esi]
+ movdqu xmm7,[64+esi]
+ lea esi,[16+esi]
+ pxor xmm4,xmm0
+ movdqa xmm0,[64+ebx]
+ pxor xmm5,xmm1
+ movdqa xmm1,[80+ebx]
+ pxor xmm6,xmm2
+ movdqa xmm2,[96+ebx]
+ pxor xmm7,xmm3
+ movdqa xmm3,[112+ebx]
+ movdqu [edi-128],xmm4
+ movdqu [edi-64],xmm5
+ movdqu [edi],xmm6
+ movdqu [64+edi],xmm7
+ lea edi,[16+edi]
+ paddd xmm0,[64+ebp]
+ paddd xmm1,[80+ebp]
+ paddd xmm2,[96+ebp]
+ paddd xmm3,[112+ebp]
+ movdqa xmm6,xmm0
+ punpckldq xmm0,xmm1
+ movdqa xmm7,xmm2
+ punpckldq xmm2,xmm3
+ punpckhdq xmm6,xmm1
+ punpckhdq xmm7,xmm3
+ movdqa xmm1,xmm0
+ punpcklqdq xmm0,xmm2
+ movdqa xmm3,xmm6
+ punpcklqdq xmm6,xmm7
+ punpckhqdq xmm1,xmm2
+ punpckhqdq xmm3,xmm7
+ movdqu xmm4,[esi-128]
+ movdqu xmm5,[esi-64]
+ movdqu xmm2,[esi]
+ movdqu xmm7,[64+esi]
+ lea esi,[208+esi]
+ pxor xmm4,xmm0
+ pxor xmm5,xmm1
+ pxor xmm6,xmm2
+ pxor xmm7,xmm3
+ movdqu [edi-128],xmm4
+ movdqu [edi-64],xmm5
+ movdqu [edi],xmm6
+ movdqu [64+edi],xmm7
+ lea edi,[208+edi]
+ sub ecx,256
+ jnc NEAR L$009outer_loop
+ add ecx,256
+ jz NEAR L$011done
+ mov ebx,DWORD [520+esp]
+ lea esi,[esi-128]
+ mov edx,DWORD [516+esp]
+ lea edi,[edi-128]
+ movd xmm2,DWORD [64+ebp]
+ movdqu xmm3,[ebx]
+ paddd xmm2,[96+eax]
+ pand xmm3,[112+eax]
+ por xmm3,xmm2
+L$0081x:
+ movdqa xmm0,[32+eax]
+ movdqu xmm1,[edx]
+ movdqu xmm2,[16+edx]
+ movdqa xmm6,[eax]
+ movdqa xmm7,[16+eax]
+ mov DWORD [48+esp],ebp
+ movdqa [esp],xmm0
+ movdqa [16+esp],xmm1
+ movdqa [32+esp],xmm2
+ movdqa [48+esp],xmm3
+ mov edx,10
+ jmp NEAR L$012loop1x
+align 16
+L$013outer1x:
+ movdqa xmm3,[80+eax]
+ movdqa xmm0,[esp]
+ movdqa xmm1,[16+esp]
+ movdqa xmm2,[32+esp]
+ paddd xmm3,[48+esp]
+ mov edx,10
+ movdqa [48+esp],xmm3
+ jmp NEAR L$012loop1x
+align 16
+L$012loop1x:
+ paddd xmm0,xmm1
+ pxor xmm3,xmm0
+db 102,15,56,0,222
+ paddd xmm2,xmm3
+ pxor xmm1,xmm2
+ movdqa xmm4,xmm1
+ psrld xmm1,20
+ pslld xmm4,12
+ por xmm1,xmm4
+ paddd xmm0,xmm1
+ pxor xmm3,xmm0
+db 102,15,56,0,223
+ paddd xmm2,xmm3
+ pxor xmm1,xmm2
+ movdqa xmm4,xmm1
+ psrld xmm1,25
+ pslld xmm4,7
+ por xmm1,xmm4
+ pshufd xmm2,xmm2,78
+ pshufd xmm1,xmm1,57
+ pshufd xmm3,xmm3,147
+ nop
+ paddd xmm0,xmm1
+ pxor xmm3,xmm0
+db 102,15,56,0,222
+ paddd xmm2,xmm3
+ pxor xmm1,xmm2
+ movdqa xmm4,xmm1
+ psrld xmm1,20
+ pslld xmm4,12
+ por xmm1,xmm4
+ paddd xmm0,xmm1
+ pxor xmm3,xmm0
+db 102,15,56,0,223
+ paddd xmm2,xmm3
+ pxor xmm1,xmm2
+ movdqa xmm4,xmm1
+ psrld xmm1,25
+ pslld xmm4,7
+ por xmm1,xmm4
+ pshufd xmm2,xmm2,78
+ pshufd xmm1,xmm1,147
+ pshufd xmm3,xmm3,57
+ dec edx
+ jnz NEAR L$012loop1x
+ paddd xmm0,[esp]
+ paddd xmm1,[16+esp]
+ paddd xmm2,[32+esp]
+ paddd xmm3,[48+esp]
+ cmp ecx,64
+ jb NEAR L$014tail
+ movdqu xmm4,[esi]
+ movdqu xmm5,[16+esi]
+ pxor xmm0,xmm4
+ movdqu xmm4,[32+esi]
+ pxor xmm1,xmm5
+ movdqu xmm5,[48+esi]
+ pxor xmm2,xmm4
+ pxor xmm3,xmm5
+ lea esi,[64+esi]
+ movdqu [edi],xmm0
+ movdqu [16+edi],xmm1
+ movdqu [32+edi],xmm2
+ movdqu [48+edi],xmm3
+ lea edi,[64+edi]
+ sub ecx,64
+ jnz NEAR L$013outer1x
+ jmp NEAR L$011done
+L$014tail:
+ movdqa [esp],xmm0
+ movdqa [16+esp],xmm1
+ movdqa [32+esp],xmm2
+ movdqa [48+esp],xmm3
+ xor eax,eax
+ xor edx,edx
+ xor ebp,ebp
+L$015tail_loop:
+ mov al,BYTE [ebp*1+esp]
+ mov dl,BYTE [ebp*1+esi]
+ lea ebp,[1+ebp]
+ xor al,dl
+ mov BYTE [ebp*1+edi-1],al
+ dec ecx
+ jnz NEAR L$015tail_loop
+L$011done:
+ mov esp,DWORD [512+esp]
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+align 64
+L$ssse3_data:
+db 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
+db 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
+dd 1634760805,857760878,2036477234,1797285236
+dd 0,1,2,3
+dd 4,4,4,4
+dd 1,0,0,0
+dd 4,0,0,0
+dd 0,-1,-1,-1
+align 64
+db 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
+db 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
+db 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
+db 114,103,62,0
+segment .bss
+common _OPENSSL_ia32cap_P 16
« no previous file with comments | « third_party/boringssl/mac-x86_64/crypto/sha/sha512-x86_64.S ('k') | third_party/boringssl/win-x86/crypto/cpu-x86-asm.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698