Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3)

Side by Side Diff: third_party/boringssl/win-x86/crypto/chacha/chacha-x86.asm

Issue 2219933002: Land BoringSSL roll on master (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 %ifidn __OUTPUT_FORMAT__,obj
2 section code use32 class=code align=64
3 %elifidn __OUTPUT_FORMAT__,win32
4 %ifdef __YASM_VERSION_ID__
5 %if __YASM_VERSION_ID__ < 01010000h
6 %error yasm version 1.1.0 or later needed.
7 %endif
8 ; Yasm automatically includes .00 and complains about redefining it.
9 ; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
10 %else
11 $@feat.00 equ 1
12 %endif
13 section .text code align=64
14 %else
15 section .text code
16 %endif
17 global _ChaCha20_ctr32
18 align 16
19 _ChaCha20_ctr32:
20 L$_ChaCha20_ctr32_begin:
21 push ebp
22 push ebx
23 push esi
24 push edi
25 xor eax,eax
26 cmp eax,DWORD [28+esp]
27 je NEAR L$000no_data
28 call L$pic_point
29 L$pic_point:
30 pop eax
31 lea ebp,[_OPENSSL_ia32cap_P]
32 test DWORD [ebp],16777216
33 jz NEAR L$001x86
34 test DWORD [4+ebp],512
35 jz NEAR L$001x86
36 jmp NEAR L$ssse3_shortcut
37 L$001x86:
38 mov esi,DWORD [32+esp]
39 mov edi,DWORD [36+esp]
40 sub esp,132
41 mov eax,DWORD [esi]
42 mov ebx,DWORD [4+esi]
43 mov ecx,DWORD [8+esi]
44 mov edx,DWORD [12+esi]
45 mov DWORD [80+esp],eax
46 mov DWORD [84+esp],ebx
47 mov DWORD [88+esp],ecx
48 mov DWORD [92+esp],edx
49 mov eax,DWORD [16+esi]
50 mov ebx,DWORD [20+esi]
51 mov ecx,DWORD [24+esi]
52 mov edx,DWORD [28+esi]
53 mov DWORD [96+esp],eax
54 mov DWORD [100+esp],ebx
55 mov DWORD [104+esp],ecx
56 mov DWORD [108+esp],edx
57 mov eax,DWORD [edi]
58 mov ebx,DWORD [4+edi]
59 mov ecx,DWORD [8+edi]
60 mov edx,DWORD [12+edi]
61 sub eax,1
62 mov DWORD [112+esp],eax
63 mov DWORD [116+esp],ebx
64 mov DWORD [120+esp],ecx
65 mov DWORD [124+esp],edx
66 jmp NEAR L$002entry
67 align 16
68 L$003outer_loop:
69 mov DWORD [156+esp],ebx
70 mov DWORD [152+esp],eax
71 mov DWORD [160+esp],ecx
72 L$002entry:
73 mov eax,1634760805
74 mov DWORD [4+esp],857760878
75 mov DWORD [8+esp],2036477234
76 mov DWORD [12+esp],1797285236
77 mov ebx,DWORD [84+esp]
78 mov ebp,DWORD [88+esp]
79 mov ecx,DWORD [104+esp]
80 mov esi,DWORD [108+esp]
81 mov edx,DWORD [116+esp]
82 mov edi,DWORD [120+esp]
83 mov DWORD [20+esp],ebx
84 mov DWORD [24+esp],ebp
85 mov DWORD [40+esp],ecx
86 mov DWORD [44+esp],esi
87 mov DWORD [52+esp],edx
88 mov DWORD [56+esp],edi
89 mov ebx,DWORD [92+esp]
90 mov edi,DWORD [124+esp]
91 mov edx,DWORD [112+esp]
92 mov ebp,DWORD [80+esp]
93 mov ecx,DWORD [96+esp]
94 mov esi,DWORD [100+esp]
95 add edx,1
96 mov DWORD [28+esp],ebx
97 mov DWORD [60+esp],edi
98 mov DWORD [112+esp],edx
99 mov ebx,10
100 jmp NEAR L$004loop
101 align 16
102 L$004loop:
103 add eax,ebp
104 mov DWORD [128+esp],ebx
105 mov ebx,ebp
106 xor edx,eax
107 rol edx,16
108 add ecx,edx
109 xor ebx,ecx
110 mov edi,DWORD [52+esp]
111 rol ebx,12
112 mov ebp,DWORD [20+esp]
113 add eax,ebx
114 xor edx,eax
115 mov DWORD [esp],eax
116 rol edx,8
117 mov eax,DWORD [4+esp]
118 add ecx,edx
119 mov DWORD [48+esp],edx
120 xor ebx,ecx
121 add eax,ebp
122 rol ebx,7
123 xor edi,eax
124 mov DWORD [32+esp],ecx
125 rol edi,16
126 mov DWORD [16+esp],ebx
127 add esi,edi
128 mov ecx,DWORD [40+esp]
129 xor ebp,esi
130 mov edx,DWORD [56+esp]
131 rol ebp,12
132 mov ebx,DWORD [24+esp]
133 add eax,ebp
134 xor edi,eax
135 mov DWORD [4+esp],eax
136 rol edi,8
137 mov eax,DWORD [8+esp]
138 add esi,edi
139 mov DWORD [52+esp],edi
140 xor ebp,esi
141 add eax,ebx
142 rol ebp,7
143 xor edx,eax
144 mov DWORD [36+esp],esi
145 rol edx,16
146 mov DWORD [20+esp],ebp
147 add ecx,edx
148 mov esi,DWORD [44+esp]
149 xor ebx,ecx
150 mov edi,DWORD [60+esp]
151 rol ebx,12
152 mov ebp,DWORD [28+esp]
153 add eax,ebx
154 xor edx,eax
155 mov DWORD [8+esp],eax
156 rol edx,8
157 mov eax,DWORD [12+esp]
158 add ecx,edx
159 mov DWORD [56+esp],edx
160 xor ebx,ecx
161 add eax,ebp
162 rol ebx,7
163 xor edi,eax
164 rol edi,16
165 mov DWORD [24+esp],ebx
166 add esi,edi
167 xor ebp,esi
168 rol ebp,12
169 mov ebx,DWORD [20+esp]
170 add eax,ebp
171 xor edi,eax
172 mov DWORD [12+esp],eax
173 rol edi,8
174 mov eax,DWORD [esp]
175 add esi,edi
176 mov edx,edi
177 xor ebp,esi
178 add eax,ebx
179 rol ebp,7
180 xor edx,eax
181 rol edx,16
182 mov DWORD [28+esp],ebp
183 add ecx,edx
184 xor ebx,ecx
185 mov edi,DWORD [48+esp]
186 rol ebx,12
187 mov ebp,DWORD [24+esp]
188 add eax,ebx
189 xor edx,eax
190 mov DWORD [esp],eax
191 rol edx,8
192 mov eax,DWORD [4+esp]
193 add ecx,edx
194 mov DWORD [60+esp],edx
195 xor ebx,ecx
196 add eax,ebp
197 rol ebx,7
198 xor edi,eax
199 mov DWORD [40+esp],ecx
200 rol edi,16
201 mov DWORD [20+esp],ebx
202 add esi,edi
203 mov ecx,DWORD [32+esp]
204 xor ebp,esi
205 mov edx,DWORD [52+esp]
206 rol ebp,12
207 mov ebx,DWORD [28+esp]
208 add eax,ebp
209 xor edi,eax
210 mov DWORD [4+esp],eax
211 rol edi,8
212 mov eax,DWORD [8+esp]
213 add esi,edi
214 mov DWORD [48+esp],edi
215 xor ebp,esi
216 add eax,ebx
217 rol ebp,7
218 xor edx,eax
219 mov DWORD [44+esp],esi
220 rol edx,16
221 mov DWORD [24+esp],ebp
222 add ecx,edx
223 mov esi,DWORD [36+esp]
224 xor ebx,ecx
225 mov edi,DWORD [56+esp]
226 rol ebx,12
227 mov ebp,DWORD [16+esp]
228 add eax,ebx
229 xor edx,eax
230 mov DWORD [8+esp],eax
231 rol edx,8
232 mov eax,DWORD [12+esp]
233 add ecx,edx
234 mov DWORD [52+esp],edx
235 xor ebx,ecx
236 add eax,ebp
237 rol ebx,7
238 xor edi,eax
239 rol edi,16
240 mov DWORD [28+esp],ebx
241 add esi,edi
242 xor ebp,esi
243 mov edx,DWORD [48+esp]
244 rol ebp,12
245 mov ebx,DWORD [128+esp]
246 add eax,ebp
247 xor edi,eax
248 mov DWORD [12+esp],eax
249 rol edi,8
250 mov eax,DWORD [esp]
251 add esi,edi
252 mov DWORD [56+esp],edi
253 xor ebp,esi
254 rol ebp,7
255 dec ebx
256 jnz NEAR L$004loop
257 mov ebx,DWORD [160+esp]
258 add eax,1634760805
259 add ebp,DWORD [80+esp]
260 add ecx,DWORD [96+esp]
261 add esi,DWORD [100+esp]
262 cmp ebx,64
263 jb NEAR L$005tail
264 mov ebx,DWORD [156+esp]
265 add edx,DWORD [112+esp]
266 add edi,DWORD [120+esp]
267 xor eax,DWORD [ebx]
268 xor ebp,DWORD [16+ebx]
269 mov DWORD [esp],eax
270 mov eax,DWORD [152+esp]
271 xor ecx,DWORD [32+ebx]
272 xor esi,DWORD [36+ebx]
273 xor edx,DWORD [48+ebx]
274 xor edi,DWORD [56+ebx]
275 mov DWORD [16+eax],ebp
276 mov DWORD [32+eax],ecx
277 mov DWORD [36+eax],esi
278 mov DWORD [48+eax],edx
279 mov DWORD [56+eax],edi
280 mov ebp,DWORD [4+esp]
281 mov ecx,DWORD [8+esp]
282 mov esi,DWORD [12+esp]
283 mov edx,DWORD [20+esp]
284 mov edi,DWORD [24+esp]
285 add ebp,857760878
286 add ecx,2036477234
287 add esi,1797285236
288 add edx,DWORD [84+esp]
289 add edi,DWORD [88+esp]
290 xor ebp,DWORD [4+ebx]
291 xor ecx,DWORD [8+ebx]
292 xor esi,DWORD [12+ebx]
293 xor edx,DWORD [20+ebx]
294 xor edi,DWORD [24+ebx]
295 mov DWORD [4+eax],ebp
296 mov DWORD [8+eax],ecx
297 mov DWORD [12+eax],esi
298 mov DWORD [20+eax],edx
299 mov DWORD [24+eax],edi
300 mov ebp,DWORD [28+esp]
301 mov ecx,DWORD [40+esp]
302 mov esi,DWORD [44+esp]
303 mov edx,DWORD [52+esp]
304 mov edi,DWORD [60+esp]
305 add ebp,DWORD [92+esp]
306 add ecx,DWORD [104+esp]
307 add esi,DWORD [108+esp]
308 add edx,DWORD [116+esp]
309 add edi,DWORD [124+esp]
310 xor ebp,DWORD [28+ebx]
311 xor ecx,DWORD [40+ebx]
312 xor esi,DWORD [44+ebx]
313 xor edx,DWORD [52+ebx]
314 xor edi,DWORD [60+ebx]
315 lea ebx,[64+ebx]
316 mov DWORD [28+eax],ebp
317 mov ebp,DWORD [esp]
318 mov DWORD [40+eax],ecx
319 mov ecx,DWORD [160+esp]
320 mov DWORD [44+eax],esi
321 mov DWORD [52+eax],edx
322 mov DWORD [60+eax],edi
323 mov DWORD [eax],ebp
324 lea eax,[64+eax]
325 sub ecx,64
326 jnz NEAR L$003outer_loop
327 jmp NEAR L$006done
328 L$005tail:
329 add edx,DWORD [112+esp]
330 add edi,DWORD [120+esp]
331 mov DWORD [esp],eax
332 mov DWORD [16+esp],ebp
333 mov DWORD [32+esp],ecx
334 mov DWORD [36+esp],esi
335 mov DWORD [48+esp],edx
336 mov DWORD [56+esp],edi
337 mov ebp,DWORD [4+esp]
338 mov ecx,DWORD [8+esp]
339 mov esi,DWORD [12+esp]
340 mov edx,DWORD [20+esp]
341 mov edi,DWORD [24+esp]
342 add ebp,857760878
343 add ecx,2036477234
344 add esi,1797285236
345 add edx,DWORD [84+esp]
346 add edi,DWORD [88+esp]
347 mov DWORD [4+esp],ebp
348 mov DWORD [8+esp],ecx
349 mov DWORD [12+esp],esi
350 mov DWORD [20+esp],edx
351 mov DWORD [24+esp],edi
352 mov ebp,DWORD [28+esp]
353 mov ecx,DWORD [40+esp]
354 mov esi,DWORD [44+esp]
355 mov edx,DWORD [52+esp]
356 mov edi,DWORD [60+esp]
357 add ebp,DWORD [92+esp]
358 add ecx,DWORD [104+esp]
359 add esi,DWORD [108+esp]
360 add edx,DWORD [116+esp]
361 add edi,DWORD [124+esp]
362 mov DWORD [28+esp],ebp
363 mov ebp,DWORD [156+esp]
364 mov DWORD [40+esp],ecx
365 mov ecx,DWORD [152+esp]
366 mov DWORD [44+esp],esi
367 xor esi,esi
368 mov DWORD [52+esp],edx
369 mov DWORD [60+esp],edi
370 xor eax,eax
371 xor edx,edx
372 L$007tail_loop:
373 mov al,BYTE [ebp*1+esi]
374 mov dl,BYTE [esi*1+esp]
375 lea esi,[1+esi]
376 xor al,dl
377 mov BYTE [esi*1+ecx-1],al
378 dec ebx
379 jnz NEAR L$007tail_loop
380 L$006done:
381 add esp,132
382 L$000no_data:
383 pop edi
384 pop esi
385 pop ebx
386 pop ebp
387 ret
388 global _ChaCha20_ssse3
389 align 16
390 _ChaCha20_ssse3:
391 L$_ChaCha20_ssse3_begin:
392 push ebp
393 push ebx
394 push esi
395 push edi
396 L$ssse3_shortcut:
397 mov edi,DWORD [20+esp]
398 mov esi,DWORD [24+esp]
399 mov ecx,DWORD [28+esp]
400 mov edx,DWORD [32+esp]
401 mov ebx,DWORD [36+esp]
402 mov ebp,esp
403 sub esp,524
404 and esp,-64
405 mov DWORD [512+esp],ebp
406 lea eax,[(L$ssse3_data-L$pic_point)+eax]
407 movdqu xmm3,[ebx]
408 cmp ecx,256
409 jb NEAR L$0081x
410 mov DWORD [516+esp],edx
411 mov DWORD [520+esp],ebx
412 sub ecx,256
413 lea ebp,[384+esp]
414 movdqu xmm7,[edx]
415 pshufd xmm0,xmm3,0
416 pshufd xmm1,xmm3,85
417 pshufd xmm2,xmm3,170
418 pshufd xmm3,xmm3,255
419 paddd xmm0,[48+eax]
420 pshufd xmm4,xmm7,0
421 pshufd xmm5,xmm7,85
422 psubd xmm0,[64+eax]
423 pshufd xmm6,xmm7,170
424 pshufd xmm7,xmm7,255
425 movdqa [64+ebp],xmm0
426 movdqa [80+ebp],xmm1
427 movdqa [96+ebp],xmm2
428 movdqa [112+ebp],xmm3
429 movdqu xmm3,[16+edx]
430 movdqa [ebp-64],xmm4
431 movdqa [ebp-48],xmm5
432 movdqa [ebp-32],xmm6
433 movdqa [ebp-16],xmm7
434 movdqa xmm7,[32+eax]
435 lea ebx,[128+esp]
436 pshufd xmm0,xmm3,0
437 pshufd xmm1,xmm3,85
438 pshufd xmm2,xmm3,170
439 pshufd xmm3,xmm3,255
440 pshufd xmm4,xmm7,0
441 pshufd xmm5,xmm7,85
442 pshufd xmm6,xmm7,170
443 pshufd xmm7,xmm7,255
444 movdqa [ebp],xmm0
445 movdqa [16+ebp],xmm1
446 movdqa [32+ebp],xmm2
447 movdqa [48+ebp],xmm3
448 movdqa [ebp-128],xmm4
449 movdqa [ebp-112],xmm5
450 movdqa [ebp-96],xmm6
451 movdqa [ebp-80],xmm7
452 lea esi,[128+esi]
453 lea edi,[128+edi]
454 jmp NEAR L$009outer_loop
455 align 16
456 L$009outer_loop:
457 movdqa xmm1,[ebp-112]
458 movdqa xmm2,[ebp-96]
459 movdqa xmm3,[ebp-80]
460 movdqa xmm5,[ebp-48]
461 movdqa xmm6,[ebp-32]
462 movdqa xmm7,[ebp-16]
463 movdqa [ebx-112],xmm1
464 movdqa [ebx-96],xmm2
465 movdqa [ebx-80],xmm3
466 movdqa [ebx-48],xmm5
467 movdqa [ebx-32],xmm6
468 movdqa [ebx-16],xmm7
469 movdqa xmm2,[32+ebp]
470 movdqa xmm3,[48+ebp]
471 movdqa xmm4,[64+ebp]
472 movdqa xmm5,[80+ebp]
473 movdqa xmm6,[96+ebp]
474 movdqa xmm7,[112+ebp]
475 paddd xmm4,[64+eax]
476 movdqa [32+ebx],xmm2
477 movdqa [48+ebx],xmm3
478 movdqa [64+ebx],xmm4
479 movdqa [80+ebx],xmm5
480 movdqa [96+ebx],xmm6
481 movdqa [112+ebx],xmm7
482 movdqa [64+ebp],xmm4
483 movdqa xmm0,[ebp-128]
484 movdqa xmm6,xmm4
485 movdqa xmm3,[ebp-64]
486 movdqa xmm4,[ebp]
487 movdqa xmm5,[16+ebp]
488 mov edx,10
489 nop
490 align 16
491 L$010loop:
492 paddd xmm0,xmm3
493 movdqa xmm2,xmm3
494 pxor xmm6,xmm0
495 pshufb xmm6,[eax]
496 paddd xmm4,xmm6
497 pxor xmm2,xmm4
498 movdqa xmm3,[ebx-48]
499 movdqa xmm1,xmm2
500 pslld xmm2,12
501 psrld xmm1,20
502 por xmm2,xmm1
503 movdqa xmm1,[ebx-112]
504 paddd xmm0,xmm2
505 movdqa xmm7,[80+ebx]
506 pxor xmm6,xmm0
507 movdqa [ebx-128],xmm0
508 pshufb xmm6,[16+eax]
509 paddd xmm4,xmm6
510 movdqa [64+ebx],xmm6
511 pxor xmm2,xmm4
512 paddd xmm1,xmm3
513 movdqa xmm0,xmm2
514 pslld xmm2,7
515 psrld xmm0,25
516 pxor xmm7,xmm1
517 por xmm2,xmm0
518 movdqa [ebx],xmm4
519 pshufb xmm7,[eax]
520 movdqa [ebx-64],xmm2
521 paddd xmm5,xmm7
522 movdqa xmm4,[32+ebx]
523 pxor xmm3,xmm5
524 movdqa xmm2,[ebx-32]
525 movdqa xmm0,xmm3
526 pslld xmm3,12
527 psrld xmm0,20
528 por xmm3,xmm0
529 movdqa xmm0,[ebx-96]
530 paddd xmm1,xmm3
531 movdqa xmm6,[96+ebx]
532 pxor xmm7,xmm1
533 movdqa [ebx-112],xmm1
534 pshufb xmm7,[16+eax]
535 paddd xmm5,xmm7
536 movdqa [80+ebx],xmm7
537 pxor xmm3,xmm5
538 paddd xmm0,xmm2
539 movdqa xmm1,xmm3
540 pslld xmm3,7
541 psrld xmm1,25
542 pxor xmm6,xmm0
543 por xmm3,xmm1
544 movdqa [16+ebx],xmm5
545 pshufb xmm6,[eax]
546 movdqa [ebx-48],xmm3
547 paddd xmm4,xmm6
548 movdqa xmm5,[48+ebx]
549 pxor xmm2,xmm4
550 movdqa xmm3,[ebx-16]
551 movdqa xmm1,xmm2
552 pslld xmm2,12
553 psrld xmm1,20
554 por xmm2,xmm1
555 movdqa xmm1,[ebx-80]
556 paddd xmm0,xmm2
557 movdqa xmm7,[112+ebx]
558 pxor xmm6,xmm0
559 movdqa [ebx-96],xmm0
560 pshufb xmm6,[16+eax]
561 paddd xmm4,xmm6
562 movdqa [96+ebx],xmm6
563 pxor xmm2,xmm4
564 paddd xmm1,xmm3
565 movdqa xmm0,xmm2
566 pslld xmm2,7
567 psrld xmm0,25
568 pxor xmm7,xmm1
569 por xmm2,xmm0
570 pshufb xmm7,[eax]
571 movdqa [ebx-32],xmm2
572 paddd xmm5,xmm7
573 pxor xmm3,xmm5
574 movdqa xmm2,[ebx-48]
575 movdqa xmm0,xmm3
576 pslld xmm3,12
577 psrld xmm0,20
578 por xmm3,xmm0
579 movdqa xmm0,[ebx-128]
580 paddd xmm1,xmm3
581 pxor xmm7,xmm1
582 movdqa [ebx-80],xmm1
583 pshufb xmm7,[16+eax]
584 paddd xmm5,xmm7
585 movdqa xmm6,xmm7
586 pxor xmm3,xmm5
587 paddd xmm0,xmm2
588 movdqa xmm1,xmm3
589 pslld xmm3,7
590 psrld xmm1,25
591 pxor xmm6,xmm0
592 por xmm3,xmm1
593 pshufb xmm6,[eax]
594 movdqa [ebx-16],xmm3
595 paddd xmm4,xmm6
596 pxor xmm2,xmm4
597 movdqa xmm3,[ebx-32]
598 movdqa xmm1,xmm2
599 pslld xmm2,12
600 psrld xmm1,20
601 por xmm2,xmm1
602 movdqa xmm1,[ebx-112]
603 paddd xmm0,xmm2
604 movdqa xmm7,[64+ebx]
605 pxor xmm6,xmm0
606 movdqa [ebx-128],xmm0
607 pshufb xmm6,[16+eax]
608 paddd xmm4,xmm6
609 movdqa [112+ebx],xmm6
610 pxor xmm2,xmm4
611 paddd xmm1,xmm3
612 movdqa xmm0,xmm2
613 pslld xmm2,7
614 psrld xmm0,25
615 pxor xmm7,xmm1
616 por xmm2,xmm0
617 movdqa [32+ebx],xmm4
618 pshufb xmm7,[eax]
619 movdqa [ebx-48],xmm2
620 paddd xmm5,xmm7
621 movdqa xmm4,[ebx]
622 pxor xmm3,xmm5
623 movdqa xmm2,[ebx-16]
624 movdqa xmm0,xmm3
625 pslld xmm3,12
626 psrld xmm0,20
627 por xmm3,xmm0
628 movdqa xmm0,[ebx-96]
629 paddd xmm1,xmm3
630 movdqa xmm6,[80+ebx]
631 pxor xmm7,xmm1
632 movdqa [ebx-112],xmm1
633 pshufb xmm7,[16+eax]
634 paddd xmm5,xmm7
635 movdqa [64+ebx],xmm7
636 pxor xmm3,xmm5
637 paddd xmm0,xmm2
638 movdqa xmm1,xmm3
639 pslld xmm3,7
640 psrld xmm1,25
641 pxor xmm6,xmm0
642 por xmm3,xmm1
643 movdqa [48+ebx],xmm5
644 pshufb xmm6,[eax]
645 movdqa [ebx-32],xmm3
646 paddd xmm4,xmm6
647 movdqa xmm5,[16+ebx]
648 pxor xmm2,xmm4
649 movdqa xmm3,[ebx-64]
650 movdqa xmm1,xmm2
651 pslld xmm2,12
652 psrld xmm1,20
653 por xmm2,xmm1
654 movdqa xmm1,[ebx-80]
655 paddd xmm0,xmm2
656 movdqa xmm7,[96+ebx]
657 pxor xmm6,xmm0
658 movdqa [ebx-96],xmm0
659 pshufb xmm6,[16+eax]
660 paddd xmm4,xmm6
661 movdqa [80+ebx],xmm6
662 pxor xmm2,xmm4
663 paddd xmm1,xmm3
664 movdqa xmm0,xmm2
665 pslld xmm2,7
666 psrld xmm0,25
667 pxor xmm7,xmm1
668 por xmm2,xmm0
669 pshufb xmm7,[eax]
670 movdqa [ebx-16],xmm2
671 paddd xmm5,xmm7
672 pxor xmm3,xmm5
673 movdqa xmm0,xmm3
674 pslld xmm3,12
675 psrld xmm0,20
676 por xmm3,xmm0
677 movdqa xmm0,[ebx-128]
678 paddd xmm1,xmm3
679 movdqa xmm6,[64+ebx]
680 pxor xmm7,xmm1
681 movdqa [ebx-80],xmm1
682 pshufb xmm7,[16+eax]
683 paddd xmm5,xmm7
684 movdqa [96+ebx],xmm7
685 pxor xmm3,xmm5
686 movdqa xmm1,xmm3
687 pslld xmm3,7
688 psrld xmm1,25
689 por xmm3,xmm1
690 dec edx
691 jnz NEAR L$010loop
692 movdqa [ebx-64],xmm3
693 movdqa [ebx],xmm4
694 movdqa [16+ebx],xmm5
695 movdqa [64+ebx],xmm6
696 movdqa [96+ebx],xmm7
697 movdqa xmm1,[ebx-112]
698 movdqa xmm2,[ebx-96]
699 movdqa xmm3,[ebx-80]
700 paddd xmm0,[ebp-128]
701 paddd xmm1,[ebp-112]
702 paddd xmm2,[ebp-96]
703 paddd xmm3,[ebp-80]
704 movdqa xmm6,xmm0
705 punpckldq xmm0,xmm1
706 movdqa xmm7,xmm2
707 punpckldq xmm2,xmm3
708 punpckhdq xmm6,xmm1
709 punpckhdq xmm7,xmm3
710 movdqa xmm1,xmm0
711 punpcklqdq xmm0,xmm2
712 movdqa xmm3,xmm6
713 punpcklqdq xmm6,xmm7
714 punpckhqdq xmm1,xmm2
715 punpckhqdq xmm3,xmm7
716 movdqu xmm4,[esi-128]
717 movdqu xmm5,[esi-64]
718 movdqu xmm2,[esi]
719 movdqu xmm7,[64+esi]
720 lea esi,[16+esi]
721 pxor xmm4,xmm0
722 movdqa xmm0,[ebx-64]
723 pxor xmm5,xmm1
724 movdqa xmm1,[ebx-48]
725 pxor xmm6,xmm2
726 movdqa xmm2,[ebx-32]
727 pxor xmm7,xmm3
728 movdqa xmm3,[ebx-16]
729 movdqu [edi-128],xmm4
730 movdqu [edi-64],xmm5
731 movdqu [edi],xmm6
732 movdqu [64+edi],xmm7
733 lea edi,[16+edi]
734 paddd xmm0,[ebp-64]
735 paddd xmm1,[ebp-48]
736 paddd xmm2,[ebp-32]
737 paddd xmm3,[ebp-16]
738 movdqa xmm6,xmm0
739 punpckldq xmm0,xmm1
740 movdqa xmm7,xmm2
741 punpckldq xmm2,xmm3
742 punpckhdq xmm6,xmm1
743 punpckhdq xmm7,xmm3
744 movdqa xmm1,xmm0
745 punpcklqdq xmm0,xmm2
746 movdqa xmm3,xmm6
747 punpcklqdq xmm6,xmm7
748 punpckhqdq xmm1,xmm2
749 punpckhqdq xmm3,xmm7
750 movdqu xmm4,[esi-128]
751 movdqu xmm5,[esi-64]
752 movdqu xmm2,[esi]
753 movdqu xmm7,[64+esi]
754 lea esi,[16+esi]
755 pxor xmm4,xmm0
756 movdqa xmm0,[ebx]
757 pxor xmm5,xmm1
758 movdqa xmm1,[16+ebx]
759 pxor xmm6,xmm2
760 movdqa xmm2,[32+ebx]
761 pxor xmm7,xmm3
762 movdqa xmm3,[48+ebx]
763 movdqu [edi-128],xmm4
764 movdqu [edi-64],xmm5
765 movdqu [edi],xmm6
766 movdqu [64+edi],xmm7
767 lea edi,[16+edi]
768 paddd xmm0,[ebp]
769 paddd xmm1,[16+ebp]
770 paddd xmm2,[32+ebp]
771 paddd xmm3,[48+ebp]
772 movdqa xmm6,xmm0
773 punpckldq xmm0,xmm1
774 movdqa xmm7,xmm2
775 punpckldq xmm2,xmm3
776 punpckhdq xmm6,xmm1
777 punpckhdq xmm7,xmm3
778 movdqa xmm1,xmm0
779 punpcklqdq xmm0,xmm2
780 movdqa xmm3,xmm6
781 punpcklqdq xmm6,xmm7
782 punpckhqdq xmm1,xmm2
783 punpckhqdq xmm3,xmm7
784 movdqu xmm4,[esi-128]
785 movdqu xmm5,[esi-64]
786 movdqu xmm2,[esi]
787 movdqu xmm7,[64+esi]
788 lea esi,[16+esi]
789 pxor xmm4,xmm0
790 movdqa xmm0,[64+ebx]
791 pxor xmm5,xmm1
792 movdqa xmm1,[80+ebx]
793 pxor xmm6,xmm2
794 movdqa xmm2,[96+ebx]
795 pxor xmm7,xmm3
796 movdqa xmm3,[112+ebx]
797 movdqu [edi-128],xmm4
798 movdqu [edi-64],xmm5
799 movdqu [edi],xmm6
800 movdqu [64+edi],xmm7
801 lea edi,[16+edi]
802 paddd xmm0,[64+ebp]
803 paddd xmm1,[80+ebp]
804 paddd xmm2,[96+ebp]
805 paddd xmm3,[112+ebp]
806 movdqa xmm6,xmm0
807 punpckldq xmm0,xmm1
808 movdqa xmm7,xmm2
809 punpckldq xmm2,xmm3
810 punpckhdq xmm6,xmm1
811 punpckhdq xmm7,xmm3
812 movdqa xmm1,xmm0
813 punpcklqdq xmm0,xmm2
814 movdqa xmm3,xmm6
815 punpcklqdq xmm6,xmm7
816 punpckhqdq xmm1,xmm2
817 punpckhqdq xmm3,xmm7
818 movdqu xmm4,[esi-128]
819 movdqu xmm5,[esi-64]
820 movdqu xmm2,[esi]
821 movdqu xmm7,[64+esi]
822 lea esi,[208+esi]
823 pxor xmm4,xmm0
824 pxor xmm5,xmm1
825 pxor xmm6,xmm2
826 pxor xmm7,xmm3
827 movdqu [edi-128],xmm4
828 movdqu [edi-64],xmm5
829 movdqu [edi],xmm6
830 movdqu [64+edi],xmm7
831 lea edi,[208+edi]
832 sub ecx,256
833 jnc NEAR L$009outer_loop
834 add ecx,256
835 jz NEAR L$011done
836 mov ebx,DWORD [520+esp]
837 lea esi,[esi-128]
838 mov edx,DWORD [516+esp]
839 lea edi,[edi-128]
840 movd xmm2,DWORD [64+ebp]
841 movdqu xmm3,[ebx]
842 paddd xmm2,[96+eax]
843 pand xmm3,[112+eax]
844 por xmm3,xmm2
845 L$0081x:
846 movdqa xmm0,[32+eax]
847 movdqu xmm1,[edx]
848 movdqu xmm2,[16+edx]
849 movdqa xmm6,[eax]
850 movdqa xmm7,[16+eax]
851 mov DWORD [48+esp],ebp
852 movdqa [esp],xmm0
853 movdqa [16+esp],xmm1
854 movdqa [32+esp],xmm2
855 movdqa [48+esp],xmm3
856 mov edx,10
857 jmp NEAR L$012loop1x
858 align 16
859 L$013outer1x:
860 movdqa xmm3,[80+eax]
861 movdqa xmm0,[esp]
862 movdqa xmm1,[16+esp]
863 movdqa xmm2,[32+esp]
864 paddd xmm3,[48+esp]
865 mov edx,10
866 movdqa [48+esp],xmm3
867 jmp NEAR L$012loop1x
868 align 16
869 L$012loop1x:
870 paddd xmm0,xmm1
871 pxor xmm3,xmm0
872 db 102,15,56,0,222
873 paddd xmm2,xmm3
874 pxor xmm1,xmm2
875 movdqa xmm4,xmm1
876 psrld xmm1,20
877 pslld xmm4,12
878 por xmm1,xmm4
879 paddd xmm0,xmm1
880 pxor xmm3,xmm0
881 db 102,15,56,0,223
882 paddd xmm2,xmm3
883 pxor xmm1,xmm2
884 movdqa xmm4,xmm1
885 psrld xmm1,25
886 pslld xmm4,7
887 por xmm1,xmm4
888 pshufd xmm2,xmm2,78
889 pshufd xmm1,xmm1,57
890 pshufd xmm3,xmm3,147
891 nop
892 paddd xmm0,xmm1
893 pxor xmm3,xmm0
894 db 102,15,56,0,222
895 paddd xmm2,xmm3
896 pxor xmm1,xmm2
897 movdqa xmm4,xmm1
898 psrld xmm1,20
899 pslld xmm4,12
900 por xmm1,xmm4
901 paddd xmm0,xmm1
902 pxor xmm3,xmm0
903 db 102,15,56,0,223
904 paddd xmm2,xmm3
905 pxor xmm1,xmm2
906 movdqa xmm4,xmm1
907 psrld xmm1,25
908 pslld xmm4,7
909 por xmm1,xmm4
910 pshufd xmm2,xmm2,78
911 pshufd xmm1,xmm1,147
912 pshufd xmm3,xmm3,57
913 dec edx
914 jnz NEAR L$012loop1x
915 paddd xmm0,[esp]
916 paddd xmm1,[16+esp]
917 paddd xmm2,[32+esp]
918 paddd xmm3,[48+esp]
919 cmp ecx,64
920 jb NEAR L$014tail
921 movdqu xmm4,[esi]
922 movdqu xmm5,[16+esi]
923 pxor xmm0,xmm4
924 movdqu xmm4,[32+esi]
925 pxor xmm1,xmm5
926 movdqu xmm5,[48+esi]
927 pxor xmm2,xmm4
928 pxor xmm3,xmm5
929 lea esi,[64+esi]
930 movdqu [edi],xmm0
931 movdqu [16+edi],xmm1
932 movdqu [32+edi],xmm2
933 movdqu [48+edi],xmm3
934 lea edi,[64+edi]
935 sub ecx,64
936 jnz NEAR L$013outer1x
937 jmp NEAR L$011done
938 L$014tail:
939 movdqa [esp],xmm0
940 movdqa [16+esp],xmm1
941 movdqa [32+esp],xmm2
942 movdqa [48+esp],xmm3
943 xor eax,eax
944 xor edx,edx
945 xor ebp,ebp
946 L$015tail_loop:
947 mov al,BYTE [ebp*1+esp]
948 mov dl,BYTE [ebp*1+esi]
949 lea ebp,[1+ebp]
950 xor al,dl
951 mov BYTE [ebp*1+edi-1],al
952 dec ecx
953 jnz NEAR L$015tail_loop
954 L$011done:
955 mov esp,DWORD [512+esp]
956 pop edi
957 pop esi
958 pop ebx
959 pop ebp
960 ret
961 align 64
962 L$ssse3_data:
963 db 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
964 db 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
965 dd 1634760805,857760878,2036477234,1797285236
966 dd 0,1,2,3
967 dd 4,4,4,4
968 dd 1,0,0,0
969 dd 4,0,0,0
970 dd 0,-1,-1,-1
971 align 64
972 db 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
973 db 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
974 db 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
975 db 114,103,62,0
976 segment .bss
977 common _OPENSSL_ia32cap_P 16
OLDNEW
« no previous file with comments | « third_party/boringssl/mac-x86_64/crypto/sha/sha512-x86_64.S ('k') | third_party/boringssl/win-x86/crypto/cpu-x86-asm.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698