Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Side by Side Diff: third_party/boringssl/linux-x86/crypto/chacha/chacha-x86.S

Issue 2219933002: Land BoringSSL roll on master (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #if defined(__i386__)
2 .file "chacha-x86.S"
3 .text
4 .globl ChaCha20_ctr32
5 .hidden ChaCha20_ctr32
6 .type ChaCha20_ctr32,@function
7 .align 16
8 ChaCha20_ctr32:
9 .L_ChaCha20_ctr32_begin:
10 pushl %ebp
11 pushl %ebx
12 pushl %esi
13 pushl %edi
14 xorl %eax,%eax
15 cmpl 28(%esp),%eax
16 je .L000no_data
17 call .Lpic_point
18 .Lpic_point:
19 popl %eax
20 leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
21 testl $16777216,(%ebp)
22 jz .L001x86
23 testl $512,4(%ebp)
24 jz .L001x86
25 jmp .Lssse3_shortcut
26 .L001x86:
27 movl 32(%esp),%esi
28 movl 36(%esp),%edi
29 subl $132,%esp
30 movl (%esi),%eax
31 movl 4(%esi),%ebx
32 movl 8(%esi),%ecx
33 movl 12(%esi),%edx
34 movl %eax,80(%esp)
35 movl %ebx,84(%esp)
36 movl %ecx,88(%esp)
37 movl %edx,92(%esp)
38 movl 16(%esi),%eax
39 movl 20(%esi),%ebx
40 movl 24(%esi),%ecx
41 movl 28(%esi),%edx
42 movl %eax,96(%esp)
43 movl %ebx,100(%esp)
44 movl %ecx,104(%esp)
45 movl %edx,108(%esp)
46 movl (%edi),%eax
47 movl 4(%edi),%ebx
48 movl 8(%edi),%ecx
49 movl 12(%edi),%edx
50 subl $1,%eax
51 movl %eax,112(%esp)
52 movl %ebx,116(%esp)
53 movl %ecx,120(%esp)
54 movl %edx,124(%esp)
55 jmp .L002entry
56 .align 16
57 .L003outer_loop:
58 movl %ebx,156(%esp)
59 movl %eax,152(%esp)
60 movl %ecx,160(%esp)
61 .L002entry:
62 movl $1634760805,%eax
63 movl $857760878,4(%esp)
64 movl $2036477234,8(%esp)
65 movl $1797285236,12(%esp)
66 movl 84(%esp),%ebx
67 movl 88(%esp),%ebp
68 movl 104(%esp),%ecx
69 movl 108(%esp),%esi
70 movl 116(%esp),%edx
71 movl 120(%esp),%edi
72 movl %ebx,20(%esp)
73 movl %ebp,24(%esp)
74 movl %ecx,40(%esp)
75 movl %esi,44(%esp)
76 movl %edx,52(%esp)
77 movl %edi,56(%esp)
78 movl 92(%esp),%ebx
79 movl 124(%esp),%edi
80 movl 112(%esp),%edx
81 movl 80(%esp),%ebp
82 movl 96(%esp),%ecx
83 movl 100(%esp),%esi
84 addl $1,%edx
85 movl %ebx,28(%esp)
86 movl %edi,60(%esp)
87 movl %edx,112(%esp)
88 movl $10,%ebx
89 jmp .L004loop
90 .align 16
91 .L004loop:
92 addl %ebp,%eax
93 movl %ebx,128(%esp)
94 movl %ebp,%ebx
95 xorl %eax,%edx
96 roll $16,%edx
97 addl %edx,%ecx
98 xorl %ecx,%ebx
99 movl 52(%esp),%edi
100 roll $12,%ebx
101 movl 20(%esp),%ebp
102 addl %ebx,%eax
103 xorl %eax,%edx
104 movl %eax,(%esp)
105 roll $8,%edx
106 movl 4(%esp),%eax
107 addl %edx,%ecx
108 movl %edx,48(%esp)
109 xorl %ecx,%ebx
110 addl %ebp,%eax
111 roll $7,%ebx
112 xorl %eax,%edi
113 movl %ecx,32(%esp)
114 roll $16,%edi
115 movl %ebx,16(%esp)
116 addl %edi,%esi
117 movl 40(%esp),%ecx
118 xorl %esi,%ebp
119 movl 56(%esp),%edx
120 roll $12,%ebp
121 movl 24(%esp),%ebx
122 addl %ebp,%eax
123 xorl %eax,%edi
124 movl %eax,4(%esp)
125 roll $8,%edi
126 movl 8(%esp),%eax
127 addl %edi,%esi
128 movl %edi,52(%esp)
129 xorl %esi,%ebp
130 addl %ebx,%eax
131 roll $7,%ebp
132 xorl %eax,%edx
133 movl %esi,36(%esp)
134 roll $16,%edx
135 movl %ebp,20(%esp)
136 addl %edx,%ecx
137 movl 44(%esp),%esi
138 xorl %ecx,%ebx
139 movl 60(%esp),%edi
140 roll $12,%ebx
141 movl 28(%esp),%ebp
142 addl %ebx,%eax
143 xorl %eax,%edx
144 movl %eax,8(%esp)
145 roll $8,%edx
146 movl 12(%esp),%eax
147 addl %edx,%ecx
148 movl %edx,56(%esp)
149 xorl %ecx,%ebx
150 addl %ebp,%eax
151 roll $7,%ebx
152 xorl %eax,%edi
153 roll $16,%edi
154 movl %ebx,24(%esp)
155 addl %edi,%esi
156 xorl %esi,%ebp
157 roll $12,%ebp
158 movl 20(%esp),%ebx
159 addl %ebp,%eax
160 xorl %eax,%edi
161 movl %eax,12(%esp)
162 roll $8,%edi
163 movl (%esp),%eax
164 addl %edi,%esi
165 movl %edi,%edx
166 xorl %esi,%ebp
167 addl %ebx,%eax
168 roll $7,%ebp
169 xorl %eax,%edx
170 roll $16,%edx
171 movl %ebp,28(%esp)
172 addl %edx,%ecx
173 xorl %ecx,%ebx
174 movl 48(%esp),%edi
175 roll $12,%ebx
176 movl 24(%esp),%ebp
177 addl %ebx,%eax
178 xorl %eax,%edx
179 movl %eax,(%esp)
180 roll $8,%edx
181 movl 4(%esp),%eax
182 addl %edx,%ecx
183 movl %edx,60(%esp)
184 xorl %ecx,%ebx
185 addl %ebp,%eax
186 roll $7,%ebx
187 xorl %eax,%edi
188 movl %ecx,40(%esp)
189 roll $16,%edi
190 movl %ebx,20(%esp)
191 addl %edi,%esi
192 movl 32(%esp),%ecx
193 xorl %esi,%ebp
194 movl 52(%esp),%edx
195 roll $12,%ebp
196 movl 28(%esp),%ebx
197 addl %ebp,%eax
198 xorl %eax,%edi
199 movl %eax,4(%esp)
200 roll $8,%edi
201 movl 8(%esp),%eax
202 addl %edi,%esi
203 movl %edi,48(%esp)
204 xorl %esi,%ebp
205 addl %ebx,%eax
206 roll $7,%ebp
207 xorl %eax,%edx
208 movl %esi,44(%esp)
209 roll $16,%edx
210 movl %ebp,24(%esp)
211 addl %edx,%ecx
212 movl 36(%esp),%esi
213 xorl %ecx,%ebx
214 movl 56(%esp),%edi
215 roll $12,%ebx
216 movl 16(%esp),%ebp
217 addl %ebx,%eax
218 xorl %eax,%edx
219 movl %eax,8(%esp)
220 roll $8,%edx
221 movl 12(%esp),%eax
222 addl %edx,%ecx
223 movl %edx,52(%esp)
224 xorl %ecx,%ebx
225 addl %ebp,%eax
226 roll $7,%ebx
227 xorl %eax,%edi
228 roll $16,%edi
229 movl %ebx,28(%esp)
230 addl %edi,%esi
231 xorl %esi,%ebp
232 movl 48(%esp),%edx
233 roll $12,%ebp
234 movl 128(%esp),%ebx
235 addl %ebp,%eax
236 xorl %eax,%edi
237 movl %eax,12(%esp)
238 roll $8,%edi
239 movl (%esp),%eax
240 addl %edi,%esi
241 movl %edi,56(%esp)
242 xorl %esi,%ebp
243 roll $7,%ebp
244 decl %ebx
245 jnz .L004loop
246 movl 160(%esp),%ebx
247 addl $1634760805,%eax
248 addl 80(%esp),%ebp
249 addl 96(%esp),%ecx
250 addl 100(%esp),%esi
251 cmpl $64,%ebx
252 jb .L005tail
253 movl 156(%esp),%ebx
254 addl 112(%esp),%edx
255 addl 120(%esp),%edi
256 xorl (%ebx),%eax
257 xorl 16(%ebx),%ebp
258 movl %eax,(%esp)
259 movl 152(%esp),%eax
260 xorl 32(%ebx),%ecx
261 xorl 36(%ebx),%esi
262 xorl 48(%ebx),%edx
263 xorl 56(%ebx),%edi
264 movl %ebp,16(%eax)
265 movl %ecx,32(%eax)
266 movl %esi,36(%eax)
267 movl %edx,48(%eax)
268 movl %edi,56(%eax)
269 movl 4(%esp),%ebp
270 movl 8(%esp),%ecx
271 movl 12(%esp),%esi
272 movl 20(%esp),%edx
273 movl 24(%esp),%edi
274 addl $857760878,%ebp
275 addl $2036477234,%ecx
276 addl $1797285236,%esi
277 addl 84(%esp),%edx
278 addl 88(%esp),%edi
279 xorl 4(%ebx),%ebp
280 xorl 8(%ebx),%ecx
281 xorl 12(%ebx),%esi
282 xorl 20(%ebx),%edx
283 xorl 24(%ebx),%edi
284 movl %ebp,4(%eax)
285 movl %ecx,8(%eax)
286 movl %esi,12(%eax)
287 movl %edx,20(%eax)
288 movl %edi,24(%eax)
289 movl 28(%esp),%ebp
290 movl 40(%esp),%ecx
291 movl 44(%esp),%esi
292 movl 52(%esp),%edx
293 movl 60(%esp),%edi
294 addl 92(%esp),%ebp
295 addl 104(%esp),%ecx
296 addl 108(%esp),%esi
297 addl 116(%esp),%edx
298 addl 124(%esp),%edi
299 xorl 28(%ebx),%ebp
300 xorl 40(%ebx),%ecx
301 xorl 44(%ebx),%esi
302 xorl 52(%ebx),%edx
303 xorl 60(%ebx),%edi
304 leal 64(%ebx),%ebx
305 movl %ebp,28(%eax)
306 movl (%esp),%ebp
307 movl %ecx,40(%eax)
308 movl 160(%esp),%ecx
309 movl %esi,44(%eax)
310 movl %edx,52(%eax)
311 movl %edi,60(%eax)
312 movl %ebp,(%eax)
313 leal 64(%eax),%eax
314 subl $64,%ecx
315 jnz .L003outer_loop
316 jmp .L006done
317 .L005tail:
318 addl 112(%esp),%edx
319 addl 120(%esp),%edi
320 movl %eax,(%esp)
321 movl %ebp,16(%esp)
322 movl %ecx,32(%esp)
323 movl %esi,36(%esp)
324 movl %edx,48(%esp)
325 movl %edi,56(%esp)
326 movl 4(%esp),%ebp
327 movl 8(%esp),%ecx
328 movl 12(%esp),%esi
329 movl 20(%esp),%edx
330 movl 24(%esp),%edi
331 addl $857760878,%ebp
332 addl $2036477234,%ecx
333 addl $1797285236,%esi
334 addl 84(%esp),%edx
335 addl 88(%esp),%edi
336 movl %ebp,4(%esp)
337 movl %ecx,8(%esp)
338 movl %esi,12(%esp)
339 movl %edx,20(%esp)
340 movl %edi,24(%esp)
341 movl 28(%esp),%ebp
342 movl 40(%esp),%ecx
343 movl 44(%esp),%esi
344 movl 52(%esp),%edx
345 movl 60(%esp),%edi
346 addl 92(%esp),%ebp
347 addl 104(%esp),%ecx
348 addl 108(%esp),%esi
349 addl 116(%esp),%edx
350 addl 124(%esp),%edi
351 movl %ebp,28(%esp)
352 movl 156(%esp),%ebp
353 movl %ecx,40(%esp)
354 movl 152(%esp),%ecx
355 movl %esi,44(%esp)
356 xorl %esi,%esi
357 movl %edx,52(%esp)
358 movl %edi,60(%esp)
359 xorl %eax,%eax
360 xorl %edx,%edx
361 .L007tail_loop:
362 movb (%esi,%ebp,1),%al
363 movb (%esp,%esi,1),%dl
364 leal 1(%esi),%esi
365 xorb %dl,%al
366 movb %al,-1(%ecx,%esi,1)
367 decl %ebx
368 jnz .L007tail_loop
369 .L006done:
370 addl $132,%esp
371 .L000no_data:
372 popl %edi
373 popl %esi
374 popl %ebx
375 popl %ebp
376 ret
377 .size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
378 .globl ChaCha20_ssse3
379 .hidden ChaCha20_ssse3
380 .type ChaCha20_ssse3,@function
381 .align 16
382 ChaCha20_ssse3:
383 .L_ChaCha20_ssse3_begin:
384 pushl %ebp
385 pushl %ebx
386 pushl %esi
387 pushl %edi
388 .Lssse3_shortcut:
389 movl 20(%esp),%edi
390 movl 24(%esp),%esi
391 movl 28(%esp),%ecx
392 movl 32(%esp),%edx
393 movl 36(%esp),%ebx
394 movl %esp,%ebp
395 subl $524,%esp
396 andl $-64,%esp
397 movl %ebp,512(%esp)
398 leal .Lssse3_data-.Lpic_point(%eax),%eax
399 movdqu (%ebx),%xmm3
400 cmpl $256,%ecx
401 jb .L0081x
402 movl %edx,516(%esp)
403 movl %ebx,520(%esp)
404 subl $256,%ecx
405 leal 384(%esp),%ebp
406 movdqu (%edx),%xmm7
407 pshufd $0,%xmm3,%xmm0
408 pshufd $85,%xmm3,%xmm1
409 pshufd $170,%xmm3,%xmm2
410 pshufd $255,%xmm3,%xmm3
411 paddd 48(%eax),%xmm0
412 pshufd $0,%xmm7,%xmm4
413 pshufd $85,%xmm7,%xmm5
414 psubd 64(%eax),%xmm0
415 pshufd $170,%xmm7,%xmm6
416 pshufd $255,%xmm7,%xmm7
417 movdqa %xmm0,64(%ebp)
418 movdqa %xmm1,80(%ebp)
419 movdqa %xmm2,96(%ebp)
420 movdqa %xmm3,112(%ebp)
421 movdqu 16(%edx),%xmm3
422 movdqa %xmm4,-64(%ebp)
423 movdqa %xmm5,-48(%ebp)
424 movdqa %xmm6,-32(%ebp)
425 movdqa %xmm7,-16(%ebp)
426 movdqa 32(%eax),%xmm7
427 leal 128(%esp),%ebx
428 pshufd $0,%xmm3,%xmm0
429 pshufd $85,%xmm3,%xmm1
430 pshufd $170,%xmm3,%xmm2
431 pshufd $255,%xmm3,%xmm3
432 pshufd $0,%xmm7,%xmm4
433 pshufd $85,%xmm7,%xmm5
434 pshufd $170,%xmm7,%xmm6
435 pshufd $255,%xmm7,%xmm7
436 movdqa %xmm0,(%ebp)
437 movdqa %xmm1,16(%ebp)
438 movdqa %xmm2,32(%ebp)
439 movdqa %xmm3,48(%ebp)
440 movdqa %xmm4,-128(%ebp)
441 movdqa %xmm5,-112(%ebp)
442 movdqa %xmm6,-96(%ebp)
443 movdqa %xmm7,-80(%ebp)
444 leal 128(%esi),%esi
445 leal 128(%edi),%edi
446 jmp .L009outer_loop
447 .align 16
448 .L009outer_loop:
449 movdqa -112(%ebp),%xmm1
450 movdqa -96(%ebp),%xmm2
451 movdqa -80(%ebp),%xmm3
452 movdqa -48(%ebp),%xmm5
453 movdqa -32(%ebp),%xmm6
454 movdqa -16(%ebp),%xmm7
455 movdqa %xmm1,-112(%ebx)
456 movdqa %xmm2,-96(%ebx)
457 movdqa %xmm3,-80(%ebx)
458 movdqa %xmm5,-48(%ebx)
459 movdqa %xmm6,-32(%ebx)
460 movdqa %xmm7,-16(%ebx)
461 movdqa 32(%ebp),%xmm2
462 movdqa 48(%ebp),%xmm3
463 movdqa 64(%ebp),%xmm4
464 movdqa 80(%ebp),%xmm5
465 movdqa 96(%ebp),%xmm6
466 movdqa 112(%ebp),%xmm7
467 paddd 64(%eax),%xmm4
468 movdqa %xmm2,32(%ebx)
469 movdqa %xmm3,48(%ebx)
470 movdqa %xmm4,64(%ebx)
471 movdqa %xmm5,80(%ebx)
472 movdqa %xmm6,96(%ebx)
473 movdqa %xmm7,112(%ebx)
474 movdqa %xmm4,64(%ebp)
475 movdqa -128(%ebp),%xmm0
476 movdqa %xmm4,%xmm6
477 movdqa -64(%ebp),%xmm3
478 movdqa (%ebp),%xmm4
479 movdqa 16(%ebp),%xmm5
480 movl $10,%edx
481 nop
482 .align 16
483 .L010loop:
484 paddd %xmm3,%xmm0
485 movdqa %xmm3,%xmm2
486 pxor %xmm0,%xmm6
487 pshufb (%eax),%xmm6
488 paddd %xmm6,%xmm4
489 pxor %xmm4,%xmm2
490 movdqa -48(%ebx),%xmm3
491 movdqa %xmm2,%xmm1
492 pslld $12,%xmm2
493 psrld $20,%xmm1
494 por %xmm1,%xmm2
495 movdqa -112(%ebx),%xmm1
496 paddd %xmm2,%xmm0
497 movdqa 80(%ebx),%xmm7
498 pxor %xmm0,%xmm6
499 movdqa %xmm0,-128(%ebx)
500 pshufb 16(%eax),%xmm6
501 paddd %xmm6,%xmm4
502 movdqa %xmm6,64(%ebx)
503 pxor %xmm4,%xmm2
504 paddd %xmm3,%xmm1
505 movdqa %xmm2,%xmm0
506 pslld $7,%xmm2
507 psrld $25,%xmm0
508 pxor %xmm1,%xmm7
509 por %xmm0,%xmm2
510 movdqa %xmm4,(%ebx)
511 pshufb (%eax),%xmm7
512 movdqa %xmm2,-64(%ebx)
513 paddd %xmm7,%xmm5
514 movdqa 32(%ebx),%xmm4
515 pxor %xmm5,%xmm3
516 movdqa -32(%ebx),%xmm2
517 movdqa %xmm3,%xmm0
518 pslld $12,%xmm3
519 psrld $20,%xmm0
520 por %xmm0,%xmm3
521 movdqa -96(%ebx),%xmm0
522 paddd %xmm3,%xmm1
523 movdqa 96(%ebx),%xmm6
524 pxor %xmm1,%xmm7
525 movdqa %xmm1,-112(%ebx)
526 pshufb 16(%eax),%xmm7
527 paddd %xmm7,%xmm5
528 movdqa %xmm7,80(%ebx)
529 pxor %xmm5,%xmm3
530 paddd %xmm2,%xmm0
531 movdqa %xmm3,%xmm1
532 pslld $7,%xmm3
533 psrld $25,%xmm1
534 pxor %xmm0,%xmm6
535 por %xmm1,%xmm3
536 movdqa %xmm5,16(%ebx)
537 pshufb (%eax),%xmm6
538 movdqa %xmm3,-48(%ebx)
539 paddd %xmm6,%xmm4
540 movdqa 48(%ebx),%xmm5
541 pxor %xmm4,%xmm2
542 movdqa -16(%ebx),%xmm3
543 movdqa %xmm2,%xmm1
544 pslld $12,%xmm2
545 psrld $20,%xmm1
546 por %xmm1,%xmm2
547 movdqa -80(%ebx),%xmm1
548 paddd %xmm2,%xmm0
549 movdqa 112(%ebx),%xmm7
550 pxor %xmm0,%xmm6
551 movdqa %xmm0,-96(%ebx)
552 pshufb 16(%eax),%xmm6
553 paddd %xmm6,%xmm4
554 movdqa %xmm6,96(%ebx)
555 pxor %xmm4,%xmm2
556 paddd %xmm3,%xmm1
557 movdqa %xmm2,%xmm0
558 pslld $7,%xmm2
559 psrld $25,%xmm0
560 pxor %xmm1,%xmm7
561 por %xmm0,%xmm2
562 pshufb (%eax),%xmm7
563 movdqa %xmm2,-32(%ebx)
564 paddd %xmm7,%xmm5
565 pxor %xmm5,%xmm3
566 movdqa -48(%ebx),%xmm2
567 movdqa %xmm3,%xmm0
568 pslld $12,%xmm3
569 psrld $20,%xmm0
570 por %xmm0,%xmm3
571 movdqa -128(%ebx),%xmm0
572 paddd %xmm3,%xmm1
573 pxor %xmm1,%xmm7
574 movdqa %xmm1,-80(%ebx)
575 pshufb 16(%eax),%xmm7
576 paddd %xmm7,%xmm5
577 movdqa %xmm7,%xmm6
578 pxor %xmm5,%xmm3
579 paddd %xmm2,%xmm0
580 movdqa %xmm3,%xmm1
581 pslld $7,%xmm3
582 psrld $25,%xmm1
583 pxor %xmm0,%xmm6
584 por %xmm1,%xmm3
585 pshufb (%eax),%xmm6
586 movdqa %xmm3,-16(%ebx)
587 paddd %xmm6,%xmm4
588 pxor %xmm4,%xmm2
589 movdqa -32(%ebx),%xmm3
590 movdqa %xmm2,%xmm1
591 pslld $12,%xmm2
592 psrld $20,%xmm1
593 por %xmm1,%xmm2
594 movdqa -112(%ebx),%xmm1
595 paddd %xmm2,%xmm0
596 movdqa 64(%ebx),%xmm7
597 pxor %xmm0,%xmm6
598 movdqa %xmm0,-128(%ebx)
599 pshufb 16(%eax),%xmm6
600 paddd %xmm6,%xmm4
601 movdqa %xmm6,112(%ebx)
602 pxor %xmm4,%xmm2
603 paddd %xmm3,%xmm1
604 movdqa %xmm2,%xmm0
605 pslld $7,%xmm2
606 psrld $25,%xmm0
607 pxor %xmm1,%xmm7
608 por %xmm0,%xmm2
609 movdqa %xmm4,32(%ebx)
610 pshufb (%eax),%xmm7
611 movdqa %xmm2,-48(%ebx)
612 paddd %xmm7,%xmm5
613 movdqa (%ebx),%xmm4
614 pxor %xmm5,%xmm3
615 movdqa -16(%ebx),%xmm2
616 movdqa %xmm3,%xmm0
617 pslld $12,%xmm3
618 psrld $20,%xmm0
619 por %xmm0,%xmm3
620 movdqa -96(%ebx),%xmm0
621 paddd %xmm3,%xmm1
622 movdqa 80(%ebx),%xmm6
623 pxor %xmm1,%xmm7
624 movdqa %xmm1,-112(%ebx)
625 pshufb 16(%eax),%xmm7
626 paddd %xmm7,%xmm5
627 movdqa %xmm7,64(%ebx)
628 pxor %xmm5,%xmm3
629 paddd %xmm2,%xmm0
630 movdqa %xmm3,%xmm1
631 pslld $7,%xmm3
632 psrld $25,%xmm1
633 pxor %xmm0,%xmm6
634 por %xmm1,%xmm3
635 movdqa %xmm5,48(%ebx)
636 pshufb (%eax),%xmm6
637 movdqa %xmm3,-32(%ebx)
638 paddd %xmm6,%xmm4
639 movdqa 16(%ebx),%xmm5
640 pxor %xmm4,%xmm2
641 movdqa -64(%ebx),%xmm3
642 movdqa %xmm2,%xmm1
643 pslld $12,%xmm2
644 psrld $20,%xmm1
645 por %xmm1,%xmm2
646 movdqa -80(%ebx),%xmm1
647 paddd %xmm2,%xmm0
648 movdqa 96(%ebx),%xmm7
649 pxor %xmm0,%xmm6
650 movdqa %xmm0,-96(%ebx)
651 pshufb 16(%eax),%xmm6
652 paddd %xmm6,%xmm4
653 movdqa %xmm6,80(%ebx)
654 pxor %xmm4,%xmm2
655 paddd %xmm3,%xmm1
656 movdqa %xmm2,%xmm0
657 pslld $7,%xmm2
658 psrld $25,%xmm0
659 pxor %xmm1,%xmm7
660 por %xmm0,%xmm2
661 pshufb (%eax),%xmm7
662 movdqa %xmm2,-16(%ebx)
663 paddd %xmm7,%xmm5
664 pxor %xmm5,%xmm3
665 movdqa %xmm3,%xmm0
666 pslld $12,%xmm3
667 psrld $20,%xmm0
668 por %xmm0,%xmm3
669 movdqa -128(%ebx),%xmm0
670 paddd %xmm3,%xmm1
671 movdqa 64(%ebx),%xmm6
672 pxor %xmm1,%xmm7
673 movdqa %xmm1,-80(%ebx)
674 pshufb 16(%eax),%xmm7
675 paddd %xmm7,%xmm5
676 movdqa %xmm7,96(%ebx)
677 pxor %xmm5,%xmm3
678 movdqa %xmm3,%xmm1
679 pslld $7,%xmm3
680 psrld $25,%xmm1
681 por %xmm1,%xmm3
682 decl %edx
683 jnz .L010loop
684 movdqa %xmm3,-64(%ebx)
685 movdqa %xmm4,(%ebx)
686 movdqa %xmm5,16(%ebx)
687 movdqa %xmm6,64(%ebx)
688 movdqa %xmm7,96(%ebx)
689 movdqa -112(%ebx),%xmm1
690 movdqa -96(%ebx),%xmm2
691 movdqa -80(%ebx),%xmm3
692 paddd -128(%ebp),%xmm0
693 paddd -112(%ebp),%xmm1
694 paddd -96(%ebp),%xmm2
695 paddd -80(%ebp),%xmm3
696 movdqa %xmm0,%xmm6
697 punpckldq %xmm1,%xmm0
698 movdqa %xmm2,%xmm7
699 punpckldq %xmm3,%xmm2
700 punpckhdq %xmm1,%xmm6
701 punpckhdq %xmm3,%xmm7
702 movdqa %xmm0,%xmm1
703 punpcklqdq %xmm2,%xmm0
704 movdqa %xmm6,%xmm3
705 punpcklqdq %xmm7,%xmm6
706 punpckhqdq %xmm2,%xmm1
707 punpckhqdq %xmm7,%xmm3
708 movdqu -128(%esi),%xmm4
709 movdqu -64(%esi),%xmm5
710 movdqu (%esi),%xmm2
711 movdqu 64(%esi),%xmm7
712 leal 16(%esi),%esi
713 pxor %xmm0,%xmm4
714 movdqa -64(%ebx),%xmm0
715 pxor %xmm1,%xmm5
716 movdqa -48(%ebx),%xmm1
717 pxor %xmm2,%xmm6
718 movdqa -32(%ebx),%xmm2
719 pxor %xmm3,%xmm7
720 movdqa -16(%ebx),%xmm3
721 movdqu %xmm4,-128(%edi)
722 movdqu %xmm5,-64(%edi)
723 movdqu %xmm6,(%edi)
724 movdqu %xmm7,64(%edi)
725 leal 16(%edi),%edi
726 paddd -64(%ebp),%xmm0
727 paddd -48(%ebp),%xmm1
728 paddd -32(%ebp),%xmm2
729 paddd -16(%ebp),%xmm3
730 movdqa %xmm0,%xmm6
731 punpckldq %xmm1,%xmm0
732 movdqa %xmm2,%xmm7
733 punpckldq %xmm3,%xmm2
734 punpckhdq %xmm1,%xmm6
735 punpckhdq %xmm3,%xmm7
736 movdqa %xmm0,%xmm1
737 punpcklqdq %xmm2,%xmm0
738 movdqa %xmm6,%xmm3
739 punpcklqdq %xmm7,%xmm6
740 punpckhqdq %xmm2,%xmm1
741 punpckhqdq %xmm7,%xmm3
742 movdqu -128(%esi),%xmm4
743 movdqu -64(%esi),%xmm5
744 movdqu (%esi),%xmm2
745 movdqu 64(%esi),%xmm7
746 leal 16(%esi),%esi
747 pxor %xmm0,%xmm4
748 movdqa (%ebx),%xmm0
749 pxor %xmm1,%xmm5
750 movdqa 16(%ebx),%xmm1
751 pxor %xmm2,%xmm6
752 movdqa 32(%ebx),%xmm2
753 pxor %xmm3,%xmm7
754 movdqa 48(%ebx),%xmm3
755 movdqu %xmm4,-128(%edi)
756 movdqu %xmm5,-64(%edi)
757 movdqu %xmm6,(%edi)
758 movdqu %xmm7,64(%edi)
759 leal 16(%edi),%edi
760 paddd (%ebp),%xmm0
761 paddd 16(%ebp),%xmm1
762 paddd 32(%ebp),%xmm2
763 paddd 48(%ebp),%xmm3
764 movdqa %xmm0,%xmm6
765 punpckldq %xmm1,%xmm0
766 movdqa %xmm2,%xmm7
767 punpckldq %xmm3,%xmm2
768 punpckhdq %xmm1,%xmm6
769 punpckhdq %xmm3,%xmm7
770 movdqa %xmm0,%xmm1
771 punpcklqdq %xmm2,%xmm0
772 movdqa %xmm6,%xmm3
773 punpcklqdq %xmm7,%xmm6
774 punpckhqdq %xmm2,%xmm1
775 punpckhqdq %xmm7,%xmm3
776 movdqu -128(%esi),%xmm4
777 movdqu -64(%esi),%xmm5
778 movdqu (%esi),%xmm2
779 movdqu 64(%esi),%xmm7
780 leal 16(%esi),%esi
781 pxor %xmm0,%xmm4
782 movdqa 64(%ebx),%xmm0
783 pxor %xmm1,%xmm5
784 movdqa 80(%ebx),%xmm1
785 pxor %xmm2,%xmm6
786 movdqa 96(%ebx),%xmm2
787 pxor %xmm3,%xmm7
788 movdqa 112(%ebx),%xmm3
789 movdqu %xmm4,-128(%edi)
790 movdqu %xmm5,-64(%edi)
791 movdqu %xmm6,(%edi)
792 movdqu %xmm7,64(%edi)
793 leal 16(%edi),%edi
794 paddd 64(%ebp),%xmm0
795 paddd 80(%ebp),%xmm1
796 paddd 96(%ebp),%xmm2
797 paddd 112(%ebp),%xmm3
798 movdqa %xmm0,%xmm6
799 punpckldq %xmm1,%xmm0
800 movdqa %xmm2,%xmm7
801 punpckldq %xmm3,%xmm2
802 punpckhdq %xmm1,%xmm6
803 punpckhdq %xmm3,%xmm7
804 movdqa %xmm0,%xmm1
805 punpcklqdq %xmm2,%xmm0
806 movdqa %xmm6,%xmm3
807 punpcklqdq %xmm7,%xmm6
808 punpckhqdq %xmm2,%xmm1
809 punpckhqdq %xmm7,%xmm3
810 movdqu -128(%esi),%xmm4
811 movdqu -64(%esi),%xmm5
812 movdqu (%esi),%xmm2
813 movdqu 64(%esi),%xmm7
814 leal 208(%esi),%esi
815 pxor %xmm0,%xmm4
816 pxor %xmm1,%xmm5
817 pxor %xmm2,%xmm6
818 pxor %xmm3,%xmm7
819 movdqu %xmm4,-128(%edi)
820 movdqu %xmm5,-64(%edi)
821 movdqu %xmm6,(%edi)
822 movdqu %xmm7,64(%edi)
823 leal 208(%edi),%edi
824 subl $256,%ecx
825 jnc .L009outer_loop
826 addl $256,%ecx
827 jz .L011done
828 movl 520(%esp),%ebx
829 leal -128(%esi),%esi
830 movl 516(%esp),%edx
831 leal -128(%edi),%edi
832 movd 64(%ebp),%xmm2
833 movdqu (%ebx),%xmm3
834 paddd 96(%eax),%xmm2
835 pand 112(%eax),%xmm3
836 por %xmm2,%xmm3
837 .L0081x:
838 movdqa 32(%eax),%xmm0
839 movdqu (%edx),%xmm1
840 movdqu 16(%edx),%xmm2
841 movdqa (%eax),%xmm6
842 movdqa 16(%eax),%xmm7
843 movl %ebp,48(%esp)
844 movdqa %xmm0,(%esp)
845 movdqa %xmm1,16(%esp)
846 movdqa %xmm2,32(%esp)
847 movdqa %xmm3,48(%esp)
848 movl $10,%edx
849 jmp .L012loop1x
850 .align 16
851 .L013outer1x:
852 movdqa 80(%eax),%xmm3
853 movdqa (%esp),%xmm0
854 movdqa 16(%esp),%xmm1
855 movdqa 32(%esp),%xmm2
856 paddd 48(%esp),%xmm3
857 movl $10,%edx
858 movdqa %xmm3,48(%esp)
859 jmp .L012loop1x
860 .align 16
861 .L012loop1x:
862 paddd %xmm1,%xmm0
863 pxor %xmm0,%xmm3
864 .byte 102,15,56,0,222
865 paddd %xmm3,%xmm2
866 pxor %xmm2,%xmm1
867 movdqa %xmm1,%xmm4
868 psrld $20,%xmm1
869 pslld $12,%xmm4
870 por %xmm4,%xmm1
871 paddd %xmm1,%xmm0
872 pxor %xmm0,%xmm3
873 .byte 102,15,56,0,223
874 paddd %xmm3,%xmm2
875 pxor %xmm2,%xmm1
876 movdqa %xmm1,%xmm4
877 psrld $25,%xmm1
878 pslld $7,%xmm4
879 por %xmm4,%xmm1
880 pshufd $78,%xmm2,%xmm2
881 pshufd $57,%xmm1,%xmm1
882 pshufd $147,%xmm3,%xmm3
883 nop
884 paddd %xmm1,%xmm0
885 pxor %xmm0,%xmm3
886 .byte 102,15,56,0,222
887 paddd %xmm3,%xmm2
888 pxor %xmm2,%xmm1
889 movdqa %xmm1,%xmm4
890 psrld $20,%xmm1
891 pslld $12,%xmm4
892 por %xmm4,%xmm1
893 paddd %xmm1,%xmm0
894 pxor %xmm0,%xmm3
895 .byte 102,15,56,0,223
896 paddd %xmm3,%xmm2
897 pxor %xmm2,%xmm1
898 movdqa %xmm1,%xmm4
899 psrld $25,%xmm1
900 pslld $7,%xmm4
901 por %xmm4,%xmm1
902 pshufd $78,%xmm2,%xmm2
903 pshufd $147,%xmm1,%xmm1
904 pshufd $57,%xmm3,%xmm3
905 decl %edx
906 jnz .L012loop1x
907 paddd (%esp),%xmm0
908 paddd 16(%esp),%xmm1
909 paddd 32(%esp),%xmm2
910 paddd 48(%esp),%xmm3
911 cmpl $64,%ecx
912 jb .L014tail
913 movdqu (%esi),%xmm4
914 movdqu 16(%esi),%xmm5
915 pxor %xmm4,%xmm0
916 movdqu 32(%esi),%xmm4
917 pxor %xmm5,%xmm1
918 movdqu 48(%esi),%xmm5
919 pxor %xmm4,%xmm2
920 pxor %xmm5,%xmm3
921 leal 64(%esi),%esi
922 movdqu %xmm0,(%edi)
923 movdqu %xmm1,16(%edi)
924 movdqu %xmm2,32(%edi)
925 movdqu %xmm3,48(%edi)
926 leal 64(%edi),%edi
927 subl $64,%ecx
928 jnz .L013outer1x
929 jmp .L011done
930 .L014tail:
931 movdqa %xmm0,(%esp)
932 movdqa %xmm1,16(%esp)
933 movdqa %xmm2,32(%esp)
934 movdqa %xmm3,48(%esp)
935 xorl %eax,%eax
936 xorl %edx,%edx
937 xorl %ebp,%ebp
938 .L015tail_loop:
939 movb (%esp,%ebp,1),%al
940 movb (%esi,%ebp,1),%dl
941 leal 1(%ebp),%ebp
942 xorb %dl,%al
943 movb %al,-1(%edi,%ebp,1)
944 decl %ecx
945 jnz .L015tail_loop
946 .L011done:
947 movl 512(%esp),%esp
948 popl %edi
949 popl %esi
950 popl %ebx
951 popl %ebp
952 ret
953 .size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
954 .align 64
955 .Lssse3_data:
956 .byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
957 .byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
958 .long 1634760805,857760878,2036477234,1797285236
959 .long 0,1,2,3
960 .long 4,4,4,4
961 .long 1,0,0,0
962 .long 4,0,0,0
963 .long 0,-1,-1,-1
964 .align 64
965 .byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
966 .byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
967 .byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
968 .byte 114,103,62,0
969 #endif
OLDNEW
« no previous file with comments | « third_party/boringssl/linux-arm/crypto/sha/sha512-armv4.S ('k') | third_party/boringssl/linux-x86/crypto/cpu-x86-asm.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698