Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(182)

Side by Side Diff: third_party/boringssl/linux-x86_64/crypto/bn/rsaz-x86_64.S

Issue 2354623003: Pull boringssl generated source from boringssl_gen (Closed)
Patch Set: . Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #if defined(__x86_64__)
2 .text
3
4 .extern OPENSSL_ia32cap_P
5 .hidden OPENSSL_ia32cap_P
6
7 .globl rsaz_512_sqr
8 .hidden rsaz_512_sqr
9 .type rsaz_512_sqr,@function
10 .align 32
11 rsaz_512_sqr:
12 pushq %rbx
13 pushq %rbp
14 pushq %r12
15 pushq %r13
16 pushq %r14
17 pushq %r15
18
19 subq $128+24,%rsp
20 .Lsqr_body:
21 movq %rdx,%rbp
22 movq (%rsi),%rdx
23 movq 8(%rsi),%rax
24 movq %rcx,128(%rsp)
25 jmp .Loop_sqr
26
27 .align 32
28 .Loop_sqr:
29 movl %r8d,128+8(%rsp)
30
31 movq %rdx,%rbx
32 mulq %rdx
33 movq %rax,%r8
34 movq 16(%rsi),%rax
35 movq %rdx,%r9
36
37 mulq %rbx
38 addq %rax,%r9
39 movq 24(%rsi),%rax
40 movq %rdx,%r10
41 adcq $0,%r10
42
43 mulq %rbx
44 addq %rax,%r10
45 movq 32(%rsi),%rax
46 movq %rdx,%r11
47 adcq $0,%r11
48
49 mulq %rbx
50 addq %rax,%r11
51 movq 40(%rsi),%rax
52 movq %rdx,%r12
53 adcq $0,%r12
54
55 mulq %rbx
56 addq %rax,%r12
57 movq 48(%rsi),%rax
58 movq %rdx,%r13
59 adcq $0,%r13
60
61 mulq %rbx
62 addq %rax,%r13
63 movq 56(%rsi),%rax
64 movq %rdx,%r14
65 adcq $0,%r14
66
67 mulq %rbx
68 addq %rax,%r14
69 movq %rbx,%rax
70 movq %rdx,%r15
71 adcq $0,%r15
72
73 addq %r8,%r8
74 movq %r9,%rcx
75 adcq %r9,%r9
76
77 mulq %rax
78 movq %rax,(%rsp)
79 addq %rdx,%r8
80 adcq $0,%r9
81
82 movq %r8,8(%rsp)
83 shrq $63,%rcx
84
85
86 movq 8(%rsi),%r8
87 movq 16(%rsi),%rax
88 mulq %r8
89 addq %rax,%r10
90 movq 24(%rsi),%rax
91 movq %rdx,%rbx
92 adcq $0,%rbx
93
94 mulq %r8
95 addq %rax,%r11
96 movq 32(%rsi),%rax
97 adcq $0,%rdx
98 addq %rbx,%r11
99 movq %rdx,%rbx
100 adcq $0,%rbx
101
102 mulq %r8
103 addq %rax,%r12
104 movq 40(%rsi),%rax
105 adcq $0,%rdx
106 addq %rbx,%r12
107 movq %rdx,%rbx
108 adcq $0,%rbx
109
110 mulq %r8
111 addq %rax,%r13
112 movq 48(%rsi),%rax
113 adcq $0,%rdx
114 addq %rbx,%r13
115 movq %rdx,%rbx
116 adcq $0,%rbx
117
118 mulq %r8
119 addq %rax,%r14
120 movq 56(%rsi),%rax
121 adcq $0,%rdx
122 addq %rbx,%r14
123 movq %rdx,%rbx
124 adcq $0,%rbx
125
126 mulq %r8
127 addq %rax,%r15
128 movq %r8,%rax
129 adcq $0,%rdx
130 addq %rbx,%r15
131 movq %rdx,%r8
132 movq %r10,%rdx
133 adcq $0,%r8
134
135 addq %rdx,%rdx
136 leaq (%rcx,%r10,2),%r10
137 movq %r11,%rbx
138 adcq %r11,%r11
139
140 mulq %rax
141 addq %rax,%r9
142 adcq %rdx,%r10
143 adcq $0,%r11
144
145 movq %r9,16(%rsp)
146 movq %r10,24(%rsp)
147 shrq $63,%rbx
148
149
150 movq 16(%rsi),%r9
151 movq 24(%rsi),%rax
152 mulq %r9
153 addq %rax,%r12
154 movq 32(%rsi),%rax
155 movq %rdx,%rcx
156 adcq $0,%rcx
157
158 mulq %r9
159 addq %rax,%r13
160 movq 40(%rsi),%rax
161 adcq $0,%rdx
162 addq %rcx,%r13
163 movq %rdx,%rcx
164 adcq $0,%rcx
165
166 mulq %r9
167 addq %rax,%r14
168 movq 48(%rsi),%rax
169 adcq $0,%rdx
170 addq %rcx,%r14
171 movq %rdx,%rcx
172 adcq $0,%rcx
173
174 mulq %r9
175 movq %r12,%r10
176 leaq (%rbx,%r12,2),%r12
177 addq %rax,%r15
178 movq 56(%rsi),%rax
179 adcq $0,%rdx
180 addq %rcx,%r15
181 movq %rdx,%rcx
182 adcq $0,%rcx
183
184 mulq %r9
185 shrq $63,%r10
186 addq %rax,%r8
187 movq %r9,%rax
188 adcq $0,%rdx
189 addq %rcx,%r8
190 movq %rdx,%r9
191 adcq $0,%r9
192
193 movq %r13,%rcx
194 leaq (%r10,%r13,2),%r13
195
196 mulq %rax
197 addq %rax,%r11
198 adcq %rdx,%r12
199 adcq $0,%r13
200
201 movq %r11,32(%rsp)
202 movq %r12,40(%rsp)
203 shrq $63,%rcx
204
205
206 movq 24(%rsi),%r10
207 movq 32(%rsi),%rax
208 mulq %r10
209 addq %rax,%r14
210 movq 40(%rsi),%rax
211 movq %rdx,%rbx
212 adcq $0,%rbx
213
214 mulq %r10
215 addq %rax,%r15
216 movq 48(%rsi),%rax
217 adcq $0,%rdx
218 addq %rbx,%r15
219 movq %rdx,%rbx
220 adcq $0,%rbx
221
222 mulq %r10
223 movq %r14,%r12
224 leaq (%rcx,%r14,2),%r14
225 addq %rax,%r8
226 movq 56(%rsi),%rax
227 adcq $0,%rdx
228 addq %rbx,%r8
229 movq %rdx,%rbx
230 adcq $0,%rbx
231
232 mulq %r10
233 shrq $63,%r12
234 addq %rax,%r9
235 movq %r10,%rax
236 adcq $0,%rdx
237 addq %rbx,%r9
238 movq %rdx,%r10
239 adcq $0,%r10
240
241 movq %r15,%rbx
242 leaq (%r12,%r15,2),%r15
243
244 mulq %rax
245 addq %rax,%r13
246 adcq %rdx,%r14
247 adcq $0,%r15
248
249 movq %r13,48(%rsp)
250 movq %r14,56(%rsp)
251 shrq $63,%rbx
252
253
254 movq 32(%rsi),%r11
255 movq 40(%rsi),%rax
256 mulq %r11
257 addq %rax,%r8
258 movq 48(%rsi),%rax
259 movq %rdx,%rcx
260 adcq $0,%rcx
261
262 mulq %r11
263 addq %rax,%r9
264 movq 56(%rsi),%rax
265 adcq $0,%rdx
266 movq %r8,%r12
267 leaq (%rbx,%r8,2),%r8
268 addq %rcx,%r9
269 movq %rdx,%rcx
270 adcq $0,%rcx
271
272 mulq %r11
273 shrq $63,%r12
274 addq %rax,%r10
275 movq %r11,%rax
276 adcq $0,%rdx
277 addq %rcx,%r10
278 movq %rdx,%r11
279 adcq $0,%r11
280
281 movq %r9,%rcx
282 leaq (%r12,%r9,2),%r9
283
284 mulq %rax
285 addq %rax,%r15
286 adcq %rdx,%r8
287 adcq $0,%r9
288
289 movq %r15,64(%rsp)
290 movq %r8,72(%rsp)
291 shrq $63,%rcx
292
293
294 movq 40(%rsi),%r12
295 movq 48(%rsi),%rax
296 mulq %r12
297 addq %rax,%r10
298 movq 56(%rsi),%rax
299 movq %rdx,%rbx
300 adcq $0,%rbx
301
302 mulq %r12
303 addq %rax,%r11
304 movq %r12,%rax
305 movq %r10,%r15
306 leaq (%rcx,%r10,2),%r10
307 adcq $0,%rdx
308 shrq $63,%r15
309 addq %rbx,%r11
310 movq %rdx,%r12
311 adcq $0,%r12
312
313 movq %r11,%rbx
314 leaq (%r15,%r11,2),%r11
315
316 mulq %rax
317 addq %rax,%r9
318 adcq %rdx,%r10
319 adcq $0,%r11
320
321 movq %r9,80(%rsp)
322 movq %r10,88(%rsp)
323
324
325 movq 48(%rsi),%r13
326 movq 56(%rsi),%rax
327 mulq %r13
328 addq %rax,%r12
329 movq %r13,%rax
330 movq %rdx,%r13
331 adcq $0,%r13
332
333 xorq %r14,%r14
334 shlq $1,%rbx
335 adcq %r12,%r12
336 adcq %r13,%r13
337 adcq %r14,%r14
338
339 mulq %rax
340 addq %rax,%r11
341 adcq %rdx,%r12
342 adcq $0,%r13
343
344 movq %r11,96(%rsp)
345 movq %r12,104(%rsp)
346
347
348 movq 56(%rsi),%rax
349 mulq %rax
350 addq %rax,%r13
351 adcq $0,%rdx
352
353 addq %rdx,%r14
354
355 movq %r13,112(%rsp)
356 movq %r14,120(%rsp)
357
358 movq (%rsp),%r8
359 movq 8(%rsp),%r9
360 movq 16(%rsp),%r10
361 movq 24(%rsp),%r11
362 movq 32(%rsp),%r12
363 movq 40(%rsp),%r13
364 movq 48(%rsp),%r14
365 movq 56(%rsp),%r15
366
367 call __rsaz_512_reduce
368
369 addq 64(%rsp),%r8
370 adcq 72(%rsp),%r9
371 adcq 80(%rsp),%r10
372 adcq 88(%rsp),%r11
373 adcq 96(%rsp),%r12
374 adcq 104(%rsp),%r13
375 adcq 112(%rsp),%r14
376 adcq 120(%rsp),%r15
377 sbbq %rcx,%rcx
378
379 call __rsaz_512_subtract
380
381 movq %r8,%rdx
382 movq %r9,%rax
383 movl 128+8(%rsp),%r8d
384 movq %rdi,%rsi
385
386 decl %r8d
387 jnz .Loop_sqr
388
389 leaq 128+24+48(%rsp),%rax
390 movq -48(%rax),%r15
391 movq -40(%rax),%r14
392 movq -32(%rax),%r13
393 movq -24(%rax),%r12
394 movq -16(%rax),%rbp
395 movq -8(%rax),%rbx
396 leaq (%rax),%rsp
397 .Lsqr_epilogue:
398 .byte 0xf3,0xc3
399 .size rsaz_512_sqr,.-rsaz_512_sqr
400 .globl rsaz_512_mul
401 .hidden rsaz_512_mul
402 .type rsaz_512_mul,@function
403 .align 32
404 rsaz_512_mul:
405 pushq %rbx
406 pushq %rbp
407 pushq %r12
408 pushq %r13
409 pushq %r14
410 pushq %r15
411
412 subq $128+24,%rsp
413 .Lmul_body:
414 .byte 102,72,15,110,199
415 .byte 102,72,15,110,201
416 movq %r8,128(%rsp)
417 movq (%rdx),%rbx
418 movq %rdx,%rbp
419 call __rsaz_512_mul
420
421 .byte 102,72,15,126,199
422 .byte 102,72,15,126,205
423
424 movq (%rsp),%r8
425 movq 8(%rsp),%r9
426 movq 16(%rsp),%r10
427 movq 24(%rsp),%r11
428 movq 32(%rsp),%r12
429 movq 40(%rsp),%r13
430 movq 48(%rsp),%r14
431 movq 56(%rsp),%r15
432
433 call __rsaz_512_reduce
434 addq 64(%rsp),%r8
435 adcq 72(%rsp),%r9
436 adcq 80(%rsp),%r10
437 adcq 88(%rsp),%r11
438 adcq 96(%rsp),%r12
439 adcq 104(%rsp),%r13
440 adcq 112(%rsp),%r14
441 adcq 120(%rsp),%r15
442 sbbq %rcx,%rcx
443
444 call __rsaz_512_subtract
445
446 leaq 128+24+48(%rsp),%rax
447 movq -48(%rax),%r15
448 movq -40(%rax),%r14
449 movq -32(%rax),%r13
450 movq -24(%rax),%r12
451 movq -16(%rax),%rbp
452 movq -8(%rax),%rbx
453 leaq (%rax),%rsp
454 .Lmul_epilogue:
455 .byte 0xf3,0xc3
456 .size rsaz_512_mul,.-rsaz_512_mul
457 .globl rsaz_512_mul_gather4
458 .hidden rsaz_512_mul_gather4
459 .type rsaz_512_mul_gather4,@function
460 .align 32
461 rsaz_512_mul_gather4:
462 pushq %rbx
463 pushq %rbp
464 pushq %r12
465 pushq %r13
466 pushq %r14
467 pushq %r15
468
469 subq $152,%rsp
470 .Lmul_gather4_body:
471 movd %r9d,%xmm8
472 movdqa .Linc+16(%rip),%xmm1
473 movdqa .Linc(%rip),%xmm0
474
475 pshufd $0,%xmm8,%xmm8
476 movdqa %xmm1,%xmm7
477 movdqa %xmm1,%xmm2
478 paddd %xmm0,%xmm1
479 pcmpeqd %xmm8,%xmm0
480 movdqa %xmm7,%xmm3
481 paddd %xmm1,%xmm2
482 pcmpeqd %xmm8,%xmm1
483 movdqa %xmm7,%xmm4
484 paddd %xmm2,%xmm3
485 pcmpeqd %xmm8,%xmm2
486 movdqa %xmm7,%xmm5
487 paddd %xmm3,%xmm4
488 pcmpeqd %xmm8,%xmm3
489 movdqa %xmm7,%xmm6
490 paddd %xmm4,%xmm5
491 pcmpeqd %xmm8,%xmm4
492 paddd %xmm5,%xmm6
493 pcmpeqd %xmm8,%xmm5
494 paddd %xmm6,%xmm7
495 pcmpeqd %xmm8,%xmm6
496 pcmpeqd %xmm8,%xmm7
497
498 movdqa 0(%rdx),%xmm8
499 movdqa 16(%rdx),%xmm9
500 movdqa 32(%rdx),%xmm10
501 movdqa 48(%rdx),%xmm11
502 pand %xmm0,%xmm8
503 movdqa 64(%rdx),%xmm12
504 pand %xmm1,%xmm9
505 movdqa 80(%rdx),%xmm13
506 pand %xmm2,%xmm10
507 movdqa 96(%rdx),%xmm14
508 pand %xmm3,%xmm11
509 movdqa 112(%rdx),%xmm15
510 leaq 128(%rdx),%rbp
511 pand %xmm4,%xmm12
512 pand %xmm5,%xmm13
513 pand %xmm6,%xmm14
514 pand %xmm7,%xmm15
515 por %xmm10,%xmm8
516 por %xmm11,%xmm9
517 por %xmm12,%xmm8
518 por %xmm13,%xmm9
519 por %xmm14,%xmm8
520 por %xmm15,%xmm9
521
522 por %xmm9,%xmm8
523 pshufd $0x4e,%xmm8,%xmm9
524 por %xmm9,%xmm8
525 .byte 102,76,15,126,195
526
527 movq %r8,128(%rsp)
528 movq %rdi,128+8(%rsp)
529 movq %rcx,128+16(%rsp)
530
531 movq (%rsi),%rax
532 movq 8(%rsi),%rcx
533 mulq %rbx
534 movq %rax,(%rsp)
535 movq %rcx,%rax
536 movq %rdx,%r8
537
538 mulq %rbx
539 addq %rax,%r8
540 movq 16(%rsi),%rax
541 movq %rdx,%r9
542 adcq $0,%r9
543
544 mulq %rbx
545 addq %rax,%r9
546 movq 24(%rsi),%rax
547 movq %rdx,%r10
548 adcq $0,%r10
549
550 mulq %rbx
551 addq %rax,%r10
552 movq 32(%rsi),%rax
553 movq %rdx,%r11
554 adcq $0,%r11
555
556 mulq %rbx
557 addq %rax,%r11
558 movq 40(%rsi),%rax
559 movq %rdx,%r12
560 adcq $0,%r12
561
562 mulq %rbx
563 addq %rax,%r12
564 movq 48(%rsi),%rax
565 movq %rdx,%r13
566 adcq $0,%r13
567
568 mulq %rbx
569 addq %rax,%r13
570 movq 56(%rsi),%rax
571 movq %rdx,%r14
572 adcq $0,%r14
573
574 mulq %rbx
575 addq %rax,%r14
576 movq (%rsi),%rax
577 movq %rdx,%r15
578 adcq $0,%r15
579
580 leaq 8(%rsp),%rdi
581 movl $7,%ecx
582 jmp .Loop_mul_gather
583
584 .align 32
585 .Loop_mul_gather:
586 movdqa 0(%rbp),%xmm8
587 movdqa 16(%rbp),%xmm9
588 movdqa 32(%rbp),%xmm10
589 movdqa 48(%rbp),%xmm11
590 pand %xmm0,%xmm8
591 movdqa 64(%rbp),%xmm12
592 pand %xmm1,%xmm9
593 movdqa 80(%rbp),%xmm13
594 pand %xmm2,%xmm10
595 movdqa 96(%rbp),%xmm14
596 pand %xmm3,%xmm11
597 movdqa 112(%rbp),%xmm15
598 leaq 128(%rbp),%rbp
599 pand %xmm4,%xmm12
600 pand %xmm5,%xmm13
601 pand %xmm6,%xmm14
602 pand %xmm7,%xmm15
603 por %xmm10,%xmm8
604 por %xmm11,%xmm9
605 por %xmm12,%xmm8
606 por %xmm13,%xmm9
607 por %xmm14,%xmm8
608 por %xmm15,%xmm9
609
610 por %xmm9,%xmm8
611 pshufd $0x4e,%xmm8,%xmm9
612 por %xmm9,%xmm8
613 .byte 102,76,15,126,195
614
615 mulq %rbx
616 addq %rax,%r8
617 movq 8(%rsi),%rax
618 movq %r8,(%rdi)
619 movq %rdx,%r8
620 adcq $0,%r8
621
622 mulq %rbx
623 addq %rax,%r9
624 movq 16(%rsi),%rax
625 adcq $0,%rdx
626 addq %r9,%r8
627 movq %rdx,%r9
628 adcq $0,%r9
629
630 mulq %rbx
631 addq %rax,%r10
632 movq 24(%rsi),%rax
633 adcq $0,%rdx
634 addq %r10,%r9
635 movq %rdx,%r10
636 adcq $0,%r10
637
638 mulq %rbx
639 addq %rax,%r11
640 movq 32(%rsi),%rax
641 adcq $0,%rdx
642 addq %r11,%r10
643 movq %rdx,%r11
644 adcq $0,%r11
645
646 mulq %rbx
647 addq %rax,%r12
648 movq 40(%rsi),%rax
649 adcq $0,%rdx
650 addq %r12,%r11
651 movq %rdx,%r12
652 adcq $0,%r12
653
654 mulq %rbx
655 addq %rax,%r13
656 movq 48(%rsi),%rax
657 adcq $0,%rdx
658 addq %r13,%r12
659 movq %rdx,%r13
660 adcq $0,%r13
661
662 mulq %rbx
663 addq %rax,%r14
664 movq 56(%rsi),%rax
665 adcq $0,%rdx
666 addq %r14,%r13
667 movq %rdx,%r14
668 adcq $0,%r14
669
670 mulq %rbx
671 addq %rax,%r15
672 movq (%rsi),%rax
673 adcq $0,%rdx
674 addq %r15,%r14
675 movq %rdx,%r15
676 adcq $0,%r15
677
678 leaq 8(%rdi),%rdi
679
680 decl %ecx
681 jnz .Loop_mul_gather
682
683 movq %r8,(%rdi)
684 movq %r9,8(%rdi)
685 movq %r10,16(%rdi)
686 movq %r11,24(%rdi)
687 movq %r12,32(%rdi)
688 movq %r13,40(%rdi)
689 movq %r14,48(%rdi)
690 movq %r15,56(%rdi)
691
692 movq 128+8(%rsp),%rdi
693 movq 128+16(%rsp),%rbp
694
695 movq (%rsp),%r8
696 movq 8(%rsp),%r9
697 movq 16(%rsp),%r10
698 movq 24(%rsp),%r11
699 movq 32(%rsp),%r12
700 movq 40(%rsp),%r13
701 movq 48(%rsp),%r14
702 movq 56(%rsp),%r15
703
704 call __rsaz_512_reduce
705 addq 64(%rsp),%r8
706 adcq 72(%rsp),%r9
707 adcq 80(%rsp),%r10
708 adcq 88(%rsp),%r11
709 adcq 96(%rsp),%r12
710 adcq 104(%rsp),%r13
711 adcq 112(%rsp),%r14
712 adcq 120(%rsp),%r15
713 sbbq %rcx,%rcx
714
715 call __rsaz_512_subtract
716
717 leaq 128+24+48(%rsp),%rax
718 movq -48(%rax),%r15
719 movq -40(%rax),%r14
720 movq -32(%rax),%r13
721 movq -24(%rax),%r12
722 movq -16(%rax),%rbp
723 movq -8(%rax),%rbx
724 leaq (%rax),%rsp
725 .Lmul_gather4_epilogue:
726 .byte 0xf3,0xc3
727 .size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4
728 .globl rsaz_512_mul_scatter4
729 .hidden rsaz_512_mul_scatter4
730 .type rsaz_512_mul_scatter4,@function
731 .align 32
732 rsaz_512_mul_scatter4:
733 pushq %rbx
734 pushq %rbp
735 pushq %r12
736 pushq %r13
737 pushq %r14
738 pushq %r15
739
740 movl %r9d,%r9d
741 subq $128+24,%rsp
742 .Lmul_scatter4_body:
743 leaq (%r8,%r9,8),%r8
744 .byte 102,72,15,110,199
745 .byte 102,72,15,110,202
746 .byte 102,73,15,110,208
747 movq %rcx,128(%rsp)
748
749 movq %rdi,%rbp
750 movq (%rdi),%rbx
751 call __rsaz_512_mul
752
753 .byte 102,72,15,126,199
754 .byte 102,72,15,126,205
755
756 movq (%rsp),%r8
757 movq 8(%rsp),%r9
758 movq 16(%rsp),%r10
759 movq 24(%rsp),%r11
760 movq 32(%rsp),%r12
761 movq 40(%rsp),%r13
762 movq 48(%rsp),%r14
763 movq 56(%rsp),%r15
764
765 call __rsaz_512_reduce
766 addq 64(%rsp),%r8
767 adcq 72(%rsp),%r9
768 adcq 80(%rsp),%r10
769 adcq 88(%rsp),%r11
770 adcq 96(%rsp),%r12
771 adcq 104(%rsp),%r13
772 adcq 112(%rsp),%r14
773 adcq 120(%rsp),%r15
774 .byte 102,72,15,126,214
775 sbbq %rcx,%rcx
776
777 call __rsaz_512_subtract
778
779 movq %r8,0(%rsi)
780 movq %r9,128(%rsi)
781 movq %r10,256(%rsi)
782 movq %r11,384(%rsi)
783 movq %r12,512(%rsi)
784 movq %r13,640(%rsi)
785 movq %r14,768(%rsi)
786 movq %r15,896(%rsi)
787
788 leaq 128+24+48(%rsp),%rax
789 movq -48(%rax),%r15
790 movq -40(%rax),%r14
791 movq -32(%rax),%r13
792 movq -24(%rax),%r12
793 movq -16(%rax),%rbp
794 movq -8(%rax),%rbx
795 leaq (%rax),%rsp
796 .Lmul_scatter4_epilogue:
797 .byte 0xf3,0xc3
798 .size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4
799 .globl rsaz_512_mul_by_one
800 .hidden rsaz_512_mul_by_one
801 .type rsaz_512_mul_by_one,@function
802 .align 32
803 rsaz_512_mul_by_one:
804 pushq %rbx
805 pushq %rbp
806 pushq %r12
807 pushq %r13
808 pushq %r14
809 pushq %r15
810
811 subq $128+24,%rsp
812 .Lmul_by_one_body:
813 movq %rdx,%rbp
814 movq %rcx,128(%rsp)
815
816 movq (%rsi),%r8
817 pxor %xmm0,%xmm0
818 movq 8(%rsi),%r9
819 movq 16(%rsi),%r10
820 movq 24(%rsi),%r11
821 movq 32(%rsi),%r12
822 movq 40(%rsi),%r13
823 movq 48(%rsi),%r14
824 movq 56(%rsi),%r15
825
826 movdqa %xmm0,(%rsp)
827 movdqa %xmm0,16(%rsp)
828 movdqa %xmm0,32(%rsp)
829 movdqa %xmm0,48(%rsp)
830 movdqa %xmm0,64(%rsp)
831 movdqa %xmm0,80(%rsp)
832 movdqa %xmm0,96(%rsp)
833 call __rsaz_512_reduce
834 movq %r8,(%rdi)
835 movq %r9,8(%rdi)
836 movq %r10,16(%rdi)
837 movq %r11,24(%rdi)
838 movq %r12,32(%rdi)
839 movq %r13,40(%rdi)
840 movq %r14,48(%rdi)
841 movq %r15,56(%rdi)
842
843 leaq 128+24+48(%rsp),%rax
844 movq -48(%rax),%r15
845 movq -40(%rax),%r14
846 movq -32(%rax),%r13
847 movq -24(%rax),%r12
848 movq -16(%rax),%rbp
849 movq -8(%rax),%rbx
850 leaq (%rax),%rsp
851 .Lmul_by_one_epilogue:
852 .byte 0xf3,0xc3
853 .size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one
854 .type __rsaz_512_reduce,@function
855 .align 32
856 __rsaz_512_reduce:
857 movq %r8,%rbx
858 imulq 128+8(%rsp),%rbx
859 movq 0(%rbp),%rax
860 movl $8,%ecx
861 jmp .Lreduction_loop
862
863 .align 32
864 .Lreduction_loop:
865 mulq %rbx
866 movq 8(%rbp),%rax
867 negq %r8
868 movq %rdx,%r8
869 adcq $0,%r8
870
871 mulq %rbx
872 addq %rax,%r9
873 movq 16(%rbp),%rax
874 adcq $0,%rdx
875 addq %r9,%r8
876 movq %rdx,%r9
877 adcq $0,%r9
878
879 mulq %rbx
880 addq %rax,%r10
881 movq 24(%rbp),%rax
882 adcq $0,%rdx
883 addq %r10,%r9
884 movq %rdx,%r10
885 adcq $0,%r10
886
887 mulq %rbx
888 addq %rax,%r11
889 movq 32(%rbp),%rax
890 adcq $0,%rdx
891 addq %r11,%r10
892 movq 128+8(%rsp),%rsi
893
894
895 adcq $0,%rdx
896 movq %rdx,%r11
897
898 mulq %rbx
899 addq %rax,%r12
900 movq 40(%rbp),%rax
901 adcq $0,%rdx
902 imulq %r8,%rsi
903 addq %r12,%r11
904 movq %rdx,%r12
905 adcq $0,%r12
906
907 mulq %rbx
908 addq %rax,%r13
909 movq 48(%rbp),%rax
910 adcq $0,%rdx
911 addq %r13,%r12
912 movq %rdx,%r13
913 adcq $0,%r13
914
915 mulq %rbx
916 addq %rax,%r14
917 movq 56(%rbp),%rax
918 adcq $0,%rdx
919 addq %r14,%r13
920 movq %rdx,%r14
921 adcq $0,%r14
922
923 mulq %rbx
924 movq %rsi,%rbx
925 addq %rax,%r15
926 movq 0(%rbp),%rax
927 adcq $0,%rdx
928 addq %r15,%r14
929 movq %rdx,%r15
930 adcq $0,%r15
931
932 decl %ecx
933 jne .Lreduction_loop
934
935 .byte 0xf3,0xc3
936 .size __rsaz_512_reduce,.-__rsaz_512_reduce
937 .type __rsaz_512_subtract,@function
938 .align 32
939 __rsaz_512_subtract:
940 movq %r8,(%rdi)
941 movq %r9,8(%rdi)
942 movq %r10,16(%rdi)
943 movq %r11,24(%rdi)
944 movq %r12,32(%rdi)
945 movq %r13,40(%rdi)
946 movq %r14,48(%rdi)
947 movq %r15,56(%rdi)
948
949 movq 0(%rbp),%r8
950 movq 8(%rbp),%r9
951 negq %r8
952 notq %r9
953 andq %rcx,%r8
954 movq 16(%rbp),%r10
955 andq %rcx,%r9
956 notq %r10
957 movq 24(%rbp),%r11
958 andq %rcx,%r10
959 notq %r11
960 movq 32(%rbp),%r12
961 andq %rcx,%r11
962 notq %r12
963 movq 40(%rbp),%r13
964 andq %rcx,%r12
965 notq %r13
966 movq 48(%rbp),%r14
967 andq %rcx,%r13
968 notq %r14
969 movq 56(%rbp),%r15
970 andq %rcx,%r14
971 notq %r15
972 andq %rcx,%r15
973
974 addq (%rdi),%r8
975 adcq 8(%rdi),%r9
976 adcq 16(%rdi),%r10
977 adcq 24(%rdi),%r11
978 adcq 32(%rdi),%r12
979 adcq 40(%rdi),%r13
980 adcq 48(%rdi),%r14
981 adcq 56(%rdi),%r15
982
983 movq %r8,(%rdi)
984 movq %r9,8(%rdi)
985 movq %r10,16(%rdi)
986 movq %r11,24(%rdi)
987 movq %r12,32(%rdi)
988 movq %r13,40(%rdi)
989 movq %r14,48(%rdi)
990 movq %r15,56(%rdi)
991
992 .byte 0xf3,0xc3
993 .size __rsaz_512_subtract,.-__rsaz_512_subtract
994 .type __rsaz_512_mul,@function
995 .align 32
996 __rsaz_512_mul:
997 leaq 8(%rsp),%rdi
998
999 movq (%rsi),%rax
1000 mulq %rbx
1001 movq %rax,(%rdi)
1002 movq 8(%rsi),%rax
1003 movq %rdx,%r8
1004
1005 mulq %rbx
1006 addq %rax,%r8
1007 movq 16(%rsi),%rax
1008 movq %rdx,%r9
1009 adcq $0,%r9
1010
1011 mulq %rbx
1012 addq %rax,%r9
1013 movq 24(%rsi),%rax
1014 movq %rdx,%r10
1015 adcq $0,%r10
1016
1017 mulq %rbx
1018 addq %rax,%r10
1019 movq 32(%rsi),%rax
1020 movq %rdx,%r11
1021 adcq $0,%r11
1022
1023 mulq %rbx
1024 addq %rax,%r11
1025 movq 40(%rsi),%rax
1026 movq %rdx,%r12
1027 adcq $0,%r12
1028
1029 mulq %rbx
1030 addq %rax,%r12
1031 movq 48(%rsi),%rax
1032 movq %rdx,%r13
1033 adcq $0,%r13
1034
1035 mulq %rbx
1036 addq %rax,%r13
1037 movq 56(%rsi),%rax
1038 movq %rdx,%r14
1039 adcq $0,%r14
1040
1041 mulq %rbx
1042 addq %rax,%r14
1043 movq (%rsi),%rax
1044 movq %rdx,%r15
1045 adcq $0,%r15
1046
1047 leaq 8(%rbp),%rbp
1048 leaq 8(%rdi),%rdi
1049
1050 movl $7,%ecx
1051 jmp .Loop_mul
1052
1053 .align 32
1054 .Loop_mul:
1055 movq (%rbp),%rbx
1056 mulq %rbx
1057 addq %rax,%r8
1058 movq 8(%rsi),%rax
1059 movq %r8,(%rdi)
1060 movq %rdx,%r8
1061 adcq $0,%r8
1062
1063 mulq %rbx
1064 addq %rax,%r9
1065 movq 16(%rsi),%rax
1066 adcq $0,%rdx
1067 addq %r9,%r8
1068 movq %rdx,%r9
1069 adcq $0,%r9
1070
1071 mulq %rbx
1072 addq %rax,%r10
1073 movq 24(%rsi),%rax
1074 adcq $0,%rdx
1075 addq %r10,%r9
1076 movq %rdx,%r10
1077 adcq $0,%r10
1078
1079 mulq %rbx
1080 addq %rax,%r11
1081 movq 32(%rsi),%rax
1082 adcq $0,%rdx
1083 addq %r11,%r10
1084 movq %rdx,%r11
1085 adcq $0,%r11
1086
1087 mulq %rbx
1088 addq %rax,%r12
1089 movq 40(%rsi),%rax
1090 adcq $0,%rdx
1091 addq %r12,%r11
1092 movq %rdx,%r12
1093 adcq $0,%r12
1094
1095 mulq %rbx
1096 addq %rax,%r13
1097 movq 48(%rsi),%rax
1098 adcq $0,%rdx
1099 addq %r13,%r12
1100 movq %rdx,%r13
1101 adcq $0,%r13
1102
1103 mulq %rbx
1104 addq %rax,%r14
1105 movq 56(%rsi),%rax
1106 adcq $0,%rdx
1107 addq %r14,%r13
1108 movq %rdx,%r14
1109 leaq 8(%rbp),%rbp
1110 adcq $0,%r14
1111
1112 mulq %rbx
1113 addq %rax,%r15
1114 movq (%rsi),%rax
1115 adcq $0,%rdx
1116 addq %r15,%r14
1117 movq %rdx,%r15
1118 adcq $0,%r15
1119
1120 leaq 8(%rdi),%rdi
1121
1122 decl %ecx
1123 jnz .Loop_mul
1124
1125 movq %r8,(%rdi)
1126 movq %r9,8(%rdi)
1127 movq %r10,16(%rdi)
1128 movq %r11,24(%rdi)
1129 movq %r12,32(%rdi)
1130 movq %r13,40(%rdi)
1131 movq %r14,48(%rdi)
1132 movq %r15,56(%rdi)
1133
1134 .byte 0xf3,0xc3
1135 .size __rsaz_512_mul,.-__rsaz_512_mul
1136 .globl rsaz_512_scatter4
1137 .hidden rsaz_512_scatter4
1138 .type rsaz_512_scatter4,@function
1139 .align 16
1140 rsaz_512_scatter4:
1141 leaq (%rdi,%rdx,8),%rdi
1142 movl $8,%r9d
1143 jmp .Loop_scatter
1144 .align 16
1145 .Loop_scatter:
1146 movq (%rsi),%rax
1147 leaq 8(%rsi),%rsi
1148 movq %rax,(%rdi)
1149 leaq 128(%rdi),%rdi
1150 decl %r9d
1151 jnz .Loop_scatter
1152 .byte 0xf3,0xc3
1153 .size rsaz_512_scatter4,.-rsaz_512_scatter4
1154
1155 .globl rsaz_512_gather4
1156 .hidden rsaz_512_gather4
1157 .type rsaz_512_gather4,@function
1158 .align 16
1159 rsaz_512_gather4:
1160 movd %edx,%xmm8
1161 movdqa .Linc+16(%rip),%xmm1
1162 movdqa .Linc(%rip),%xmm0
1163
1164 pshufd $0,%xmm8,%xmm8
1165 movdqa %xmm1,%xmm7
1166 movdqa %xmm1,%xmm2
1167 paddd %xmm0,%xmm1
1168 pcmpeqd %xmm8,%xmm0
1169 movdqa %xmm7,%xmm3
1170 paddd %xmm1,%xmm2
1171 pcmpeqd %xmm8,%xmm1
1172 movdqa %xmm7,%xmm4
1173 paddd %xmm2,%xmm3
1174 pcmpeqd %xmm8,%xmm2
1175 movdqa %xmm7,%xmm5
1176 paddd %xmm3,%xmm4
1177 pcmpeqd %xmm8,%xmm3
1178 movdqa %xmm7,%xmm6
1179 paddd %xmm4,%xmm5
1180 pcmpeqd %xmm8,%xmm4
1181 paddd %xmm5,%xmm6
1182 pcmpeqd %xmm8,%xmm5
1183 paddd %xmm6,%xmm7
1184 pcmpeqd %xmm8,%xmm6
1185 pcmpeqd %xmm8,%xmm7
1186 movl $8,%r9d
1187 jmp .Loop_gather
1188 .align 16
1189 .Loop_gather:
1190 movdqa 0(%rsi),%xmm8
1191 movdqa 16(%rsi),%xmm9
1192 movdqa 32(%rsi),%xmm10
1193 movdqa 48(%rsi),%xmm11
1194 pand %xmm0,%xmm8
1195 movdqa 64(%rsi),%xmm12
1196 pand %xmm1,%xmm9
1197 movdqa 80(%rsi),%xmm13
1198 pand %xmm2,%xmm10
1199 movdqa 96(%rsi),%xmm14
1200 pand %xmm3,%xmm11
1201 movdqa 112(%rsi),%xmm15
1202 leaq 128(%rsi),%rsi
1203 pand %xmm4,%xmm12
1204 pand %xmm5,%xmm13
1205 pand %xmm6,%xmm14
1206 pand %xmm7,%xmm15
1207 por %xmm10,%xmm8
1208 por %xmm11,%xmm9
1209 por %xmm12,%xmm8
1210 por %xmm13,%xmm9
1211 por %xmm14,%xmm8
1212 por %xmm15,%xmm9
1213
1214 por %xmm9,%xmm8
1215 pshufd $0x4e,%xmm8,%xmm9
1216 por %xmm9,%xmm8
1217 movq %xmm8,(%rdi)
1218 leaq 8(%rdi),%rdi
1219 decl %r9d
1220 jnz .Loop_gather
1221 .byte 0xf3,0xc3
1222 .LSEH_end_rsaz_512_gather4:
1223 .size rsaz_512_gather4,.-rsaz_512_gather4
1224
1225 .align 64
1226 .Linc:
1227 .long 0,0, 1,1
1228 .long 2,2, 2,2
1229 #endif
OLDNEW
« no previous file with comments | « third_party/boringssl/linux-x86_64/crypto/bn/rsaz-avx2.S ('k') | third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698