Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(24)

Side by Side Diff: third_party/boringssl/mac-x86_64/crypto/bn/rsaz-x86_64.S

Issue 2354623003: Pull boringssl generated source from boringssl_gen (Closed)
Patch Set: . Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #if defined(__x86_64__)
2 .text
3
4
5
6 .globl _rsaz_512_sqr
7 .private_extern _rsaz_512_sqr
8
9 .p2align 5
10 _rsaz_512_sqr:
11 pushq %rbx
12 pushq %rbp
13 pushq %r12
14 pushq %r13
15 pushq %r14
16 pushq %r15
17
18 subq $128+24,%rsp
19 L$sqr_body:
20 movq %rdx,%rbp
21 movq (%rsi),%rdx
22 movq 8(%rsi),%rax
23 movq %rcx,128(%rsp)
24 jmp L$oop_sqr
25
26 .p2align 5
27 L$oop_sqr:
28 movl %r8d,128+8(%rsp)
29
30 movq %rdx,%rbx
31 mulq %rdx
32 movq %rax,%r8
33 movq 16(%rsi),%rax
34 movq %rdx,%r9
35
36 mulq %rbx
37 addq %rax,%r9
38 movq 24(%rsi),%rax
39 movq %rdx,%r10
40 adcq $0,%r10
41
42 mulq %rbx
43 addq %rax,%r10
44 movq 32(%rsi),%rax
45 movq %rdx,%r11
46 adcq $0,%r11
47
48 mulq %rbx
49 addq %rax,%r11
50 movq 40(%rsi),%rax
51 movq %rdx,%r12
52 adcq $0,%r12
53
54 mulq %rbx
55 addq %rax,%r12
56 movq 48(%rsi),%rax
57 movq %rdx,%r13
58 adcq $0,%r13
59
60 mulq %rbx
61 addq %rax,%r13
62 movq 56(%rsi),%rax
63 movq %rdx,%r14
64 adcq $0,%r14
65
66 mulq %rbx
67 addq %rax,%r14
68 movq %rbx,%rax
69 movq %rdx,%r15
70 adcq $0,%r15
71
72 addq %r8,%r8
73 movq %r9,%rcx
74 adcq %r9,%r9
75
76 mulq %rax
77 movq %rax,(%rsp)
78 addq %rdx,%r8
79 adcq $0,%r9
80
81 movq %r8,8(%rsp)
82 shrq $63,%rcx
83
84
85 movq 8(%rsi),%r8
86 movq 16(%rsi),%rax
87 mulq %r8
88 addq %rax,%r10
89 movq 24(%rsi),%rax
90 movq %rdx,%rbx
91 adcq $0,%rbx
92
93 mulq %r8
94 addq %rax,%r11
95 movq 32(%rsi),%rax
96 adcq $0,%rdx
97 addq %rbx,%r11
98 movq %rdx,%rbx
99 adcq $0,%rbx
100
101 mulq %r8
102 addq %rax,%r12
103 movq 40(%rsi),%rax
104 adcq $0,%rdx
105 addq %rbx,%r12
106 movq %rdx,%rbx
107 adcq $0,%rbx
108
109 mulq %r8
110 addq %rax,%r13
111 movq 48(%rsi),%rax
112 adcq $0,%rdx
113 addq %rbx,%r13
114 movq %rdx,%rbx
115 adcq $0,%rbx
116
117 mulq %r8
118 addq %rax,%r14
119 movq 56(%rsi),%rax
120 adcq $0,%rdx
121 addq %rbx,%r14
122 movq %rdx,%rbx
123 adcq $0,%rbx
124
125 mulq %r8
126 addq %rax,%r15
127 movq %r8,%rax
128 adcq $0,%rdx
129 addq %rbx,%r15
130 movq %rdx,%r8
131 movq %r10,%rdx
132 adcq $0,%r8
133
134 addq %rdx,%rdx
135 leaq (%rcx,%r10,2),%r10
136 movq %r11,%rbx
137 adcq %r11,%r11
138
139 mulq %rax
140 addq %rax,%r9
141 adcq %rdx,%r10
142 adcq $0,%r11
143
144 movq %r9,16(%rsp)
145 movq %r10,24(%rsp)
146 shrq $63,%rbx
147
148
149 movq 16(%rsi),%r9
150 movq 24(%rsi),%rax
151 mulq %r9
152 addq %rax,%r12
153 movq 32(%rsi),%rax
154 movq %rdx,%rcx
155 adcq $0,%rcx
156
157 mulq %r9
158 addq %rax,%r13
159 movq 40(%rsi),%rax
160 adcq $0,%rdx
161 addq %rcx,%r13
162 movq %rdx,%rcx
163 adcq $0,%rcx
164
165 mulq %r9
166 addq %rax,%r14
167 movq 48(%rsi),%rax
168 adcq $0,%rdx
169 addq %rcx,%r14
170 movq %rdx,%rcx
171 adcq $0,%rcx
172
173 mulq %r9
174 movq %r12,%r10
175 leaq (%rbx,%r12,2),%r12
176 addq %rax,%r15
177 movq 56(%rsi),%rax
178 adcq $0,%rdx
179 addq %rcx,%r15
180 movq %rdx,%rcx
181 adcq $0,%rcx
182
183 mulq %r9
184 shrq $63,%r10
185 addq %rax,%r8
186 movq %r9,%rax
187 adcq $0,%rdx
188 addq %rcx,%r8
189 movq %rdx,%r9
190 adcq $0,%r9
191
192 movq %r13,%rcx
193 leaq (%r10,%r13,2),%r13
194
195 mulq %rax
196 addq %rax,%r11
197 adcq %rdx,%r12
198 adcq $0,%r13
199
200 movq %r11,32(%rsp)
201 movq %r12,40(%rsp)
202 shrq $63,%rcx
203
204
205 movq 24(%rsi),%r10
206 movq 32(%rsi),%rax
207 mulq %r10
208 addq %rax,%r14
209 movq 40(%rsi),%rax
210 movq %rdx,%rbx
211 adcq $0,%rbx
212
213 mulq %r10
214 addq %rax,%r15
215 movq 48(%rsi),%rax
216 adcq $0,%rdx
217 addq %rbx,%r15
218 movq %rdx,%rbx
219 adcq $0,%rbx
220
221 mulq %r10
222 movq %r14,%r12
223 leaq (%rcx,%r14,2),%r14
224 addq %rax,%r8
225 movq 56(%rsi),%rax
226 adcq $0,%rdx
227 addq %rbx,%r8
228 movq %rdx,%rbx
229 adcq $0,%rbx
230
231 mulq %r10
232 shrq $63,%r12
233 addq %rax,%r9
234 movq %r10,%rax
235 adcq $0,%rdx
236 addq %rbx,%r9
237 movq %rdx,%r10
238 adcq $0,%r10
239
240 movq %r15,%rbx
241 leaq (%r12,%r15,2),%r15
242
243 mulq %rax
244 addq %rax,%r13
245 adcq %rdx,%r14
246 adcq $0,%r15
247
248 movq %r13,48(%rsp)
249 movq %r14,56(%rsp)
250 shrq $63,%rbx
251
252
253 movq 32(%rsi),%r11
254 movq 40(%rsi),%rax
255 mulq %r11
256 addq %rax,%r8
257 movq 48(%rsi),%rax
258 movq %rdx,%rcx
259 adcq $0,%rcx
260
261 mulq %r11
262 addq %rax,%r9
263 movq 56(%rsi),%rax
264 adcq $0,%rdx
265 movq %r8,%r12
266 leaq (%rbx,%r8,2),%r8
267 addq %rcx,%r9
268 movq %rdx,%rcx
269 adcq $0,%rcx
270
271 mulq %r11
272 shrq $63,%r12
273 addq %rax,%r10
274 movq %r11,%rax
275 adcq $0,%rdx
276 addq %rcx,%r10
277 movq %rdx,%r11
278 adcq $0,%r11
279
280 movq %r9,%rcx
281 leaq (%r12,%r9,2),%r9
282
283 mulq %rax
284 addq %rax,%r15
285 adcq %rdx,%r8
286 adcq $0,%r9
287
288 movq %r15,64(%rsp)
289 movq %r8,72(%rsp)
290 shrq $63,%rcx
291
292
293 movq 40(%rsi),%r12
294 movq 48(%rsi),%rax
295 mulq %r12
296 addq %rax,%r10
297 movq 56(%rsi),%rax
298 movq %rdx,%rbx
299 adcq $0,%rbx
300
301 mulq %r12
302 addq %rax,%r11
303 movq %r12,%rax
304 movq %r10,%r15
305 leaq (%rcx,%r10,2),%r10
306 adcq $0,%rdx
307 shrq $63,%r15
308 addq %rbx,%r11
309 movq %rdx,%r12
310 adcq $0,%r12
311
312 movq %r11,%rbx
313 leaq (%r15,%r11,2),%r11
314
315 mulq %rax
316 addq %rax,%r9
317 adcq %rdx,%r10
318 adcq $0,%r11
319
320 movq %r9,80(%rsp)
321 movq %r10,88(%rsp)
322
323
324 movq 48(%rsi),%r13
325 movq 56(%rsi),%rax
326 mulq %r13
327 addq %rax,%r12
328 movq %r13,%rax
329 movq %rdx,%r13
330 adcq $0,%r13
331
332 xorq %r14,%r14
333 shlq $1,%rbx
334 adcq %r12,%r12
335 adcq %r13,%r13
336 adcq %r14,%r14
337
338 mulq %rax
339 addq %rax,%r11
340 adcq %rdx,%r12
341 adcq $0,%r13
342
343 movq %r11,96(%rsp)
344 movq %r12,104(%rsp)
345
346
347 movq 56(%rsi),%rax
348 mulq %rax
349 addq %rax,%r13
350 adcq $0,%rdx
351
352 addq %rdx,%r14
353
354 movq %r13,112(%rsp)
355 movq %r14,120(%rsp)
356
357 movq (%rsp),%r8
358 movq 8(%rsp),%r9
359 movq 16(%rsp),%r10
360 movq 24(%rsp),%r11
361 movq 32(%rsp),%r12
362 movq 40(%rsp),%r13
363 movq 48(%rsp),%r14
364 movq 56(%rsp),%r15
365
366 call __rsaz_512_reduce
367
368 addq 64(%rsp),%r8
369 adcq 72(%rsp),%r9
370 adcq 80(%rsp),%r10
371 adcq 88(%rsp),%r11
372 adcq 96(%rsp),%r12
373 adcq 104(%rsp),%r13
374 adcq 112(%rsp),%r14
375 adcq 120(%rsp),%r15
376 sbbq %rcx,%rcx
377
378 call __rsaz_512_subtract
379
380 movq %r8,%rdx
381 movq %r9,%rax
382 movl 128+8(%rsp),%r8d
383 movq %rdi,%rsi
384
385 decl %r8d
386 jnz L$oop_sqr
387
388 leaq 128+24+48(%rsp),%rax
389 movq -48(%rax),%r15
390 movq -40(%rax),%r14
391 movq -32(%rax),%r13
392 movq -24(%rax),%r12
393 movq -16(%rax),%rbp
394 movq -8(%rax),%rbx
395 leaq (%rax),%rsp
396 L$sqr_epilogue:
397 .byte 0xf3,0xc3
398
399 .globl _rsaz_512_mul
400 .private_extern _rsaz_512_mul
401
402 .p2align 5
403 _rsaz_512_mul:
404 pushq %rbx
405 pushq %rbp
406 pushq %r12
407 pushq %r13
408 pushq %r14
409 pushq %r15
410
411 subq $128+24,%rsp
412 L$mul_body:
413 .byte 102,72,15,110,199
414 .byte 102,72,15,110,201
415 movq %r8,128(%rsp)
416 movq (%rdx),%rbx
417 movq %rdx,%rbp
418 call __rsaz_512_mul
419
420 .byte 102,72,15,126,199
421 .byte 102,72,15,126,205
422
423 movq (%rsp),%r8
424 movq 8(%rsp),%r9
425 movq 16(%rsp),%r10
426 movq 24(%rsp),%r11
427 movq 32(%rsp),%r12
428 movq 40(%rsp),%r13
429 movq 48(%rsp),%r14
430 movq 56(%rsp),%r15
431
432 call __rsaz_512_reduce
433 addq 64(%rsp),%r8
434 adcq 72(%rsp),%r9
435 adcq 80(%rsp),%r10
436 adcq 88(%rsp),%r11
437 adcq 96(%rsp),%r12
438 adcq 104(%rsp),%r13
439 adcq 112(%rsp),%r14
440 adcq 120(%rsp),%r15
441 sbbq %rcx,%rcx
442
443 call __rsaz_512_subtract
444
445 leaq 128+24+48(%rsp),%rax
446 movq -48(%rax),%r15
447 movq -40(%rax),%r14
448 movq -32(%rax),%r13
449 movq -24(%rax),%r12
450 movq -16(%rax),%rbp
451 movq -8(%rax),%rbx
452 leaq (%rax),%rsp
453 L$mul_epilogue:
454 .byte 0xf3,0xc3
455
456 .globl _rsaz_512_mul_gather4
457 .private_extern _rsaz_512_mul_gather4
458
459 .p2align 5
460 _rsaz_512_mul_gather4:
461 pushq %rbx
462 pushq %rbp
463 pushq %r12
464 pushq %r13
465 pushq %r14
466 pushq %r15
467
468 subq $152,%rsp
469 L$mul_gather4_body:
470 movd %r9d,%xmm8
471 movdqa L$inc+16(%rip),%xmm1
472 movdqa L$inc(%rip),%xmm0
473
474 pshufd $0,%xmm8,%xmm8
475 movdqa %xmm1,%xmm7
476 movdqa %xmm1,%xmm2
477 paddd %xmm0,%xmm1
478 pcmpeqd %xmm8,%xmm0
479 movdqa %xmm7,%xmm3
480 paddd %xmm1,%xmm2
481 pcmpeqd %xmm8,%xmm1
482 movdqa %xmm7,%xmm4
483 paddd %xmm2,%xmm3
484 pcmpeqd %xmm8,%xmm2
485 movdqa %xmm7,%xmm5
486 paddd %xmm3,%xmm4
487 pcmpeqd %xmm8,%xmm3
488 movdqa %xmm7,%xmm6
489 paddd %xmm4,%xmm5
490 pcmpeqd %xmm8,%xmm4
491 paddd %xmm5,%xmm6
492 pcmpeqd %xmm8,%xmm5
493 paddd %xmm6,%xmm7
494 pcmpeqd %xmm8,%xmm6
495 pcmpeqd %xmm8,%xmm7
496
497 movdqa 0(%rdx),%xmm8
498 movdqa 16(%rdx),%xmm9
499 movdqa 32(%rdx),%xmm10
500 movdqa 48(%rdx),%xmm11
501 pand %xmm0,%xmm8
502 movdqa 64(%rdx),%xmm12
503 pand %xmm1,%xmm9
504 movdqa 80(%rdx),%xmm13
505 pand %xmm2,%xmm10
506 movdqa 96(%rdx),%xmm14
507 pand %xmm3,%xmm11
508 movdqa 112(%rdx),%xmm15
509 leaq 128(%rdx),%rbp
510 pand %xmm4,%xmm12
511 pand %xmm5,%xmm13
512 pand %xmm6,%xmm14
513 pand %xmm7,%xmm15
514 por %xmm10,%xmm8
515 por %xmm11,%xmm9
516 por %xmm12,%xmm8
517 por %xmm13,%xmm9
518 por %xmm14,%xmm8
519 por %xmm15,%xmm9
520
521 por %xmm9,%xmm8
522 pshufd $0x4e,%xmm8,%xmm9
523 por %xmm9,%xmm8
524 .byte 102,76,15,126,195
525
526 movq %r8,128(%rsp)
527 movq %rdi,128+8(%rsp)
528 movq %rcx,128+16(%rsp)
529
530 movq (%rsi),%rax
531 movq 8(%rsi),%rcx
532 mulq %rbx
533 movq %rax,(%rsp)
534 movq %rcx,%rax
535 movq %rdx,%r8
536
537 mulq %rbx
538 addq %rax,%r8
539 movq 16(%rsi),%rax
540 movq %rdx,%r9
541 adcq $0,%r9
542
543 mulq %rbx
544 addq %rax,%r9
545 movq 24(%rsi),%rax
546 movq %rdx,%r10
547 adcq $0,%r10
548
549 mulq %rbx
550 addq %rax,%r10
551 movq 32(%rsi),%rax
552 movq %rdx,%r11
553 adcq $0,%r11
554
555 mulq %rbx
556 addq %rax,%r11
557 movq 40(%rsi),%rax
558 movq %rdx,%r12
559 adcq $0,%r12
560
561 mulq %rbx
562 addq %rax,%r12
563 movq 48(%rsi),%rax
564 movq %rdx,%r13
565 adcq $0,%r13
566
567 mulq %rbx
568 addq %rax,%r13
569 movq 56(%rsi),%rax
570 movq %rdx,%r14
571 adcq $0,%r14
572
573 mulq %rbx
574 addq %rax,%r14
575 movq (%rsi),%rax
576 movq %rdx,%r15
577 adcq $0,%r15
578
579 leaq 8(%rsp),%rdi
580 movl $7,%ecx
581 jmp L$oop_mul_gather
582
583 .p2align 5
584 L$oop_mul_gather:
585 movdqa 0(%rbp),%xmm8
586 movdqa 16(%rbp),%xmm9
587 movdqa 32(%rbp),%xmm10
588 movdqa 48(%rbp),%xmm11
589 pand %xmm0,%xmm8
590 movdqa 64(%rbp),%xmm12
591 pand %xmm1,%xmm9
592 movdqa 80(%rbp),%xmm13
593 pand %xmm2,%xmm10
594 movdqa 96(%rbp),%xmm14
595 pand %xmm3,%xmm11
596 movdqa 112(%rbp),%xmm15
597 leaq 128(%rbp),%rbp
598 pand %xmm4,%xmm12
599 pand %xmm5,%xmm13
600 pand %xmm6,%xmm14
601 pand %xmm7,%xmm15
602 por %xmm10,%xmm8
603 por %xmm11,%xmm9
604 por %xmm12,%xmm8
605 por %xmm13,%xmm9
606 por %xmm14,%xmm8
607 por %xmm15,%xmm9
608
609 por %xmm9,%xmm8
610 pshufd $0x4e,%xmm8,%xmm9
611 por %xmm9,%xmm8
612 .byte 102,76,15,126,195
613
614 mulq %rbx
615 addq %rax,%r8
616 movq 8(%rsi),%rax
617 movq %r8,(%rdi)
618 movq %rdx,%r8
619 adcq $0,%r8
620
621 mulq %rbx
622 addq %rax,%r9
623 movq 16(%rsi),%rax
624 adcq $0,%rdx
625 addq %r9,%r8
626 movq %rdx,%r9
627 adcq $0,%r9
628
629 mulq %rbx
630 addq %rax,%r10
631 movq 24(%rsi),%rax
632 adcq $0,%rdx
633 addq %r10,%r9
634 movq %rdx,%r10
635 adcq $0,%r10
636
637 mulq %rbx
638 addq %rax,%r11
639 movq 32(%rsi),%rax
640 adcq $0,%rdx
641 addq %r11,%r10
642 movq %rdx,%r11
643 adcq $0,%r11
644
645 mulq %rbx
646 addq %rax,%r12
647 movq 40(%rsi),%rax
648 adcq $0,%rdx
649 addq %r12,%r11
650 movq %rdx,%r12
651 adcq $0,%r12
652
653 mulq %rbx
654 addq %rax,%r13
655 movq 48(%rsi),%rax
656 adcq $0,%rdx
657 addq %r13,%r12
658 movq %rdx,%r13
659 adcq $0,%r13
660
661 mulq %rbx
662 addq %rax,%r14
663 movq 56(%rsi),%rax
664 adcq $0,%rdx
665 addq %r14,%r13
666 movq %rdx,%r14
667 adcq $0,%r14
668
669 mulq %rbx
670 addq %rax,%r15
671 movq (%rsi),%rax
672 adcq $0,%rdx
673 addq %r15,%r14
674 movq %rdx,%r15
675 adcq $0,%r15
676
677 leaq 8(%rdi),%rdi
678
679 decl %ecx
680 jnz L$oop_mul_gather
681
682 movq %r8,(%rdi)
683 movq %r9,8(%rdi)
684 movq %r10,16(%rdi)
685 movq %r11,24(%rdi)
686 movq %r12,32(%rdi)
687 movq %r13,40(%rdi)
688 movq %r14,48(%rdi)
689 movq %r15,56(%rdi)
690
691 movq 128+8(%rsp),%rdi
692 movq 128+16(%rsp),%rbp
693
694 movq (%rsp),%r8
695 movq 8(%rsp),%r9
696 movq 16(%rsp),%r10
697 movq 24(%rsp),%r11
698 movq 32(%rsp),%r12
699 movq 40(%rsp),%r13
700 movq 48(%rsp),%r14
701 movq 56(%rsp),%r15
702
703 call __rsaz_512_reduce
704 addq 64(%rsp),%r8
705 adcq 72(%rsp),%r9
706 adcq 80(%rsp),%r10
707 adcq 88(%rsp),%r11
708 adcq 96(%rsp),%r12
709 adcq 104(%rsp),%r13
710 adcq 112(%rsp),%r14
711 adcq 120(%rsp),%r15
712 sbbq %rcx,%rcx
713
714 call __rsaz_512_subtract
715
716 leaq 128+24+48(%rsp),%rax
717 movq -48(%rax),%r15
718 movq -40(%rax),%r14
719 movq -32(%rax),%r13
720 movq -24(%rax),%r12
721 movq -16(%rax),%rbp
722 movq -8(%rax),%rbx
723 leaq (%rax),%rsp
724 L$mul_gather4_epilogue:
725 .byte 0xf3,0xc3
726
727 .globl _rsaz_512_mul_scatter4
728 .private_extern _rsaz_512_mul_scatter4
729
730 .p2align 5
731 _rsaz_512_mul_scatter4:
732 pushq %rbx
733 pushq %rbp
734 pushq %r12
735 pushq %r13
736 pushq %r14
737 pushq %r15
738
739 movl %r9d,%r9d
740 subq $128+24,%rsp
741 L$mul_scatter4_body:
742 leaq (%r8,%r9,8),%r8
743 .byte 102,72,15,110,199
744 .byte 102,72,15,110,202
745 .byte 102,73,15,110,208
746 movq %rcx,128(%rsp)
747
748 movq %rdi,%rbp
749 movq (%rdi),%rbx
750 call __rsaz_512_mul
751
752 .byte 102,72,15,126,199
753 .byte 102,72,15,126,205
754
755 movq (%rsp),%r8
756 movq 8(%rsp),%r9
757 movq 16(%rsp),%r10
758 movq 24(%rsp),%r11
759 movq 32(%rsp),%r12
760 movq 40(%rsp),%r13
761 movq 48(%rsp),%r14
762 movq 56(%rsp),%r15
763
764 call __rsaz_512_reduce
765 addq 64(%rsp),%r8
766 adcq 72(%rsp),%r9
767 adcq 80(%rsp),%r10
768 adcq 88(%rsp),%r11
769 adcq 96(%rsp),%r12
770 adcq 104(%rsp),%r13
771 adcq 112(%rsp),%r14
772 adcq 120(%rsp),%r15
773 .byte 102,72,15,126,214
774 sbbq %rcx,%rcx
775
776 call __rsaz_512_subtract
777
778 movq %r8,0(%rsi)
779 movq %r9,128(%rsi)
780 movq %r10,256(%rsi)
781 movq %r11,384(%rsi)
782 movq %r12,512(%rsi)
783 movq %r13,640(%rsi)
784 movq %r14,768(%rsi)
785 movq %r15,896(%rsi)
786
787 leaq 128+24+48(%rsp),%rax
788 movq -48(%rax),%r15
789 movq -40(%rax),%r14
790 movq -32(%rax),%r13
791 movq -24(%rax),%r12
792 movq -16(%rax),%rbp
793 movq -8(%rax),%rbx
794 leaq (%rax),%rsp
795 L$mul_scatter4_epilogue:
796 .byte 0xf3,0xc3
797
798 .globl _rsaz_512_mul_by_one
799 .private_extern _rsaz_512_mul_by_one
800
801 .p2align 5
802 _rsaz_512_mul_by_one:
803 pushq %rbx
804 pushq %rbp
805 pushq %r12
806 pushq %r13
807 pushq %r14
808 pushq %r15
809
810 subq $128+24,%rsp
811 L$mul_by_one_body:
812 movq %rdx,%rbp
813 movq %rcx,128(%rsp)
814
815 movq (%rsi),%r8
816 pxor %xmm0,%xmm0
817 movq 8(%rsi),%r9
818 movq 16(%rsi),%r10
819 movq 24(%rsi),%r11
820 movq 32(%rsi),%r12
821 movq 40(%rsi),%r13
822 movq 48(%rsi),%r14
823 movq 56(%rsi),%r15
824
825 movdqa %xmm0,(%rsp)
826 movdqa %xmm0,16(%rsp)
827 movdqa %xmm0,32(%rsp)
828 movdqa %xmm0,48(%rsp)
829 movdqa %xmm0,64(%rsp)
830 movdqa %xmm0,80(%rsp)
831 movdqa %xmm0,96(%rsp)
832 call __rsaz_512_reduce
833 movq %r8,(%rdi)
834 movq %r9,8(%rdi)
835 movq %r10,16(%rdi)
836 movq %r11,24(%rdi)
837 movq %r12,32(%rdi)
838 movq %r13,40(%rdi)
839 movq %r14,48(%rdi)
840 movq %r15,56(%rdi)
841
842 leaq 128+24+48(%rsp),%rax
843 movq -48(%rax),%r15
844 movq -40(%rax),%r14
845 movq -32(%rax),%r13
846 movq -24(%rax),%r12
847 movq -16(%rax),%rbp
848 movq -8(%rax),%rbx
849 leaq (%rax),%rsp
850 L$mul_by_one_epilogue:
851 .byte 0xf3,0xc3
852
853
854 .p2align 5
855 __rsaz_512_reduce:
856 movq %r8,%rbx
857 imulq 128+8(%rsp),%rbx
858 movq 0(%rbp),%rax
859 movl $8,%ecx
860 jmp L$reduction_loop
861
862 .p2align 5
863 L$reduction_loop:
864 mulq %rbx
865 movq 8(%rbp),%rax
866 negq %r8
867 movq %rdx,%r8
868 adcq $0,%r8
869
870 mulq %rbx
871 addq %rax,%r9
872 movq 16(%rbp),%rax
873 adcq $0,%rdx
874 addq %r9,%r8
875 movq %rdx,%r9
876 adcq $0,%r9
877
878 mulq %rbx
879 addq %rax,%r10
880 movq 24(%rbp),%rax
881 adcq $0,%rdx
882 addq %r10,%r9
883 movq %rdx,%r10
884 adcq $0,%r10
885
886 mulq %rbx
887 addq %rax,%r11
888 movq 32(%rbp),%rax
889 adcq $0,%rdx
890 addq %r11,%r10
891 movq 128+8(%rsp),%rsi
892
893
894 adcq $0,%rdx
895 movq %rdx,%r11
896
897 mulq %rbx
898 addq %rax,%r12
899 movq 40(%rbp),%rax
900 adcq $0,%rdx
901 imulq %r8,%rsi
902 addq %r12,%r11
903 movq %rdx,%r12
904 adcq $0,%r12
905
906 mulq %rbx
907 addq %rax,%r13
908 movq 48(%rbp),%rax
909 adcq $0,%rdx
910 addq %r13,%r12
911 movq %rdx,%r13
912 adcq $0,%r13
913
914 mulq %rbx
915 addq %rax,%r14
916 movq 56(%rbp),%rax
917 adcq $0,%rdx
918 addq %r14,%r13
919 movq %rdx,%r14
920 adcq $0,%r14
921
922 mulq %rbx
923 movq %rsi,%rbx
924 addq %rax,%r15
925 movq 0(%rbp),%rax
926 adcq $0,%rdx
927 addq %r15,%r14
928 movq %rdx,%r15
929 adcq $0,%r15
930
931 decl %ecx
932 jne L$reduction_loop
933
934 .byte 0xf3,0xc3
935
936
937 .p2align 5
938 __rsaz_512_subtract:
939 movq %r8,(%rdi)
940 movq %r9,8(%rdi)
941 movq %r10,16(%rdi)
942 movq %r11,24(%rdi)
943 movq %r12,32(%rdi)
944 movq %r13,40(%rdi)
945 movq %r14,48(%rdi)
946 movq %r15,56(%rdi)
947
948 movq 0(%rbp),%r8
949 movq 8(%rbp),%r9
950 negq %r8
951 notq %r9
952 andq %rcx,%r8
953 movq 16(%rbp),%r10
954 andq %rcx,%r9
955 notq %r10
956 movq 24(%rbp),%r11
957 andq %rcx,%r10
958 notq %r11
959 movq 32(%rbp),%r12
960 andq %rcx,%r11
961 notq %r12
962 movq 40(%rbp),%r13
963 andq %rcx,%r12
964 notq %r13
965 movq 48(%rbp),%r14
966 andq %rcx,%r13
967 notq %r14
968 movq 56(%rbp),%r15
969 andq %rcx,%r14
970 notq %r15
971 andq %rcx,%r15
972
973 addq (%rdi),%r8
974 adcq 8(%rdi),%r9
975 adcq 16(%rdi),%r10
976 adcq 24(%rdi),%r11
977 adcq 32(%rdi),%r12
978 adcq 40(%rdi),%r13
979 adcq 48(%rdi),%r14
980 adcq 56(%rdi),%r15
981
982 movq %r8,(%rdi)
983 movq %r9,8(%rdi)
984 movq %r10,16(%rdi)
985 movq %r11,24(%rdi)
986 movq %r12,32(%rdi)
987 movq %r13,40(%rdi)
988 movq %r14,48(%rdi)
989 movq %r15,56(%rdi)
990
991 .byte 0xf3,0xc3
992
993
994 .p2align 5
995 __rsaz_512_mul:
996 leaq 8(%rsp),%rdi
997
998 movq (%rsi),%rax
999 mulq %rbx
1000 movq %rax,(%rdi)
1001 movq 8(%rsi),%rax
1002 movq %rdx,%r8
1003
1004 mulq %rbx
1005 addq %rax,%r8
1006 movq 16(%rsi),%rax
1007 movq %rdx,%r9
1008 adcq $0,%r9
1009
1010 mulq %rbx
1011 addq %rax,%r9
1012 movq 24(%rsi),%rax
1013 movq %rdx,%r10
1014 adcq $0,%r10
1015
1016 mulq %rbx
1017 addq %rax,%r10
1018 movq 32(%rsi),%rax
1019 movq %rdx,%r11
1020 adcq $0,%r11
1021
1022 mulq %rbx
1023 addq %rax,%r11
1024 movq 40(%rsi),%rax
1025 movq %rdx,%r12
1026 adcq $0,%r12
1027
1028 mulq %rbx
1029 addq %rax,%r12
1030 movq 48(%rsi),%rax
1031 movq %rdx,%r13
1032 adcq $0,%r13
1033
1034 mulq %rbx
1035 addq %rax,%r13
1036 movq 56(%rsi),%rax
1037 movq %rdx,%r14
1038 adcq $0,%r14
1039
1040 mulq %rbx
1041 addq %rax,%r14
1042 movq (%rsi),%rax
1043 movq %rdx,%r15
1044 adcq $0,%r15
1045
1046 leaq 8(%rbp),%rbp
1047 leaq 8(%rdi),%rdi
1048
1049 movl $7,%ecx
1050 jmp L$oop_mul
1051
1052 .p2align 5
1053 L$oop_mul:
1054 movq (%rbp),%rbx
1055 mulq %rbx
1056 addq %rax,%r8
1057 movq 8(%rsi),%rax
1058 movq %r8,(%rdi)
1059 movq %rdx,%r8
1060 adcq $0,%r8
1061
1062 mulq %rbx
1063 addq %rax,%r9
1064 movq 16(%rsi),%rax
1065 adcq $0,%rdx
1066 addq %r9,%r8
1067 movq %rdx,%r9
1068 adcq $0,%r9
1069
1070 mulq %rbx
1071 addq %rax,%r10
1072 movq 24(%rsi),%rax
1073 adcq $0,%rdx
1074 addq %r10,%r9
1075 movq %rdx,%r10
1076 adcq $0,%r10
1077
1078 mulq %rbx
1079 addq %rax,%r11
1080 movq 32(%rsi),%rax
1081 adcq $0,%rdx
1082 addq %r11,%r10
1083 movq %rdx,%r11
1084 adcq $0,%r11
1085
1086 mulq %rbx
1087 addq %rax,%r12
1088 movq 40(%rsi),%rax
1089 adcq $0,%rdx
1090 addq %r12,%r11
1091 movq %rdx,%r12
1092 adcq $0,%r12
1093
1094 mulq %rbx
1095 addq %rax,%r13
1096 movq 48(%rsi),%rax
1097 adcq $0,%rdx
1098 addq %r13,%r12
1099 movq %rdx,%r13
1100 adcq $0,%r13
1101
1102 mulq %rbx
1103 addq %rax,%r14
1104 movq 56(%rsi),%rax
1105 adcq $0,%rdx
1106 addq %r14,%r13
1107 movq %rdx,%r14
1108 leaq 8(%rbp),%rbp
1109 adcq $0,%r14
1110
1111 mulq %rbx
1112 addq %rax,%r15
1113 movq (%rsi),%rax
1114 adcq $0,%rdx
1115 addq %r15,%r14
1116 movq %rdx,%r15
1117 adcq $0,%r15
1118
1119 leaq 8(%rdi),%rdi
1120
1121 decl %ecx
1122 jnz L$oop_mul
1123
1124 movq %r8,(%rdi)
1125 movq %r9,8(%rdi)
1126 movq %r10,16(%rdi)
1127 movq %r11,24(%rdi)
1128 movq %r12,32(%rdi)
1129 movq %r13,40(%rdi)
1130 movq %r14,48(%rdi)
1131 movq %r15,56(%rdi)
1132
1133 .byte 0xf3,0xc3
1134
1135 .globl _rsaz_512_scatter4
1136 .private_extern _rsaz_512_scatter4
1137
1138 .p2align 4
1139 _rsaz_512_scatter4:
1140 leaq (%rdi,%rdx,8),%rdi
1141 movl $8,%r9d
1142 jmp L$oop_scatter
1143 .p2align 4
1144 L$oop_scatter:
1145 movq (%rsi),%rax
1146 leaq 8(%rsi),%rsi
1147 movq %rax,(%rdi)
1148 leaq 128(%rdi),%rdi
1149 decl %r9d
1150 jnz L$oop_scatter
1151 .byte 0xf3,0xc3
1152
1153
1154 .globl _rsaz_512_gather4
1155 .private_extern _rsaz_512_gather4
1156
1157 .p2align 4
1158 _rsaz_512_gather4:
1159 movd %edx,%xmm8
1160 movdqa L$inc+16(%rip),%xmm1
1161 movdqa L$inc(%rip),%xmm0
1162
1163 pshufd $0,%xmm8,%xmm8
1164 movdqa %xmm1,%xmm7
1165 movdqa %xmm1,%xmm2
1166 paddd %xmm0,%xmm1
1167 pcmpeqd %xmm8,%xmm0
1168 movdqa %xmm7,%xmm3
1169 paddd %xmm1,%xmm2
1170 pcmpeqd %xmm8,%xmm1
1171 movdqa %xmm7,%xmm4
1172 paddd %xmm2,%xmm3
1173 pcmpeqd %xmm8,%xmm2
1174 movdqa %xmm7,%xmm5
1175 paddd %xmm3,%xmm4
1176 pcmpeqd %xmm8,%xmm3
1177 movdqa %xmm7,%xmm6
1178 paddd %xmm4,%xmm5
1179 pcmpeqd %xmm8,%xmm4
1180 paddd %xmm5,%xmm6
1181 pcmpeqd %xmm8,%xmm5
1182 paddd %xmm6,%xmm7
1183 pcmpeqd %xmm8,%xmm6
1184 pcmpeqd %xmm8,%xmm7
1185 movl $8,%r9d
1186 jmp L$oop_gather
1187 .p2align 4
1188 L$oop_gather:
1189 movdqa 0(%rsi),%xmm8
1190 movdqa 16(%rsi),%xmm9
1191 movdqa 32(%rsi),%xmm10
1192 movdqa 48(%rsi),%xmm11
1193 pand %xmm0,%xmm8
1194 movdqa 64(%rsi),%xmm12
1195 pand %xmm1,%xmm9
1196 movdqa 80(%rsi),%xmm13
1197 pand %xmm2,%xmm10
1198 movdqa 96(%rsi),%xmm14
1199 pand %xmm3,%xmm11
1200 movdqa 112(%rsi),%xmm15
1201 leaq 128(%rsi),%rsi
1202 pand %xmm4,%xmm12
1203 pand %xmm5,%xmm13
1204 pand %xmm6,%xmm14
1205 pand %xmm7,%xmm15
1206 por %xmm10,%xmm8
1207 por %xmm11,%xmm9
1208 por %xmm12,%xmm8
1209 por %xmm13,%xmm9
1210 por %xmm14,%xmm8
1211 por %xmm15,%xmm9
1212
1213 por %xmm9,%xmm8
1214 pshufd $0x4e,%xmm8,%xmm9
1215 por %xmm9,%xmm8
1216 movq %xmm8,(%rdi)
1217 leaq 8(%rdi),%rdi
1218 decl %r9d
1219 jnz L$oop_gather
1220 .byte 0xf3,0xc3
1221 L$SEH_end_rsaz_512_gather4:
1222
1223
1224 .p2align 6
1225 L$inc:
1226 .long 0,0, 1,1
1227 .long 2,2, 2,2
1228 #endif
OLDNEW
« no previous file with comments | « third_party/boringssl/mac-x86_64/crypto/bn/rsaz-avx2.S ('k') | third_party/boringssl/mac-x86_64/crypto/bn/x86_64-mont.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698