Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(144)

Side by Side Diff: third_party/boringssl/mac-x86_64/crypto/bn/rsaz-x86_64.S

Issue 2219933002: Land BoringSSL roll on master (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 #if defined(__x86_64__) 1 #if defined(__x86_64__)
2 .text 2 .text
3 3
4 4
5 5
6 .globl _rsaz_512_sqr 6 .globl _rsaz_512_sqr
7 .private_extern _rsaz_512_sqr 7 .private_extern _rsaz_512_sqr
8 8
9 .p2align 5 9 .p2align 5
10 _rsaz_512_sqr: 10 _rsaz_512_sqr:
(...skipping 447 matching lines...) Expand 10 before | Expand all | Expand 10 after
458 458
459 .p2align 5 459 .p2align 5
460 _rsaz_512_mul_gather4: 460 _rsaz_512_mul_gather4:
461 pushq %rbx 461 pushq %rbx
462 pushq %rbp 462 pushq %rbp
463 pushq %r12 463 pushq %r12
464 pushq %r13 464 pushq %r13
465 pushq %r14 465 pushq %r14
466 pushq %r15 466 pushq %r15
467 467
468 » movl» %r9d,%r9d 468 » subq» $152,%rsp
469 » subq» $128+24,%rsp
470 L$mul_gather4_body: 469 L$mul_gather4_body:
471 » movl» 64(%rdx,%r9,4),%eax 470 » movd» %r9d,%xmm8
472 .byte» 102,72,15,110,199 471 » movdqa» L$inc+16(%rip),%xmm1
473 » movl» (%rdx,%r9,4),%ebx 472 » movdqa» L$inc(%rip),%xmm0
474 .byte» 102,72,15,110,201 473
474 » pshufd» $0,%xmm8,%xmm8
475 » movdqa» %xmm1,%xmm7
476 » movdqa» %xmm1,%xmm2
477 » paddd» %xmm0,%xmm1
478 » pcmpeqd»%xmm8,%xmm0
479 » movdqa» %xmm7,%xmm3
480 » paddd» %xmm1,%xmm2
481 » pcmpeqd»%xmm8,%xmm1
482 » movdqa» %xmm7,%xmm4
483 » paddd» %xmm2,%xmm3
484 » pcmpeqd»%xmm8,%xmm2
485 » movdqa» %xmm7,%xmm5
486 » paddd» %xmm3,%xmm4
487 » pcmpeqd»%xmm8,%xmm3
488 » movdqa» %xmm7,%xmm6
489 » paddd» %xmm4,%xmm5
490 » pcmpeqd»%xmm8,%xmm4
491 » paddd» %xmm5,%xmm6
492 » pcmpeqd»%xmm8,%xmm5
493 » paddd» %xmm6,%xmm7
494 » pcmpeqd»%xmm8,%xmm6
495 » pcmpeqd»%xmm8,%xmm7
496
497 » movdqa» 0(%rdx),%xmm8
498 » movdqa» 16(%rdx),%xmm9
499 » movdqa» 32(%rdx),%xmm10
500 » movdqa» 48(%rdx),%xmm11
501 » pand» %xmm0,%xmm8
502 » movdqa» 64(%rdx),%xmm12
503 » pand» %xmm1,%xmm9
504 » movdqa» 80(%rdx),%xmm13
505 » pand» %xmm2,%xmm10
506 » movdqa» 96(%rdx),%xmm14
507 » pand» %xmm3,%xmm11
508 » movdqa» 112(%rdx),%xmm15
509 » leaq» 128(%rdx),%rbp
510 » pand» %xmm4,%xmm12
511 » pand» %xmm5,%xmm13
512 » pand» %xmm6,%xmm14
513 » pand» %xmm7,%xmm15
514 » por» %xmm10,%xmm8
515 » por» %xmm11,%xmm9
516 » por» %xmm12,%xmm8
517 » por» %xmm13,%xmm9
518 » por» %xmm14,%xmm8
519 » por» %xmm15,%xmm9
520
521 » por» %xmm9,%xmm8
522 » pshufd» $0x4e,%xmm8,%xmm9
523 » por» %xmm9,%xmm8
524 .byte» 102,76,15,126,195
525
475 movq %r8,128(%rsp) 526 movq %r8,128(%rsp)
527 movq %rdi,128+8(%rsp)
528 movq %rcx,128+16(%rsp)
476 529
477 shlq $32,%rax
478 orq %rax,%rbx
479 movq (%rsi),%rax 530 movq (%rsi),%rax
480 movq 8(%rsi),%rcx 531 movq 8(%rsi),%rcx
481 leaq 128(%rdx,%r9,4),%rbp
482 mulq %rbx 532 mulq %rbx
483 movq %rax,(%rsp) 533 movq %rax,(%rsp)
484 movq %rcx,%rax 534 movq %rcx,%rax
485 movq %rdx,%r8 535 movq %rdx,%r8
486 536
487 mulq %rbx 537 mulq %rbx
488 movd (%rbp),%xmm4
489 addq %rax,%r8 538 addq %rax,%r8
490 movq 16(%rsi),%rax 539 movq 16(%rsi),%rax
491 movq %rdx,%r9 540 movq %rdx,%r9
492 adcq $0,%r9 541 adcq $0,%r9
493 542
494 mulq %rbx 543 mulq %rbx
495 movd 64(%rbp),%xmm5
496 addq %rax,%r9 544 addq %rax,%r9
497 movq 24(%rsi),%rax 545 movq 24(%rsi),%rax
498 movq %rdx,%r10 546 movq %rdx,%r10
499 adcq $0,%r10 547 adcq $0,%r10
500 548
501 mulq %rbx 549 mulq %rbx
502 pslldq $4,%xmm5
503 addq %rax,%r10 550 addq %rax,%r10
504 movq 32(%rsi),%rax 551 movq 32(%rsi),%rax
505 movq %rdx,%r11 552 movq %rdx,%r11
506 adcq $0,%r11 553 adcq $0,%r11
507 554
508 mulq %rbx 555 mulq %rbx
509 por %xmm5,%xmm4
510 addq %rax,%r11 556 addq %rax,%r11
511 movq 40(%rsi),%rax 557 movq 40(%rsi),%rax
512 movq %rdx,%r12 558 movq %rdx,%r12
513 adcq $0,%r12 559 adcq $0,%r12
514 560
515 mulq %rbx 561 mulq %rbx
516 addq %rax,%r12 562 addq %rax,%r12
517 movq 48(%rsi),%rax 563 movq 48(%rsi),%rax
518 movq %rdx,%r13 564 movq %rdx,%r13
519 adcq $0,%r13 565 adcq $0,%r13
520 566
521 mulq %rbx 567 mulq %rbx
522 leaq 128(%rbp),%rbp
523 addq %rax,%r13 568 addq %rax,%r13
524 movq 56(%rsi),%rax 569 movq 56(%rsi),%rax
525 movq %rdx,%r14 570 movq %rdx,%r14
526 adcq $0,%r14 571 adcq $0,%r14
527 572
528 mulq %rbx 573 mulq %rbx
529 .byte 102,72,15,126,227
530 addq %rax,%r14 574 addq %rax,%r14
531 movq (%rsi),%rax 575 movq (%rsi),%rax
532 movq %rdx,%r15 576 movq %rdx,%r15
533 adcq $0,%r15 577 adcq $0,%r15
534 578
535 leaq 8(%rsp),%rdi 579 leaq 8(%rsp),%rdi
536 movl $7,%ecx 580 movl $7,%ecx
537 jmp L$oop_mul_gather 581 jmp L$oop_mul_gather
538 582
539 .p2align 5 583 .p2align 5
540 L$oop_mul_gather: 584 L$oop_mul_gather:
585 movdqa 0(%rbp),%xmm8
586 movdqa 16(%rbp),%xmm9
587 movdqa 32(%rbp),%xmm10
588 movdqa 48(%rbp),%xmm11
589 pand %xmm0,%xmm8
590 movdqa 64(%rbp),%xmm12
591 pand %xmm1,%xmm9
592 movdqa 80(%rbp),%xmm13
593 pand %xmm2,%xmm10
594 movdqa 96(%rbp),%xmm14
595 pand %xmm3,%xmm11
596 movdqa 112(%rbp),%xmm15
597 leaq 128(%rbp),%rbp
598 pand %xmm4,%xmm12
599 pand %xmm5,%xmm13
600 pand %xmm6,%xmm14
601 pand %xmm7,%xmm15
602 por %xmm10,%xmm8
603 por %xmm11,%xmm9
604 por %xmm12,%xmm8
605 por %xmm13,%xmm9
606 por %xmm14,%xmm8
607 por %xmm15,%xmm9
608
609 por %xmm9,%xmm8
610 pshufd $0x4e,%xmm8,%xmm9
611 por %xmm9,%xmm8
612 .byte 102,76,15,126,195
613
541 mulq %rbx 614 mulq %rbx
542 addq %rax,%r8 615 addq %rax,%r8
543 movq 8(%rsi),%rax 616 movq 8(%rsi),%rax
544 movq %r8,(%rdi) 617 movq %r8,(%rdi)
545 movq %rdx,%r8 618 movq %rdx,%r8
546 adcq $0,%r8 619 adcq $0,%r8
547 620
548 mulq %rbx 621 mulq %rbx
549 movd (%rbp),%xmm4
550 addq %rax,%r9 622 addq %rax,%r9
551 movq 16(%rsi),%rax 623 movq 16(%rsi),%rax
552 adcq $0,%rdx 624 adcq $0,%rdx
553 addq %r9,%r8 625 addq %r9,%r8
554 movq %rdx,%r9 626 movq %rdx,%r9
555 adcq $0,%r9 627 adcq $0,%r9
556 628
557 mulq %rbx 629 mulq %rbx
558 movd 64(%rbp),%xmm5
559 addq %rax,%r10 630 addq %rax,%r10
560 movq 24(%rsi),%rax 631 movq 24(%rsi),%rax
561 adcq $0,%rdx 632 adcq $0,%rdx
562 addq %r10,%r9 633 addq %r10,%r9
563 movq %rdx,%r10 634 movq %rdx,%r10
564 adcq $0,%r10 635 adcq $0,%r10
565 636
566 mulq %rbx 637 mulq %rbx
567 pslldq $4,%xmm5
568 addq %rax,%r11 638 addq %rax,%r11
569 movq 32(%rsi),%rax 639 movq 32(%rsi),%rax
570 adcq $0,%rdx 640 adcq $0,%rdx
571 addq %r11,%r10 641 addq %r11,%r10
572 movq %rdx,%r11 642 movq %rdx,%r11
573 adcq $0,%r11 643 adcq $0,%r11
574 644
575 mulq %rbx 645 mulq %rbx
576 por %xmm5,%xmm4
577 addq %rax,%r12 646 addq %rax,%r12
578 movq 40(%rsi),%rax 647 movq 40(%rsi),%rax
579 adcq $0,%rdx 648 adcq $0,%rdx
580 addq %r12,%r11 649 addq %r12,%r11
581 movq %rdx,%r12 650 movq %rdx,%r12
582 adcq $0,%r12 651 adcq $0,%r12
583 652
584 mulq %rbx 653 mulq %rbx
585 addq %rax,%r13 654 addq %rax,%r13
586 movq 48(%rsi),%rax 655 movq 48(%rsi),%rax
587 adcq $0,%rdx 656 adcq $0,%rdx
588 addq %r13,%r12 657 addq %r13,%r12
589 movq %rdx,%r13 658 movq %rdx,%r13
590 adcq $0,%r13 659 adcq $0,%r13
591 660
592 mulq %rbx 661 mulq %rbx
593 addq %rax,%r14 662 addq %rax,%r14
594 movq 56(%rsi),%rax 663 movq 56(%rsi),%rax
595 adcq $0,%rdx 664 adcq $0,%rdx
596 addq %r14,%r13 665 addq %r14,%r13
597 movq %rdx,%r14 666 movq %rdx,%r14
598 adcq $0,%r14 667 adcq $0,%r14
599 668
600 mulq %rbx 669 mulq %rbx
601 .byte 102,72,15,126,227
602 addq %rax,%r15 670 addq %rax,%r15
603 movq (%rsi),%rax 671 movq (%rsi),%rax
604 adcq $0,%rdx 672 adcq $0,%rdx
605 addq %r15,%r14 673 addq %r15,%r14
606 movq %rdx,%r15 674 movq %rdx,%r15
607 adcq $0,%r15 675 adcq $0,%r15
608 676
609 leaq 128(%rbp),%rbp
610 leaq 8(%rdi),%rdi 677 leaq 8(%rdi),%rdi
611 678
612 decl %ecx 679 decl %ecx
613 jnz L$oop_mul_gather 680 jnz L$oop_mul_gather
614 681
615 movq %r8,(%rdi) 682 movq %r8,(%rdi)
616 movq %r9,8(%rdi) 683 movq %r9,8(%rdi)
617 movq %r10,16(%rdi) 684 movq %r10,16(%rdi)
618 movq %r11,24(%rdi) 685 movq %r11,24(%rdi)
619 movq %r12,32(%rdi) 686 movq %r12,32(%rdi)
620 movq %r13,40(%rdi) 687 movq %r13,40(%rdi)
621 movq %r14,48(%rdi) 688 movq %r14,48(%rdi)
622 movq %r15,56(%rdi) 689 movq %r15,56(%rdi)
623 690
624 .byte» 102,72,15,126,199 691 » movq» 128+8(%rsp),%rdi
625 .byte» 102,72,15,126,205 692 » movq» 128+16(%rsp),%rbp
626 693
627 movq (%rsp),%r8 694 movq (%rsp),%r8
628 movq 8(%rsp),%r9 695 movq 8(%rsp),%r9
629 movq 16(%rsp),%r10 696 movq 16(%rsp),%r10
630 movq 24(%rsp),%r11 697 movq 24(%rsp),%r11
631 movq 32(%rsp),%r12 698 movq 32(%rsp),%r12
632 movq 40(%rsp),%r13 699 movq 40(%rsp),%r13
633 movq 48(%rsp),%r14 700 movq 48(%rsp),%r14
634 movq 56(%rsp),%r15 701 movq 56(%rsp),%r15
635 702
(...skipping 29 matching lines...) Expand all
665 pushq %rbx 732 pushq %rbx
666 pushq %rbp 733 pushq %rbp
667 pushq %r12 734 pushq %r12
668 pushq %r13 735 pushq %r13
669 pushq %r14 736 pushq %r14
670 pushq %r15 737 pushq %r15
671 738
672 movl %r9d,%r9d 739 movl %r9d,%r9d
673 subq $128+24,%rsp 740 subq $128+24,%rsp
674 L$mul_scatter4_body: 741 L$mul_scatter4_body:
675 » leaq» (%r8,%r9,4),%r8 742 » leaq» (%r8,%r9,8),%r8
676 .byte 102,72,15,110,199 743 .byte 102,72,15,110,199
677 .byte 102,72,15,110,202 744 .byte 102,72,15,110,202
678 .byte 102,73,15,110,208 745 .byte 102,73,15,110,208
679 movq %rcx,128(%rsp) 746 movq %rcx,128(%rsp)
680 747
681 movq %rdi,%rbp 748 movq %rdi,%rbp
682 movq (%rdi),%rbx 749 movq (%rdi),%rbx
683 call __rsaz_512_mul 750 call __rsaz_512_mul
684 751
685 .byte 102,72,15,126,199 752 .byte 102,72,15,126,199
(...skipping 15 matching lines...) Expand all
701 adcq 88(%rsp),%r11 768 adcq 88(%rsp),%r11
702 adcq 96(%rsp),%r12 769 adcq 96(%rsp),%r12
703 adcq 104(%rsp),%r13 770 adcq 104(%rsp),%r13
704 adcq 112(%rsp),%r14 771 adcq 112(%rsp),%r14
705 adcq 120(%rsp),%r15 772 adcq 120(%rsp),%r15
706 .byte 102,72,15,126,214 773 .byte 102,72,15,126,214
707 sbbq %rcx,%rcx 774 sbbq %rcx,%rcx
708 775
709 call __rsaz_512_subtract 776 call __rsaz_512_subtract
710 777
711 » movl» %r8d,0(%rsi) 778 » movq» %r8,0(%rsi)
712 » shrq» $32,%r8 779 » movq» %r9,128(%rsi)
713 » movl» %r9d,128(%rsi) 780 » movq» %r10,256(%rsi)
714 » shrq» $32,%r9 781 » movq» %r11,384(%rsi)
715 » movl» %r10d,256(%rsi) 782 » movq» %r12,512(%rsi)
716 » shrq» $32,%r10 783 » movq» %r13,640(%rsi)
717 » movl» %r11d,384(%rsi) 784 » movq» %r14,768(%rsi)
718 » shrq» $32,%r11 785 » movq» %r15,896(%rsi)
719 » movl» %r12d,512(%rsi)
720 » shrq» $32,%r12
721 » movl» %r13d,640(%rsi)
722 » shrq» $32,%r13
723 » movl» %r14d,768(%rsi)
724 » shrq» $32,%r14
725 » movl» %r15d,896(%rsi)
726 » shrq» $32,%r15
727 » movl» %r8d,64(%rsi)
728 » movl» %r9d,192(%rsi)
729 » movl» %r10d,320(%rsi)
730 » movl» %r11d,448(%rsi)
731 » movl» %r12d,576(%rsi)
732 » movl» %r13d,704(%rsi)
733 » movl» %r14d,832(%rsi)
734 » movl» %r15d,960(%rsi)
735 786
736 leaq 128+24+48(%rsp),%rax 787 leaq 128+24+48(%rsp),%rax
737 movq -48(%rax),%r15 788 movq -48(%rax),%r15
738 movq -40(%rax),%r14 789 movq -40(%rax),%r14
739 movq -32(%rax),%r13 790 movq -32(%rax),%r13
740 movq -24(%rax),%r12 791 movq -24(%rax),%r12
741 movq -16(%rax),%rbp 792 movq -16(%rax),%rbp
742 movq -8(%rax),%rbx 793 movq -8(%rax),%rbx
743 leaq (%rax),%rsp 794 leaq (%rax),%rsp
744 L$mul_scatter4_epilogue: 795 L$mul_scatter4_epilogue:
(...skipping 334 matching lines...) Expand 10 before | Expand all | Expand 10 after
1079 movq %r14,48(%rdi) 1130 movq %r14,48(%rdi)
1080 movq %r15,56(%rdi) 1131 movq %r15,56(%rdi)
1081 1132
1082 .byte 0xf3,0xc3 1133 .byte 0xf3,0xc3
1083 1134
1084 .globl _rsaz_512_scatter4 1135 .globl _rsaz_512_scatter4
1085 .private_extern _rsaz_512_scatter4 1136 .private_extern _rsaz_512_scatter4
1086 1137
1087 .p2align 4 1138 .p2align 4
1088 _rsaz_512_scatter4: 1139 _rsaz_512_scatter4:
1089 » leaq» (%rdi,%rdx,4),%rdi 1140 » leaq» (%rdi,%rdx,8),%rdi
1090 movl $8,%r9d 1141 movl $8,%r9d
1091 jmp L$oop_scatter 1142 jmp L$oop_scatter
1092 .p2align 4 1143 .p2align 4
1093 L$oop_scatter: 1144 L$oop_scatter:
1094 movq (%rsi),%rax 1145 movq (%rsi),%rax
1095 leaq 8(%rsi),%rsi 1146 leaq 8(%rsi),%rsi
1096 » movl» %eax,(%rdi) 1147 » movq» %rax,(%rdi)
1097 » shrq» $32,%rax
1098 » movl» %eax,64(%rdi)
1099 leaq 128(%rdi),%rdi 1148 leaq 128(%rdi),%rdi
1100 decl %r9d 1149 decl %r9d
1101 jnz L$oop_scatter 1150 jnz L$oop_scatter
1102 .byte 0xf3,0xc3 1151 .byte 0xf3,0xc3
1103 1152
1104 1153
1105 .globl _rsaz_512_gather4 1154 .globl _rsaz_512_gather4
1106 .private_extern _rsaz_512_gather4 1155 .private_extern _rsaz_512_gather4
1107 1156
1108 .p2align 4 1157 .p2align 4
1109 _rsaz_512_gather4: 1158 _rsaz_512_gather4:
1110 » leaq» (%rsi,%rdx,4),%rsi 1159 » movd» %edx,%xmm8
1160 » movdqa» L$inc+16(%rip),%xmm1
1161 » movdqa» L$inc(%rip),%xmm0
1162
1163 » pshufd» $0,%xmm8,%xmm8
1164 » movdqa» %xmm1,%xmm7
1165 » movdqa» %xmm1,%xmm2
1166 » paddd» %xmm0,%xmm1
1167 » pcmpeqd»%xmm8,%xmm0
1168 » movdqa» %xmm7,%xmm3
1169 » paddd» %xmm1,%xmm2
1170 » pcmpeqd»%xmm8,%xmm1
1171 » movdqa» %xmm7,%xmm4
1172 » paddd» %xmm2,%xmm3
1173 » pcmpeqd»%xmm8,%xmm2
1174 » movdqa» %xmm7,%xmm5
1175 » paddd» %xmm3,%xmm4
1176 » pcmpeqd»%xmm8,%xmm3
1177 » movdqa» %xmm7,%xmm6
1178 » paddd» %xmm4,%xmm5
1179 » pcmpeqd»%xmm8,%xmm4
1180 » paddd» %xmm5,%xmm6
1181 » pcmpeqd»%xmm8,%xmm5
1182 » paddd» %xmm6,%xmm7
1183 » pcmpeqd»%xmm8,%xmm6
1184 » pcmpeqd»%xmm8,%xmm7
1111 movl $8,%r9d 1185 movl $8,%r9d
1112 jmp L$oop_gather 1186 jmp L$oop_gather
1113 .p2align 4 1187 .p2align 4
1114 L$oop_gather: 1188 L$oop_gather:
1115 » movl» (%rsi),%eax 1189 » movdqa» 0(%rsi),%xmm8
1116 » movl» 64(%rsi),%r8d 1190 » movdqa» 16(%rsi),%xmm9
1191 » movdqa» 32(%rsi),%xmm10
1192 » movdqa» 48(%rsi),%xmm11
1193 » pand» %xmm0,%xmm8
1194 » movdqa» 64(%rsi),%xmm12
1195 » pand» %xmm1,%xmm9
1196 » movdqa» 80(%rsi),%xmm13
1197 » pand» %xmm2,%xmm10
1198 » movdqa» 96(%rsi),%xmm14
1199 » pand» %xmm3,%xmm11
1200 » movdqa» 112(%rsi),%xmm15
1117 leaq 128(%rsi),%rsi 1201 leaq 128(%rsi),%rsi
1118 » shlq» $32,%r8 1202 » pand» %xmm4,%xmm12
1119 » orq» %r8,%rax 1203 » pand» %xmm5,%xmm13
1120 » movq» %rax,(%rdi) 1204 » pand» %xmm6,%xmm14
1205 » pand» %xmm7,%xmm15
1206 » por» %xmm10,%xmm8
1207 » por» %xmm11,%xmm9
1208 » por» %xmm12,%xmm8
1209 » por» %xmm13,%xmm9
1210 » por» %xmm14,%xmm8
1211 » por» %xmm15,%xmm9
1212
1213 » por» %xmm9,%xmm8
1214 » pshufd» $0x4e,%xmm8,%xmm9
1215 » por» %xmm9,%xmm8
1216 » movq» %xmm8,(%rdi)
1121 leaq 8(%rdi),%rdi 1217 leaq 8(%rdi),%rdi
1122 decl %r9d 1218 decl %r9d
1123 jnz L$oop_gather 1219 jnz L$oop_gather
1124 .byte 0xf3,0xc3 1220 .byte 0xf3,0xc3
1221 L$SEH_end_rsaz_512_gather4:
1125 1222
1223
1224 .p2align 6
1225 L$inc:
1226 .long 0,0, 1,1
1227 .long 2,2, 2,2
1126 #endif 1228 #endif
OLDNEW
« no previous file with comments | « third_party/boringssl/mac-x86_64/crypto/aes/vpaes-x86_64.S ('k') | third_party/boringssl/mac-x86_64/crypto/bn/x86_64-mont.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698