OLD | NEW |
1 default rel | 1 default rel |
2 %define XMMWORD | 2 %define XMMWORD |
3 %define YMMWORD | 3 %define YMMWORD |
4 %define ZMMWORD | 4 %define ZMMWORD |
5 section .text code align=64 | 5 section .text code align=64 |
6 | 6 |
7 | 7 |
8 EXTERN OPENSSL_ia32cap_P | 8 EXTERN OPENSSL_ia32cap_P |
9 | 9 |
10 global rsaz_512_sqr | 10 global rsaz_512_sqr |
(...skipping 486 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
497 mov r9,QWORD[48+rsp] | 497 mov r9,QWORD[48+rsp] |
498 | 498 |
499 | 499 |
500 push rbx | 500 push rbx |
501 push rbp | 501 push rbp |
502 push r12 | 502 push r12 |
503 push r13 | 503 push r13 |
504 push r14 | 504 push r14 |
505 push r15 | 505 push r15 |
506 | 506 |
507 » mov» r9d,r9d | 507 » sub» rsp,328 |
508 » sub» rsp,128+24 | 508 » movaps» XMMWORD[160+rsp],xmm6 |
| 509 » movaps» XMMWORD[176+rsp],xmm7 |
| 510 » movaps» XMMWORD[192+rsp],xmm8 |
| 511 » movaps» XMMWORD[208+rsp],xmm9 |
| 512 » movaps» XMMWORD[224+rsp],xmm10 |
| 513 » movaps» XMMWORD[240+rsp],xmm11 |
| 514 » movaps» XMMWORD[256+rsp],xmm12 |
| 515 » movaps» XMMWORD[272+rsp],xmm13 |
| 516 » movaps» XMMWORD[288+rsp],xmm14 |
| 517 » movaps» XMMWORD[304+rsp],xmm15 |
509 $L$mul_gather4_body: | 518 $L$mul_gather4_body: |
510 » mov» eax,DWORD[64+r9*4+rdx] | 519 » movd» xmm8,r9d |
511 DB» 102,72,15,110,199 | 520 » movdqa» xmm1,XMMWORD[(($L$inc+16))] |
512 » mov» ebx,DWORD[r9*4+rdx] | 521 » movdqa» xmm0,XMMWORD[$L$inc] |
513 DB» 102,72,15,110,201 | 522 |
| 523 » pshufd» xmm8,xmm8,0 |
| 524 » movdqa» xmm7,xmm1 |
| 525 » movdqa» xmm2,xmm1 |
| 526 » paddd» xmm1,xmm0 |
| 527 » pcmpeqd»xmm0,xmm8 |
| 528 » movdqa» xmm3,xmm7 |
| 529 » paddd» xmm2,xmm1 |
| 530 » pcmpeqd»xmm1,xmm8 |
| 531 » movdqa» xmm4,xmm7 |
| 532 » paddd» xmm3,xmm2 |
| 533 » pcmpeqd»xmm2,xmm8 |
| 534 » movdqa» xmm5,xmm7 |
| 535 » paddd» xmm4,xmm3 |
| 536 » pcmpeqd»xmm3,xmm8 |
| 537 » movdqa» xmm6,xmm7 |
| 538 » paddd» xmm5,xmm4 |
| 539 » pcmpeqd»xmm4,xmm8 |
| 540 » paddd» xmm6,xmm5 |
| 541 » pcmpeqd»xmm5,xmm8 |
| 542 » paddd» xmm7,xmm6 |
| 543 » pcmpeqd»xmm6,xmm8 |
| 544 » pcmpeqd»xmm7,xmm8 |
| 545 |
| 546 » movdqa» xmm8,XMMWORD[rdx] |
| 547 » movdqa» xmm9,XMMWORD[16+rdx] |
| 548 » movdqa» xmm10,XMMWORD[32+rdx] |
| 549 » movdqa» xmm11,XMMWORD[48+rdx] |
| 550 » pand» xmm8,xmm0 |
| 551 » movdqa» xmm12,XMMWORD[64+rdx] |
| 552 » pand» xmm9,xmm1 |
| 553 » movdqa» xmm13,XMMWORD[80+rdx] |
| 554 » pand» xmm10,xmm2 |
| 555 » movdqa» xmm14,XMMWORD[96+rdx] |
| 556 » pand» xmm11,xmm3 |
| 557 » movdqa» xmm15,XMMWORD[112+rdx] |
| 558 » lea» rbp,[128+rdx] |
| 559 » pand» xmm12,xmm4 |
| 560 » pand» xmm13,xmm5 |
| 561 » pand» xmm14,xmm6 |
| 562 » pand» xmm15,xmm7 |
| 563 » por» xmm8,xmm10 |
| 564 » por» xmm9,xmm11 |
| 565 » por» xmm8,xmm12 |
| 566 » por» xmm9,xmm13 |
| 567 » por» xmm8,xmm14 |
| 568 » por» xmm9,xmm15 |
| 569 |
| 570 » por» xmm8,xmm9 |
| 571 » pshufd» xmm9,xmm8,0x4e |
| 572 » por» xmm8,xmm9 |
| 573 DB» 102,76,15,126,195 |
| 574 |
514 mov QWORD[128+rsp],r8 | 575 mov QWORD[128+rsp],r8 |
| 576 mov QWORD[((128+8))+rsp],rdi |
| 577 mov QWORD[((128+16))+rsp],rcx |
515 | 578 |
516 shl rax,32 | |
517 or rbx,rax | |
518 mov rax,QWORD[rsi] | 579 mov rax,QWORD[rsi] |
519 mov rcx,QWORD[8+rsi] | 580 mov rcx,QWORD[8+rsi] |
520 lea rbp,[128+r9*4+rdx] | |
521 mul rbx | 581 mul rbx |
522 mov QWORD[rsp],rax | 582 mov QWORD[rsp],rax |
523 mov rax,rcx | 583 mov rax,rcx |
524 mov r8,rdx | 584 mov r8,rdx |
525 | 585 |
526 mul rbx | 586 mul rbx |
527 movd xmm4,DWORD[rbp] | |
528 add r8,rax | 587 add r8,rax |
529 mov rax,QWORD[16+rsi] | 588 mov rax,QWORD[16+rsi] |
530 mov r9,rdx | 589 mov r9,rdx |
531 adc r9,0 | 590 adc r9,0 |
532 | 591 |
533 mul rbx | 592 mul rbx |
534 movd xmm5,DWORD[64+rbp] | |
535 add r9,rax | 593 add r9,rax |
536 mov rax,QWORD[24+rsi] | 594 mov rax,QWORD[24+rsi] |
537 mov r10,rdx | 595 mov r10,rdx |
538 adc r10,0 | 596 adc r10,0 |
539 | 597 |
540 mul rbx | 598 mul rbx |
541 pslldq xmm5,4 | |
542 add r10,rax | 599 add r10,rax |
543 mov rax,QWORD[32+rsi] | 600 mov rax,QWORD[32+rsi] |
544 mov r11,rdx | 601 mov r11,rdx |
545 adc r11,0 | 602 adc r11,0 |
546 | 603 |
547 mul rbx | 604 mul rbx |
548 por xmm4,xmm5 | |
549 add r11,rax | 605 add r11,rax |
550 mov rax,QWORD[40+rsi] | 606 mov rax,QWORD[40+rsi] |
551 mov r12,rdx | 607 mov r12,rdx |
552 adc r12,0 | 608 adc r12,0 |
553 | 609 |
554 mul rbx | 610 mul rbx |
555 add r12,rax | 611 add r12,rax |
556 mov rax,QWORD[48+rsi] | 612 mov rax,QWORD[48+rsi] |
557 mov r13,rdx | 613 mov r13,rdx |
558 adc r13,0 | 614 adc r13,0 |
559 | 615 |
560 mul rbx | 616 mul rbx |
561 lea rbp,[128+rbp] | |
562 add r13,rax | 617 add r13,rax |
563 mov rax,QWORD[56+rsi] | 618 mov rax,QWORD[56+rsi] |
564 mov r14,rdx | 619 mov r14,rdx |
565 adc r14,0 | 620 adc r14,0 |
566 | 621 |
567 mul rbx | 622 mul rbx |
568 DB 102,72,15,126,227 | |
569 add r14,rax | 623 add r14,rax |
570 mov rax,QWORD[rsi] | 624 mov rax,QWORD[rsi] |
571 mov r15,rdx | 625 mov r15,rdx |
572 adc r15,0 | 626 adc r15,0 |
573 | 627 |
574 lea rdi,[8+rsp] | 628 lea rdi,[8+rsp] |
575 mov ecx,7 | 629 mov ecx,7 |
576 jmp NEAR $L$oop_mul_gather | 630 jmp NEAR $L$oop_mul_gather |
577 | 631 |
578 ALIGN 32 | 632 ALIGN 32 |
579 $L$oop_mul_gather: | 633 $L$oop_mul_gather: |
| 634 movdqa xmm8,XMMWORD[rbp] |
| 635 movdqa xmm9,XMMWORD[16+rbp] |
| 636 movdqa xmm10,XMMWORD[32+rbp] |
| 637 movdqa xmm11,XMMWORD[48+rbp] |
| 638 pand xmm8,xmm0 |
| 639 movdqa xmm12,XMMWORD[64+rbp] |
| 640 pand xmm9,xmm1 |
| 641 movdqa xmm13,XMMWORD[80+rbp] |
| 642 pand xmm10,xmm2 |
| 643 movdqa xmm14,XMMWORD[96+rbp] |
| 644 pand xmm11,xmm3 |
| 645 movdqa xmm15,XMMWORD[112+rbp] |
| 646 lea rbp,[128+rbp] |
| 647 pand xmm12,xmm4 |
| 648 pand xmm13,xmm5 |
| 649 pand xmm14,xmm6 |
| 650 pand xmm15,xmm7 |
| 651 por xmm8,xmm10 |
| 652 por xmm9,xmm11 |
| 653 por xmm8,xmm12 |
| 654 por xmm9,xmm13 |
| 655 por xmm8,xmm14 |
| 656 por xmm9,xmm15 |
| 657 |
| 658 por xmm8,xmm9 |
| 659 pshufd xmm9,xmm8,0x4e |
| 660 por xmm8,xmm9 |
| 661 DB 102,76,15,126,195 |
| 662 |
580 mul rbx | 663 mul rbx |
581 add r8,rax | 664 add r8,rax |
582 mov rax,QWORD[8+rsi] | 665 mov rax,QWORD[8+rsi] |
583 mov QWORD[rdi],r8 | 666 mov QWORD[rdi],r8 |
584 mov r8,rdx | 667 mov r8,rdx |
585 adc r8,0 | 668 adc r8,0 |
586 | 669 |
587 mul rbx | 670 mul rbx |
588 movd xmm4,DWORD[rbp] | |
589 add r9,rax | 671 add r9,rax |
590 mov rax,QWORD[16+rsi] | 672 mov rax,QWORD[16+rsi] |
591 adc rdx,0 | 673 adc rdx,0 |
592 add r8,r9 | 674 add r8,r9 |
593 mov r9,rdx | 675 mov r9,rdx |
594 adc r9,0 | 676 adc r9,0 |
595 | 677 |
596 mul rbx | 678 mul rbx |
597 movd xmm5,DWORD[64+rbp] | |
598 add r10,rax | 679 add r10,rax |
599 mov rax,QWORD[24+rsi] | 680 mov rax,QWORD[24+rsi] |
600 adc rdx,0 | 681 adc rdx,0 |
601 add r9,r10 | 682 add r9,r10 |
602 mov r10,rdx | 683 mov r10,rdx |
603 adc r10,0 | 684 adc r10,0 |
604 | 685 |
605 mul rbx | 686 mul rbx |
606 pslldq xmm5,4 | |
607 add r11,rax | 687 add r11,rax |
608 mov rax,QWORD[32+rsi] | 688 mov rax,QWORD[32+rsi] |
609 adc rdx,0 | 689 adc rdx,0 |
610 add r10,r11 | 690 add r10,r11 |
611 mov r11,rdx | 691 mov r11,rdx |
612 adc r11,0 | 692 adc r11,0 |
613 | 693 |
614 mul rbx | 694 mul rbx |
615 por xmm4,xmm5 | |
616 add r12,rax | 695 add r12,rax |
617 mov rax,QWORD[40+rsi] | 696 mov rax,QWORD[40+rsi] |
618 adc rdx,0 | 697 adc rdx,0 |
619 add r11,r12 | 698 add r11,r12 |
620 mov r12,rdx | 699 mov r12,rdx |
621 adc r12,0 | 700 adc r12,0 |
622 | 701 |
623 mul rbx | 702 mul rbx |
624 add r13,rax | 703 add r13,rax |
625 mov rax,QWORD[48+rsi] | 704 mov rax,QWORD[48+rsi] |
626 adc rdx,0 | 705 adc rdx,0 |
627 add r12,r13 | 706 add r12,r13 |
628 mov r13,rdx | 707 mov r13,rdx |
629 adc r13,0 | 708 adc r13,0 |
630 | 709 |
631 mul rbx | 710 mul rbx |
632 add r14,rax | 711 add r14,rax |
633 mov rax,QWORD[56+rsi] | 712 mov rax,QWORD[56+rsi] |
634 adc rdx,0 | 713 adc rdx,0 |
635 add r13,r14 | 714 add r13,r14 |
636 mov r14,rdx | 715 mov r14,rdx |
637 adc r14,0 | 716 adc r14,0 |
638 | 717 |
639 mul rbx | 718 mul rbx |
640 DB 102,72,15,126,227 | |
641 add r15,rax | 719 add r15,rax |
642 mov rax,QWORD[rsi] | 720 mov rax,QWORD[rsi] |
643 adc rdx,0 | 721 adc rdx,0 |
644 add r14,r15 | 722 add r14,r15 |
645 mov r15,rdx | 723 mov r15,rdx |
646 adc r15,0 | 724 adc r15,0 |
647 | 725 |
648 lea rbp,[128+rbp] | |
649 lea rdi,[8+rdi] | 726 lea rdi,[8+rdi] |
650 | 727 |
651 dec ecx | 728 dec ecx |
652 jnz NEAR $L$oop_mul_gather | 729 jnz NEAR $L$oop_mul_gather |
653 | 730 |
654 mov QWORD[rdi],r8 | 731 mov QWORD[rdi],r8 |
655 mov QWORD[8+rdi],r9 | 732 mov QWORD[8+rdi],r9 |
656 mov QWORD[16+rdi],r10 | 733 mov QWORD[16+rdi],r10 |
657 mov QWORD[24+rdi],r11 | 734 mov QWORD[24+rdi],r11 |
658 mov QWORD[32+rdi],r12 | 735 mov QWORD[32+rdi],r12 |
659 mov QWORD[40+rdi],r13 | 736 mov QWORD[40+rdi],r13 |
660 mov QWORD[48+rdi],r14 | 737 mov QWORD[48+rdi],r14 |
661 mov QWORD[56+rdi],r15 | 738 mov QWORD[56+rdi],r15 |
662 | 739 |
663 DB» 102,72,15,126,199 | 740 » mov» rdi,QWORD[((128+8))+rsp] |
664 DB» 102,72,15,126,205 | 741 » mov» rbp,QWORD[((128+16))+rsp] |
665 | 742 |
666 mov r8,QWORD[rsp] | 743 mov r8,QWORD[rsp] |
667 mov r9,QWORD[8+rsp] | 744 mov r9,QWORD[8+rsp] |
668 mov r10,QWORD[16+rsp] | 745 mov r10,QWORD[16+rsp] |
669 mov r11,QWORD[24+rsp] | 746 mov r11,QWORD[24+rsp] |
670 mov r12,QWORD[32+rsp] | 747 mov r12,QWORD[32+rsp] |
671 mov r13,QWORD[40+rsp] | 748 mov r13,QWORD[40+rsp] |
672 mov r14,QWORD[48+rsp] | 749 mov r14,QWORD[48+rsp] |
673 mov r15,QWORD[56+rsp] | 750 mov r15,QWORD[56+rsp] |
674 | 751 |
675 call __rsaz_512_reduce | 752 call __rsaz_512_reduce |
676 add r8,QWORD[64+rsp] | 753 add r8,QWORD[64+rsp] |
677 adc r9,QWORD[72+rsp] | 754 adc r9,QWORD[72+rsp] |
678 adc r10,QWORD[80+rsp] | 755 adc r10,QWORD[80+rsp] |
679 adc r11,QWORD[88+rsp] | 756 adc r11,QWORD[88+rsp] |
680 adc r12,QWORD[96+rsp] | 757 adc r12,QWORD[96+rsp] |
681 adc r13,QWORD[104+rsp] | 758 adc r13,QWORD[104+rsp] |
682 adc r14,QWORD[112+rsp] | 759 adc r14,QWORD[112+rsp] |
683 adc r15,QWORD[120+rsp] | 760 adc r15,QWORD[120+rsp] |
684 sbb rcx,rcx | 761 sbb rcx,rcx |
685 | 762 |
686 call __rsaz_512_subtract | 763 call __rsaz_512_subtract |
687 | 764 |
688 lea rax,[((128+24+48))+rsp] | 765 lea rax,[((128+24+48))+rsp] |
| 766 movaps xmm6,XMMWORD[((160-200))+rax] |
| 767 movaps xmm7,XMMWORD[((176-200))+rax] |
| 768 movaps xmm8,XMMWORD[((192-200))+rax] |
| 769 movaps xmm9,XMMWORD[((208-200))+rax] |
| 770 movaps xmm10,XMMWORD[((224-200))+rax] |
| 771 movaps xmm11,XMMWORD[((240-200))+rax] |
| 772 movaps xmm12,XMMWORD[((256-200))+rax] |
| 773 movaps xmm13,XMMWORD[((272-200))+rax] |
| 774 movaps xmm14,XMMWORD[((288-200))+rax] |
| 775 movaps xmm15,XMMWORD[((304-200))+rax] |
| 776 lea rax,[176+rax] |
689 mov r15,QWORD[((-48))+rax] | 777 mov r15,QWORD[((-48))+rax] |
690 mov r14,QWORD[((-40))+rax] | 778 mov r14,QWORD[((-40))+rax] |
691 mov r13,QWORD[((-32))+rax] | 779 mov r13,QWORD[((-32))+rax] |
692 mov r12,QWORD[((-24))+rax] | 780 mov r12,QWORD[((-24))+rax] |
693 mov rbp,QWORD[((-16))+rax] | 781 mov rbp,QWORD[((-16))+rax] |
694 mov rbx,QWORD[((-8))+rax] | 782 mov rbx,QWORD[((-8))+rax] |
695 lea rsp,[rax] | 783 lea rsp,[rax] |
696 $L$mul_gather4_epilogue: | 784 $L$mul_gather4_epilogue: |
697 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | 785 mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
698 mov rsi,QWORD[16+rsp] | 786 mov rsi,QWORD[16+rsp] |
(...skipping 18 matching lines...) Expand all Loading... |
717 push rbx | 805 push rbx |
718 push rbp | 806 push rbp |
719 push r12 | 807 push r12 |
720 push r13 | 808 push r13 |
721 push r14 | 809 push r14 |
722 push r15 | 810 push r15 |
723 | 811 |
724 mov r9d,r9d | 812 mov r9d,r9d |
725 sub rsp,128+24 | 813 sub rsp,128+24 |
726 $L$mul_scatter4_body: | 814 $L$mul_scatter4_body: |
727 » lea» r8,[r9*4+r8] | 815 » lea» r8,[r9*8+r8] |
728 DB 102,72,15,110,199 | 816 DB 102,72,15,110,199 |
729 DB 102,72,15,110,202 | 817 DB 102,72,15,110,202 |
730 DB 102,73,15,110,208 | 818 DB 102,73,15,110,208 |
731 mov QWORD[128+rsp],rcx | 819 mov QWORD[128+rsp],rcx |
732 | 820 |
733 mov rbp,rdi | 821 mov rbp,rdi |
734 mov rbx,QWORD[rdi] | 822 mov rbx,QWORD[rdi] |
735 call __rsaz_512_mul | 823 call __rsaz_512_mul |
736 | 824 |
737 DB 102,72,15,126,199 | 825 DB 102,72,15,126,199 |
(...skipping 15 matching lines...) Expand all Loading... |
753 adc r11,QWORD[88+rsp] | 841 adc r11,QWORD[88+rsp] |
754 adc r12,QWORD[96+rsp] | 842 adc r12,QWORD[96+rsp] |
755 adc r13,QWORD[104+rsp] | 843 adc r13,QWORD[104+rsp] |
756 adc r14,QWORD[112+rsp] | 844 adc r14,QWORD[112+rsp] |
757 adc r15,QWORD[120+rsp] | 845 adc r15,QWORD[120+rsp] |
758 DB 102,72,15,126,214 | 846 DB 102,72,15,126,214 |
759 sbb rcx,rcx | 847 sbb rcx,rcx |
760 | 848 |
761 call __rsaz_512_subtract | 849 call __rsaz_512_subtract |
762 | 850 |
763 » mov» DWORD[rsi],r8d | 851 » mov» QWORD[rsi],r8 |
764 » shr» r8,32 | 852 » mov» QWORD[128+rsi],r9 |
765 » mov» DWORD[128+rsi],r9d | 853 » mov» QWORD[256+rsi],r10 |
766 » shr» r9,32 | 854 » mov» QWORD[384+rsi],r11 |
767 » mov» DWORD[256+rsi],r10d | 855 » mov» QWORD[512+rsi],r12 |
768 » shr» r10,32 | 856 » mov» QWORD[640+rsi],r13 |
769 » mov» DWORD[384+rsi],r11d | 857 » mov» QWORD[768+rsi],r14 |
770 » shr» r11,32 | 858 » mov» QWORD[896+rsi],r15 |
771 » mov» DWORD[512+rsi],r12d | |
772 » shr» r12,32 | |
773 » mov» DWORD[640+rsi],r13d | |
774 » shr» r13,32 | |
775 » mov» DWORD[768+rsi],r14d | |
776 » shr» r14,32 | |
777 » mov» DWORD[896+rsi],r15d | |
778 » shr» r15,32 | |
779 » mov» DWORD[64+rsi],r8d | |
780 » mov» DWORD[192+rsi],r9d | |
781 » mov» DWORD[320+rsi],r10d | |
782 » mov» DWORD[448+rsi],r11d | |
783 » mov» DWORD[576+rsi],r12d | |
784 » mov» DWORD[704+rsi],r13d | |
785 » mov» DWORD[832+rsi],r14d | |
786 » mov» DWORD[960+rsi],r15d | |
787 | 859 |
788 lea rax,[((128+24+48))+rsp] | 860 lea rax,[((128+24+48))+rsp] |
789 mov r15,QWORD[((-48))+rax] | 861 mov r15,QWORD[((-48))+rax] |
790 mov r14,QWORD[((-40))+rax] | 862 mov r14,QWORD[((-40))+rax] |
791 mov r13,QWORD[((-32))+rax] | 863 mov r13,QWORD[((-32))+rax] |
792 mov r12,QWORD[((-24))+rax] | 864 mov r12,QWORD[((-24))+rax] |
793 mov rbp,QWORD[((-16))+rax] | 865 mov rbp,QWORD[((-16))+rax] |
794 mov rbx,QWORD[((-8))+rax] | 866 mov rbx,QWORD[((-8))+rax] |
795 lea rsp,[rax] | 867 lea rsp,[rax] |
796 $L$mul_scatter4_epilogue: | 868 $L$mul_scatter4_epilogue: |
(...skipping 346 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1143 mov QWORD[40+rdi],r13 | 1215 mov QWORD[40+rdi],r13 |
1144 mov QWORD[48+rdi],r14 | 1216 mov QWORD[48+rdi],r14 |
1145 mov QWORD[56+rdi],r15 | 1217 mov QWORD[56+rdi],r15 |
1146 | 1218 |
1147 DB 0F3h,0C3h ;repret | 1219 DB 0F3h,0C3h ;repret |
1148 | 1220 |
1149 global rsaz_512_scatter4 | 1221 global rsaz_512_scatter4 |
1150 | 1222 |
1151 ALIGN 16 | 1223 ALIGN 16 |
1152 rsaz_512_scatter4: | 1224 rsaz_512_scatter4: |
1153 » lea» rcx,[r8*4+rcx] | 1225 » lea» rcx,[r8*8+rcx] |
1154 mov r9d,8 | 1226 mov r9d,8 |
1155 jmp NEAR $L$oop_scatter | 1227 jmp NEAR $L$oop_scatter |
1156 ALIGN 16 | 1228 ALIGN 16 |
1157 $L$oop_scatter: | 1229 $L$oop_scatter: |
1158 mov rax,QWORD[rdx] | 1230 mov rax,QWORD[rdx] |
1159 lea rdx,[8+rdx] | 1231 lea rdx,[8+rdx] |
1160 » mov» DWORD[rcx],eax | 1232 » mov» QWORD[rcx],rax |
1161 » shr» rax,32 | |
1162 » mov» DWORD[64+rcx],eax | |
1163 lea rcx,[128+rcx] | 1233 lea rcx,[128+rcx] |
1164 dec r9d | 1234 dec r9d |
1165 jnz NEAR $L$oop_scatter | 1235 jnz NEAR $L$oop_scatter |
1166 DB 0F3h,0C3h ;repret | 1236 DB 0F3h,0C3h ;repret |
1167 | 1237 |
1168 | 1238 |
1169 global rsaz_512_gather4 | 1239 global rsaz_512_gather4 |
1170 | 1240 |
1171 ALIGN 16 | 1241 ALIGN 16 |
1172 rsaz_512_gather4: | 1242 rsaz_512_gather4: |
1173 » lea» rdx,[r8*4+rdx] | 1243 $L$SEH_begin_rsaz_512_gather4: |
| 1244 DB» 0x48,0x81,0xec,0xa8,0x00,0x00,0x00 |
| 1245 DB» 0x0f,0x29,0x34,0x24 |
| 1246 DB» 0x0f,0x29,0x7c,0x24,0x10 |
| 1247 DB» 0x44,0x0f,0x29,0x44,0x24,0x20 |
| 1248 DB» 0x44,0x0f,0x29,0x4c,0x24,0x30 |
| 1249 DB» 0x44,0x0f,0x29,0x54,0x24,0x40 |
| 1250 DB» 0x44,0x0f,0x29,0x5c,0x24,0x50 |
| 1251 DB» 0x44,0x0f,0x29,0x64,0x24,0x60 |
| 1252 DB» 0x44,0x0f,0x29,0x6c,0x24,0x70 |
| 1253 DB» 0x44,0x0f,0x29,0xb4,0x24,0x80,0,0,0 |
| 1254 DB» 0x44,0x0f,0x29,0xbc,0x24,0x90,0,0,0 |
| 1255 » movd» xmm8,r8d |
| 1256 » movdqa» xmm1,XMMWORD[(($L$inc+16))] |
| 1257 » movdqa» xmm0,XMMWORD[$L$inc] |
| 1258 |
| 1259 » pshufd» xmm8,xmm8,0 |
| 1260 » movdqa» xmm7,xmm1 |
| 1261 » movdqa» xmm2,xmm1 |
| 1262 » paddd» xmm1,xmm0 |
| 1263 » pcmpeqd»xmm0,xmm8 |
| 1264 » movdqa» xmm3,xmm7 |
| 1265 » paddd» xmm2,xmm1 |
| 1266 » pcmpeqd»xmm1,xmm8 |
| 1267 » movdqa» xmm4,xmm7 |
| 1268 » paddd» xmm3,xmm2 |
| 1269 » pcmpeqd»xmm2,xmm8 |
| 1270 » movdqa» xmm5,xmm7 |
| 1271 » paddd» xmm4,xmm3 |
| 1272 » pcmpeqd»xmm3,xmm8 |
| 1273 » movdqa» xmm6,xmm7 |
| 1274 » paddd» xmm5,xmm4 |
| 1275 » pcmpeqd»xmm4,xmm8 |
| 1276 » paddd» xmm6,xmm5 |
| 1277 » pcmpeqd»xmm5,xmm8 |
| 1278 » paddd» xmm7,xmm6 |
| 1279 » pcmpeqd»xmm6,xmm8 |
| 1280 » pcmpeqd»xmm7,xmm8 |
1174 mov r9d,8 | 1281 mov r9d,8 |
1175 jmp NEAR $L$oop_gather | 1282 jmp NEAR $L$oop_gather |
1176 ALIGN 16 | 1283 ALIGN 16 |
1177 $L$oop_gather: | 1284 $L$oop_gather: |
1178 » mov» eax,DWORD[rdx] | 1285 » movdqa» xmm8,XMMWORD[rdx] |
1179 » mov» r8d,DWORD[64+rdx] | 1286 » movdqa» xmm9,XMMWORD[16+rdx] |
| 1287 » movdqa» xmm10,XMMWORD[32+rdx] |
| 1288 » movdqa» xmm11,XMMWORD[48+rdx] |
| 1289 » pand» xmm8,xmm0 |
| 1290 » movdqa» xmm12,XMMWORD[64+rdx] |
| 1291 » pand» xmm9,xmm1 |
| 1292 » movdqa» xmm13,XMMWORD[80+rdx] |
| 1293 » pand» xmm10,xmm2 |
| 1294 » movdqa» xmm14,XMMWORD[96+rdx] |
| 1295 » pand» xmm11,xmm3 |
| 1296 » movdqa» xmm15,XMMWORD[112+rdx] |
1180 lea rdx,[128+rdx] | 1297 lea rdx,[128+rdx] |
1181 » shl» r8,32 | 1298 » pand» xmm12,xmm4 |
1182 » or» rax,r8 | 1299 » pand» xmm13,xmm5 |
1183 » mov» QWORD[rcx],rax | 1300 » pand» xmm14,xmm6 |
| 1301 » pand» xmm15,xmm7 |
| 1302 » por» xmm8,xmm10 |
| 1303 » por» xmm9,xmm11 |
| 1304 » por» xmm8,xmm12 |
| 1305 » por» xmm9,xmm13 |
| 1306 » por» xmm8,xmm14 |
| 1307 » por» xmm9,xmm15 |
| 1308 |
| 1309 » por» xmm8,xmm9 |
| 1310 » pshufd» xmm9,xmm8,0x4e |
| 1311 » por» xmm8,xmm9 |
| 1312 » movq» QWORD[rcx],xmm8 |
1184 lea rcx,[8+rcx] | 1313 lea rcx,[8+rcx] |
1185 dec r9d | 1314 dec r9d |
1186 jnz NEAR $L$oop_gather | 1315 jnz NEAR $L$oop_gather |
| 1316 movaps xmm6,XMMWORD[rsp] |
| 1317 movaps xmm7,XMMWORD[16+rsp] |
| 1318 movaps xmm8,XMMWORD[32+rsp] |
| 1319 movaps xmm9,XMMWORD[48+rsp] |
| 1320 movaps xmm10,XMMWORD[64+rsp] |
| 1321 movaps xmm11,XMMWORD[80+rsp] |
| 1322 movaps xmm12,XMMWORD[96+rsp] |
| 1323 movaps xmm13,XMMWORD[112+rsp] |
| 1324 movaps xmm14,XMMWORD[128+rsp] |
| 1325 movaps xmm15,XMMWORD[144+rsp] |
| 1326 add rsp,0xa8 |
1187 DB 0F3h,0C3h ;repret | 1327 DB 0F3h,0C3h ;repret |
| 1328 $L$SEH_end_rsaz_512_gather4: |
1188 | 1329 |
| 1330 |
| 1331 ALIGN 64 |
| 1332 $L$inc: |
| 1333 DD 0,0,1,1 |
| 1334 DD 2,2,2,2 |
1189 EXTERN __imp_RtlVirtualUnwind | 1335 EXTERN __imp_RtlVirtualUnwind |
1190 | 1336 |
1191 ALIGN 16 | 1337 ALIGN 16 |
1192 se_handler: | 1338 se_handler: |
1193 push rsi | 1339 push rsi |
1194 push rdi | 1340 push rdi |
1195 push rbx | 1341 push rbx |
1196 push rbp | 1342 push rbp |
1197 push r12 | 1343 push r12 |
1198 push r13 | 1344 push r13 |
(...skipping 15 matching lines...) Expand all Loading... |
1214 | 1360 |
1215 mov rax,QWORD[152+r8] | 1361 mov rax,QWORD[152+r8] |
1216 | 1362 |
1217 mov r10d,DWORD[4+r11] | 1363 mov r10d,DWORD[4+r11] |
1218 lea r10,[r10*1+rsi] | 1364 lea r10,[r10*1+rsi] |
1219 cmp rbx,r10 | 1365 cmp rbx,r10 |
1220 jae NEAR $L$common_seh_tail | 1366 jae NEAR $L$common_seh_tail |
1221 | 1367 |
1222 lea rax,[((128+24+48))+rax] | 1368 lea rax,[((128+24+48))+rax] |
1223 | 1369 |
| 1370 lea rbx,[$L$mul_gather4_epilogue] |
| 1371 cmp rbx,r10 |
| 1372 jne NEAR $L$se_not_in_mul_gather4 |
| 1373 |
| 1374 lea rax,[176+rax] |
| 1375 |
| 1376 lea rsi,[((-48-168))+rax] |
| 1377 lea rdi,[512+r8] |
| 1378 mov ecx,20 |
| 1379 DD 0xa548f3fc |
| 1380 |
| 1381 $L$se_not_in_mul_gather4: |
1224 mov rbx,QWORD[((-8))+rax] | 1382 mov rbx,QWORD[((-8))+rax] |
1225 mov rbp,QWORD[((-16))+rax] | 1383 mov rbp,QWORD[((-16))+rax] |
1226 mov r12,QWORD[((-24))+rax] | 1384 mov r12,QWORD[((-24))+rax] |
1227 mov r13,QWORD[((-32))+rax] | 1385 mov r13,QWORD[((-32))+rax] |
1228 mov r14,QWORD[((-40))+rax] | 1386 mov r14,QWORD[((-40))+rax] |
1229 mov r15,QWORD[((-48))+rax] | 1387 mov r15,QWORD[((-48))+rax] |
1230 mov QWORD[144+r8],rbx | 1388 mov QWORD[144+r8],rbx |
1231 mov QWORD[160+r8],rbp | 1389 mov QWORD[160+r8],rbp |
1232 mov QWORD[216+r8],r12 | 1390 mov QWORD[216+r8],r12 |
1233 mov QWORD[224+r8],r13 | 1391 mov QWORD[224+r8],r13 |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1289 DD $L$SEH_info_rsaz_512_mul_gather4 wrt ..imagebase | 1447 DD $L$SEH_info_rsaz_512_mul_gather4 wrt ..imagebase |
1290 | 1448 |
1291 DD $L$SEH_begin_rsaz_512_mul_scatter4 wrt ..imagebase | 1449 DD $L$SEH_begin_rsaz_512_mul_scatter4 wrt ..imagebase |
1292 DD $L$SEH_end_rsaz_512_mul_scatter4 wrt ..imagebase | 1450 DD $L$SEH_end_rsaz_512_mul_scatter4 wrt ..imagebase |
1293 DD $L$SEH_info_rsaz_512_mul_scatter4 wrt ..imagebase | 1451 DD $L$SEH_info_rsaz_512_mul_scatter4 wrt ..imagebase |
1294 | 1452 |
1295 DD $L$SEH_begin_rsaz_512_mul_by_one wrt ..imagebase | 1453 DD $L$SEH_begin_rsaz_512_mul_by_one wrt ..imagebase |
1296 DD $L$SEH_end_rsaz_512_mul_by_one wrt ..imagebase | 1454 DD $L$SEH_end_rsaz_512_mul_by_one wrt ..imagebase |
1297 DD $L$SEH_info_rsaz_512_mul_by_one wrt ..imagebase | 1455 DD $L$SEH_info_rsaz_512_mul_by_one wrt ..imagebase |
1298 | 1456 |
| 1457 DD $L$SEH_begin_rsaz_512_gather4 wrt ..imagebase |
| 1458 DD $L$SEH_end_rsaz_512_gather4 wrt ..imagebase |
| 1459 DD $L$SEH_info_rsaz_512_gather4 wrt ..imagebase |
| 1460 |
1299 section .xdata rdata align=8 | 1461 section .xdata rdata align=8 |
1300 ALIGN 8 | 1462 ALIGN 8 |
1301 $L$SEH_info_rsaz_512_sqr: | 1463 $L$SEH_info_rsaz_512_sqr: |
1302 DB 9,0,0,0 | 1464 DB 9,0,0,0 |
1303 DD se_handler wrt ..imagebase | 1465 DD se_handler wrt ..imagebase |
1304 DD $L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase | 1466 DD $L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase |
1305 $L$SEH_info_rsaz_512_mul: | 1467 $L$SEH_info_rsaz_512_mul: |
1306 DB 9,0,0,0 | 1468 DB 9,0,0,0 |
1307 DD se_handler wrt ..imagebase | 1469 DD se_handler wrt ..imagebase |
1308 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase | 1470 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase |
1309 $L$SEH_info_rsaz_512_mul_gather4: | 1471 $L$SEH_info_rsaz_512_mul_gather4: |
1310 DB 9,0,0,0 | 1472 DB 9,0,0,0 |
1311 DD se_handler wrt ..imagebase | 1473 DD se_handler wrt ..imagebase |
1312 DD $L$mul_gather4_body wrt ..imagebase,$L$mul_gather4_epilogue wrt
..imagebase | 1474 DD $L$mul_gather4_body wrt ..imagebase,$L$mul_gather4_epilogue wrt
..imagebase |
1313 $L$SEH_info_rsaz_512_mul_scatter4: | 1475 $L$SEH_info_rsaz_512_mul_scatter4: |
1314 DB 9,0,0,0 | 1476 DB 9,0,0,0 |
1315 DD se_handler wrt ..imagebase | 1477 DD se_handler wrt ..imagebase |
1316 DD $L$mul_scatter4_body wrt ..imagebase,$L$mul_scatter4_epilogue wr
t ..imagebase | 1478 DD $L$mul_scatter4_body wrt ..imagebase,$L$mul_scatter4_epilogue wr
t ..imagebase |
1317 $L$SEH_info_rsaz_512_mul_by_one: | 1479 $L$SEH_info_rsaz_512_mul_by_one: |
1318 DB 9,0,0,0 | 1480 DB 9,0,0,0 |
1319 DD se_handler wrt ..imagebase | 1481 DD se_handler wrt ..imagebase |
1320 DD $L$mul_by_one_body wrt ..imagebase,$L$mul_by_one_epilogue wrt ..
imagebase | 1482 DD $L$mul_by_one_body wrt ..imagebase,$L$mul_by_one_epilogue wrt ..
imagebase |
| 1483 $L$SEH_info_rsaz_512_gather4: |
| 1484 DB 0x01,0x46,0x16,0x00 |
| 1485 DB 0x46,0xf8,0x09,0x00 |
| 1486 DB 0x3d,0xe8,0x08,0x00 |
| 1487 DB 0x34,0xd8,0x07,0x00 |
| 1488 DB 0x2e,0xc8,0x06,0x00 |
| 1489 DB 0x28,0xb8,0x05,0x00 |
| 1490 DB 0x22,0xa8,0x04,0x00 |
| 1491 DB 0x1c,0x98,0x03,0x00 |
| 1492 DB 0x16,0x88,0x02,0x00 |
| 1493 DB 0x10,0x78,0x01,0x00 |
| 1494 DB 0x0b,0x68,0x00,0x00 |
| 1495 DB 0x07,0x01,0x15,0x00 |
OLD | NEW |