OLD | NEW |
1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
2 .text | 2 .text |
3 | 3 |
4 .extern OPENSSL_ia32cap_P | 4 .extern OPENSSL_ia32cap_P |
5 .hidden OPENSSL_ia32cap_P | 5 .hidden OPENSSL_ia32cap_P |
6 | 6 |
7 .globl bn_mul_mont | 7 .globl bn_mul_mont |
8 .hidden bn_mul_mont | 8 .hidden bn_mul_mont |
9 .type bn_mul_mont,@function | 9 .type bn_mul_mont,@function |
10 .align 16 | 10 .align 16 |
(...skipping 618 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
629 movl %r9d,%r10d | 629 movl %r9d,%r10d |
630 shll $3,%r9d | 630 shll $3,%r9d |
631 shlq $3+2,%r10 | 631 shlq $3+2,%r10 |
632 negq %r9 | 632 negq %r9 |
633 | 633 |
634 | 634 |
635 | 635 |
636 | 636 |
637 | 637 |
638 | 638 |
639 » leaq» -64(%rsp,%r9,4),%r11 | 639 » leaq» -64(%rsp,%r9,2),%r11 |
640 movq (%r8),%r8 | 640 movq (%r8),%r8 |
641 subq %rsi,%r11 | 641 subq %rsi,%r11 |
642 andq $4095,%r11 | 642 andq $4095,%r11 |
643 cmpq %r11,%r10 | 643 cmpq %r11,%r10 |
644 jb .Lsqr8x_sp_alt | 644 jb .Lsqr8x_sp_alt |
645 subq %r11,%rsp | 645 subq %r11,%rsp |
646 » leaq» -64(%rsp,%r9,4),%rsp | 646 » leaq» -64(%rsp,%r9,2),%rsp |
647 jmp .Lsqr8x_sp_done | 647 jmp .Lsqr8x_sp_done |
648 | 648 |
649 .align 32 | 649 .align 32 |
650 .Lsqr8x_sp_alt: | 650 .Lsqr8x_sp_alt: |
651 » leaq» 4096-64(,%r9,4),%r10 | 651 » leaq» 4096-64(,%r9,2),%r10 |
652 » leaq» -64(%rsp,%r9,4),%rsp | 652 » leaq» -64(%rsp,%r9,2),%rsp |
653 subq %r10,%r11 | 653 subq %r10,%r11 |
654 movq $0,%r10 | 654 movq $0,%r10 |
655 cmovcq %r10,%r11 | 655 cmovcq %r10,%r11 |
656 subq %r11,%rsp | 656 subq %r11,%rsp |
657 .Lsqr8x_sp_done: | 657 .Lsqr8x_sp_done: |
658 andq $-64,%rsp | 658 andq $-64,%rsp |
659 movq %r9,%r10 | 659 movq %r9,%r10 |
660 negq %r9 | 660 negq %r9 |
661 | 661 |
662 leaq 64(%rsp,%r9,2),%r11 | |
663 movq %r8,32(%rsp) | 662 movq %r8,32(%rsp) |
664 movq %rax,40(%rsp) | 663 movq %rax,40(%rsp) |
665 .Lsqr8x_body: | 664 .Lsqr8x_body: |
666 | 665 |
667 » movq» %r9,%rbp | 666 .byte» 102,72,15,110,209 |
668 .byte» 102,73,15,110,211 | |
669 » shrq» $3+2,%rbp | |
670 » movl» OPENSSL_ia32cap_P+8(%rip),%eax | |
671 » jmp» .Lsqr8x_copy_n | |
672 | |
673 .align» 32 | |
674 .Lsqr8x_copy_n: | |
675 » movq» 0(%rcx),%xmm0 | |
676 » movq» 8(%rcx),%xmm1 | |
677 » movq» 16(%rcx),%xmm3 | |
678 » movq» 24(%rcx),%xmm4 | |
679 » leaq» 32(%rcx),%rcx | |
680 » movdqa» %xmm0,0(%r11) | |
681 » movdqa» %xmm1,16(%r11) | |
682 » movdqa» %xmm3,32(%r11) | |
683 » movdqa» %xmm4,48(%r11) | |
684 » leaq» 64(%r11),%r11 | |
685 » decq» %rbp | |
686 » jnz» .Lsqr8x_copy_n | |
687 | |
688 pxor %xmm0,%xmm0 | 667 pxor %xmm0,%xmm0 |
689 .byte 102,72,15,110,207 | 668 .byte 102,72,15,110,207 |
690 .byte 102,73,15,110,218 | 669 .byte 102,73,15,110,218 |
691 call bn_sqr8x_internal | 670 call bn_sqr8x_internal |
692 | 671 |
693 » pxor» %xmm0,%xmm0 | 672 |
694 » leaq» 48(%rsp),%rax | 673 |
695 » leaq» 64(%rsp,%r9,2),%rdx | 674 |
696 » shrq» $3+2,%r9 | 675 » leaq» (%rdi,%r9,1),%rbx |
697 » movq» 40(%rsp),%rsi | 676 » movq» %r9,%rcx |
698 » jmp» .Lsqr8x_zero | 677 » movq» %r9,%rdx |
| 678 .byte» 102,72,15,126,207 |
| 679 » sarq» $3+2,%rcx |
| 680 » jmp» .Lsqr8x_sub |
699 | 681 |
700 .align 32 | 682 .align 32 |
701 .Lsqr8x_zero: | 683 .Lsqr8x_sub: |
702 » movdqa» %xmm0,0(%rax) | 684 » movq» 0(%rbx),%r12 |
703 » movdqa» %xmm0,16(%rax) | 685 » movq» 8(%rbx),%r13 |
704 » movdqa» %xmm0,32(%rax) | 686 » movq» 16(%rbx),%r14 |
705 » movdqa» %xmm0,48(%rax) | 687 » movq» 24(%rbx),%r15 |
706 » leaq» 64(%rax),%rax | 688 » leaq» 32(%rbx),%rbx |
707 » movdqa» %xmm0,0(%rdx) | 689 » sbbq» 0(%rbp),%r12 |
708 » movdqa» %xmm0,16(%rdx) | 690 » sbbq» 8(%rbp),%r13 |
709 » movdqa» %xmm0,32(%rdx) | 691 » sbbq» 16(%rbp),%r14 |
710 » movdqa» %xmm0,48(%rdx) | 692 » sbbq» 24(%rbp),%r15 |
711 » leaq» 64(%rdx),%rdx | 693 » leaq» 32(%rbp),%rbp |
712 » decq» %r9 | 694 » movq» %r12,0(%rdi) |
713 » jnz» .Lsqr8x_zero | 695 » movq» %r13,8(%rdi) |
| 696 » movq» %r14,16(%rdi) |
| 697 » movq» %r15,24(%rdi) |
| 698 » leaq» 32(%rdi),%rdi |
| 699 » incq» %rcx |
| 700 » jnz» .Lsqr8x_sub |
| 701 |
| 702 » sbbq» $0,%rax |
| 703 » leaq» (%rbx,%r9,1),%rbx |
| 704 » leaq» (%rdi,%r9,1),%rdi |
| 705 |
| 706 .byte» 102,72,15,110,200 |
| 707 » pxor» %xmm0,%xmm0 |
| 708 » pshufd» $0,%xmm1,%xmm1 |
| 709 » movq» 40(%rsp),%rsi |
| 710 » jmp» .Lsqr8x_cond_copy |
| 711 |
| 712 .align» 32 |
| 713 .Lsqr8x_cond_copy: |
| 714 » movdqa» 0(%rbx),%xmm2 |
| 715 » movdqa» 16(%rbx),%xmm3 |
| 716 » leaq» 32(%rbx),%rbx |
| 717 » movdqu» 0(%rdi),%xmm4 |
| 718 » movdqu» 16(%rdi),%xmm5 |
| 719 » leaq» 32(%rdi),%rdi |
| 720 » movdqa» %xmm0,-32(%rbx) |
| 721 » movdqa» %xmm0,-16(%rbx) |
| 722 » movdqa» %xmm0,-32(%rbx,%rdx,1) |
| 723 » movdqa» %xmm0,-16(%rbx,%rdx,1) |
| 724 » pcmpeqd»%xmm1,%xmm0 |
| 725 » pand» %xmm1,%xmm2 |
| 726 » pand» %xmm1,%xmm3 |
| 727 » pand» %xmm0,%xmm4 |
| 728 » pand» %xmm0,%xmm5 |
| 729 » pxor» %xmm0,%xmm0 |
| 730 » por» %xmm2,%xmm4 |
| 731 » por» %xmm3,%xmm5 |
| 732 » movdqu» %xmm4,-32(%rdi) |
| 733 » movdqu» %xmm5,-16(%rdi) |
| 734 » addq» $32,%r9 |
| 735 » jnz» .Lsqr8x_cond_copy |
714 | 736 |
715 movq $1,%rax | 737 movq $1,%rax |
716 movq -48(%rsi),%r15 | 738 movq -48(%rsi),%r15 |
717 movq -40(%rsi),%r14 | 739 movq -40(%rsi),%r14 |
718 movq -32(%rsi),%r13 | 740 movq -32(%rsi),%r13 |
719 movq -24(%rsi),%r12 | 741 movq -24(%rsi),%r12 |
720 movq -16(%rsi),%rbp | 742 movq -16(%rsi),%rbp |
721 movq -8(%rsi),%rbx | 743 movq -8(%rsi),%rbx |
722 leaq (%rsi),%rsp | 744 leaq (%rsi),%rsp |
723 .Lsqr8x_epilogue: | 745 .Lsqr8x_epilogue: |
724 .byte 0xf3,0xc3 | 746 .byte 0xf3,0xc3 |
725 .size bn_sqr8x_mont,.-bn_sqr8x_mont | 747 .size bn_sqr8x_mont,.-bn_sqr8x_mont |
726 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84
,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10
8,46,111,114,103,62,0 | 748 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84
,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10
8,46,111,114,103,62,0 |
727 .align 16 | 749 .align 16 |
728 #endif | 750 #endif |
OLD | NEW |