OLD | NEW |
1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
2 .text | 2 .text |
3 | 3 |
4 | 4 |
5 | 5 |
6 .globl _bn_mul_mont | 6 .globl _bn_mul_mont |
7 .private_extern _bn_mul_mont | 7 .private_extern _bn_mul_mont |
8 | 8 |
9 .p2align 4 | 9 .p2align 4 |
10 _bn_mul_mont: | 10 _bn_mul_mont: |
(...skipping 616 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
627 movl %r9d,%r10d | 627 movl %r9d,%r10d |
628 shll $3,%r9d | 628 shll $3,%r9d |
629 shlq $3+2,%r10 | 629 shlq $3+2,%r10 |
630 negq %r9 | 630 negq %r9 |
631 | 631 |
632 | 632 |
633 | 633 |
634 | 634 |
635 | 635 |
636 | 636 |
637 » leaq» -64(%rsp,%r9,4),%r11 | 637 » leaq» -64(%rsp,%r9,2),%r11 |
638 movq (%r8),%r8 | 638 movq (%r8),%r8 |
639 subq %rsi,%r11 | 639 subq %rsi,%r11 |
640 andq $4095,%r11 | 640 andq $4095,%r11 |
641 cmpq %r11,%r10 | 641 cmpq %r11,%r10 |
642 jb L$sqr8x_sp_alt | 642 jb L$sqr8x_sp_alt |
643 subq %r11,%rsp | 643 subq %r11,%rsp |
644 » leaq» -64(%rsp,%r9,4),%rsp | 644 » leaq» -64(%rsp,%r9,2),%rsp |
645 jmp L$sqr8x_sp_done | 645 jmp L$sqr8x_sp_done |
646 | 646 |
647 .p2align 5 | 647 .p2align 5 |
648 L$sqr8x_sp_alt: | 648 L$sqr8x_sp_alt: |
649 » leaq» 4096-64(,%r9,4),%r10 | 649 » leaq» 4096-64(,%r9,2),%r10 |
650 » leaq» -64(%rsp,%r9,4),%rsp | 650 » leaq» -64(%rsp,%r9,2),%rsp |
651 subq %r10,%r11 | 651 subq %r10,%r11 |
652 movq $0,%r10 | 652 movq $0,%r10 |
653 cmovcq %r10,%r11 | 653 cmovcq %r10,%r11 |
654 subq %r11,%rsp | 654 subq %r11,%rsp |
655 L$sqr8x_sp_done: | 655 L$sqr8x_sp_done: |
656 andq $-64,%rsp | 656 andq $-64,%rsp |
657 movq %r9,%r10 | 657 movq %r9,%r10 |
658 negq %r9 | 658 negq %r9 |
659 | 659 |
660 leaq 64(%rsp,%r9,2),%r11 | |
661 movq %r8,32(%rsp) | 660 movq %r8,32(%rsp) |
662 movq %rax,40(%rsp) | 661 movq %rax,40(%rsp) |
663 L$sqr8x_body: | 662 L$sqr8x_body: |
664 | 663 |
665 » movq» %r9,%rbp | 664 .byte» 102,72,15,110,209 |
666 .byte» 102,73,15,110,211 | |
667 » shrq» $3+2,%rbp | |
668 » movl» _OPENSSL_ia32cap_P+8(%rip),%eax | |
669 » jmp» L$sqr8x_copy_n | |
670 | |
671 .p2align» 5 | |
672 L$sqr8x_copy_n: | |
673 » movq» 0(%rcx),%xmm0 | |
674 » movq» 8(%rcx),%xmm1 | |
675 » movq» 16(%rcx),%xmm3 | |
676 » movq» 24(%rcx),%xmm4 | |
677 » leaq» 32(%rcx),%rcx | |
678 » movdqa» %xmm0,0(%r11) | |
679 » movdqa» %xmm1,16(%r11) | |
680 » movdqa» %xmm3,32(%r11) | |
681 » movdqa» %xmm4,48(%r11) | |
682 » leaq» 64(%r11),%r11 | |
683 » decq» %rbp | |
684 » jnz» L$sqr8x_copy_n | |
685 | |
686 pxor %xmm0,%xmm0 | 665 pxor %xmm0,%xmm0 |
687 .byte 102,72,15,110,207 | 666 .byte 102,72,15,110,207 |
688 .byte 102,73,15,110,218 | 667 .byte 102,73,15,110,218 |
689 call _bn_sqr8x_internal | 668 call _bn_sqr8x_internal |
690 | 669 |
691 » pxor» %xmm0,%xmm0 | 670 |
692 » leaq» 48(%rsp),%rax | 671 |
693 » leaq» 64(%rsp,%r9,2),%rdx | 672 |
694 » shrq» $3+2,%r9 | 673 » leaq» (%rdi,%r9,1),%rbx |
695 » movq» 40(%rsp),%rsi | 674 » movq» %r9,%rcx |
696 » jmp» L$sqr8x_zero | 675 » movq» %r9,%rdx |
| 676 .byte» 102,72,15,126,207 |
| 677 » sarq» $3+2,%rcx |
| 678 » jmp» L$sqr8x_sub |
697 | 679 |
698 .p2align 5 | 680 .p2align 5 |
699 L$sqr8x_zero: | 681 L$sqr8x_sub: |
700 » movdqa» %xmm0,0(%rax) | 682 » movq» 0(%rbx),%r12 |
701 » movdqa» %xmm0,16(%rax) | 683 » movq» 8(%rbx),%r13 |
702 » movdqa» %xmm0,32(%rax) | 684 » movq» 16(%rbx),%r14 |
703 » movdqa» %xmm0,48(%rax) | 685 » movq» 24(%rbx),%r15 |
704 » leaq» 64(%rax),%rax | 686 » leaq» 32(%rbx),%rbx |
705 » movdqa» %xmm0,0(%rdx) | 687 » sbbq» 0(%rbp),%r12 |
706 » movdqa» %xmm0,16(%rdx) | 688 » sbbq» 8(%rbp),%r13 |
707 » movdqa» %xmm0,32(%rdx) | 689 » sbbq» 16(%rbp),%r14 |
708 » movdqa» %xmm0,48(%rdx) | 690 » sbbq» 24(%rbp),%r15 |
709 » leaq» 64(%rdx),%rdx | 691 » leaq» 32(%rbp),%rbp |
710 » decq» %r9 | 692 » movq» %r12,0(%rdi) |
711 » jnz» L$sqr8x_zero | 693 » movq» %r13,8(%rdi) |
| 694 » movq» %r14,16(%rdi) |
| 695 » movq» %r15,24(%rdi) |
| 696 » leaq» 32(%rdi),%rdi |
| 697 » incq» %rcx |
| 698 » jnz» L$sqr8x_sub |
| 699 |
| 700 » sbbq» $0,%rax |
| 701 » leaq» (%rbx,%r9,1),%rbx |
| 702 » leaq» (%rdi,%r9,1),%rdi |
| 703 |
| 704 .byte» 102,72,15,110,200 |
| 705 » pxor» %xmm0,%xmm0 |
| 706 » pshufd» $0,%xmm1,%xmm1 |
| 707 » movq» 40(%rsp),%rsi |
| 708 » jmp» L$sqr8x_cond_copy |
| 709 |
| 710 .p2align» 5 |
| 711 L$sqr8x_cond_copy: |
| 712 » movdqa» 0(%rbx),%xmm2 |
| 713 » movdqa» 16(%rbx),%xmm3 |
| 714 » leaq» 32(%rbx),%rbx |
| 715 » movdqu» 0(%rdi),%xmm4 |
| 716 » movdqu» 16(%rdi),%xmm5 |
| 717 » leaq» 32(%rdi),%rdi |
| 718 » movdqa» %xmm0,-32(%rbx) |
| 719 » movdqa» %xmm0,-16(%rbx) |
| 720 » movdqa» %xmm0,-32(%rbx,%rdx,1) |
| 721 » movdqa» %xmm0,-16(%rbx,%rdx,1) |
| 722 » pcmpeqd»%xmm1,%xmm0 |
| 723 » pand» %xmm1,%xmm2 |
| 724 » pand» %xmm1,%xmm3 |
| 725 » pand» %xmm0,%xmm4 |
| 726 » pand» %xmm0,%xmm5 |
| 727 » pxor» %xmm0,%xmm0 |
| 728 » por» %xmm2,%xmm4 |
| 729 » por» %xmm3,%xmm5 |
| 730 » movdqu» %xmm4,-32(%rdi) |
| 731 » movdqu» %xmm5,-16(%rdi) |
| 732 » addq» $32,%r9 |
| 733 » jnz» L$sqr8x_cond_copy |
712 | 734 |
713 movq $1,%rax | 735 movq $1,%rax |
714 movq -48(%rsi),%r15 | 736 movq -48(%rsi),%r15 |
715 movq -40(%rsi),%r14 | 737 movq -40(%rsi),%r14 |
716 movq -32(%rsi),%r13 | 738 movq -32(%rsi),%r13 |
717 movq -24(%rsi),%r12 | 739 movq -24(%rsi),%r12 |
718 movq -16(%rsi),%rbp | 740 movq -16(%rsi),%rbp |
719 movq -8(%rsi),%rbx | 741 movq -8(%rsi),%rbx |
720 leaq (%rsi),%rsp | 742 leaq (%rsi),%rsp |
721 L$sqr8x_epilogue: | 743 L$sqr8x_epilogue: |
722 .byte 0xf3,0xc3 | 744 .byte 0xf3,0xc3 |
723 | 745 |
724 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84
,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10
8,46,111,114,103,62,0 | 746 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84
,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10
8,46,111,114,103,62,0 |
725 .p2align 4 | 747 .p2align 4 |
726 #endif | 748 #endif |
OLD | NEW |