| OLD | NEW |
| 1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
| 2 .text | 2 .text |
| 3 | 3 |
| 4 | 4 |
| 5 | 5 |
| 6 .globl _bn_mul_mont | 6 .globl _bn_mul_mont |
| 7 .private_extern _bn_mul_mont | 7 .private_extern _bn_mul_mont |
| 8 | 8 |
| 9 .p2align 4 | 9 .p2align 4 |
| 10 _bn_mul_mont: | 10 _bn_mul_mont: |
| (...skipping 616 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 627 movl %r9d,%r10d | 627 movl %r9d,%r10d |
| 628 shll $3,%r9d | 628 shll $3,%r9d |
| 629 shlq $3+2,%r10 | 629 shlq $3+2,%r10 |
| 630 negq %r9 | 630 negq %r9 |
| 631 | 631 |
| 632 | 632 |
| 633 | 633 |
| 634 | 634 |
| 635 | 635 |
| 636 | 636 |
| 637 » leaq» -64(%rsp,%r9,4),%r11 | 637 » leaq» -64(%rsp,%r9,2),%r11 |
| 638 movq (%r8),%r8 | 638 movq (%r8),%r8 |
| 639 subq %rsi,%r11 | 639 subq %rsi,%r11 |
| 640 andq $4095,%r11 | 640 andq $4095,%r11 |
| 641 cmpq %r11,%r10 | 641 cmpq %r11,%r10 |
| 642 jb L$sqr8x_sp_alt | 642 jb L$sqr8x_sp_alt |
| 643 subq %r11,%rsp | 643 subq %r11,%rsp |
| 644 » leaq» -64(%rsp,%r9,4),%rsp | 644 » leaq» -64(%rsp,%r9,2),%rsp |
| 645 jmp L$sqr8x_sp_done | 645 jmp L$sqr8x_sp_done |
| 646 | 646 |
| 647 .p2align 5 | 647 .p2align 5 |
| 648 L$sqr8x_sp_alt: | 648 L$sqr8x_sp_alt: |
| 649 » leaq» 4096-64(,%r9,4),%r10 | 649 » leaq» 4096-64(,%r9,2),%r10 |
| 650 » leaq» -64(%rsp,%r9,4),%rsp | 650 » leaq» -64(%rsp,%r9,2),%rsp |
| 651 subq %r10,%r11 | 651 subq %r10,%r11 |
| 652 movq $0,%r10 | 652 movq $0,%r10 |
| 653 cmovcq %r10,%r11 | 653 cmovcq %r10,%r11 |
| 654 subq %r11,%rsp | 654 subq %r11,%rsp |
| 655 L$sqr8x_sp_done: | 655 L$sqr8x_sp_done: |
| 656 andq $-64,%rsp | 656 andq $-64,%rsp |
| 657 movq %r9,%r10 | 657 movq %r9,%r10 |
| 658 negq %r9 | 658 negq %r9 |
| 659 | 659 |
| 660 leaq 64(%rsp,%r9,2),%r11 | |
| 661 movq %r8,32(%rsp) | 660 movq %r8,32(%rsp) |
| 662 movq %rax,40(%rsp) | 661 movq %rax,40(%rsp) |
| 663 L$sqr8x_body: | 662 L$sqr8x_body: |
| 664 | 663 |
| 665 » movq» %r9,%rbp | 664 .byte» 102,72,15,110,209 |
| 666 .byte» 102,73,15,110,211 | |
| 667 » shrq» $3+2,%rbp | |
| 668 » movl» _OPENSSL_ia32cap_P+8(%rip),%eax | |
| 669 » jmp» L$sqr8x_copy_n | |
| 670 | |
| 671 .p2align» 5 | |
| 672 L$sqr8x_copy_n: | |
| 673 » movq» 0(%rcx),%xmm0 | |
| 674 » movq» 8(%rcx),%xmm1 | |
| 675 » movq» 16(%rcx),%xmm3 | |
| 676 » movq» 24(%rcx),%xmm4 | |
| 677 » leaq» 32(%rcx),%rcx | |
| 678 » movdqa» %xmm0,0(%r11) | |
| 679 » movdqa» %xmm1,16(%r11) | |
| 680 » movdqa» %xmm3,32(%r11) | |
| 681 » movdqa» %xmm4,48(%r11) | |
| 682 » leaq» 64(%r11),%r11 | |
| 683 » decq» %rbp | |
| 684 » jnz» L$sqr8x_copy_n | |
| 685 | |
| 686 pxor %xmm0,%xmm0 | 665 pxor %xmm0,%xmm0 |
| 687 .byte 102,72,15,110,207 | 666 .byte 102,72,15,110,207 |
| 688 .byte 102,73,15,110,218 | 667 .byte 102,73,15,110,218 |
| 689 call _bn_sqr8x_internal | 668 call _bn_sqr8x_internal |
| 690 | 669 |
| 691 » pxor» %xmm0,%xmm0 | 670 |
| 692 » leaq» 48(%rsp),%rax | 671 |
| 693 » leaq» 64(%rsp,%r9,2),%rdx | 672 |
| 694 » shrq» $3+2,%r9 | 673 » leaq» (%rdi,%r9,1),%rbx |
| 695 » movq» 40(%rsp),%rsi | 674 » movq» %r9,%rcx |
| 696 » jmp» L$sqr8x_zero | 675 » movq» %r9,%rdx |
| 676 .byte» 102,72,15,126,207 |
| 677 » sarq» $3+2,%rcx |
| 678 » jmp» L$sqr8x_sub |
| 697 | 679 |
| 698 .p2align 5 | 680 .p2align 5 |
| 699 L$sqr8x_zero: | 681 L$sqr8x_sub: |
| 700 » movdqa» %xmm0,0(%rax) | 682 » movq» 0(%rbx),%r12 |
| 701 » movdqa» %xmm0,16(%rax) | 683 » movq» 8(%rbx),%r13 |
| 702 » movdqa» %xmm0,32(%rax) | 684 » movq» 16(%rbx),%r14 |
| 703 » movdqa» %xmm0,48(%rax) | 685 » movq» 24(%rbx),%r15 |
| 704 » leaq» 64(%rax),%rax | 686 » leaq» 32(%rbx),%rbx |
| 705 » movdqa» %xmm0,0(%rdx) | 687 » sbbq» 0(%rbp),%r12 |
| 706 » movdqa» %xmm0,16(%rdx) | 688 » sbbq» 8(%rbp),%r13 |
| 707 » movdqa» %xmm0,32(%rdx) | 689 » sbbq» 16(%rbp),%r14 |
| 708 » movdqa» %xmm0,48(%rdx) | 690 » sbbq» 24(%rbp),%r15 |
| 709 » leaq» 64(%rdx),%rdx | 691 » leaq» 32(%rbp),%rbp |
| 710 » decq» %r9 | 692 » movq» %r12,0(%rdi) |
| 711 » jnz» L$sqr8x_zero | 693 » movq» %r13,8(%rdi) |
| 694 » movq» %r14,16(%rdi) |
| 695 » movq» %r15,24(%rdi) |
| 696 » leaq» 32(%rdi),%rdi |
| 697 » incq» %rcx |
| 698 » jnz» L$sqr8x_sub |
| 699 |
| 700 » sbbq» $0,%rax |
| 701 » leaq» (%rbx,%r9,1),%rbx |
| 702 » leaq» (%rdi,%r9,1),%rdi |
| 703 |
| 704 .byte» 102,72,15,110,200 |
| 705 » pxor» %xmm0,%xmm0 |
| 706 » pshufd» $0,%xmm1,%xmm1 |
| 707 » movq» 40(%rsp),%rsi |
| 708 » jmp» L$sqr8x_cond_copy |
| 709 |
| 710 .p2align» 5 |
| 711 L$sqr8x_cond_copy: |
| 712 » movdqa» 0(%rbx),%xmm2 |
| 713 » movdqa» 16(%rbx),%xmm3 |
| 714 » leaq» 32(%rbx),%rbx |
| 715 » movdqu» 0(%rdi),%xmm4 |
| 716 » movdqu» 16(%rdi),%xmm5 |
| 717 » leaq» 32(%rdi),%rdi |
| 718 » movdqa» %xmm0,-32(%rbx) |
| 719 » movdqa» %xmm0,-16(%rbx) |
| 720 » movdqa» %xmm0,-32(%rbx,%rdx,1) |
| 721 » movdqa» %xmm0,-16(%rbx,%rdx,1) |
| 722 » pcmpeqd»%xmm1,%xmm0 |
| 723 » pand» %xmm1,%xmm2 |
| 724 » pand» %xmm1,%xmm3 |
| 725 » pand» %xmm0,%xmm4 |
| 726 » pand» %xmm0,%xmm5 |
| 727 » pxor» %xmm0,%xmm0 |
| 728 » por» %xmm2,%xmm4 |
| 729 » por» %xmm3,%xmm5 |
| 730 » movdqu» %xmm4,-32(%rdi) |
| 731 » movdqu» %xmm5,-16(%rdi) |
| 732 » addq» $32,%r9 |
| 733 » jnz» L$sqr8x_cond_copy |
| 712 | 734 |
| 713 movq $1,%rax | 735 movq $1,%rax |
| 714 movq -48(%rsi),%r15 | 736 movq -48(%rsi),%r15 |
| 715 movq -40(%rsi),%r14 | 737 movq -40(%rsi),%r14 |
| 716 movq -32(%rsi),%r13 | 738 movq -32(%rsi),%r13 |
| 717 movq -24(%rsi),%r12 | 739 movq -24(%rsi),%r12 |
| 718 movq -16(%rsi),%rbp | 740 movq -16(%rsi),%rbp |
| 719 movq -8(%rsi),%rbx | 741 movq -8(%rsi),%rbx |
| 720 leaq (%rsi),%rsp | 742 leaq (%rsi),%rsp |
| 721 L$sqr8x_epilogue: | 743 L$sqr8x_epilogue: |
| 722 .byte 0xf3,0xc3 | 744 .byte 0xf3,0xc3 |
| 723 | 745 |
| 724 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84
,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10
8,46,111,114,103,62,0 | 746 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84
,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10
8,46,111,114,103,62,0 |
| 725 .p2align 4 | 747 .p2align 4 |
| 726 #endif | 748 #endif |
| OLD | NEW |