OLD | NEW |
1 default rel | 1 default rel |
2 %define XMMWORD | 2 %define XMMWORD |
3 %define YMMWORD | 3 %define YMMWORD |
4 %define ZMMWORD | 4 %define ZMMWORD |
5 section .text code align=64 | 5 section .text code align=64 |
6 | 6 |
7 | 7 |
8 EXTERN OPENSSL_ia32cap_P | 8 EXTERN OPENSSL_ia32cap_P |
9 | 9 |
10 global bn_mul_mont | 10 global bn_mul_mont |
(...skipping 659 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
670 mov r10d,r9d | 670 mov r10d,r9d |
671 shl r9d,3 | 671 shl r9d,3 |
672 shl r10,3+2 | 672 shl r10,3+2 |
673 neg r9 | 673 neg r9 |
674 | 674 |
675 | 675 |
676 | 676 |
677 | 677 |
678 | 678 |
679 | 679 |
680 » lea» r11,[((-64))+r9*4+rsp] | 680 » lea» r11,[((-64))+r9*2+rsp] |
681 mov r8,QWORD[r8] | 681 mov r8,QWORD[r8] |
682 sub r11,rsi | 682 sub r11,rsi |
683 and r11,4095 | 683 and r11,4095 |
684 cmp r10,r11 | 684 cmp r10,r11 |
685 jb NEAR $L$sqr8x_sp_alt | 685 jb NEAR $L$sqr8x_sp_alt |
686 sub rsp,r11 | 686 sub rsp,r11 |
687 » lea» rsp,[((-64))+r9*4+rsp] | 687 » lea» rsp,[((-64))+r9*2+rsp] |
688 jmp NEAR $L$sqr8x_sp_done | 688 jmp NEAR $L$sqr8x_sp_done |
689 | 689 |
690 ALIGN 32 | 690 ALIGN 32 |
691 $L$sqr8x_sp_alt: | 691 $L$sqr8x_sp_alt: |
692 » lea» r10,[((4096-64))+r9*4] | 692 » lea» r10,[((4096-64))+r9*2] |
693 » lea» rsp,[((-64))+r9*4+rsp] | 693 » lea» rsp,[((-64))+r9*2+rsp] |
694 sub r11,r10 | 694 sub r11,r10 |
695 mov r10,0 | 695 mov r10,0 |
696 cmovc r11,r10 | 696 cmovc r11,r10 |
697 sub rsp,r11 | 697 sub rsp,r11 |
698 $L$sqr8x_sp_done: | 698 $L$sqr8x_sp_done: |
699 and rsp,-64 | 699 and rsp,-64 |
700 mov r10,r9 | 700 mov r10,r9 |
701 neg r9 | 701 neg r9 |
702 | 702 |
703 lea r11,[64+r9*2+rsp] | |
704 mov QWORD[32+rsp],r8 | 703 mov QWORD[32+rsp],r8 |
705 mov QWORD[40+rsp],rax | 704 mov QWORD[40+rsp],rax |
706 $L$sqr8x_body: | 705 $L$sqr8x_body: |
707 | 706 |
708 » mov» rbp,r9 | 707 DB» 102,72,15,110,209 |
709 DB» 102,73,15,110,211 | |
710 » shr» rbp,3+2 | |
711 » mov» eax,DWORD[((OPENSSL_ia32cap_P+8))] | |
712 » jmp» NEAR $L$sqr8x_copy_n | |
713 | |
714 ALIGN» 32 | |
715 $L$sqr8x_copy_n: | |
716 » movq» xmm0,QWORD[rcx] | |
717 » movq» xmm1,QWORD[8+rcx] | |
718 » movq» xmm3,QWORD[16+rcx] | |
719 » movq» xmm4,QWORD[24+rcx] | |
720 » lea» rcx,[32+rcx] | |
721 » movdqa» XMMWORD[r11],xmm0 | |
722 » movdqa» XMMWORD[16+r11],xmm1 | |
723 » movdqa» XMMWORD[32+r11],xmm3 | |
724 » movdqa» XMMWORD[48+r11],xmm4 | |
725 » lea» r11,[64+r11] | |
726 » dec» rbp | |
727 » jnz» NEAR $L$sqr8x_copy_n | |
728 | |
729 pxor xmm0,xmm0 | 708 pxor xmm0,xmm0 |
730 DB 102,72,15,110,207 | 709 DB 102,72,15,110,207 |
731 DB 102,73,15,110,218 | 710 DB 102,73,15,110,218 |
732 call bn_sqr8x_internal | 711 call bn_sqr8x_internal |
733 | 712 |
734 » pxor» xmm0,xmm0 | 713 |
735 » lea» rax,[48+rsp] | 714 |
736 » lea» rdx,[64+r9*2+rsp] | 715 |
737 » shr» r9,3+2 | 716 » lea» rbx,[r9*1+rdi] |
738 » mov» rsi,QWORD[40+rsp] | 717 » mov» rcx,r9 |
739 » jmp» NEAR $L$sqr8x_zero | 718 » mov» rdx,r9 |
| 719 DB» 102,72,15,126,207 |
| 720 » sar» rcx,3+2 |
| 721 » jmp» NEAR $L$sqr8x_sub |
740 | 722 |
741 ALIGN 32 | 723 ALIGN 32 |
742 $L$sqr8x_zero: | 724 $L$sqr8x_sub: |
743 » movdqa» XMMWORD[rax],xmm0 | 725 » mov» r12,QWORD[rbx] |
744 » movdqa» XMMWORD[16+rax],xmm0 | 726 » mov» r13,QWORD[8+rbx] |
745 » movdqa» XMMWORD[32+rax],xmm0 | 727 » mov» r14,QWORD[16+rbx] |
746 » movdqa» XMMWORD[48+rax],xmm0 | 728 » mov» r15,QWORD[24+rbx] |
747 » lea» rax,[64+rax] | 729 » lea» rbx,[32+rbx] |
748 » movdqa» XMMWORD[rdx],xmm0 | 730 » sbb» r12,QWORD[rbp] |
749 » movdqa» XMMWORD[16+rdx],xmm0 | 731 » sbb» r13,QWORD[8+rbp] |
750 » movdqa» XMMWORD[32+rdx],xmm0 | 732 » sbb» r14,QWORD[16+rbp] |
751 » movdqa» XMMWORD[48+rdx],xmm0 | 733 » sbb» r15,QWORD[24+rbp] |
752 » lea» rdx,[64+rdx] | 734 » lea» rbp,[32+rbp] |
753 » dec» r9 | 735 » mov» QWORD[rdi],r12 |
754 » jnz» NEAR $L$sqr8x_zero | 736 » mov» QWORD[8+rdi],r13 |
| 737 » mov» QWORD[16+rdi],r14 |
| 738 » mov» QWORD[24+rdi],r15 |
| 739 » lea» rdi,[32+rdi] |
| 740 » inc» rcx |
| 741 » jnz» NEAR $L$sqr8x_sub |
| 742 |
| 743 » sbb» rax,0 |
| 744 » lea» rbx,[r9*1+rbx] |
| 745 » lea» rdi,[r9*1+rdi] |
| 746 |
| 747 DB» 102,72,15,110,200 |
| 748 » pxor» xmm0,xmm0 |
| 749 » pshufd» xmm1,xmm1,0 |
| 750 » mov» rsi,QWORD[40+rsp] |
| 751 » jmp» NEAR $L$sqr8x_cond_copy |
| 752 |
| 753 ALIGN» 32 |
| 754 $L$sqr8x_cond_copy: |
| 755 » movdqa» xmm2,XMMWORD[rbx] |
| 756 » movdqa» xmm3,XMMWORD[16+rbx] |
| 757 » lea» rbx,[32+rbx] |
| 758 » movdqu» xmm4,XMMWORD[rdi] |
| 759 » movdqu» xmm5,XMMWORD[16+rdi] |
| 760 » lea» rdi,[32+rdi] |
| 761 » movdqa» XMMWORD[(-32)+rbx],xmm0 |
| 762 » movdqa» XMMWORD[(-16)+rbx],xmm0 |
| 763 » movdqa» XMMWORD[(-32)+rdx*1+rbx],xmm0 |
| 764 » movdqa» XMMWORD[(-16)+rdx*1+rbx],xmm0 |
| 765 » pcmpeqd»xmm0,xmm1 |
| 766 » pand» xmm2,xmm1 |
| 767 » pand» xmm3,xmm1 |
| 768 » pand» xmm4,xmm0 |
| 769 » pand» xmm5,xmm0 |
| 770 » pxor» xmm0,xmm0 |
| 771 » por» xmm4,xmm2 |
| 772 » por» xmm5,xmm3 |
| 773 » movdqu» XMMWORD[(-32)+rdi],xmm4 |
| 774 » movdqu» XMMWORD[(-16)+rdi],xmm5 |
| 775 » add» r9,32 |
| 776 » jnz» NEAR $L$sqr8x_cond_copy |
755 | 777 |
756 mov rax,1 | 778 mov rax,1 |
757 mov r15,QWORD[((-48))+rsi] | 779 mov r15,QWORD[((-48))+rsi] |
758 mov r14,QWORD[((-40))+rsi] | 780 mov r14,QWORD[((-40))+rsi] |
759 mov r13,QWORD[((-32))+rsi] | 781 mov r13,QWORD[((-32))+rsi] |
760 mov r12,QWORD[((-24))+rsi] | 782 mov r12,QWORD[((-24))+rsi] |
761 mov rbp,QWORD[((-16))+rsi] | 783 mov rbp,QWORD[((-16))+rsi] |
762 mov rbx,QWORD[((-8))+rsi] | 784 mov rbx,QWORD[((-8))+rsi] |
763 lea rsp,[rsi] | 785 lea rsp,[rsi] |
764 $L$sqr8x_epilogue: | 786 $L$sqr8x_epilogue: |
(...skipping 167 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
932 DD mul_handler wrt ..imagebase | 954 DD mul_handler wrt ..imagebase |
933 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase | 955 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase |
934 $L$SEH_info_bn_mul4x_mont: | 956 $L$SEH_info_bn_mul4x_mont: |
935 DB 9,0,0,0 | 957 DB 9,0,0,0 |
936 DD mul_handler wrt ..imagebase | 958 DD mul_handler wrt ..imagebase |
937 DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase | 959 DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase |
938 $L$SEH_info_bn_sqr8x_mont: | 960 $L$SEH_info_bn_sqr8x_mont: |
939 DB 9,0,0,0 | 961 DB 9,0,0,0 |
940 DD sqr_handler wrt ..imagebase | 962 DD sqr_handler wrt ..imagebase |
941 DD $L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase | 963 DD $L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase |
OLD | NEW |