| OLD | NEW |
| 1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
| 2 .text | 2 .text |
| 3 | 3 |
| 4 .extern OPENSSL_ia32cap_P | 4 .extern OPENSSL_ia32cap_P |
| 5 .hidden OPENSSL_ia32cap_P | 5 .hidden OPENSSL_ia32cap_P |
| 6 | 6 |
| 7 .globl rsaz_512_sqr | 7 .globl rsaz_512_sqr |
| 8 .hidden rsaz_512_sqr | 8 .hidden rsaz_512_sqr |
| 9 .type rsaz_512_sqr,@function | 9 .type rsaz_512_sqr,@function |
| 10 .align 32 | 10 .align 32 |
| (...skipping 448 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 459 .type rsaz_512_mul_gather4,@function | 459 .type rsaz_512_mul_gather4,@function |
| 460 .align 32 | 460 .align 32 |
| 461 rsaz_512_mul_gather4: | 461 rsaz_512_mul_gather4: |
| 462 pushq %rbx | 462 pushq %rbx |
| 463 pushq %rbp | 463 pushq %rbp |
| 464 pushq %r12 | 464 pushq %r12 |
| 465 pushq %r13 | 465 pushq %r13 |
| 466 pushq %r14 | 466 pushq %r14 |
| 467 pushq %r15 | 467 pushq %r15 |
| 468 | 468 |
| 469 » movl» %r9d,%r9d | 469 » subq» $152,%rsp |
| 470 » subq» $128+24,%rsp | |
| 471 .Lmul_gather4_body: | 470 .Lmul_gather4_body: |
| 472 » movl» 64(%rdx,%r9,4),%eax | 471 » movd» %r9d,%xmm8 |
| 473 .byte» 102,72,15,110,199 | 472 » movdqa» .Linc+16(%rip),%xmm1 |
| 474 » movl» (%rdx,%r9,4),%ebx | 473 » movdqa» .Linc(%rip),%xmm0 |
| 475 .byte» 102,72,15,110,201 | 474 |
| 475 » pshufd» $0,%xmm8,%xmm8 |
| 476 » movdqa» %xmm1,%xmm7 |
| 477 » movdqa» %xmm1,%xmm2 |
| 478 » paddd» %xmm0,%xmm1 |
| 479 » pcmpeqd»%xmm8,%xmm0 |
| 480 » movdqa» %xmm7,%xmm3 |
| 481 » paddd» %xmm1,%xmm2 |
| 482 » pcmpeqd»%xmm8,%xmm1 |
| 483 » movdqa» %xmm7,%xmm4 |
| 484 » paddd» %xmm2,%xmm3 |
| 485 » pcmpeqd»%xmm8,%xmm2 |
| 486 » movdqa» %xmm7,%xmm5 |
| 487 » paddd» %xmm3,%xmm4 |
| 488 » pcmpeqd»%xmm8,%xmm3 |
| 489 » movdqa» %xmm7,%xmm6 |
| 490 » paddd» %xmm4,%xmm5 |
| 491 » pcmpeqd»%xmm8,%xmm4 |
| 492 » paddd» %xmm5,%xmm6 |
| 493 » pcmpeqd»%xmm8,%xmm5 |
| 494 » paddd» %xmm6,%xmm7 |
| 495 » pcmpeqd»%xmm8,%xmm6 |
| 496 » pcmpeqd»%xmm8,%xmm7 |
| 497 |
| 498 » movdqa» 0(%rdx),%xmm8 |
| 499 » movdqa» 16(%rdx),%xmm9 |
| 500 » movdqa» 32(%rdx),%xmm10 |
| 501 » movdqa» 48(%rdx),%xmm11 |
| 502 » pand» %xmm0,%xmm8 |
| 503 » movdqa» 64(%rdx),%xmm12 |
| 504 » pand» %xmm1,%xmm9 |
| 505 » movdqa» 80(%rdx),%xmm13 |
| 506 » pand» %xmm2,%xmm10 |
| 507 » movdqa» 96(%rdx),%xmm14 |
| 508 » pand» %xmm3,%xmm11 |
| 509 » movdqa» 112(%rdx),%xmm15 |
| 510 » leaq» 128(%rdx),%rbp |
| 511 » pand» %xmm4,%xmm12 |
| 512 » pand» %xmm5,%xmm13 |
| 513 » pand» %xmm6,%xmm14 |
| 514 » pand» %xmm7,%xmm15 |
| 515 » por» %xmm10,%xmm8 |
| 516 » por» %xmm11,%xmm9 |
| 517 » por» %xmm12,%xmm8 |
| 518 » por» %xmm13,%xmm9 |
| 519 » por» %xmm14,%xmm8 |
| 520 » por» %xmm15,%xmm9 |
| 521 |
| 522 » por» %xmm9,%xmm8 |
| 523 » pshufd» $0x4e,%xmm8,%xmm9 |
| 524 » por» %xmm9,%xmm8 |
| 525 .byte» 102,76,15,126,195 |
| 526 |
| 476 movq %r8,128(%rsp) | 527 movq %r8,128(%rsp) |
| 528 movq %rdi,128+8(%rsp) |
| 529 movq %rcx,128+16(%rsp) |
| 477 | 530 |
| 478 shlq $32,%rax | |
| 479 orq %rax,%rbx | |
| 480 movq (%rsi),%rax | 531 movq (%rsi),%rax |
| 481 movq 8(%rsi),%rcx | 532 movq 8(%rsi),%rcx |
| 482 leaq 128(%rdx,%r9,4),%rbp | |
| 483 mulq %rbx | 533 mulq %rbx |
| 484 movq %rax,(%rsp) | 534 movq %rax,(%rsp) |
| 485 movq %rcx,%rax | 535 movq %rcx,%rax |
| 486 movq %rdx,%r8 | 536 movq %rdx,%r8 |
| 487 | 537 |
| 488 mulq %rbx | 538 mulq %rbx |
| 489 movd (%rbp),%xmm4 | |
| 490 addq %rax,%r8 | 539 addq %rax,%r8 |
| 491 movq 16(%rsi),%rax | 540 movq 16(%rsi),%rax |
| 492 movq %rdx,%r9 | 541 movq %rdx,%r9 |
| 493 adcq $0,%r9 | 542 adcq $0,%r9 |
| 494 | 543 |
| 495 mulq %rbx | 544 mulq %rbx |
| 496 movd 64(%rbp),%xmm5 | |
| 497 addq %rax,%r9 | 545 addq %rax,%r9 |
| 498 movq 24(%rsi),%rax | 546 movq 24(%rsi),%rax |
| 499 movq %rdx,%r10 | 547 movq %rdx,%r10 |
| 500 adcq $0,%r10 | 548 adcq $0,%r10 |
| 501 | 549 |
| 502 mulq %rbx | 550 mulq %rbx |
| 503 pslldq $4,%xmm5 | |
| 504 addq %rax,%r10 | 551 addq %rax,%r10 |
| 505 movq 32(%rsi),%rax | 552 movq 32(%rsi),%rax |
| 506 movq %rdx,%r11 | 553 movq %rdx,%r11 |
| 507 adcq $0,%r11 | 554 adcq $0,%r11 |
| 508 | 555 |
| 509 mulq %rbx | 556 mulq %rbx |
| 510 por %xmm5,%xmm4 | |
| 511 addq %rax,%r11 | 557 addq %rax,%r11 |
| 512 movq 40(%rsi),%rax | 558 movq 40(%rsi),%rax |
| 513 movq %rdx,%r12 | 559 movq %rdx,%r12 |
| 514 adcq $0,%r12 | 560 adcq $0,%r12 |
| 515 | 561 |
| 516 mulq %rbx | 562 mulq %rbx |
| 517 addq %rax,%r12 | 563 addq %rax,%r12 |
| 518 movq 48(%rsi),%rax | 564 movq 48(%rsi),%rax |
| 519 movq %rdx,%r13 | 565 movq %rdx,%r13 |
| 520 adcq $0,%r13 | 566 adcq $0,%r13 |
| 521 | 567 |
| 522 mulq %rbx | 568 mulq %rbx |
| 523 leaq 128(%rbp),%rbp | |
| 524 addq %rax,%r13 | 569 addq %rax,%r13 |
| 525 movq 56(%rsi),%rax | 570 movq 56(%rsi),%rax |
| 526 movq %rdx,%r14 | 571 movq %rdx,%r14 |
| 527 adcq $0,%r14 | 572 adcq $0,%r14 |
| 528 | 573 |
| 529 mulq %rbx | 574 mulq %rbx |
| 530 .byte 102,72,15,126,227 | |
| 531 addq %rax,%r14 | 575 addq %rax,%r14 |
| 532 movq (%rsi),%rax | 576 movq (%rsi),%rax |
| 533 movq %rdx,%r15 | 577 movq %rdx,%r15 |
| 534 adcq $0,%r15 | 578 adcq $0,%r15 |
| 535 | 579 |
| 536 leaq 8(%rsp),%rdi | 580 leaq 8(%rsp),%rdi |
| 537 movl $7,%ecx | 581 movl $7,%ecx |
| 538 jmp .Loop_mul_gather | 582 jmp .Loop_mul_gather |
| 539 | 583 |
| 540 .align 32 | 584 .align 32 |
| 541 .Loop_mul_gather: | 585 .Loop_mul_gather: |
| 586 movdqa 0(%rbp),%xmm8 |
| 587 movdqa 16(%rbp),%xmm9 |
| 588 movdqa 32(%rbp),%xmm10 |
| 589 movdqa 48(%rbp),%xmm11 |
| 590 pand %xmm0,%xmm8 |
| 591 movdqa 64(%rbp),%xmm12 |
| 592 pand %xmm1,%xmm9 |
| 593 movdqa 80(%rbp),%xmm13 |
| 594 pand %xmm2,%xmm10 |
| 595 movdqa 96(%rbp),%xmm14 |
| 596 pand %xmm3,%xmm11 |
| 597 movdqa 112(%rbp),%xmm15 |
| 598 leaq 128(%rbp),%rbp |
| 599 pand %xmm4,%xmm12 |
| 600 pand %xmm5,%xmm13 |
| 601 pand %xmm6,%xmm14 |
| 602 pand %xmm7,%xmm15 |
| 603 por %xmm10,%xmm8 |
| 604 por %xmm11,%xmm9 |
| 605 por %xmm12,%xmm8 |
| 606 por %xmm13,%xmm9 |
| 607 por %xmm14,%xmm8 |
| 608 por %xmm15,%xmm9 |
| 609 |
| 610 por %xmm9,%xmm8 |
| 611 pshufd $0x4e,%xmm8,%xmm9 |
| 612 por %xmm9,%xmm8 |
| 613 .byte 102,76,15,126,195 |
| 614 |
| 542 mulq %rbx | 615 mulq %rbx |
| 543 addq %rax,%r8 | 616 addq %rax,%r8 |
| 544 movq 8(%rsi),%rax | 617 movq 8(%rsi),%rax |
| 545 movq %r8,(%rdi) | 618 movq %r8,(%rdi) |
| 546 movq %rdx,%r8 | 619 movq %rdx,%r8 |
| 547 adcq $0,%r8 | 620 adcq $0,%r8 |
| 548 | 621 |
| 549 mulq %rbx | 622 mulq %rbx |
| 550 movd (%rbp),%xmm4 | |
| 551 addq %rax,%r9 | 623 addq %rax,%r9 |
| 552 movq 16(%rsi),%rax | 624 movq 16(%rsi),%rax |
| 553 adcq $0,%rdx | 625 adcq $0,%rdx |
| 554 addq %r9,%r8 | 626 addq %r9,%r8 |
| 555 movq %rdx,%r9 | 627 movq %rdx,%r9 |
| 556 adcq $0,%r9 | 628 adcq $0,%r9 |
| 557 | 629 |
| 558 mulq %rbx | 630 mulq %rbx |
| 559 movd 64(%rbp),%xmm5 | |
| 560 addq %rax,%r10 | 631 addq %rax,%r10 |
| 561 movq 24(%rsi),%rax | 632 movq 24(%rsi),%rax |
| 562 adcq $0,%rdx | 633 adcq $0,%rdx |
| 563 addq %r10,%r9 | 634 addq %r10,%r9 |
| 564 movq %rdx,%r10 | 635 movq %rdx,%r10 |
| 565 adcq $0,%r10 | 636 adcq $0,%r10 |
| 566 | 637 |
| 567 mulq %rbx | 638 mulq %rbx |
| 568 pslldq $4,%xmm5 | |
| 569 addq %rax,%r11 | 639 addq %rax,%r11 |
| 570 movq 32(%rsi),%rax | 640 movq 32(%rsi),%rax |
| 571 adcq $0,%rdx | 641 adcq $0,%rdx |
| 572 addq %r11,%r10 | 642 addq %r11,%r10 |
| 573 movq %rdx,%r11 | 643 movq %rdx,%r11 |
| 574 adcq $0,%r11 | 644 adcq $0,%r11 |
| 575 | 645 |
| 576 mulq %rbx | 646 mulq %rbx |
| 577 por %xmm5,%xmm4 | |
| 578 addq %rax,%r12 | 647 addq %rax,%r12 |
| 579 movq 40(%rsi),%rax | 648 movq 40(%rsi),%rax |
| 580 adcq $0,%rdx | 649 adcq $0,%rdx |
| 581 addq %r12,%r11 | 650 addq %r12,%r11 |
| 582 movq %rdx,%r12 | 651 movq %rdx,%r12 |
| 583 adcq $0,%r12 | 652 adcq $0,%r12 |
| 584 | 653 |
| 585 mulq %rbx | 654 mulq %rbx |
| 586 addq %rax,%r13 | 655 addq %rax,%r13 |
| 587 movq 48(%rsi),%rax | 656 movq 48(%rsi),%rax |
| 588 adcq $0,%rdx | 657 adcq $0,%rdx |
| 589 addq %r13,%r12 | 658 addq %r13,%r12 |
| 590 movq %rdx,%r13 | 659 movq %rdx,%r13 |
| 591 adcq $0,%r13 | 660 adcq $0,%r13 |
| 592 | 661 |
| 593 mulq %rbx | 662 mulq %rbx |
| 594 addq %rax,%r14 | 663 addq %rax,%r14 |
| 595 movq 56(%rsi),%rax | 664 movq 56(%rsi),%rax |
| 596 adcq $0,%rdx | 665 adcq $0,%rdx |
| 597 addq %r14,%r13 | 666 addq %r14,%r13 |
| 598 movq %rdx,%r14 | 667 movq %rdx,%r14 |
| 599 adcq $0,%r14 | 668 adcq $0,%r14 |
| 600 | 669 |
| 601 mulq %rbx | 670 mulq %rbx |
| 602 .byte 102,72,15,126,227 | |
| 603 addq %rax,%r15 | 671 addq %rax,%r15 |
| 604 movq (%rsi),%rax | 672 movq (%rsi),%rax |
| 605 adcq $0,%rdx | 673 adcq $0,%rdx |
| 606 addq %r15,%r14 | 674 addq %r15,%r14 |
| 607 movq %rdx,%r15 | 675 movq %rdx,%r15 |
| 608 adcq $0,%r15 | 676 adcq $0,%r15 |
| 609 | 677 |
| 610 leaq 128(%rbp),%rbp | |
| 611 leaq 8(%rdi),%rdi | 678 leaq 8(%rdi),%rdi |
| 612 | 679 |
| 613 decl %ecx | 680 decl %ecx |
| 614 jnz .Loop_mul_gather | 681 jnz .Loop_mul_gather |
| 615 | 682 |
| 616 movq %r8,(%rdi) | 683 movq %r8,(%rdi) |
| 617 movq %r9,8(%rdi) | 684 movq %r9,8(%rdi) |
| 618 movq %r10,16(%rdi) | 685 movq %r10,16(%rdi) |
| 619 movq %r11,24(%rdi) | 686 movq %r11,24(%rdi) |
| 620 movq %r12,32(%rdi) | 687 movq %r12,32(%rdi) |
| 621 movq %r13,40(%rdi) | 688 movq %r13,40(%rdi) |
| 622 movq %r14,48(%rdi) | 689 movq %r14,48(%rdi) |
| 623 movq %r15,56(%rdi) | 690 movq %r15,56(%rdi) |
| 624 | 691 |
| 625 .byte» 102,72,15,126,199 | 692 » movq» 128+8(%rsp),%rdi |
| 626 .byte» 102,72,15,126,205 | 693 » movq» 128+16(%rsp),%rbp |
| 627 | 694 |
| 628 movq (%rsp),%r8 | 695 movq (%rsp),%r8 |
| 629 movq 8(%rsp),%r9 | 696 movq 8(%rsp),%r9 |
| 630 movq 16(%rsp),%r10 | 697 movq 16(%rsp),%r10 |
| 631 movq 24(%rsp),%r11 | 698 movq 24(%rsp),%r11 |
| 632 movq 32(%rsp),%r12 | 699 movq 32(%rsp),%r12 |
| 633 movq 40(%rsp),%r13 | 700 movq 40(%rsp),%r13 |
| 634 movq 48(%rsp),%r14 | 701 movq 48(%rsp),%r14 |
| 635 movq 56(%rsp),%r15 | 702 movq 56(%rsp),%r15 |
| 636 | 703 |
| (...skipping 29 matching lines...) Expand all Loading... |
| 666 pushq %rbx | 733 pushq %rbx |
| 667 pushq %rbp | 734 pushq %rbp |
| 668 pushq %r12 | 735 pushq %r12 |
| 669 pushq %r13 | 736 pushq %r13 |
| 670 pushq %r14 | 737 pushq %r14 |
| 671 pushq %r15 | 738 pushq %r15 |
| 672 | 739 |
| 673 movl %r9d,%r9d | 740 movl %r9d,%r9d |
| 674 subq $128+24,%rsp | 741 subq $128+24,%rsp |
| 675 .Lmul_scatter4_body: | 742 .Lmul_scatter4_body: |
| 676 » leaq» (%r8,%r9,4),%r8 | 743 » leaq» (%r8,%r9,8),%r8 |
| 677 .byte 102,72,15,110,199 | 744 .byte 102,72,15,110,199 |
| 678 .byte 102,72,15,110,202 | 745 .byte 102,72,15,110,202 |
| 679 .byte 102,73,15,110,208 | 746 .byte 102,73,15,110,208 |
| 680 movq %rcx,128(%rsp) | 747 movq %rcx,128(%rsp) |
| 681 | 748 |
| 682 movq %rdi,%rbp | 749 movq %rdi,%rbp |
| 683 movq (%rdi),%rbx | 750 movq (%rdi),%rbx |
| 684 call __rsaz_512_mul | 751 call __rsaz_512_mul |
| 685 | 752 |
| 686 .byte 102,72,15,126,199 | 753 .byte 102,72,15,126,199 |
| (...skipping 15 matching lines...) Expand all Loading... |
| 702 adcq 88(%rsp),%r11 | 769 adcq 88(%rsp),%r11 |
| 703 adcq 96(%rsp),%r12 | 770 adcq 96(%rsp),%r12 |
| 704 adcq 104(%rsp),%r13 | 771 adcq 104(%rsp),%r13 |
| 705 adcq 112(%rsp),%r14 | 772 adcq 112(%rsp),%r14 |
| 706 adcq 120(%rsp),%r15 | 773 adcq 120(%rsp),%r15 |
| 707 .byte 102,72,15,126,214 | 774 .byte 102,72,15,126,214 |
| 708 sbbq %rcx,%rcx | 775 sbbq %rcx,%rcx |
| 709 | 776 |
| 710 call __rsaz_512_subtract | 777 call __rsaz_512_subtract |
| 711 | 778 |
| 712 » movl» %r8d,0(%rsi) | 779 » movq» %r8,0(%rsi) |
| 713 » shrq» $32,%r8 | 780 » movq» %r9,128(%rsi) |
| 714 » movl» %r9d,128(%rsi) | 781 » movq» %r10,256(%rsi) |
| 715 » shrq» $32,%r9 | 782 » movq» %r11,384(%rsi) |
| 716 » movl» %r10d,256(%rsi) | 783 » movq» %r12,512(%rsi) |
| 717 » shrq» $32,%r10 | 784 » movq» %r13,640(%rsi) |
| 718 » movl» %r11d,384(%rsi) | 785 » movq» %r14,768(%rsi) |
| 719 » shrq» $32,%r11 | 786 » movq» %r15,896(%rsi) |
| 720 » movl» %r12d,512(%rsi) | |
| 721 » shrq» $32,%r12 | |
| 722 » movl» %r13d,640(%rsi) | |
| 723 » shrq» $32,%r13 | |
| 724 » movl» %r14d,768(%rsi) | |
| 725 » shrq» $32,%r14 | |
| 726 » movl» %r15d,896(%rsi) | |
| 727 » shrq» $32,%r15 | |
| 728 » movl» %r8d,64(%rsi) | |
| 729 » movl» %r9d,192(%rsi) | |
| 730 » movl» %r10d,320(%rsi) | |
| 731 » movl» %r11d,448(%rsi) | |
| 732 » movl» %r12d,576(%rsi) | |
| 733 » movl» %r13d,704(%rsi) | |
| 734 » movl» %r14d,832(%rsi) | |
| 735 » movl» %r15d,960(%rsi) | |
| 736 | 787 |
| 737 leaq 128+24+48(%rsp),%rax | 788 leaq 128+24+48(%rsp),%rax |
| 738 movq -48(%rax),%r15 | 789 movq -48(%rax),%r15 |
| 739 movq -40(%rax),%r14 | 790 movq -40(%rax),%r14 |
| 740 movq -32(%rax),%r13 | 791 movq -32(%rax),%r13 |
| 741 movq -24(%rax),%r12 | 792 movq -24(%rax),%r12 |
| 742 movq -16(%rax),%rbp | 793 movq -16(%rax),%rbp |
| 743 movq -8(%rax),%rbx | 794 movq -8(%rax),%rbx |
| 744 leaq (%rax),%rsp | 795 leaq (%rax),%rsp |
| 745 .Lmul_scatter4_epilogue: | 796 .Lmul_scatter4_epilogue: |
| (...skipping 334 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1080 movq %r14,48(%rdi) | 1131 movq %r14,48(%rdi) |
| 1081 movq %r15,56(%rdi) | 1132 movq %r15,56(%rdi) |
| 1082 | 1133 |
| 1083 .byte 0xf3,0xc3 | 1134 .byte 0xf3,0xc3 |
| 1084 .size __rsaz_512_mul,.-__rsaz_512_mul | 1135 .size __rsaz_512_mul,.-__rsaz_512_mul |
| 1085 .globl rsaz_512_scatter4 | 1136 .globl rsaz_512_scatter4 |
| 1086 .hidden rsaz_512_scatter4 | 1137 .hidden rsaz_512_scatter4 |
| 1087 .type rsaz_512_scatter4,@function | 1138 .type rsaz_512_scatter4,@function |
| 1088 .align 16 | 1139 .align 16 |
| 1089 rsaz_512_scatter4: | 1140 rsaz_512_scatter4: |
| 1090 » leaq» (%rdi,%rdx,4),%rdi | 1141 » leaq» (%rdi,%rdx,8),%rdi |
| 1091 movl $8,%r9d | 1142 movl $8,%r9d |
| 1092 jmp .Loop_scatter | 1143 jmp .Loop_scatter |
| 1093 .align 16 | 1144 .align 16 |
| 1094 .Loop_scatter: | 1145 .Loop_scatter: |
| 1095 movq (%rsi),%rax | 1146 movq (%rsi),%rax |
| 1096 leaq 8(%rsi),%rsi | 1147 leaq 8(%rsi),%rsi |
| 1097 » movl» %eax,(%rdi) | 1148 » movq» %rax,(%rdi) |
| 1098 » shrq» $32,%rax | |
| 1099 » movl» %eax,64(%rdi) | |
| 1100 leaq 128(%rdi),%rdi | 1149 leaq 128(%rdi),%rdi |
| 1101 decl %r9d | 1150 decl %r9d |
| 1102 jnz .Loop_scatter | 1151 jnz .Loop_scatter |
| 1103 .byte 0xf3,0xc3 | 1152 .byte 0xf3,0xc3 |
| 1104 .size rsaz_512_scatter4,.-rsaz_512_scatter4 | 1153 .size rsaz_512_scatter4,.-rsaz_512_scatter4 |
| 1105 | 1154 |
| 1106 .globl rsaz_512_gather4 | 1155 .globl rsaz_512_gather4 |
| 1107 .hidden rsaz_512_gather4 | 1156 .hidden rsaz_512_gather4 |
| 1108 .type rsaz_512_gather4,@function | 1157 .type rsaz_512_gather4,@function |
| 1109 .align 16 | 1158 .align 16 |
| 1110 rsaz_512_gather4: | 1159 rsaz_512_gather4: |
| 1111 » leaq» (%rsi,%rdx,4),%rsi | 1160 » movd» %edx,%xmm8 |
| 1161 » movdqa» .Linc+16(%rip),%xmm1 |
| 1162 » movdqa» .Linc(%rip),%xmm0 |
| 1163 |
| 1164 » pshufd» $0,%xmm8,%xmm8 |
| 1165 » movdqa» %xmm1,%xmm7 |
| 1166 » movdqa» %xmm1,%xmm2 |
| 1167 » paddd» %xmm0,%xmm1 |
| 1168 » pcmpeqd»%xmm8,%xmm0 |
| 1169 » movdqa» %xmm7,%xmm3 |
| 1170 » paddd» %xmm1,%xmm2 |
| 1171 » pcmpeqd»%xmm8,%xmm1 |
| 1172 » movdqa» %xmm7,%xmm4 |
| 1173 » paddd» %xmm2,%xmm3 |
| 1174 » pcmpeqd»%xmm8,%xmm2 |
| 1175 » movdqa» %xmm7,%xmm5 |
| 1176 » paddd» %xmm3,%xmm4 |
| 1177 » pcmpeqd»%xmm8,%xmm3 |
| 1178 » movdqa» %xmm7,%xmm6 |
| 1179 » paddd» %xmm4,%xmm5 |
| 1180 » pcmpeqd»%xmm8,%xmm4 |
| 1181 » paddd» %xmm5,%xmm6 |
| 1182 » pcmpeqd»%xmm8,%xmm5 |
| 1183 » paddd» %xmm6,%xmm7 |
| 1184 » pcmpeqd»%xmm8,%xmm6 |
| 1185 » pcmpeqd»%xmm8,%xmm7 |
| 1112 movl $8,%r9d | 1186 movl $8,%r9d |
| 1113 jmp .Loop_gather | 1187 jmp .Loop_gather |
| 1114 .align 16 | 1188 .align 16 |
| 1115 .Loop_gather: | 1189 .Loop_gather: |
| 1116 » movl» (%rsi),%eax | 1190 » movdqa» 0(%rsi),%xmm8 |
| 1117 » movl» 64(%rsi),%r8d | 1191 » movdqa» 16(%rsi),%xmm9 |
| 1192 » movdqa» 32(%rsi),%xmm10 |
| 1193 » movdqa» 48(%rsi),%xmm11 |
| 1194 » pand» %xmm0,%xmm8 |
| 1195 » movdqa» 64(%rsi),%xmm12 |
| 1196 » pand» %xmm1,%xmm9 |
| 1197 » movdqa» 80(%rsi),%xmm13 |
| 1198 » pand» %xmm2,%xmm10 |
| 1199 » movdqa» 96(%rsi),%xmm14 |
| 1200 » pand» %xmm3,%xmm11 |
| 1201 » movdqa» 112(%rsi),%xmm15 |
| 1118 leaq 128(%rsi),%rsi | 1202 leaq 128(%rsi),%rsi |
| 1119 » shlq» $32,%r8 | 1203 » pand» %xmm4,%xmm12 |
| 1120 » orq» %r8,%rax | 1204 » pand» %xmm5,%xmm13 |
| 1121 » movq» %rax,(%rdi) | 1205 » pand» %xmm6,%xmm14 |
| 1206 » pand» %xmm7,%xmm15 |
| 1207 » por» %xmm10,%xmm8 |
| 1208 » por» %xmm11,%xmm9 |
| 1209 » por» %xmm12,%xmm8 |
| 1210 » por» %xmm13,%xmm9 |
| 1211 » por» %xmm14,%xmm8 |
| 1212 » por» %xmm15,%xmm9 |
| 1213 |
| 1214 » por» %xmm9,%xmm8 |
| 1215 » pshufd» $0x4e,%xmm8,%xmm9 |
| 1216 » por» %xmm9,%xmm8 |
| 1217 » movq» %xmm8,(%rdi) |
| 1122 leaq 8(%rdi),%rdi | 1218 leaq 8(%rdi),%rdi |
| 1123 decl %r9d | 1219 decl %r9d |
| 1124 jnz .Loop_gather | 1220 jnz .Loop_gather |
| 1125 .byte 0xf3,0xc3 | 1221 .byte 0xf3,0xc3 |
| 1222 .LSEH_end_rsaz_512_gather4: |
| 1126 .size rsaz_512_gather4,.-rsaz_512_gather4 | 1223 .size rsaz_512_gather4,.-rsaz_512_gather4 |
| 1224 |
| 1225 .align 64 |
| 1226 .Linc: |
| 1227 .long 0,0, 1,1 |
| 1228 .long 2,2, 2,2 |
| 1127 #endif | 1229 #endif |
| OLD | NEW |