| OLD | NEW |
| 1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
| 2 .text | 2 .text |
| 3 | 3 |
| 4 | 4 |
| 5 | 5 |
| 6 .globl _rsaz_512_sqr | 6 .globl _rsaz_512_sqr |
| 7 .private_extern _rsaz_512_sqr | 7 .private_extern _rsaz_512_sqr |
| 8 | 8 |
| 9 .p2align 5 | 9 .p2align 5 |
| 10 _rsaz_512_sqr: | 10 _rsaz_512_sqr: |
| (...skipping 447 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 458 | 458 |
| 459 .p2align 5 | 459 .p2align 5 |
| 460 _rsaz_512_mul_gather4: | 460 _rsaz_512_mul_gather4: |
| 461 pushq %rbx | 461 pushq %rbx |
| 462 pushq %rbp | 462 pushq %rbp |
| 463 pushq %r12 | 463 pushq %r12 |
| 464 pushq %r13 | 464 pushq %r13 |
| 465 pushq %r14 | 465 pushq %r14 |
| 466 pushq %r15 | 466 pushq %r15 |
| 467 | 467 |
| 468 » movl» %r9d,%r9d | 468 » subq» $152,%rsp |
| 469 » subq» $128+24,%rsp | |
| 470 L$mul_gather4_body: | 469 L$mul_gather4_body: |
| 471 » movl» 64(%rdx,%r9,4),%eax | 470 » movd» %r9d,%xmm8 |
| 472 .byte» 102,72,15,110,199 | 471 » movdqa» L$inc+16(%rip),%xmm1 |
| 473 » movl» (%rdx,%r9,4),%ebx | 472 » movdqa» L$inc(%rip),%xmm0 |
| 474 .byte» 102,72,15,110,201 | 473 |
| 474 » pshufd» $0,%xmm8,%xmm8 |
| 475 » movdqa» %xmm1,%xmm7 |
| 476 » movdqa» %xmm1,%xmm2 |
| 477 » paddd» %xmm0,%xmm1 |
| 478 » pcmpeqd»%xmm8,%xmm0 |
| 479 » movdqa» %xmm7,%xmm3 |
| 480 » paddd» %xmm1,%xmm2 |
| 481 » pcmpeqd»%xmm8,%xmm1 |
| 482 » movdqa» %xmm7,%xmm4 |
| 483 » paddd» %xmm2,%xmm3 |
| 484 » pcmpeqd»%xmm8,%xmm2 |
| 485 » movdqa» %xmm7,%xmm5 |
| 486 » paddd» %xmm3,%xmm4 |
| 487 » pcmpeqd»%xmm8,%xmm3 |
| 488 » movdqa» %xmm7,%xmm6 |
| 489 » paddd» %xmm4,%xmm5 |
| 490 » pcmpeqd»%xmm8,%xmm4 |
| 491 » paddd» %xmm5,%xmm6 |
| 492 » pcmpeqd»%xmm8,%xmm5 |
| 493 » paddd» %xmm6,%xmm7 |
| 494 » pcmpeqd»%xmm8,%xmm6 |
| 495 » pcmpeqd»%xmm8,%xmm7 |
| 496 |
| 497 » movdqa» 0(%rdx),%xmm8 |
| 498 » movdqa» 16(%rdx),%xmm9 |
| 499 » movdqa» 32(%rdx),%xmm10 |
| 500 » movdqa» 48(%rdx),%xmm11 |
| 501 » pand» %xmm0,%xmm8 |
| 502 » movdqa» 64(%rdx),%xmm12 |
| 503 » pand» %xmm1,%xmm9 |
| 504 » movdqa» 80(%rdx),%xmm13 |
| 505 » pand» %xmm2,%xmm10 |
| 506 » movdqa» 96(%rdx),%xmm14 |
| 507 » pand» %xmm3,%xmm11 |
| 508 » movdqa» 112(%rdx),%xmm15 |
| 509 » leaq» 128(%rdx),%rbp |
| 510 » pand» %xmm4,%xmm12 |
| 511 » pand» %xmm5,%xmm13 |
| 512 » pand» %xmm6,%xmm14 |
| 513 » pand» %xmm7,%xmm15 |
| 514 » por» %xmm10,%xmm8 |
| 515 » por» %xmm11,%xmm9 |
| 516 » por» %xmm12,%xmm8 |
| 517 » por» %xmm13,%xmm9 |
| 518 » por» %xmm14,%xmm8 |
| 519 » por» %xmm15,%xmm9 |
| 520 |
| 521 » por» %xmm9,%xmm8 |
| 522 » pshufd» $0x4e,%xmm8,%xmm9 |
| 523 » por» %xmm9,%xmm8 |
| 524 .byte» 102,76,15,126,195 |
| 525 |
| 475 movq %r8,128(%rsp) | 526 movq %r8,128(%rsp) |
| 527 movq %rdi,128+8(%rsp) |
| 528 movq %rcx,128+16(%rsp) |
| 476 | 529 |
| 477 shlq $32,%rax | |
| 478 orq %rax,%rbx | |
| 479 movq (%rsi),%rax | 530 movq (%rsi),%rax |
| 480 movq 8(%rsi),%rcx | 531 movq 8(%rsi),%rcx |
| 481 leaq 128(%rdx,%r9,4),%rbp | |
| 482 mulq %rbx | 532 mulq %rbx |
| 483 movq %rax,(%rsp) | 533 movq %rax,(%rsp) |
| 484 movq %rcx,%rax | 534 movq %rcx,%rax |
| 485 movq %rdx,%r8 | 535 movq %rdx,%r8 |
| 486 | 536 |
| 487 mulq %rbx | 537 mulq %rbx |
| 488 movd (%rbp),%xmm4 | |
| 489 addq %rax,%r8 | 538 addq %rax,%r8 |
| 490 movq 16(%rsi),%rax | 539 movq 16(%rsi),%rax |
| 491 movq %rdx,%r9 | 540 movq %rdx,%r9 |
| 492 adcq $0,%r9 | 541 adcq $0,%r9 |
| 493 | 542 |
| 494 mulq %rbx | 543 mulq %rbx |
| 495 movd 64(%rbp),%xmm5 | |
| 496 addq %rax,%r9 | 544 addq %rax,%r9 |
| 497 movq 24(%rsi),%rax | 545 movq 24(%rsi),%rax |
| 498 movq %rdx,%r10 | 546 movq %rdx,%r10 |
| 499 adcq $0,%r10 | 547 adcq $0,%r10 |
| 500 | 548 |
| 501 mulq %rbx | 549 mulq %rbx |
| 502 pslldq $4,%xmm5 | |
| 503 addq %rax,%r10 | 550 addq %rax,%r10 |
| 504 movq 32(%rsi),%rax | 551 movq 32(%rsi),%rax |
| 505 movq %rdx,%r11 | 552 movq %rdx,%r11 |
| 506 adcq $0,%r11 | 553 adcq $0,%r11 |
| 507 | 554 |
| 508 mulq %rbx | 555 mulq %rbx |
| 509 por %xmm5,%xmm4 | |
| 510 addq %rax,%r11 | 556 addq %rax,%r11 |
| 511 movq 40(%rsi),%rax | 557 movq 40(%rsi),%rax |
| 512 movq %rdx,%r12 | 558 movq %rdx,%r12 |
| 513 adcq $0,%r12 | 559 adcq $0,%r12 |
| 514 | 560 |
| 515 mulq %rbx | 561 mulq %rbx |
| 516 addq %rax,%r12 | 562 addq %rax,%r12 |
| 517 movq 48(%rsi),%rax | 563 movq 48(%rsi),%rax |
| 518 movq %rdx,%r13 | 564 movq %rdx,%r13 |
| 519 adcq $0,%r13 | 565 adcq $0,%r13 |
| 520 | 566 |
| 521 mulq %rbx | 567 mulq %rbx |
| 522 leaq 128(%rbp),%rbp | |
| 523 addq %rax,%r13 | 568 addq %rax,%r13 |
| 524 movq 56(%rsi),%rax | 569 movq 56(%rsi),%rax |
| 525 movq %rdx,%r14 | 570 movq %rdx,%r14 |
| 526 adcq $0,%r14 | 571 adcq $0,%r14 |
| 527 | 572 |
| 528 mulq %rbx | 573 mulq %rbx |
| 529 .byte 102,72,15,126,227 | |
| 530 addq %rax,%r14 | 574 addq %rax,%r14 |
| 531 movq (%rsi),%rax | 575 movq (%rsi),%rax |
| 532 movq %rdx,%r15 | 576 movq %rdx,%r15 |
| 533 adcq $0,%r15 | 577 adcq $0,%r15 |
| 534 | 578 |
| 535 leaq 8(%rsp),%rdi | 579 leaq 8(%rsp),%rdi |
| 536 movl $7,%ecx | 580 movl $7,%ecx |
| 537 jmp L$oop_mul_gather | 581 jmp L$oop_mul_gather |
| 538 | 582 |
| 539 .p2align 5 | 583 .p2align 5 |
| 540 L$oop_mul_gather: | 584 L$oop_mul_gather: |
| 585 movdqa 0(%rbp),%xmm8 |
| 586 movdqa 16(%rbp),%xmm9 |
| 587 movdqa 32(%rbp),%xmm10 |
| 588 movdqa 48(%rbp),%xmm11 |
| 589 pand %xmm0,%xmm8 |
| 590 movdqa 64(%rbp),%xmm12 |
| 591 pand %xmm1,%xmm9 |
| 592 movdqa 80(%rbp),%xmm13 |
| 593 pand %xmm2,%xmm10 |
| 594 movdqa 96(%rbp),%xmm14 |
| 595 pand %xmm3,%xmm11 |
| 596 movdqa 112(%rbp),%xmm15 |
| 597 leaq 128(%rbp),%rbp |
| 598 pand %xmm4,%xmm12 |
| 599 pand %xmm5,%xmm13 |
| 600 pand %xmm6,%xmm14 |
| 601 pand %xmm7,%xmm15 |
| 602 por %xmm10,%xmm8 |
| 603 por %xmm11,%xmm9 |
| 604 por %xmm12,%xmm8 |
| 605 por %xmm13,%xmm9 |
| 606 por %xmm14,%xmm8 |
| 607 por %xmm15,%xmm9 |
| 608 |
| 609 por %xmm9,%xmm8 |
| 610 pshufd $0x4e,%xmm8,%xmm9 |
| 611 por %xmm9,%xmm8 |
| 612 .byte 102,76,15,126,195 |
| 613 |
| 541 mulq %rbx | 614 mulq %rbx |
| 542 addq %rax,%r8 | 615 addq %rax,%r8 |
| 543 movq 8(%rsi),%rax | 616 movq 8(%rsi),%rax |
| 544 movq %r8,(%rdi) | 617 movq %r8,(%rdi) |
| 545 movq %rdx,%r8 | 618 movq %rdx,%r8 |
| 546 adcq $0,%r8 | 619 adcq $0,%r8 |
| 547 | 620 |
| 548 mulq %rbx | 621 mulq %rbx |
| 549 movd (%rbp),%xmm4 | |
| 550 addq %rax,%r9 | 622 addq %rax,%r9 |
| 551 movq 16(%rsi),%rax | 623 movq 16(%rsi),%rax |
| 552 adcq $0,%rdx | 624 adcq $0,%rdx |
| 553 addq %r9,%r8 | 625 addq %r9,%r8 |
| 554 movq %rdx,%r9 | 626 movq %rdx,%r9 |
| 555 adcq $0,%r9 | 627 adcq $0,%r9 |
| 556 | 628 |
| 557 mulq %rbx | 629 mulq %rbx |
| 558 movd 64(%rbp),%xmm5 | |
| 559 addq %rax,%r10 | 630 addq %rax,%r10 |
| 560 movq 24(%rsi),%rax | 631 movq 24(%rsi),%rax |
| 561 adcq $0,%rdx | 632 adcq $0,%rdx |
| 562 addq %r10,%r9 | 633 addq %r10,%r9 |
| 563 movq %rdx,%r10 | 634 movq %rdx,%r10 |
| 564 adcq $0,%r10 | 635 adcq $0,%r10 |
| 565 | 636 |
| 566 mulq %rbx | 637 mulq %rbx |
| 567 pslldq $4,%xmm5 | |
| 568 addq %rax,%r11 | 638 addq %rax,%r11 |
| 569 movq 32(%rsi),%rax | 639 movq 32(%rsi),%rax |
| 570 adcq $0,%rdx | 640 adcq $0,%rdx |
| 571 addq %r11,%r10 | 641 addq %r11,%r10 |
| 572 movq %rdx,%r11 | 642 movq %rdx,%r11 |
| 573 adcq $0,%r11 | 643 adcq $0,%r11 |
| 574 | 644 |
| 575 mulq %rbx | 645 mulq %rbx |
| 576 por %xmm5,%xmm4 | |
| 577 addq %rax,%r12 | 646 addq %rax,%r12 |
| 578 movq 40(%rsi),%rax | 647 movq 40(%rsi),%rax |
| 579 adcq $0,%rdx | 648 adcq $0,%rdx |
| 580 addq %r12,%r11 | 649 addq %r12,%r11 |
| 581 movq %rdx,%r12 | 650 movq %rdx,%r12 |
| 582 adcq $0,%r12 | 651 adcq $0,%r12 |
| 583 | 652 |
| 584 mulq %rbx | 653 mulq %rbx |
| 585 addq %rax,%r13 | 654 addq %rax,%r13 |
| 586 movq 48(%rsi),%rax | 655 movq 48(%rsi),%rax |
| 587 adcq $0,%rdx | 656 adcq $0,%rdx |
| 588 addq %r13,%r12 | 657 addq %r13,%r12 |
| 589 movq %rdx,%r13 | 658 movq %rdx,%r13 |
| 590 adcq $0,%r13 | 659 adcq $0,%r13 |
| 591 | 660 |
| 592 mulq %rbx | 661 mulq %rbx |
| 593 addq %rax,%r14 | 662 addq %rax,%r14 |
| 594 movq 56(%rsi),%rax | 663 movq 56(%rsi),%rax |
| 595 adcq $0,%rdx | 664 adcq $0,%rdx |
| 596 addq %r14,%r13 | 665 addq %r14,%r13 |
| 597 movq %rdx,%r14 | 666 movq %rdx,%r14 |
| 598 adcq $0,%r14 | 667 adcq $0,%r14 |
| 599 | 668 |
| 600 mulq %rbx | 669 mulq %rbx |
| 601 .byte 102,72,15,126,227 | |
| 602 addq %rax,%r15 | 670 addq %rax,%r15 |
| 603 movq (%rsi),%rax | 671 movq (%rsi),%rax |
| 604 adcq $0,%rdx | 672 adcq $0,%rdx |
| 605 addq %r15,%r14 | 673 addq %r15,%r14 |
| 606 movq %rdx,%r15 | 674 movq %rdx,%r15 |
| 607 adcq $0,%r15 | 675 adcq $0,%r15 |
| 608 | 676 |
| 609 leaq 128(%rbp),%rbp | |
| 610 leaq 8(%rdi),%rdi | 677 leaq 8(%rdi),%rdi |
| 611 | 678 |
| 612 decl %ecx | 679 decl %ecx |
| 613 jnz L$oop_mul_gather | 680 jnz L$oop_mul_gather |
| 614 | 681 |
| 615 movq %r8,(%rdi) | 682 movq %r8,(%rdi) |
| 616 movq %r9,8(%rdi) | 683 movq %r9,8(%rdi) |
| 617 movq %r10,16(%rdi) | 684 movq %r10,16(%rdi) |
| 618 movq %r11,24(%rdi) | 685 movq %r11,24(%rdi) |
| 619 movq %r12,32(%rdi) | 686 movq %r12,32(%rdi) |
| 620 movq %r13,40(%rdi) | 687 movq %r13,40(%rdi) |
| 621 movq %r14,48(%rdi) | 688 movq %r14,48(%rdi) |
| 622 movq %r15,56(%rdi) | 689 movq %r15,56(%rdi) |
| 623 | 690 |
| 624 .byte» 102,72,15,126,199 | 691 » movq» 128+8(%rsp),%rdi |
| 625 .byte» 102,72,15,126,205 | 692 » movq» 128+16(%rsp),%rbp |
| 626 | 693 |
| 627 movq (%rsp),%r8 | 694 movq (%rsp),%r8 |
| 628 movq 8(%rsp),%r9 | 695 movq 8(%rsp),%r9 |
| 629 movq 16(%rsp),%r10 | 696 movq 16(%rsp),%r10 |
| 630 movq 24(%rsp),%r11 | 697 movq 24(%rsp),%r11 |
| 631 movq 32(%rsp),%r12 | 698 movq 32(%rsp),%r12 |
| 632 movq 40(%rsp),%r13 | 699 movq 40(%rsp),%r13 |
| 633 movq 48(%rsp),%r14 | 700 movq 48(%rsp),%r14 |
| 634 movq 56(%rsp),%r15 | 701 movq 56(%rsp),%r15 |
| 635 | 702 |
| (...skipping 29 matching lines...) Expand all Loading... |
| 665 pushq %rbx | 732 pushq %rbx |
| 666 pushq %rbp | 733 pushq %rbp |
| 667 pushq %r12 | 734 pushq %r12 |
| 668 pushq %r13 | 735 pushq %r13 |
| 669 pushq %r14 | 736 pushq %r14 |
| 670 pushq %r15 | 737 pushq %r15 |
| 671 | 738 |
| 672 movl %r9d,%r9d | 739 movl %r9d,%r9d |
| 673 subq $128+24,%rsp | 740 subq $128+24,%rsp |
| 674 L$mul_scatter4_body: | 741 L$mul_scatter4_body: |
| 675 » leaq» (%r8,%r9,4),%r8 | 742 » leaq» (%r8,%r9,8),%r8 |
| 676 .byte 102,72,15,110,199 | 743 .byte 102,72,15,110,199 |
| 677 .byte 102,72,15,110,202 | 744 .byte 102,72,15,110,202 |
| 678 .byte 102,73,15,110,208 | 745 .byte 102,73,15,110,208 |
| 679 movq %rcx,128(%rsp) | 746 movq %rcx,128(%rsp) |
| 680 | 747 |
| 681 movq %rdi,%rbp | 748 movq %rdi,%rbp |
| 682 movq (%rdi),%rbx | 749 movq (%rdi),%rbx |
| 683 call __rsaz_512_mul | 750 call __rsaz_512_mul |
| 684 | 751 |
| 685 .byte 102,72,15,126,199 | 752 .byte 102,72,15,126,199 |
| (...skipping 15 matching lines...) Expand all Loading... |
| 701 adcq 88(%rsp),%r11 | 768 adcq 88(%rsp),%r11 |
| 702 adcq 96(%rsp),%r12 | 769 adcq 96(%rsp),%r12 |
| 703 adcq 104(%rsp),%r13 | 770 adcq 104(%rsp),%r13 |
| 704 adcq 112(%rsp),%r14 | 771 adcq 112(%rsp),%r14 |
| 705 adcq 120(%rsp),%r15 | 772 adcq 120(%rsp),%r15 |
| 706 .byte 102,72,15,126,214 | 773 .byte 102,72,15,126,214 |
| 707 sbbq %rcx,%rcx | 774 sbbq %rcx,%rcx |
| 708 | 775 |
| 709 call __rsaz_512_subtract | 776 call __rsaz_512_subtract |
| 710 | 777 |
| 711 » movl» %r8d,0(%rsi) | 778 » movq» %r8,0(%rsi) |
| 712 » shrq» $32,%r8 | 779 » movq» %r9,128(%rsi) |
| 713 » movl» %r9d,128(%rsi) | 780 » movq» %r10,256(%rsi) |
| 714 » shrq» $32,%r9 | 781 » movq» %r11,384(%rsi) |
| 715 » movl» %r10d,256(%rsi) | 782 » movq» %r12,512(%rsi) |
| 716 » shrq» $32,%r10 | 783 » movq» %r13,640(%rsi) |
| 717 » movl» %r11d,384(%rsi) | 784 » movq» %r14,768(%rsi) |
| 718 » shrq» $32,%r11 | 785 » movq» %r15,896(%rsi) |
| 719 » movl» %r12d,512(%rsi) | |
| 720 » shrq» $32,%r12 | |
| 721 » movl» %r13d,640(%rsi) | |
| 722 » shrq» $32,%r13 | |
| 723 » movl» %r14d,768(%rsi) | |
| 724 » shrq» $32,%r14 | |
| 725 » movl» %r15d,896(%rsi) | |
| 726 » shrq» $32,%r15 | |
| 727 » movl» %r8d,64(%rsi) | |
| 728 » movl» %r9d,192(%rsi) | |
| 729 » movl» %r10d,320(%rsi) | |
| 730 » movl» %r11d,448(%rsi) | |
| 731 » movl» %r12d,576(%rsi) | |
| 732 » movl» %r13d,704(%rsi) | |
| 733 » movl» %r14d,832(%rsi) | |
| 734 » movl» %r15d,960(%rsi) | |
| 735 | 786 |
| 736 leaq 128+24+48(%rsp),%rax | 787 leaq 128+24+48(%rsp),%rax |
| 737 movq -48(%rax),%r15 | 788 movq -48(%rax),%r15 |
| 738 movq -40(%rax),%r14 | 789 movq -40(%rax),%r14 |
| 739 movq -32(%rax),%r13 | 790 movq -32(%rax),%r13 |
| 740 movq -24(%rax),%r12 | 791 movq -24(%rax),%r12 |
| 741 movq -16(%rax),%rbp | 792 movq -16(%rax),%rbp |
| 742 movq -8(%rax),%rbx | 793 movq -8(%rax),%rbx |
| 743 leaq (%rax),%rsp | 794 leaq (%rax),%rsp |
| 744 L$mul_scatter4_epilogue: | 795 L$mul_scatter4_epilogue: |
| (...skipping 334 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1079 movq %r14,48(%rdi) | 1130 movq %r14,48(%rdi) |
| 1080 movq %r15,56(%rdi) | 1131 movq %r15,56(%rdi) |
| 1081 | 1132 |
| 1082 .byte 0xf3,0xc3 | 1133 .byte 0xf3,0xc3 |
| 1083 | 1134 |
| 1084 .globl _rsaz_512_scatter4 | 1135 .globl _rsaz_512_scatter4 |
| 1085 .private_extern _rsaz_512_scatter4 | 1136 .private_extern _rsaz_512_scatter4 |
| 1086 | 1137 |
| 1087 .p2align 4 | 1138 .p2align 4 |
| 1088 _rsaz_512_scatter4: | 1139 _rsaz_512_scatter4: |
| 1089 » leaq» (%rdi,%rdx,4),%rdi | 1140 » leaq» (%rdi,%rdx,8),%rdi |
| 1090 movl $8,%r9d | 1141 movl $8,%r9d |
| 1091 jmp L$oop_scatter | 1142 jmp L$oop_scatter |
| 1092 .p2align 4 | 1143 .p2align 4 |
| 1093 L$oop_scatter: | 1144 L$oop_scatter: |
| 1094 movq (%rsi),%rax | 1145 movq (%rsi),%rax |
| 1095 leaq 8(%rsi),%rsi | 1146 leaq 8(%rsi),%rsi |
| 1096 » movl» %eax,(%rdi) | 1147 » movq» %rax,(%rdi) |
| 1097 » shrq» $32,%rax | |
| 1098 » movl» %eax,64(%rdi) | |
| 1099 leaq 128(%rdi),%rdi | 1148 leaq 128(%rdi),%rdi |
| 1100 decl %r9d | 1149 decl %r9d |
| 1101 jnz L$oop_scatter | 1150 jnz L$oop_scatter |
| 1102 .byte 0xf3,0xc3 | 1151 .byte 0xf3,0xc3 |
| 1103 | 1152 |
| 1104 | 1153 |
| 1105 .globl _rsaz_512_gather4 | 1154 .globl _rsaz_512_gather4 |
| 1106 .private_extern _rsaz_512_gather4 | 1155 .private_extern _rsaz_512_gather4 |
| 1107 | 1156 |
| 1108 .p2align 4 | 1157 .p2align 4 |
| 1109 _rsaz_512_gather4: | 1158 _rsaz_512_gather4: |
| 1110 » leaq» (%rsi,%rdx,4),%rsi | 1159 » movd» %edx,%xmm8 |
| 1160 » movdqa» L$inc+16(%rip),%xmm1 |
| 1161 » movdqa» L$inc(%rip),%xmm0 |
| 1162 |
| 1163 » pshufd» $0,%xmm8,%xmm8 |
| 1164 » movdqa» %xmm1,%xmm7 |
| 1165 » movdqa» %xmm1,%xmm2 |
| 1166 » paddd» %xmm0,%xmm1 |
| 1167 » pcmpeqd»%xmm8,%xmm0 |
| 1168 » movdqa» %xmm7,%xmm3 |
| 1169 » paddd» %xmm1,%xmm2 |
| 1170 » pcmpeqd»%xmm8,%xmm1 |
| 1171 » movdqa» %xmm7,%xmm4 |
| 1172 » paddd» %xmm2,%xmm3 |
| 1173 » pcmpeqd»%xmm8,%xmm2 |
| 1174 » movdqa» %xmm7,%xmm5 |
| 1175 » paddd» %xmm3,%xmm4 |
| 1176 » pcmpeqd»%xmm8,%xmm3 |
| 1177 » movdqa» %xmm7,%xmm6 |
| 1178 » paddd» %xmm4,%xmm5 |
| 1179 » pcmpeqd»%xmm8,%xmm4 |
| 1180 » paddd» %xmm5,%xmm6 |
| 1181 » pcmpeqd»%xmm8,%xmm5 |
| 1182 » paddd» %xmm6,%xmm7 |
| 1183 » pcmpeqd»%xmm8,%xmm6 |
| 1184 » pcmpeqd»%xmm8,%xmm7 |
| 1111 movl $8,%r9d | 1185 movl $8,%r9d |
| 1112 jmp L$oop_gather | 1186 jmp L$oop_gather |
| 1113 .p2align 4 | 1187 .p2align 4 |
| 1114 L$oop_gather: | 1188 L$oop_gather: |
| 1115 » movl» (%rsi),%eax | 1189 » movdqa» 0(%rsi),%xmm8 |
| 1116 » movl» 64(%rsi),%r8d | 1190 » movdqa» 16(%rsi),%xmm9 |
| 1191 » movdqa» 32(%rsi),%xmm10 |
| 1192 » movdqa» 48(%rsi),%xmm11 |
| 1193 » pand» %xmm0,%xmm8 |
| 1194 » movdqa» 64(%rsi),%xmm12 |
| 1195 » pand» %xmm1,%xmm9 |
| 1196 » movdqa» 80(%rsi),%xmm13 |
| 1197 » pand» %xmm2,%xmm10 |
| 1198 » movdqa» 96(%rsi),%xmm14 |
| 1199 » pand» %xmm3,%xmm11 |
| 1200 » movdqa» 112(%rsi),%xmm15 |
| 1117 leaq 128(%rsi),%rsi | 1201 leaq 128(%rsi),%rsi |
| 1118 » shlq» $32,%r8 | 1202 » pand» %xmm4,%xmm12 |
| 1119 » orq» %r8,%rax | 1203 » pand» %xmm5,%xmm13 |
| 1120 » movq» %rax,(%rdi) | 1204 » pand» %xmm6,%xmm14 |
| 1205 » pand» %xmm7,%xmm15 |
| 1206 » por» %xmm10,%xmm8 |
| 1207 » por» %xmm11,%xmm9 |
| 1208 » por» %xmm12,%xmm8 |
| 1209 » por» %xmm13,%xmm9 |
| 1210 » por» %xmm14,%xmm8 |
| 1211 » por» %xmm15,%xmm9 |
| 1212 |
| 1213 » por» %xmm9,%xmm8 |
| 1214 » pshufd» $0x4e,%xmm8,%xmm9 |
| 1215 » por» %xmm9,%xmm8 |
| 1216 » movq» %xmm8,(%rdi) |
| 1121 leaq 8(%rdi),%rdi | 1217 leaq 8(%rdi),%rdi |
| 1122 decl %r9d | 1218 decl %r9d |
| 1123 jnz L$oop_gather | 1219 jnz L$oop_gather |
| 1124 .byte 0xf3,0xc3 | 1220 .byte 0xf3,0xc3 |
| 1221 L$SEH_end_rsaz_512_gather4: |
| 1125 | 1222 |
| 1223 |
| 1224 .p2align 6 |
| 1225 L$inc: |
| 1226 .long 0,0, 1,1 |
| 1227 .long 2,2, 2,2 |
| 1126 #endif | 1228 #endif |
| OLD | NEW |