OLD | NEW |
1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
2 .text | 2 .text |
3 | 3 |
4 | 4 |
5 | 5 |
6 .globl _rsaz_512_sqr | 6 .globl _rsaz_512_sqr |
7 .private_extern _rsaz_512_sqr | 7 .private_extern _rsaz_512_sqr |
8 | 8 |
9 .p2align 5 | 9 .p2align 5 |
10 _rsaz_512_sqr: | 10 _rsaz_512_sqr: |
(...skipping 447 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
458 | 458 |
459 .p2align 5 | 459 .p2align 5 |
460 _rsaz_512_mul_gather4: | 460 _rsaz_512_mul_gather4: |
461 pushq %rbx | 461 pushq %rbx |
462 pushq %rbp | 462 pushq %rbp |
463 pushq %r12 | 463 pushq %r12 |
464 pushq %r13 | 464 pushq %r13 |
465 pushq %r14 | 465 pushq %r14 |
466 pushq %r15 | 466 pushq %r15 |
467 | 467 |
468 » movl» %r9d,%r9d | 468 » subq» $152,%rsp |
469 » subq» $128+24,%rsp | |
470 L$mul_gather4_body: | 469 L$mul_gather4_body: |
471 » movl» 64(%rdx,%r9,4),%eax | 470 » movd» %r9d,%xmm8 |
472 .byte» 102,72,15,110,199 | 471 » movdqa» L$inc+16(%rip),%xmm1 |
473 » movl» (%rdx,%r9,4),%ebx | 472 » movdqa» L$inc(%rip),%xmm0 |
474 .byte» 102,72,15,110,201 | 473 |
| 474 » pshufd» $0,%xmm8,%xmm8 |
| 475 » movdqa» %xmm1,%xmm7 |
| 476 » movdqa» %xmm1,%xmm2 |
| 477 » paddd» %xmm0,%xmm1 |
| 478 » pcmpeqd»%xmm8,%xmm0 |
| 479 » movdqa» %xmm7,%xmm3 |
| 480 » paddd» %xmm1,%xmm2 |
| 481 » pcmpeqd»%xmm8,%xmm1 |
| 482 » movdqa» %xmm7,%xmm4 |
| 483 » paddd» %xmm2,%xmm3 |
| 484 » pcmpeqd»%xmm8,%xmm2 |
| 485 » movdqa» %xmm7,%xmm5 |
| 486 » paddd» %xmm3,%xmm4 |
| 487 » pcmpeqd»%xmm8,%xmm3 |
| 488 » movdqa» %xmm7,%xmm6 |
| 489 » paddd» %xmm4,%xmm5 |
| 490 » pcmpeqd»%xmm8,%xmm4 |
| 491 » paddd» %xmm5,%xmm6 |
| 492 » pcmpeqd»%xmm8,%xmm5 |
| 493 » paddd» %xmm6,%xmm7 |
| 494 » pcmpeqd»%xmm8,%xmm6 |
| 495 » pcmpeqd»%xmm8,%xmm7 |
| 496 |
| 497 » movdqa» 0(%rdx),%xmm8 |
| 498 » movdqa» 16(%rdx),%xmm9 |
| 499 » movdqa» 32(%rdx),%xmm10 |
| 500 » movdqa» 48(%rdx),%xmm11 |
| 501 » pand» %xmm0,%xmm8 |
| 502 » movdqa» 64(%rdx),%xmm12 |
| 503 » pand» %xmm1,%xmm9 |
| 504 » movdqa» 80(%rdx),%xmm13 |
| 505 » pand» %xmm2,%xmm10 |
| 506 » movdqa» 96(%rdx),%xmm14 |
| 507 » pand» %xmm3,%xmm11 |
| 508 » movdqa» 112(%rdx),%xmm15 |
| 509 » leaq» 128(%rdx),%rbp |
| 510 » pand» %xmm4,%xmm12 |
| 511 » pand» %xmm5,%xmm13 |
| 512 » pand» %xmm6,%xmm14 |
| 513 » pand» %xmm7,%xmm15 |
| 514 » por» %xmm10,%xmm8 |
| 515 » por» %xmm11,%xmm9 |
| 516 » por» %xmm12,%xmm8 |
| 517 » por» %xmm13,%xmm9 |
| 518 » por» %xmm14,%xmm8 |
| 519 » por» %xmm15,%xmm9 |
| 520 |
| 521 » por» %xmm9,%xmm8 |
| 522 » pshufd» $0x4e,%xmm8,%xmm9 |
| 523 » por» %xmm9,%xmm8 |
| 524 .byte» 102,76,15,126,195 |
| 525 |
475 movq %r8,128(%rsp) | 526 movq %r8,128(%rsp) |
| 527 movq %rdi,128+8(%rsp) |
| 528 movq %rcx,128+16(%rsp) |
476 | 529 |
477 shlq $32,%rax | |
478 orq %rax,%rbx | |
479 movq (%rsi),%rax | 530 movq (%rsi),%rax |
480 movq 8(%rsi),%rcx | 531 movq 8(%rsi),%rcx |
481 leaq 128(%rdx,%r9,4),%rbp | |
482 mulq %rbx | 532 mulq %rbx |
483 movq %rax,(%rsp) | 533 movq %rax,(%rsp) |
484 movq %rcx,%rax | 534 movq %rcx,%rax |
485 movq %rdx,%r8 | 535 movq %rdx,%r8 |
486 | 536 |
487 mulq %rbx | 537 mulq %rbx |
488 movd (%rbp),%xmm4 | |
489 addq %rax,%r8 | 538 addq %rax,%r8 |
490 movq 16(%rsi),%rax | 539 movq 16(%rsi),%rax |
491 movq %rdx,%r9 | 540 movq %rdx,%r9 |
492 adcq $0,%r9 | 541 adcq $0,%r9 |
493 | 542 |
494 mulq %rbx | 543 mulq %rbx |
495 movd 64(%rbp),%xmm5 | |
496 addq %rax,%r9 | 544 addq %rax,%r9 |
497 movq 24(%rsi),%rax | 545 movq 24(%rsi),%rax |
498 movq %rdx,%r10 | 546 movq %rdx,%r10 |
499 adcq $0,%r10 | 547 adcq $0,%r10 |
500 | 548 |
501 mulq %rbx | 549 mulq %rbx |
502 pslldq $4,%xmm5 | |
503 addq %rax,%r10 | 550 addq %rax,%r10 |
504 movq 32(%rsi),%rax | 551 movq 32(%rsi),%rax |
505 movq %rdx,%r11 | 552 movq %rdx,%r11 |
506 adcq $0,%r11 | 553 adcq $0,%r11 |
507 | 554 |
508 mulq %rbx | 555 mulq %rbx |
509 por %xmm5,%xmm4 | |
510 addq %rax,%r11 | 556 addq %rax,%r11 |
511 movq 40(%rsi),%rax | 557 movq 40(%rsi),%rax |
512 movq %rdx,%r12 | 558 movq %rdx,%r12 |
513 adcq $0,%r12 | 559 adcq $0,%r12 |
514 | 560 |
515 mulq %rbx | 561 mulq %rbx |
516 addq %rax,%r12 | 562 addq %rax,%r12 |
517 movq 48(%rsi),%rax | 563 movq 48(%rsi),%rax |
518 movq %rdx,%r13 | 564 movq %rdx,%r13 |
519 adcq $0,%r13 | 565 adcq $0,%r13 |
520 | 566 |
521 mulq %rbx | 567 mulq %rbx |
522 leaq 128(%rbp),%rbp | |
523 addq %rax,%r13 | 568 addq %rax,%r13 |
524 movq 56(%rsi),%rax | 569 movq 56(%rsi),%rax |
525 movq %rdx,%r14 | 570 movq %rdx,%r14 |
526 adcq $0,%r14 | 571 adcq $0,%r14 |
527 | 572 |
528 mulq %rbx | 573 mulq %rbx |
529 .byte 102,72,15,126,227 | |
530 addq %rax,%r14 | 574 addq %rax,%r14 |
531 movq (%rsi),%rax | 575 movq (%rsi),%rax |
532 movq %rdx,%r15 | 576 movq %rdx,%r15 |
533 adcq $0,%r15 | 577 adcq $0,%r15 |
534 | 578 |
535 leaq 8(%rsp),%rdi | 579 leaq 8(%rsp),%rdi |
536 movl $7,%ecx | 580 movl $7,%ecx |
537 jmp L$oop_mul_gather | 581 jmp L$oop_mul_gather |
538 | 582 |
539 .p2align 5 | 583 .p2align 5 |
540 L$oop_mul_gather: | 584 L$oop_mul_gather: |
| 585 movdqa 0(%rbp),%xmm8 |
| 586 movdqa 16(%rbp),%xmm9 |
| 587 movdqa 32(%rbp),%xmm10 |
| 588 movdqa 48(%rbp),%xmm11 |
| 589 pand %xmm0,%xmm8 |
| 590 movdqa 64(%rbp),%xmm12 |
| 591 pand %xmm1,%xmm9 |
| 592 movdqa 80(%rbp),%xmm13 |
| 593 pand %xmm2,%xmm10 |
| 594 movdqa 96(%rbp),%xmm14 |
| 595 pand %xmm3,%xmm11 |
| 596 movdqa 112(%rbp),%xmm15 |
| 597 leaq 128(%rbp),%rbp |
| 598 pand %xmm4,%xmm12 |
| 599 pand %xmm5,%xmm13 |
| 600 pand %xmm6,%xmm14 |
| 601 pand %xmm7,%xmm15 |
| 602 por %xmm10,%xmm8 |
| 603 por %xmm11,%xmm9 |
| 604 por %xmm12,%xmm8 |
| 605 por %xmm13,%xmm9 |
| 606 por %xmm14,%xmm8 |
| 607 por %xmm15,%xmm9 |
| 608 |
| 609 por %xmm9,%xmm8 |
| 610 pshufd $0x4e,%xmm8,%xmm9 |
| 611 por %xmm9,%xmm8 |
| 612 .byte 102,76,15,126,195 |
| 613 |
541 mulq %rbx | 614 mulq %rbx |
542 addq %rax,%r8 | 615 addq %rax,%r8 |
543 movq 8(%rsi),%rax | 616 movq 8(%rsi),%rax |
544 movq %r8,(%rdi) | 617 movq %r8,(%rdi) |
545 movq %rdx,%r8 | 618 movq %rdx,%r8 |
546 adcq $0,%r8 | 619 adcq $0,%r8 |
547 | 620 |
548 mulq %rbx | 621 mulq %rbx |
549 movd (%rbp),%xmm4 | |
550 addq %rax,%r9 | 622 addq %rax,%r9 |
551 movq 16(%rsi),%rax | 623 movq 16(%rsi),%rax |
552 adcq $0,%rdx | 624 adcq $0,%rdx |
553 addq %r9,%r8 | 625 addq %r9,%r8 |
554 movq %rdx,%r9 | 626 movq %rdx,%r9 |
555 adcq $0,%r9 | 627 adcq $0,%r9 |
556 | 628 |
557 mulq %rbx | 629 mulq %rbx |
558 movd 64(%rbp),%xmm5 | |
559 addq %rax,%r10 | 630 addq %rax,%r10 |
560 movq 24(%rsi),%rax | 631 movq 24(%rsi),%rax |
561 adcq $0,%rdx | 632 adcq $0,%rdx |
562 addq %r10,%r9 | 633 addq %r10,%r9 |
563 movq %rdx,%r10 | 634 movq %rdx,%r10 |
564 adcq $0,%r10 | 635 adcq $0,%r10 |
565 | 636 |
566 mulq %rbx | 637 mulq %rbx |
567 pslldq $4,%xmm5 | |
568 addq %rax,%r11 | 638 addq %rax,%r11 |
569 movq 32(%rsi),%rax | 639 movq 32(%rsi),%rax |
570 adcq $0,%rdx | 640 adcq $0,%rdx |
571 addq %r11,%r10 | 641 addq %r11,%r10 |
572 movq %rdx,%r11 | 642 movq %rdx,%r11 |
573 adcq $0,%r11 | 643 adcq $0,%r11 |
574 | 644 |
575 mulq %rbx | 645 mulq %rbx |
576 por %xmm5,%xmm4 | |
577 addq %rax,%r12 | 646 addq %rax,%r12 |
578 movq 40(%rsi),%rax | 647 movq 40(%rsi),%rax |
579 adcq $0,%rdx | 648 adcq $0,%rdx |
580 addq %r12,%r11 | 649 addq %r12,%r11 |
581 movq %rdx,%r12 | 650 movq %rdx,%r12 |
582 adcq $0,%r12 | 651 adcq $0,%r12 |
583 | 652 |
584 mulq %rbx | 653 mulq %rbx |
585 addq %rax,%r13 | 654 addq %rax,%r13 |
586 movq 48(%rsi),%rax | 655 movq 48(%rsi),%rax |
587 adcq $0,%rdx | 656 adcq $0,%rdx |
588 addq %r13,%r12 | 657 addq %r13,%r12 |
589 movq %rdx,%r13 | 658 movq %rdx,%r13 |
590 adcq $0,%r13 | 659 adcq $0,%r13 |
591 | 660 |
592 mulq %rbx | 661 mulq %rbx |
593 addq %rax,%r14 | 662 addq %rax,%r14 |
594 movq 56(%rsi),%rax | 663 movq 56(%rsi),%rax |
595 adcq $0,%rdx | 664 adcq $0,%rdx |
596 addq %r14,%r13 | 665 addq %r14,%r13 |
597 movq %rdx,%r14 | 666 movq %rdx,%r14 |
598 adcq $0,%r14 | 667 adcq $0,%r14 |
599 | 668 |
600 mulq %rbx | 669 mulq %rbx |
601 .byte 102,72,15,126,227 | |
602 addq %rax,%r15 | 670 addq %rax,%r15 |
603 movq (%rsi),%rax | 671 movq (%rsi),%rax |
604 adcq $0,%rdx | 672 adcq $0,%rdx |
605 addq %r15,%r14 | 673 addq %r15,%r14 |
606 movq %rdx,%r15 | 674 movq %rdx,%r15 |
607 adcq $0,%r15 | 675 adcq $0,%r15 |
608 | 676 |
609 leaq 128(%rbp),%rbp | |
610 leaq 8(%rdi),%rdi | 677 leaq 8(%rdi),%rdi |
611 | 678 |
612 decl %ecx | 679 decl %ecx |
613 jnz L$oop_mul_gather | 680 jnz L$oop_mul_gather |
614 | 681 |
615 movq %r8,(%rdi) | 682 movq %r8,(%rdi) |
616 movq %r9,8(%rdi) | 683 movq %r9,8(%rdi) |
617 movq %r10,16(%rdi) | 684 movq %r10,16(%rdi) |
618 movq %r11,24(%rdi) | 685 movq %r11,24(%rdi) |
619 movq %r12,32(%rdi) | 686 movq %r12,32(%rdi) |
620 movq %r13,40(%rdi) | 687 movq %r13,40(%rdi) |
621 movq %r14,48(%rdi) | 688 movq %r14,48(%rdi) |
622 movq %r15,56(%rdi) | 689 movq %r15,56(%rdi) |
623 | 690 |
624 .byte» 102,72,15,126,199 | 691 » movq» 128+8(%rsp),%rdi |
625 .byte» 102,72,15,126,205 | 692 » movq» 128+16(%rsp),%rbp |
626 | 693 |
627 movq (%rsp),%r8 | 694 movq (%rsp),%r8 |
628 movq 8(%rsp),%r9 | 695 movq 8(%rsp),%r9 |
629 movq 16(%rsp),%r10 | 696 movq 16(%rsp),%r10 |
630 movq 24(%rsp),%r11 | 697 movq 24(%rsp),%r11 |
631 movq 32(%rsp),%r12 | 698 movq 32(%rsp),%r12 |
632 movq 40(%rsp),%r13 | 699 movq 40(%rsp),%r13 |
633 movq 48(%rsp),%r14 | 700 movq 48(%rsp),%r14 |
634 movq 56(%rsp),%r15 | 701 movq 56(%rsp),%r15 |
635 | 702 |
(...skipping 29 matching lines...) Expand all Loading... |
665 pushq %rbx | 732 pushq %rbx |
666 pushq %rbp | 733 pushq %rbp |
667 pushq %r12 | 734 pushq %r12 |
668 pushq %r13 | 735 pushq %r13 |
669 pushq %r14 | 736 pushq %r14 |
670 pushq %r15 | 737 pushq %r15 |
671 | 738 |
672 movl %r9d,%r9d | 739 movl %r9d,%r9d |
673 subq $128+24,%rsp | 740 subq $128+24,%rsp |
674 L$mul_scatter4_body: | 741 L$mul_scatter4_body: |
675 » leaq» (%r8,%r9,4),%r8 | 742 » leaq» (%r8,%r9,8),%r8 |
676 .byte 102,72,15,110,199 | 743 .byte 102,72,15,110,199 |
677 .byte 102,72,15,110,202 | 744 .byte 102,72,15,110,202 |
678 .byte 102,73,15,110,208 | 745 .byte 102,73,15,110,208 |
679 movq %rcx,128(%rsp) | 746 movq %rcx,128(%rsp) |
680 | 747 |
681 movq %rdi,%rbp | 748 movq %rdi,%rbp |
682 movq (%rdi),%rbx | 749 movq (%rdi),%rbx |
683 call __rsaz_512_mul | 750 call __rsaz_512_mul |
684 | 751 |
685 .byte 102,72,15,126,199 | 752 .byte 102,72,15,126,199 |
(...skipping 15 matching lines...) Expand all Loading... |
701 adcq 88(%rsp),%r11 | 768 adcq 88(%rsp),%r11 |
702 adcq 96(%rsp),%r12 | 769 adcq 96(%rsp),%r12 |
703 adcq 104(%rsp),%r13 | 770 adcq 104(%rsp),%r13 |
704 adcq 112(%rsp),%r14 | 771 adcq 112(%rsp),%r14 |
705 adcq 120(%rsp),%r15 | 772 adcq 120(%rsp),%r15 |
706 .byte 102,72,15,126,214 | 773 .byte 102,72,15,126,214 |
707 sbbq %rcx,%rcx | 774 sbbq %rcx,%rcx |
708 | 775 |
709 call __rsaz_512_subtract | 776 call __rsaz_512_subtract |
710 | 777 |
711 » movl» %r8d,0(%rsi) | 778 » movq» %r8,0(%rsi) |
712 » shrq» $32,%r8 | 779 » movq» %r9,128(%rsi) |
713 » movl» %r9d,128(%rsi) | 780 » movq» %r10,256(%rsi) |
714 » shrq» $32,%r9 | 781 » movq» %r11,384(%rsi) |
715 » movl» %r10d,256(%rsi) | 782 » movq» %r12,512(%rsi) |
716 » shrq» $32,%r10 | 783 » movq» %r13,640(%rsi) |
717 » movl» %r11d,384(%rsi) | 784 » movq» %r14,768(%rsi) |
718 » shrq» $32,%r11 | 785 » movq» %r15,896(%rsi) |
719 » movl» %r12d,512(%rsi) | |
720 » shrq» $32,%r12 | |
721 » movl» %r13d,640(%rsi) | |
722 » shrq» $32,%r13 | |
723 » movl» %r14d,768(%rsi) | |
724 » shrq» $32,%r14 | |
725 » movl» %r15d,896(%rsi) | |
726 » shrq» $32,%r15 | |
727 » movl» %r8d,64(%rsi) | |
728 » movl» %r9d,192(%rsi) | |
729 » movl» %r10d,320(%rsi) | |
730 » movl» %r11d,448(%rsi) | |
731 » movl» %r12d,576(%rsi) | |
732 » movl» %r13d,704(%rsi) | |
733 » movl» %r14d,832(%rsi) | |
734 » movl» %r15d,960(%rsi) | |
735 | 786 |
736 leaq 128+24+48(%rsp),%rax | 787 leaq 128+24+48(%rsp),%rax |
737 movq -48(%rax),%r15 | 788 movq -48(%rax),%r15 |
738 movq -40(%rax),%r14 | 789 movq -40(%rax),%r14 |
739 movq -32(%rax),%r13 | 790 movq -32(%rax),%r13 |
740 movq -24(%rax),%r12 | 791 movq -24(%rax),%r12 |
741 movq -16(%rax),%rbp | 792 movq -16(%rax),%rbp |
742 movq -8(%rax),%rbx | 793 movq -8(%rax),%rbx |
743 leaq (%rax),%rsp | 794 leaq (%rax),%rsp |
744 L$mul_scatter4_epilogue: | 795 L$mul_scatter4_epilogue: |
(...skipping 334 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1079 movq %r14,48(%rdi) | 1130 movq %r14,48(%rdi) |
1080 movq %r15,56(%rdi) | 1131 movq %r15,56(%rdi) |
1081 | 1132 |
1082 .byte 0xf3,0xc3 | 1133 .byte 0xf3,0xc3 |
1083 | 1134 |
1084 .globl _rsaz_512_scatter4 | 1135 .globl _rsaz_512_scatter4 |
1085 .private_extern _rsaz_512_scatter4 | 1136 .private_extern _rsaz_512_scatter4 |
1086 | 1137 |
1087 .p2align 4 | 1138 .p2align 4 |
1088 _rsaz_512_scatter4: | 1139 _rsaz_512_scatter4: |
1089 » leaq» (%rdi,%rdx,4),%rdi | 1140 » leaq» (%rdi,%rdx,8),%rdi |
1090 movl $8,%r9d | 1141 movl $8,%r9d |
1091 jmp L$oop_scatter | 1142 jmp L$oop_scatter |
1092 .p2align 4 | 1143 .p2align 4 |
1093 L$oop_scatter: | 1144 L$oop_scatter: |
1094 movq (%rsi),%rax | 1145 movq (%rsi),%rax |
1095 leaq 8(%rsi),%rsi | 1146 leaq 8(%rsi),%rsi |
1096 » movl» %eax,(%rdi) | 1147 » movq» %rax,(%rdi) |
1097 » shrq» $32,%rax | |
1098 » movl» %eax,64(%rdi) | |
1099 leaq 128(%rdi),%rdi | 1148 leaq 128(%rdi),%rdi |
1100 decl %r9d | 1149 decl %r9d |
1101 jnz L$oop_scatter | 1150 jnz L$oop_scatter |
1102 .byte 0xf3,0xc3 | 1151 .byte 0xf3,0xc3 |
1103 | 1152 |
1104 | 1153 |
1105 .globl _rsaz_512_gather4 | 1154 .globl _rsaz_512_gather4 |
1106 .private_extern _rsaz_512_gather4 | 1155 .private_extern _rsaz_512_gather4 |
1107 | 1156 |
1108 .p2align 4 | 1157 .p2align 4 |
1109 _rsaz_512_gather4: | 1158 _rsaz_512_gather4: |
1110 » leaq» (%rsi,%rdx,4),%rsi | 1159 » movd» %edx,%xmm8 |
| 1160 » movdqa» L$inc+16(%rip),%xmm1 |
| 1161 » movdqa» L$inc(%rip),%xmm0 |
| 1162 |
| 1163 » pshufd» $0,%xmm8,%xmm8 |
| 1164 » movdqa» %xmm1,%xmm7 |
| 1165 » movdqa» %xmm1,%xmm2 |
| 1166 » paddd» %xmm0,%xmm1 |
| 1167 » pcmpeqd»%xmm8,%xmm0 |
| 1168 » movdqa» %xmm7,%xmm3 |
| 1169 » paddd» %xmm1,%xmm2 |
| 1170 » pcmpeqd»%xmm8,%xmm1 |
| 1171 » movdqa» %xmm7,%xmm4 |
| 1172 » paddd» %xmm2,%xmm3 |
| 1173 » pcmpeqd»%xmm8,%xmm2 |
| 1174 » movdqa» %xmm7,%xmm5 |
| 1175 » paddd» %xmm3,%xmm4 |
| 1176 » pcmpeqd»%xmm8,%xmm3 |
| 1177 » movdqa» %xmm7,%xmm6 |
| 1178 » paddd» %xmm4,%xmm5 |
| 1179 » pcmpeqd»%xmm8,%xmm4 |
| 1180 » paddd» %xmm5,%xmm6 |
| 1181 » pcmpeqd»%xmm8,%xmm5 |
| 1182 » paddd» %xmm6,%xmm7 |
| 1183 » pcmpeqd»%xmm8,%xmm6 |
| 1184 » pcmpeqd»%xmm8,%xmm7 |
1111 movl $8,%r9d | 1185 movl $8,%r9d |
1112 jmp L$oop_gather | 1186 jmp L$oop_gather |
1113 .p2align 4 | 1187 .p2align 4 |
1114 L$oop_gather: | 1188 L$oop_gather: |
1115 » movl» (%rsi),%eax | 1189 » movdqa» 0(%rsi),%xmm8 |
1116 » movl» 64(%rsi),%r8d | 1190 » movdqa» 16(%rsi),%xmm9 |
| 1191 » movdqa» 32(%rsi),%xmm10 |
| 1192 » movdqa» 48(%rsi),%xmm11 |
| 1193 » pand» %xmm0,%xmm8 |
| 1194 » movdqa» 64(%rsi),%xmm12 |
| 1195 » pand» %xmm1,%xmm9 |
| 1196 » movdqa» 80(%rsi),%xmm13 |
| 1197 » pand» %xmm2,%xmm10 |
| 1198 » movdqa» 96(%rsi),%xmm14 |
| 1199 » pand» %xmm3,%xmm11 |
| 1200 » movdqa» 112(%rsi),%xmm15 |
1117 leaq 128(%rsi),%rsi | 1201 leaq 128(%rsi),%rsi |
1118 » shlq» $32,%r8 | 1202 » pand» %xmm4,%xmm12 |
1119 » orq» %r8,%rax | 1203 » pand» %xmm5,%xmm13 |
1120 » movq» %rax,(%rdi) | 1204 » pand» %xmm6,%xmm14 |
| 1205 » pand» %xmm7,%xmm15 |
| 1206 » por» %xmm10,%xmm8 |
| 1207 » por» %xmm11,%xmm9 |
| 1208 » por» %xmm12,%xmm8 |
| 1209 » por» %xmm13,%xmm9 |
| 1210 » por» %xmm14,%xmm8 |
| 1211 » por» %xmm15,%xmm9 |
| 1212 |
| 1213 » por» %xmm9,%xmm8 |
| 1214 » pshufd» $0x4e,%xmm8,%xmm9 |
| 1215 » por» %xmm9,%xmm8 |
| 1216 » movq» %xmm8,(%rdi) |
1121 leaq 8(%rdi),%rdi | 1217 leaq 8(%rdi),%rdi |
1122 decl %r9d | 1218 decl %r9d |
1123 jnz L$oop_gather | 1219 jnz L$oop_gather |
1124 .byte 0xf3,0xc3 | 1220 .byte 0xf3,0xc3 |
| 1221 L$SEH_end_rsaz_512_gather4: |
1125 | 1222 |
| 1223 |
| 1224 .p2align 6 |
| 1225 L$inc: |
| 1226 .long 0,0, 1,1 |
| 1227 .long 2,2, 2,2 |
1126 #endif | 1228 #endif |
OLD | NEW |