OLD | NEW |
1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
2 .text | 2 .text |
3 | 3 |
4 .extern OPENSSL_ia32cap_P | 4 .extern OPENSSL_ia32cap_P |
5 .hidden OPENSSL_ia32cap_P | 5 .hidden OPENSSL_ia32cap_P |
6 | 6 |
7 .globl rsaz_512_sqr | 7 .globl rsaz_512_sqr |
8 .hidden rsaz_512_sqr | 8 .hidden rsaz_512_sqr |
9 .type rsaz_512_sqr,@function | 9 .type rsaz_512_sqr,@function |
10 .align 32 | 10 .align 32 |
(...skipping 448 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
459 .type rsaz_512_mul_gather4,@function | 459 .type rsaz_512_mul_gather4,@function |
460 .align 32 | 460 .align 32 |
461 rsaz_512_mul_gather4: | 461 rsaz_512_mul_gather4: |
462 pushq %rbx | 462 pushq %rbx |
463 pushq %rbp | 463 pushq %rbp |
464 pushq %r12 | 464 pushq %r12 |
465 pushq %r13 | 465 pushq %r13 |
466 pushq %r14 | 466 pushq %r14 |
467 pushq %r15 | 467 pushq %r15 |
468 | 468 |
469 » movl» %r9d,%r9d | 469 » subq» $152,%rsp |
470 » subq» $128+24,%rsp | |
471 .Lmul_gather4_body: | 470 .Lmul_gather4_body: |
472 » movl» 64(%rdx,%r9,4),%eax | 471 » movd» %r9d,%xmm8 |
473 .byte» 102,72,15,110,199 | 472 » movdqa» .Linc+16(%rip),%xmm1 |
474 » movl» (%rdx,%r9,4),%ebx | 473 » movdqa» .Linc(%rip),%xmm0 |
475 .byte» 102,72,15,110,201 | 474 |
| 475 » pshufd» $0,%xmm8,%xmm8 |
| 476 » movdqa» %xmm1,%xmm7 |
| 477 » movdqa» %xmm1,%xmm2 |
| 478 » paddd» %xmm0,%xmm1 |
| 479 » pcmpeqd»%xmm8,%xmm0 |
| 480 » movdqa» %xmm7,%xmm3 |
| 481 » paddd» %xmm1,%xmm2 |
| 482 » pcmpeqd»%xmm8,%xmm1 |
| 483 » movdqa» %xmm7,%xmm4 |
| 484 » paddd» %xmm2,%xmm3 |
| 485 » pcmpeqd»%xmm8,%xmm2 |
| 486 » movdqa» %xmm7,%xmm5 |
| 487 » paddd» %xmm3,%xmm4 |
| 488 » pcmpeqd»%xmm8,%xmm3 |
| 489 » movdqa» %xmm7,%xmm6 |
| 490 » paddd» %xmm4,%xmm5 |
| 491 » pcmpeqd»%xmm8,%xmm4 |
| 492 » paddd» %xmm5,%xmm6 |
| 493 » pcmpeqd»%xmm8,%xmm5 |
| 494 » paddd» %xmm6,%xmm7 |
| 495 » pcmpeqd»%xmm8,%xmm6 |
| 496 » pcmpeqd»%xmm8,%xmm7 |
| 497 |
| 498 » movdqa» 0(%rdx),%xmm8 |
| 499 » movdqa» 16(%rdx),%xmm9 |
| 500 » movdqa» 32(%rdx),%xmm10 |
| 501 » movdqa» 48(%rdx),%xmm11 |
| 502 » pand» %xmm0,%xmm8 |
| 503 » movdqa» 64(%rdx),%xmm12 |
| 504 » pand» %xmm1,%xmm9 |
| 505 » movdqa» 80(%rdx),%xmm13 |
| 506 » pand» %xmm2,%xmm10 |
| 507 » movdqa» 96(%rdx),%xmm14 |
| 508 » pand» %xmm3,%xmm11 |
| 509 » movdqa» 112(%rdx),%xmm15 |
| 510 » leaq» 128(%rdx),%rbp |
| 511 » pand» %xmm4,%xmm12 |
| 512 » pand» %xmm5,%xmm13 |
| 513 » pand» %xmm6,%xmm14 |
| 514 » pand» %xmm7,%xmm15 |
| 515 » por» %xmm10,%xmm8 |
| 516 » por» %xmm11,%xmm9 |
| 517 » por» %xmm12,%xmm8 |
| 518 » por» %xmm13,%xmm9 |
| 519 » por» %xmm14,%xmm8 |
| 520 » por» %xmm15,%xmm9 |
| 521 |
| 522 » por» %xmm9,%xmm8 |
| 523 » pshufd» $0x4e,%xmm8,%xmm9 |
| 524 » por» %xmm9,%xmm8 |
| 525 .byte» 102,76,15,126,195 |
| 526 |
476 movq %r8,128(%rsp) | 527 movq %r8,128(%rsp) |
| 528 movq %rdi,128+8(%rsp) |
| 529 movq %rcx,128+16(%rsp) |
477 | 530 |
478 shlq $32,%rax | |
479 orq %rax,%rbx | |
480 movq (%rsi),%rax | 531 movq (%rsi),%rax |
481 movq 8(%rsi),%rcx | 532 movq 8(%rsi),%rcx |
482 leaq 128(%rdx,%r9,4),%rbp | |
483 mulq %rbx | 533 mulq %rbx |
484 movq %rax,(%rsp) | 534 movq %rax,(%rsp) |
485 movq %rcx,%rax | 535 movq %rcx,%rax |
486 movq %rdx,%r8 | 536 movq %rdx,%r8 |
487 | 537 |
488 mulq %rbx | 538 mulq %rbx |
489 movd (%rbp),%xmm4 | |
490 addq %rax,%r8 | 539 addq %rax,%r8 |
491 movq 16(%rsi),%rax | 540 movq 16(%rsi),%rax |
492 movq %rdx,%r9 | 541 movq %rdx,%r9 |
493 adcq $0,%r9 | 542 adcq $0,%r9 |
494 | 543 |
495 mulq %rbx | 544 mulq %rbx |
496 movd 64(%rbp),%xmm5 | |
497 addq %rax,%r9 | 545 addq %rax,%r9 |
498 movq 24(%rsi),%rax | 546 movq 24(%rsi),%rax |
499 movq %rdx,%r10 | 547 movq %rdx,%r10 |
500 adcq $0,%r10 | 548 adcq $0,%r10 |
501 | 549 |
502 mulq %rbx | 550 mulq %rbx |
503 pslldq $4,%xmm5 | |
504 addq %rax,%r10 | 551 addq %rax,%r10 |
505 movq 32(%rsi),%rax | 552 movq 32(%rsi),%rax |
506 movq %rdx,%r11 | 553 movq %rdx,%r11 |
507 adcq $0,%r11 | 554 adcq $0,%r11 |
508 | 555 |
509 mulq %rbx | 556 mulq %rbx |
510 por %xmm5,%xmm4 | |
511 addq %rax,%r11 | 557 addq %rax,%r11 |
512 movq 40(%rsi),%rax | 558 movq 40(%rsi),%rax |
513 movq %rdx,%r12 | 559 movq %rdx,%r12 |
514 adcq $0,%r12 | 560 adcq $0,%r12 |
515 | 561 |
516 mulq %rbx | 562 mulq %rbx |
517 addq %rax,%r12 | 563 addq %rax,%r12 |
518 movq 48(%rsi),%rax | 564 movq 48(%rsi),%rax |
519 movq %rdx,%r13 | 565 movq %rdx,%r13 |
520 adcq $0,%r13 | 566 adcq $0,%r13 |
521 | 567 |
522 mulq %rbx | 568 mulq %rbx |
523 leaq 128(%rbp),%rbp | |
524 addq %rax,%r13 | 569 addq %rax,%r13 |
525 movq 56(%rsi),%rax | 570 movq 56(%rsi),%rax |
526 movq %rdx,%r14 | 571 movq %rdx,%r14 |
527 adcq $0,%r14 | 572 adcq $0,%r14 |
528 | 573 |
529 mulq %rbx | 574 mulq %rbx |
530 .byte 102,72,15,126,227 | |
531 addq %rax,%r14 | 575 addq %rax,%r14 |
532 movq (%rsi),%rax | 576 movq (%rsi),%rax |
533 movq %rdx,%r15 | 577 movq %rdx,%r15 |
534 adcq $0,%r15 | 578 adcq $0,%r15 |
535 | 579 |
536 leaq 8(%rsp),%rdi | 580 leaq 8(%rsp),%rdi |
537 movl $7,%ecx | 581 movl $7,%ecx |
538 jmp .Loop_mul_gather | 582 jmp .Loop_mul_gather |
539 | 583 |
540 .align 32 | 584 .align 32 |
541 .Loop_mul_gather: | 585 .Loop_mul_gather: |
| 586 movdqa 0(%rbp),%xmm8 |
| 587 movdqa 16(%rbp),%xmm9 |
| 588 movdqa 32(%rbp),%xmm10 |
| 589 movdqa 48(%rbp),%xmm11 |
| 590 pand %xmm0,%xmm8 |
| 591 movdqa 64(%rbp),%xmm12 |
| 592 pand %xmm1,%xmm9 |
| 593 movdqa 80(%rbp),%xmm13 |
| 594 pand %xmm2,%xmm10 |
| 595 movdqa 96(%rbp),%xmm14 |
| 596 pand %xmm3,%xmm11 |
| 597 movdqa 112(%rbp),%xmm15 |
| 598 leaq 128(%rbp),%rbp |
| 599 pand %xmm4,%xmm12 |
| 600 pand %xmm5,%xmm13 |
| 601 pand %xmm6,%xmm14 |
| 602 pand %xmm7,%xmm15 |
| 603 por %xmm10,%xmm8 |
| 604 por %xmm11,%xmm9 |
| 605 por %xmm12,%xmm8 |
| 606 por %xmm13,%xmm9 |
| 607 por %xmm14,%xmm8 |
| 608 por %xmm15,%xmm9 |
| 609 |
| 610 por %xmm9,%xmm8 |
| 611 pshufd $0x4e,%xmm8,%xmm9 |
| 612 por %xmm9,%xmm8 |
| 613 .byte 102,76,15,126,195 |
| 614 |
542 mulq %rbx | 615 mulq %rbx |
543 addq %rax,%r8 | 616 addq %rax,%r8 |
544 movq 8(%rsi),%rax | 617 movq 8(%rsi),%rax |
545 movq %r8,(%rdi) | 618 movq %r8,(%rdi) |
546 movq %rdx,%r8 | 619 movq %rdx,%r8 |
547 adcq $0,%r8 | 620 adcq $0,%r8 |
548 | 621 |
549 mulq %rbx | 622 mulq %rbx |
550 movd (%rbp),%xmm4 | |
551 addq %rax,%r9 | 623 addq %rax,%r9 |
552 movq 16(%rsi),%rax | 624 movq 16(%rsi),%rax |
553 adcq $0,%rdx | 625 adcq $0,%rdx |
554 addq %r9,%r8 | 626 addq %r9,%r8 |
555 movq %rdx,%r9 | 627 movq %rdx,%r9 |
556 adcq $0,%r9 | 628 adcq $0,%r9 |
557 | 629 |
558 mulq %rbx | 630 mulq %rbx |
559 movd 64(%rbp),%xmm5 | |
560 addq %rax,%r10 | 631 addq %rax,%r10 |
561 movq 24(%rsi),%rax | 632 movq 24(%rsi),%rax |
562 adcq $0,%rdx | 633 adcq $0,%rdx |
563 addq %r10,%r9 | 634 addq %r10,%r9 |
564 movq %rdx,%r10 | 635 movq %rdx,%r10 |
565 adcq $0,%r10 | 636 adcq $0,%r10 |
566 | 637 |
567 mulq %rbx | 638 mulq %rbx |
568 pslldq $4,%xmm5 | |
569 addq %rax,%r11 | 639 addq %rax,%r11 |
570 movq 32(%rsi),%rax | 640 movq 32(%rsi),%rax |
571 adcq $0,%rdx | 641 adcq $0,%rdx |
572 addq %r11,%r10 | 642 addq %r11,%r10 |
573 movq %rdx,%r11 | 643 movq %rdx,%r11 |
574 adcq $0,%r11 | 644 adcq $0,%r11 |
575 | 645 |
576 mulq %rbx | 646 mulq %rbx |
577 por %xmm5,%xmm4 | |
578 addq %rax,%r12 | 647 addq %rax,%r12 |
579 movq 40(%rsi),%rax | 648 movq 40(%rsi),%rax |
580 adcq $0,%rdx | 649 adcq $0,%rdx |
581 addq %r12,%r11 | 650 addq %r12,%r11 |
582 movq %rdx,%r12 | 651 movq %rdx,%r12 |
583 adcq $0,%r12 | 652 adcq $0,%r12 |
584 | 653 |
585 mulq %rbx | 654 mulq %rbx |
586 addq %rax,%r13 | 655 addq %rax,%r13 |
587 movq 48(%rsi),%rax | 656 movq 48(%rsi),%rax |
588 adcq $0,%rdx | 657 adcq $0,%rdx |
589 addq %r13,%r12 | 658 addq %r13,%r12 |
590 movq %rdx,%r13 | 659 movq %rdx,%r13 |
591 adcq $0,%r13 | 660 adcq $0,%r13 |
592 | 661 |
593 mulq %rbx | 662 mulq %rbx |
594 addq %rax,%r14 | 663 addq %rax,%r14 |
595 movq 56(%rsi),%rax | 664 movq 56(%rsi),%rax |
596 adcq $0,%rdx | 665 adcq $0,%rdx |
597 addq %r14,%r13 | 666 addq %r14,%r13 |
598 movq %rdx,%r14 | 667 movq %rdx,%r14 |
599 adcq $0,%r14 | 668 adcq $0,%r14 |
600 | 669 |
601 mulq %rbx | 670 mulq %rbx |
602 .byte 102,72,15,126,227 | |
603 addq %rax,%r15 | 671 addq %rax,%r15 |
604 movq (%rsi),%rax | 672 movq (%rsi),%rax |
605 adcq $0,%rdx | 673 adcq $0,%rdx |
606 addq %r15,%r14 | 674 addq %r15,%r14 |
607 movq %rdx,%r15 | 675 movq %rdx,%r15 |
608 adcq $0,%r15 | 676 adcq $0,%r15 |
609 | 677 |
610 leaq 128(%rbp),%rbp | |
611 leaq 8(%rdi),%rdi | 678 leaq 8(%rdi),%rdi |
612 | 679 |
613 decl %ecx | 680 decl %ecx |
614 jnz .Loop_mul_gather | 681 jnz .Loop_mul_gather |
615 | 682 |
616 movq %r8,(%rdi) | 683 movq %r8,(%rdi) |
617 movq %r9,8(%rdi) | 684 movq %r9,8(%rdi) |
618 movq %r10,16(%rdi) | 685 movq %r10,16(%rdi) |
619 movq %r11,24(%rdi) | 686 movq %r11,24(%rdi) |
620 movq %r12,32(%rdi) | 687 movq %r12,32(%rdi) |
621 movq %r13,40(%rdi) | 688 movq %r13,40(%rdi) |
622 movq %r14,48(%rdi) | 689 movq %r14,48(%rdi) |
623 movq %r15,56(%rdi) | 690 movq %r15,56(%rdi) |
624 | 691 |
625 .byte» 102,72,15,126,199 | 692 » movq» 128+8(%rsp),%rdi |
626 .byte» 102,72,15,126,205 | 693 » movq» 128+16(%rsp),%rbp |
627 | 694 |
628 movq (%rsp),%r8 | 695 movq (%rsp),%r8 |
629 movq 8(%rsp),%r9 | 696 movq 8(%rsp),%r9 |
630 movq 16(%rsp),%r10 | 697 movq 16(%rsp),%r10 |
631 movq 24(%rsp),%r11 | 698 movq 24(%rsp),%r11 |
632 movq 32(%rsp),%r12 | 699 movq 32(%rsp),%r12 |
633 movq 40(%rsp),%r13 | 700 movq 40(%rsp),%r13 |
634 movq 48(%rsp),%r14 | 701 movq 48(%rsp),%r14 |
635 movq 56(%rsp),%r15 | 702 movq 56(%rsp),%r15 |
636 | 703 |
(...skipping 29 matching lines...) Expand all Loading... |
666 pushq %rbx | 733 pushq %rbx |
667 pushq %rbp | 734 pushq %rbp |
668 pushq %r12 | 735 pushq %r12 |
669 pushq %r13 | 736 pushq %r13 |
670 pushq %r14 | 737 pushq %r14 |
671 pushq %r15 | 738 pushq %r15 |
672 | 739 |
673 movl %r9d,%r9d | 740 movl %r9d,%r9d |
674 subq $128+24,%rsp | 741 subq $128+24,%rsp |
675 .Lmul_scatter4_body: | 742 .Lmul_scatter4_body: |
676 » leaq» (%r8,%r9,4),%r8 | 743 » leaq» (%r8,%r9,8),%r8 |
677 .byte 102,72,15,110,199 | 744 .byte 102,72,15,110,199 |
678 .byte 102,72,15,110,202 | 745 .byte 102,72,15,110,202 |
679 .byte 102,73,15,110,208 | 746 .byte 102,73,15,110,208 |
680 movq %rcx,128(%rsp) | 747 movq %rcx,128(%rsp) |
681 | 748 |
682 movq %rdi,%rbp | 749 movq %rdi,%rbp |
683 movq (%rdi),%rbx | 750 movq (%rdi),%rbx |
684 call __rsaz_512_mul | 751 call __rsaz_512_mul |
685 | 752 |
686 .byte 102,72,15,126,199 | 753 .byte 102,72,15,126,199 |
(...skipping 15 matching lines...) Expand all Loading... |
702 adcq 88(%rsp),%r11 | 769 adcq 88(%rsp),%r11 |
703 adcq 96(%rsp),%r12 | 770 adcq 96(%rsp),%r12 |
704 adcq 104(%rsp),%r13 | 771 adcq 104(%rsp),%r13 |
705 adcq 112(%rsp),%r14 | 772 adcq 112(%rsp),%r14 |
706 adcq 120(%rsp),%r15 | 773 adcq 120(%rsp),%r15 |
707 .byte 102,72,15,126,214 | 774 .byte 102,72,15,126,214 |
708 sbbq %rcx,%rcx | 775 sbbq %rcx,%rcx |
709 | 776 |
710 call __rsaz_512_subtract | 777 call __rsaz_512_subtract |
711 | 778 |
712 » movl» %r8d,0(%rsi) | 779 » movq» %r8,0(%rsi) |
713 » shrq» $32,%r8 | 780 » movq» %r9,128(%rsi) |
714 » movl» %r9d,128(%rsi) | 781 » movq» %r10,256(%rsi) |
715 » shrq» $32,%r9 | 782 » movq» %r11,384(%rsi) |
716 » movl» %r10d,256(%rsi) | 783 » movq» %r12,512(%rsi) |
717 » shrq» $32,%r10 | 784 » movq» %r13,640(%rsi) |
718 » movl» %r11d,384(%rsi) | 785 » movq» %r14,768(%rsi) |
719 » shrq» $32,%r11 | 786 » movq» %r15,896(%rsi) |
720 » movl» %r12d,512(%rsi) | |
721 » shrq» $32,%r12 | |
722 » movl» %r13d,640(%rsi) | |
723 » shrq» $32,%r13 | |
724 » movl» %r14d,768(%rsi) | |
725 » shrq» $32,%r14 | |
726 » movl» %r15d,896(%rsi) | |
727 » shrq» $32,%r15 | |
728 » movl» %r8d,64(%rsi) | |
729 » movl» %r9d,192(%rsi) | |
730 » movl» %r10d,320(%rsi) | |
731 » movl» %r11d,448(%rsi) | |
732 » movl» %r12d,576(%rsi) | |
733 » movl» %r13d,704(%rsi) | |
734 » movl» %r14d,832(%rsi) | |
735 » movl» %r15d,960(%rsi) | |
736 | 787 |
737 leaq 128+24+48(%rsp),%rax | 788 leaq 128+24+48(%rsp),%rax |
738 movq -48(%rax),%r15 | 789 movq -48(%rax),%r15 |
739 movq -40(%rax),%r14 | 790 movq -40(%rax),%r14 |
740 movq -32(%rax),%r13 | 791 movq -32(%rax),%r13 |
741 movq -24(%rax),%r12 | 792 movq -24(%rax),%r12 |
742 movq -16(%rax),%rbp | 793 movq -16(%rax),%rbp |
743 movq -8(%rax),%rbx | 794 movq -8(%rax),%rbx |
744 leaq (%rax),%rsp | 795 leaq (%rax),%rsp |
745 .Lmul_scatter4_epilogue: | 796 .Lmul_scatter4_epilogue: |
(...skipping 334 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1080 movq %r14,48(%rdi) | 1131 movq %r14,48(%rdi) |
1081 movq %r15,56(%rdi) | 1132 movq %r15,56(%rdi) |
1082 | 1133 |
1083 .byte 0xf3,0xc3 | 1134 .byte 0xf3,0xc3 |
1084 .size __rsaz_512_mul,.-__rsaz_512_mul | 1135 .size __rsaz_512_mul,.-__rsaz_512_mul |
1085 .globl rsaz_512_scatter4 | 1136 .globl rsaz_512_scatter4 |
1086 .hidden rsaz_512_scatter4 | 1137 .hidden rsaz_512_scatter4 |
1087 .type rsaz_512_scatter4,@function | 1138 .type rsaz_512_scatter4,@function |
1088 .align 16 | 1139 .align 16 |
1089 rsaz_512_scatter4: | 1140 rsaz_512_scatter4: |
1090 » leaq» (%rdi,%rdx,4),%rdi | 1141 » leaq» (%rdi,%rdx,8),%rdi |
1091 movl $8,%r9d | 1142 movl $8,%r9d |
1092 jmp .Loop_scatter | 1143 jmp .Loop_scatter |
1093 .align 16 | 1144 .align 16 |
1094 .Loop_scatter: | 1145 .Loop_scatter: |
1095 movq (%rsi),%rax | 1146 movq (%rsi),%rax |
1096 leaq 8(%rsi),%rsi | 1147 leaq 8(%rsi),%rsi |
1097 » movl» %eax,(%rdi) | 1148 » movq» %rax,(%rdi) |
1098 » shrq» $32,%rax | |
1099 » movl» %eax,64(%rdi) | |
1100 leaq 128(%rdi),%rdi | 1149 leaq 128(%rdi),%rdi |
1101 decl %r9d | 1150 decl %r9d |
1102 jnz .Loop_scatter | 1151 jnz .Loop_scatter |
1103 .byte 0xf3,0xc3 | 1152 .byte 0xf3,0xc3 |
1104 .size rsaz_512_scatter4,.-rsaz_512_scatter4 | 1153 .size rsaz_512_scatter4,.-rsaz_512_scatter4 |
1105 | 1154 |
1106 .globl rsaz_512_gather4 | 1155 .globl rsaz_512_gather4 |
1107 .hidden rsaz_512_gather4 | 1156 .hidden rsaz_512_gather4 |
1108 .type rsaz_512_gather4,@function | 1157 .type rsaz_512_gather4,@function |
1109 .align 16 | 1158 .align 16 |
1110 rsaz_512_gather4: | 1159 rsaz_512_gather4: |
1111 » leaq» (%rsi,%rdx,4),%rsi | 1160 » movd» %edx,%xmm8 |
| 1161 » movdqa» .Linc+16(%rip),%xmm1 |
| 1162 » movdqa» .Linc(%rip),%xmm0 |
| 1163 |
| 1164 » pshufd» $0,%xmm8,%xmm8 |
| 1165 » movdqa» %xmm1,%xmm7 |
| 1166 » movdqa» %xmm1,%xmm2 |
| 1167 » paddd» %xmm0,%xmm1 |
| 1168 » pcmpeqd»%xmm8,%xmm0 |
| 1169 » movdqa» %xmm7,%xmm3 |
| 1170 » paddd» %xmm1,%xmm2 |
| 1171 » pcmpeqd»%xmm8,%xmm1 |
| 1172 » movdqa» %xmm7,%xmm4 |
| 1173 » paddd» %xmm2,%xmm3 |
| 1174 » pcmpeqd»%xmm8,%xmm2 |
| 1175 » movdqa» %xmm7,%xmm5 |
| 1176 » paddd» %xmm3,%xmm4 |
| 1177 » pcmpeqd»%xmm8,%xmm3 |
| 1178 » movdqa» %xmm7,%xmm6 |
| 1179 » paddd» %xmm4,%xmm5 |
| 1180 » pcmpeqd»%xmm8,%xmm4 |
| 1181 » paddd» %xmm5,%xmm6 |
| 1182 » pcmpeqd»%xmm8,%xmm5 |
| 1183 » paddd» %xmm6,%xmm7 |
| 1184 » pcmpeqd»%xmm8,%xmm6 |
| 1185 » pcmpeqd»%xmm8,%xmm7 |
1112 movl $8,%r9d | 1186 movl $8,%r9d |
1113 jmp .Loop_gather | 1187 jmp .Loop_gather |
1114 .align 16 | 1188 .align 16 |
1115 .Loop_gather: | 1189 .Loop_gather: |
1116 » movl» (%rsi),%eax | 1190 » movdqa» 0(%rsi),%xmm8 |
1117 » movl» 64(%rsi),%r8d | 1191 » movdqa» 16(%rsi),%xmm9 |
| 1192 » movdqa» 32(%rsi),%xmm10 |
| 1193 » movdqa» 48(%rsi),%xmm11 |
| 1194 » pand» %xmm0,%xmm8 |
| 1195 » movdqa» 64(%rsi),%xmm12 |
| 1196 » pand» %xmm1,%xmm9 |
| 1197 » movdqa» 80(%rsi),%xmm13 |
| 1198 » pand» %xmm2,%xmm10 |
| 1199 » movdqa» 96(%rsi),%xmm14 |
| 1200 » pand» %xmm3,%xmm11 |
| 1201 » movdqa» 112(%rsi),%xmm15 |
1118 leaq 128(%rsi),%rsi | 1202 leaq 128(%rsi),%rsi |
1119 » shlq» $32,%r8 | 1203 » pand» %xmm4,%xmm12 |
1120 » orq» %r8,%rax | 1204 » pand» %xmm5,%xmm13 |
1121 » movq» %rax,(%rdi) | 1205 » pand» %xmm6,%xmm14 |
| 1206 » pand» %xmm7,%xmm15 |
| 1207 » por» %xmm10,%xmm8 |
| 1208 » por» %xmm11,%xmm9 |
| 1209 » por» %xmm12,%xmm8 |
| 1210 » por» %xmm13,%xmm9 |
| 1211 » por» %xmm14,%xmm8 |
| 1212 » por» %xmm15,%xmm9 |
| 1213 |
| 1214 » por» %xmm9,%xmm8 |
| 1215 » pshufd» $0x4e,%xmm8,%xmm9 |
| 1216 » por» %xmm9,%xmm8 |
| 1217 » movq» %xmm8,(%rdi) |
1122 leaq 8(%rdi),%rdi | 1218 leaq 8(%rdi),%rdi |
1123 decl %r9d | 1219 decl %r9d |
1124 jnz .Loop_gather | 1220 jnz .Loop_gather |
1125 .byte 0xf3,0xc3 | 1221 .byte 0xf3,0xc3 |
| 1222 .LSEH_end_rsaz_512_gather4: |
1126 .size rsaz_512_gather4,.-rsaz_512_gather4 | 1223 .size rsaz_512_gather4,.-rsaz_512_gather4 |
| 1224 |
| 1225 .align 64 |
| 1226 .Linc: |
| 1227 .long 0,0, 1,1 |
| 1228 .long 2,2, 2,2 |
1127 #endif | 1229 #endif |
OLD | NEW |