Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(14)

Side by Side Diff: third_party/boringssl/linux-x86_64/crypto/aes/aesni-x86_64.S

Issue 2219933002: Land BoringSSL roll on master (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 #if defined(__x86_64__) 1 #if defined(__x86_64__)
2 .text 2 .text
3 .extern OPENSSL_ia32cap_P 3 .extern OPENSSL_ia32cap_P
4 .hidden OPENSSL_ia32cap_P 4 .hidden OPENSSL_ia32cap_P
5 .globl aesni_encrypt 5 .globl aesni_encrypt
6 .hidden aesni_encrypt 6 .hidden aesni_encrypt
7 .type aesni_encrypt,@function 7 .type aesni_encrypt,@function
8 .align 16 8 .align 16
9 aesni_encrypt: 9 aesni_encrypt:
10 movups (%rdi),%xmm2 10 movups (%rdi),%xmm2
(...skipping 490 matching lines...) Expand 10 before | Expand all | Expand 10 after
501 andq $-16,%rdx 501 andq $-16,%rdx
502 jz .Lecb_ret 502 jz .Lecb_ret
503 503
504 movl 240(%rcx),%eax 504 movl 240(%rcx),%eax
505 movups (%rcx),%xmm0 505 movups (%rcx),%xmm0
506 movq %rcx,%r11 506 movq %rcx,%r11
507 movl %eax,%r10d 507 movl %eax,%r10d
508 testl %r8d,%r8d 508 testl %r8d,%r8d
509 jz .Lecb_decrypt 509 jz .Lecb_decrypt
510 510
511 » cmpq» $128,%rdx 511 » cmpq» $0x80,%rdx
512 jb .Lecb_enc_tail 512 jb .Lecb_enc_tail
513 513
514 movdqu (%rdi),%xmm2 514 movdqu (%rdi),%xmm2
515 movdqu 16(%rdi),%xmm3 515 movdqu 16(%rdi),%xmm3
516 movdqu 32(%rdi),%xmm4 516 movdqu 32(%rdi),%xmm4
517 movdqu 48(%rdi),%xmm5 517 movdqu 48(%rdi),%xmm5
518 movdqu 64(%rdi),%xmm6 518 movdqu 64(%rdi),%xmm6
519 movdqu 80(%rdi),%xmm7 519 movdqu 80(%rdi),%xmm7
520 movdqu 96(%rdi),%xmm8 520 movdqu 96(%rdi),%xmm8
521 movdqu 112(%rdi),%xmm9 521 movdqu 112(%rdi),%xmm9
522 leaq 128(%rdi),%rdi 522 leaq 128(%rdi),%rdi
523 » subq» $128,%rdx 523 » subq» $0x80,%rdx
524 jmp .Lecb_enc_loop8_enter 524 jmp .Lecb_enc_loop8_enter
525 .align 16 525 .align 16
526 .Lecb_enc_loop8: 526 .Lecb_enc_loop8:
527 movups %xmm2,(%rsi) 527 movups %xmm2,(%rsi)
528 movq %r11,%rcx 528 movq %r11,%rcx
529 movdqu (%rdi),%xmm2 529 movdqu (%rdi),%xmm2
530 movl %r10d,%eax 530 movl %r10d,%eax
531 movups %xmm3,16(%rsi) 531 movups %xmm3,16(%rsi)
532 movdqu 16(%rdi),%xmm3 532 movdqu 16(%rdi),%xmm3
533 movups %xmm4,32(%rsi) 533 movups %xmm4,32(%rsi)
534 movdqu 32(%rdi),%xmm4 534 movdqu 32(%rdi),%xmm4
535 movups %xmm5,48(%rsi) 535 movups %xmm5,48(%rsi)
536 movdqu 48(%rdi),%xmm5 536 movdqu 48(%rdi),%xmm5
537 movups %xmm6,64(%rsi) 537 movups %xmm6,64(%rsi)
538 movdqu 64(%rdi),%xmm6 538 movdqu 64(%rdi),%xmm6
539 movups %xmm7,80(%rsi) 539 movups %xmm7,80(%rsi)
540 movdqu 80(%rdi),%xmm7 540 movdqu 80(%rdi),%xmm7
541 movups %xmm8,96(%rsi) 541 movups %xmm8,96(%rsi)
542 movdqu 96(%rdi),%xmm8 542 movdqu 96(%rdi),%xmm8
543 movups %xmm9,112(%rsi) 543 movups %xmm9,112(%rsi)
544 leaq 128(%rsi),%rsi 544 leaq 128(%rsi),%rsi
545 movdqu 112(%rdi),%xmm9 545 movdqu 112(%rdi),%xmm9
546 leaq 128(%rdi),%rdi 546 leaq 128(%rdi),%rdi
547 .Lecb_enc_loop8_enter: 547 .Lecb_enc_loop8_enter:
548 548
549 call _aesni_encrypt8 549 call _aesni_encrypt8
550 550
551 » subq» $128,%rdx 551 » subq» $0x80,%rdx
552 jnc .Lecb_enc_loop8 552 jnc .Lecb_enc_loop8
553 553
554 movups %xmm2,(%rsi) 554 movups %xmm2,(%rsi)
555 movq %r11,%rcx 555 movq %r11,%rcx
556 movups %xmm3,16(%rsi) 556 movups %xmm3,16(%rsi)
557 movl %r10d,%eax 557 movl %r10d,%eax
558 movups %xmm4,32(%rsi) 558 movups %xmm4,32(%rsi)
559 movups %xmm5,48(%rsi) 559 movups %xmm5,48(%rsi)
560 movups %xmm6,64(%rsi) 560 movups %xmm6,64(%rsi)
561 movups %xmm7,80(%rsi) 561 movups %xmm7,80(%rsi)
562 movups %xmm8,96(%rsi) 562 movups %xmm8,96(%rsi)
563 movups %xmm9,112(%rsi) 563 movups %xmm9,112(%rsi)
564 leaq 128(%rsi),%rsi 564 leaq 128(%rsi),%rsi
565 » addq» $128,%rdx 565 » addq» $0x80,%rdx
566 jz .Lecb_ret 566 jz .Lecb_ret
567 567
568 .Lecb_enc_tail: 568 .Lecb_enc_tail:
569 movups (%rdi),%xmm2 569 movups (%rdi),%xmm2
570 » cmpq» $32,%rdx 570 » cmpq» $0x20,%rdx
571 jb .Lecb_enc_one 571 jb .Lecb_enc_one
572 movups 16(%rdi),%xmm3 572 movups 16(%rdi),%xmm3
573 je .Lecb_enc_two 573 je .Lecb_enc_two
574 movups 32(%rdi),%xmm4 574 movups 32(%rdi),%xmm4
575 » cmpq» $64,%rdx 575 » cmpq» $0x40,%rdx
576 jb .Lecb_enc_three 576 jb .Lecb_enc_three
577 movups 48(%rdi),%xmm5 577 movups 48(%rdi),%xmm5
578 je .Lecb_enc_four 578 je .Lecb_enc_four
579 movups 64(%rdi),%xmm6 579 movups 64(%rdi),%xmm6
580 » cmpq» $96,%rdx 580 » cmpq» $0x60,%rdx
581 jb .Lecb_enc_five 581 jb .Lecb_enc_five
582 movups 80(%rdi),%xmm7 582 movups 80(%rdi),%xmm7
583 je .Lecb_enc_six 583 je .Lecb_enc_six
584 movdqu 96(%rdi),%xmm8 584 movdqu 96(%rdi),%xmm8
585 xorps %xmm9,%xmm9 585 xorps %xmm9,%xmm9
586 call _aesni_encrypt8 586 call _aesni_encrypt8
587 movups %xmm2,(%rsi) 587 movups %xmm2,(%rsi)
588 movups %xmm3,16(%rsi) 588 movups %xmm3,16(%rsi)
589 movups %xmm4,32(%rsi) 589 movups %xmm4,32(%rsi)
590 movups %xmm5,48(%rsi) 590 movups %xmm5,48(%rsi)
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
644 movups %xmm2,(%rsi) 644 movups %xmm2,(%rsi)
645 movups %xmm3,16(%rsi) 645 movups %xmm3,16(%rsi)
646 movups %xmm4,32(%rsi) 646 movups %xmm4,32(%rsi)
647 movups %xmm5,48(%rsi) 647 movups %xmm5,48(%rsi)
648 movups %xmm6,64(%rsi) 648 movups %xmm6,64(%rsi)
649 movups %xmm7,80(%rsi) 649 movups %xmm7,80(%rsi)
650 jmp .Lecb_ret 650 jmp .Lecb_ret
651 651
652 .align 16 652 .align 16
653 .Lecb_decrypt: 653 .Lecb_decrypt:
654 » cmpq» $128,%rdx 654 » cmpq» $0x80,%rdx
655 jb .Lecb_dec_tail 655 jb .Lecb_dec_tail
656 656
657 movdqu (%rdi),%xmm2 657 movdqu (%rdi),%xmm2
658 movdqu 16(%rdi),%xmm3 658 movdqu 16(%rdi),%xmm3
659 movdqu 32(%rdi),%xmm4 659 movdqu 32(%rdi),%xmm4
660 movdqu 48(%rdi),%xmm5 660 movdqu 48(%rdi),%xmm5
661 movdqu 64(%rdi),%xmm6 661 movdqu 64(%rdi),%xmm6
662 movdqu 80(%rdi),%xmm7 662 movdqu 80(%rdi),%xmm7
663 movdqu 96(%rdi),%xmm8 663 movdqu 96(%rdi),%xmm8
664 movdqu 112(%rdi),%xmm9 664 movdqu 112(%rdi),%xmm9
665 leaq 128(%rdi),%rdi 665 leaq 128(%rdi),%rdi
666 » subq» $128,%rdx 666 » subq» $0x80,%rdx
667 jmp .Lecb_dec_loop8_enter 667 jmp .Lecb_dec_loop8_enter
668 .align 16 668 .align 16
669 .Lecb_dec_loop8: 669 .Lecb_dec_loop8:
670 movups %xmm2,(%rsi) 670 movups %xmm2,(%rsi)
671 movq %r11,%rcx 671 movq %r11,%rcx
672 movdqu (%rdi),%xmm2 672 movdqu (%rdi),%xmm2
673 movl %r10d,%eax 673 movl %r10d,%eax
674 movups %xmm3,16(%rsi) 674 movups %xmm3,16(%rsi)
675 movdqu 16(%rdi),%xmm3 675 movdqu 16(%rdi),%xmm3
676 movups %xmm4,32(%rsi) 676 movups %xmm4,32(%rsi)
677 movdqu 32(%rdi),%xmm4 677 movdqu 32(%rdi),%xmm4
678 movups %xmm5,48(%rsi) 678 movups %xmm5,48(%rsi)
679 movdqu 48(%rdi),%xmm5 679 movdqu 48(%rdi),%xmm5
680 movups %xmm6,64(%rsi) 680 movups %xmm6,64(%rsi)
681 movdqu 64(%rdi),%xmm6 681 movdqu 64(%rdi),%xmm6
682 movups %xmm7,80(%rsi) 682 movups %xmm7,80(%rsi)
683 movdqu 80(%rdi),%xmm7 683 movdqu 80(%rdi),%xmm7
684 movups %xmm8,96(%rsi) 684 movups %xmm8,96(%rsi)
685 movdqu 96(%rdi),%xmm8 685 movdqu 96(%rdi),%xmm8
686 movups %xmm9,112(%rsi) 686 movups %xmm9,112(%rsi)
687 leaq 128(%rsi),%rsi 687 leaq 128(%rsi),%rsi
688 movdqu 112(%rdi),%xmm9 688 movdqu 112(%rdi),%xmm9
689 leaq 128(%rdi),%rdi 689 leaq 128(%rdi),%rdi
690 .Lecb_dec_loop8_enter: 690 .Lecb_dec_loop8_enter:
691 691
692 call _aesni_decrypt8 692 call _aesni_decrypt8
693 693
694 movups (%r11),%xmm0 694 movups (%r11),%xmm0
695 » subq» $128,%rdx 695 » subq» $0x80,%rdx
696 jnc .Lecb_dec_loop8 696 jnc .Lecb_dec_loop8
697 697
698 movups %xmm2,(%rsi) 698 movups %xmm2,(%rsi)
699 pxor %xmm2,%xmm2 699 pxor %xmm2,%xmm2
700 movq %r11,%rcx 700 movq %r11,%rcx
701 movups %xmm3,16(%rsi) 701 movups %xmm3,16(%rsi)
702 pxor %xmm3,%xmm3 702 pxor %xmm3,%xmm3
703 movl %r10d,%eax 703 movl %r10d,%eax
704 movups %xmm4,32(%rsi) 704 movups %xmm4,32(%rsi)
705 pxor %xmm4,%xmm4 705 pxor %xmm4,%xmm4
706 movups %xmm5,48(%rsi) 706 movups %xmm5,48(%rsi)
707 pxor %xmm5,%xmm5 707 pxor %xmm5,%xmm5
708 movups %xmm6,64(%rsi) 708 movups %xmm6,64(%rsi)
709 pxor %xmm6,%xmm6 709 pxor %xmm6,%xmm6
710 movups %xmm7,80(%rsi) 710 movups %xmm7,80(%rsi)
711 pxor %xmm7,%xmm7 711 pxor %xmm7,%xmm7
712 movups %xmm8,96(%rsi) 712 movups %xmm8,96(%rsi)
713 pxor %xmm8,%xmm8 713 pxor %xmm8,%xmm8
714 movups %xmm9,112(%rsi) 714 movups %xmm9,112(%rsi)
715 pxor %xmm9,%xmm9 715 pxor %xmm9,%xmm9
716 leaq 128(%rsi),%rsi 716 leaq 128(%rsi),%rsi
717 » addq» $128,%rdx 717 » addq» $0x80,%rdx
718 jz .Lecb_ret 718 jz .Lecb_ret
719 719
720 .Lecb_dec_tail: 720 .Lecb_dec_tail:
721 movups (%rdi),%xmm2 721 movups (%rdi),%xmm2
722 » cmpq» $32,%rdx 722 » cmpq» $0x20,%rdx
723 jb .Lecb_dec_one 723 jb .Lecb_dec_one
724 movups 16(%rdi),%xmm3 724 movups 16(%rdi),%xmm3
725 je .Lecb_dec_two 725 je .Lecb_dec_two
726 movups 32(%rdi),%xmm4 726 movups 32(%rdi),%xmm4
727 » cmpq» $64,%rdx 727 » cmpq» $0x40,%rdx
728 jb .Lecb_dec_three 728 jb .Lecb_dec_three
729 movups 48(%rdi),%xmm5 729 movups 48(%rdi),%xmm5
730 je .Lecb_dec_four 730 je .Lecb_dec_four
731 movups 64(%rdi),%xmm6 731 movups 64(%rdi),%xmm6
732 » cmpq» $96,%rdx 732 » cmpq» $0x60,%rdx
733 jb .Lecb_dec_five 733 jb .Lecb_dec_five
734 movups 80(%rdi),%xmm7 734 movups 80(%rdi),%xmm7
735 je .Lecb_dec_six 735 je .Lecb_dec_six
736 movups 96(%rdi),%xmm8 736 movups 96(%rdi),%xmm8
737 movups (%rcx),%xmm0 737 movups (%rcx),%xmm0
738 xorps %xmm9,%xmm9 738 xorps %xmm9,%xmm9
739 call _aesni_decrypt8 739 call _aesni_decrypt8
740 movups %xmm2,(%rsi) 740 movups %xmm2,(%rsi)
741 pxor %xmm2,%xmm2 741 pxor %xmm2,%xmm2
742 movups %xmm3,16(%rsi) 742 movups %xmm3,16(%rsi)
(...skipping 857 matching lines...) Expand 10 before | Expand all | Expand 10 after
1600 movq %rcx,%r11 1600 movq %rcx,%r11
1601 movl %r10d,%eax 1601 movl %r10d,%eax
1602 shll $4,%r10d 1602 shll $4,%r10d
1603 movq %rdx,%r9 1603 movq %rdx,%r9
1604 andq $-16,%rdx 1604 andq $-16,%rdx
1605 1605
1606 movups 16(%rcx,%r10,1),%xmm1 1606 movups 16(%rcx,%r10,1),%xmm1
1607 1607
1608 movdqa .Lxts_magic(%rip),%xmm8 1608 movdqa .Lxts_magic(%rip),%xmm8
1609 movdqa %xmm2,%xmm15 1609 movdqa %xmm2,%xmm15
1610 » pshufd» $95,%xmm2,%xmm9 1610 » pshufd» $0x5f,%xmm2,%xmm9
1611 pxor %xmm0,%xmm1 1611 pxor %xmm0,%xmm1
1612 movdqa %xmm9,%xmm14 1612 movdqa %xmm9,%xmm14
1613 paddd %xmm9,%xmm9 1613 paddd %xmm9,%xmm9
1614 movdqa %xmm15,%xmm10 1614 movdqa %xmm15,%xmm10
1615 psrad $31,%xmm14 1615 psrad $31,%xmm14
1616 paddq %xmm15,%xmm15 1616 paddq %xmm15,%xmm15
1617 pand %xmm8,%xmm14 1617 pand %xmm8,%xmm14
1618 pxor %xmm0,%xmm10 1618 pxor %xmm0,%xmm10
1619 pxor %xmm14,%xmm15 1619 pxor %xmm14,%xmm15
1620 movdqa %xmm9,%xmm14 1620 movdqa %xmm9,%xmm14
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
1699 pxor %xmm9,%xmm14 1699 pxor %xmm9,%xmm14
1700 movdqa %xmm12,32(%rsp) 1700 movdqa %xmm12,32(%rsp)
1701 .byte 102,15,56,220,224 1701 .byte 102,15,56,220,224
1702 .byte 102,15,56,220,232 1702 .byte 102,15,56,220,232
1703 pxor %xmm9,%xmm8 1703 pxor %xmm9,%xmm8
1704 movdqa %xmm14,64(%rsp) 1704 movdqa %xmm14,64(%rsp)
1705 .byte 102,15,56,220,240 1705 .byte 102,15,56,220,240
1706 .byte 102,15,56,220,248 1706 .byte 102,15,56,220,248
1707 movups 64(%r11),%xmm0 1707 movups 64(%r11),%xmm0
1708 movdqa %xmm8,80(%rsp) 1708 movdqa %xmm8,80(%rsp)
1709 » pshufd» $95,%xmm15,%xmm9 1709 » pshufd» $0x5f,%xmm15,%xmm9
1710 jmp .Lxts_enc_loop6 1710 jmp .Lxts_enc_loop6
1711 .align 32 1711 .align 32
1712 .Lxts_enc_loop6: 1712 .Lxts_enc_loop6:
1713 .byte 102,15,56,220,209 1713 .byte 102,15,56,220,209
1714 .byte 102,15,56,220,217 1714 .byte 102,15,56,220,217
1715 .byte 102,15,56,220,225 1715 .byte 102,15,56,220,225
1716 .byte 102,15,56,220,233 1716 .byte 102,15,56,220,233
1717 .byte 102,15,56,220,241 1717 .byte 102,15,56,220,241
1718 .byte 102,15,56,220,249 1718 .byte 102,15,56,220,249
1719 movups -64(%rcx,%rax,1),%xmm1 1719 movups -64(%rcx,%rax,1),%xmm1
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after
1838 shrl $4,%eax 1838 shrl $4,%eax
1839 1839
1840 .Lxts_enc_short: 1840 .Lxts_enc_short:
1841 1841
1842 movl %eax,%r10d 1842 movl %eax,%r10d
1843 pxor %xmm0,%xmm10 1843 pxor %xmm0,%xmm10
1844 addq $96,%rdx 1844 addq $96,%rdx
1845 jz .Lxts_enc_done 1845 jz .Lxts_enc_done
1846 1846
1847 pxor %xmm0,%xmm11 1847 pxor %xmm0,%xmm11
1848 » cmpq» $32,%rdx 1848 » cmpq» $0x20,%rdx
1849 jb .Lxts_enc_one 1849 jb .Lxts_enc_one
1850 pxor %xmm0,%xmm12 1850 pxor %xmm0,%xmm12
1851 je .Lxts_enc_two 1851 je .Lxts_enc_two
1852 1852
1853 pxor %xmm0,%xmm13 1853 pxor %xmm0,%xmm13
1854 » cmpq» $64,%rdx 1854 » cmpq» $0x40,%rdx
1855 jb .Lxts_enc_three 1855 jb .Lxts_enc_three
1856 pxor %xmm0,%xmm14 1856 pxor %xmm0,%xmm14
1857 je .Lxts_enc_four 1857 je .Lxts_enc_four
1858 1858
1859 movdqu (%rdi),%xmm2 1859 movdqu (%rdi),%xmm2
1860 movdqu 16(%rdi),%xmm3 1860 movdqu 16(%rdi),%xmm3
1861 movdqu 32(%rdi),%xmm4 1861 movdqu 32(%rdi),%xmm4
1862 pxor %xmm10,%xmm2 1862 pxor %xmm10,%xmm2
1863 movdqu 48(%rdi),%xmm5 1863 movdqu 48(%rdi),%xmm5
1864 pxor %xmm11,%xmm3 1864 pxor %xmm11,%xmm3
(...skipping 207 matching lines...) Expand 10 before | Expand all | Expand 10 after
2072 movq %rcx,%r11 2072 movq %rcx,%r11
2073 movl %r10d,%eax 2073 movl %r10d,%eax
2074 shll $4,%r10d 2074 shll $4,%r10d
2075 movq %rdx,%r9 2075 movq %rdx,%r9
2076 andq $-16,%rdx 2076 andq $-16,%rdx
2077 2077
2078 movups 16(%rcx,%r10,1),%xmm1 2078 movups 16(%rcx,%r10,1),%xmm1
2079 2079
2080 movdqa .Lxts_magic(%rip),%xmm8 2080 movdqa .Lxts_magic(%rip),%xmm8
2081 movdqa %xmm2,%xmm15 2081 movdqa %xmm2,%xmm15
2082 » pshufd» $95,%xmm2,%xmm9 2082 » pshufd» $0x5f,%xmm2,%xmm9
2083 pxor %xmm0,%xmm1 2083 pxor %xmm0,%xmm1
2084 movdqa %xmm9,%xmm14 2084 movdqa %xmm9,%xmm14
2085 paddd %xmm9,%xmm9 2085 paddd %xmm9,%xmm9
2086 movdqa %xmm15,%xmm10 2086 movdqa %xmm15,%xmm10
2087 psrad $31,%xmm14 2087 psrad $31,%xmm14
2088 paddq %xmm15,%xmm15 2088 paddq %xmm15,%xmm15
2089 pand %xmm8,%xmm14 2089 pand %xmm8,%xmm14
2090 pxor %xmm0,%xmm10 2090 pxor %xmm0,%xmm10
2091 pxor %xmm14,%xmm15 2091 pxor %xmm14,%xmm15
2092 movdqa %xmm9,%xmm14 2092 movdqa %xmm9,%xmm14
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
2171 pxor %xmm9,%xmm14 2171 pxor %xmm9,%xmm14
2172 movdqa %xmm12,32(%rsp) 2172 movdqa %xmm12,32(%rsp)
2173 .byte 102,15,56,222,224 2173 .byte 102,15,56,222,224
2174 .byte 102,15,56,222,232 2174 .byte 102,15,56,222,232
2175 pxor %xmm9,%xmm8 2175 pxor %xmm9,%xmm8
2176 movdqa %xmm14,64(%rsp) 2176 movdqa %xmm14,64(%rsp)
2177 .byte 102,15,56,222,240 2177 .byte 102,15,56,222,240
2178 .byte 102,15,56,222,248 2178 .byte 102,15,56,222,248
2179 movups 64(%r11),%xmm0 2179 movups 64(%r11),%xmm0
2180 movdqa %xmm8,80(%rsp) 2180 movdqa %xmm8,80(%rsp)
2181 » pshufd» $95,%xmm15,%xmm9 2181 » pshufd» $0x5f,%xmm15,%xmm9
2182 jmp .Lxts_dec_loop6 2182 jmp .Lxts_dec_loop6
2183 .align 32 2183 .align 32
2184 .Lxts_dec_loop6: 2184 .Lxts_dec_loop6:
2185 .byte 102,15,56,222,209 2185 .byte 102,15,56,222,209
2186 .byte 102,15,56,222,217 2186 .byte 102,15,56,222,217
2187 .byte 102,15,56,222,225 2187 .byte 102,15,56,222,225
2188 .byte 102,15,56,222,233 2188 .byte 102,15,56,222,233
2189 .byte 102,15,56,222,241 2189 .byte 102,15,56,222,241
2190 .byte 102,15,56,222,249 2190 .byte 102,15,56,222,249
2191 movups -64(%rcx,%rax,1),%xmm1 2191 movups -64(%rcx,%rax,1),%xmm1
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after
2311 2311
2312 .Lxts_dec_short: 2312 .Lxts_dec_short:
2313 2313
2314 movl %eax,%r10d 2314 movl %eax,%r10d
2315 pxor %xmm0,%xmm10 2315 pxor %xmm0,%xmm10
2316 pxor %xmm0,%xmm11 2316 pxor %xmm0,%xmm11
2317 addq $96,%rdx 2317 addq $96,%rdx
2318 jz .Lxts_dec_done 2318 jz .Lxts_dec_done
2319 2319
2320 pxor %xmm0,%xmm12 2320 pxor %xmm0,%xmm12
2321 » cmpq» $32,%rdx 2321 » cmpq» $0x20,%rdx
2322 jb .Lxts_dec_one 2322 jb .Lxts_dec_one
2323 pxor %xmm0,%xmm13 2323 pxor %xmm0,%xmm13
2324 je .Lxts_dec_two 2324 je .Lxts_dec_two
2325 2325
2326 pxor %xmm0,%xmm14 2326 pxor %xmm0,%xmm14
2327 » cmpq» $64,%rdx 2327 » cmpq» $0x40,%rdx
2328 jb .Lxts_dec_three 2328 jb .Lxts_dec_three
2329 je .Lxts_dec_four 2329 je .Lxts_dec_four
2330 2330
2331 movdqu (%rdi),%xmm2 2331 movdqu (%rdi),%xmm2
2332 movdqu 16(%rdi),%xmm3 2332 movdqu 16(%rdi),%xmm3
2333 movdqu 32(%rdi),%xmm4 2333 movdqu 32(%rdi),%xmm4
2334 pxor %xmm10,%xmm2 2334 pxor %xmm10,%xmm2
2335 movdqu 48(%rdi),%xmm5 2335 movdqu 48(%rdi),%xmm5
2336 pxor %xmm11,%xmm3 2336 pxor %xmm11,%xmm3
2337 movdqu 64(%rdi),%xmm6 2337 movdqu 64(%rdi),%xmm6
(...skipping 10 matching lines...) Expand all
2348 movdqu %xmm2,(%rsi) 2348 movdqu %xmm2,(%rsi)
2349 xorps %xmm13,%xmm5 2349 xorps %xmm13,%xmm5
2350 movdqu %xmm3,16(%rsi) 2350 movdqu %xmm3,16(%rsi)
2351 xorps %xmm14,%xmm6 2351 xorps %xmm14,%xmm6
2352 movdqu %xmm4,32(%rsi) 2352 movdqu %xmm4,32(%rsi)
2353 pxor %xmm14,%xmm14 2353 pxor %xmm14,%xmm14
2354 movdqu %xmm5,48(%rsi) 2354 movdqu %xmm5,48(%rsi)
2355 pcmpgtd %xmm15,%xmm14 2355 pcmpgtd %xmm15,%xmm14
2356 movdqu %xmm6,64(%rsi) 2356 movdqu %xmm6,64(%rsi)
2357 leaq 80(%rsi),%rsi 2357 leaq 80(%rsi),%rsi
2358 » pshufd» $19,%xmm14,%xmm11 2358 » pshufd» $0x13,%xmm14,%xmm11
2359 andq $15,%r9 2359 andq $15,%r9
2360 jz .Lxts_dec_ret 2360 jz .Lxts_dec_ret
2361 2361
2362 movdqa %xmm15,%xmm10 2362 movdqa %xmm15,%xmm10
2363 paddq %xmm15,%xmm15 2363 paddq %xmm15,%xmm15
2364 pand %xmm8,%xmm11 2364 pand %xmm8,%xmm11
2365 pxor %xmm15,%xmm11 2365 pxor %xmm15,%xmm11
2366 jmp .Lxts_dec_done2 2366 jmp .Lxts_dec_done2
2367 2367
2368 .align 16 2368 .align 16
(...skipping 269 matching lines...) Expand 10 before | Expand all | Expand 10 after
2638 jmp .Lcbc_ret 2638 jmp .Lcbc_ret
2639 .align 16 2639 .align 16
2640 .Lcbc_decrypt_bulk: 2640 .Lcbc_decrypt_bulk:
2641 leaq (%rsp),%rax 2641 leaq (%rsp),%rax
2642 pushq %rbp 2642 pushq %rbp
2643 subq $16,%rsp 2643 subq $16,%rsp
2644 andq $-16,%rsp 2644 andq $-16,%rsp
2645 leaq -8(%rax),%rbp 2645 leaq -8(%rax),%rbp
2646 movups (%r8),%xmm10 2646 movups (%r8),%xmm10
2647 movl %r10d,%eax 2647 movl %r10d,%eax
2648 » cmpq» $80,%rdx 2648 » cmpq» $0x50,%rdx
2649 jbe .Lcbc_dec_tail 2649 jbe .Lcbc_dec_tail
2650 2650
2651 movups (%rcx),%xmm0 2651 movups (%rcx),%xmm0
2652 movdqu 0(%rdi),%xmm2 2652 movdqu 0(%rdi),%xmm2
2653 movdqu 16(%rdi),%xmm3 2653 movdqu 16(%rdi),%xmm3
2654 movdqa %xmm2,%xmm11 2654 movdqa %xmm2,%xmm11
2655 movdqu 32(%rdi),%xmm4 2655 movdqu 32(%rdi),%xmm4
2656 movdqa %xmm3,%xmm12 2656 movdqa %xmm3,%xmm12
2657 movdqu 48(%rdi),%xmm5 2657 movdqu 48(%rdi),%xmm5
2658 movdqa %xmm4,%xmm13 2658 movdqa %xmm4,%xmm13
2659 movdqu 64(%rdi),%xmm6 2659 movdqu 64(%rdi),%xmm6
2660 movdqa %xmm5,%xmm14 2660 movdqa %xmm5,%xmm14
2661 movdqu 80(%rdi),%xmm7 2661 movdqu 80(%rdi),%xmm7
2662 movdqa %xmm6,%xmm15 2662 movdqa %xmm6,%xmm15
2663 movl OPENSSL_ia32cap_P+4(%rip),%r9d 2663 movl OPENSSL_ia32cap_P+4(%rip),%r9d
2664 » cmpq» $112,%rdx 2664 » cmpq» $0x70,%rdx
2665 jbe .Lcbc_dec_six_or_seven 2665 jbe .Lcbc_dec_six_or_seven
2666 2666
2667 andl $71303168,%r9d 2667 andl $71303168,%r9d
2668 » subq» $80,%rdx 2668 » subq» $0x50,%rdx
2669 cmpl $4194304,%r9d 2669 cmpl $4194304,%r9d
2670 je .Lcbc_dec_loop6_enter 2670 je .Lcbc_dec_loop6_enter
2671 » subq» $32,%rdx 2671 » subq» $0x20,%rdx
2672 leaq 112(%rcx),%rcx 2672 leaq 112(%rcx),%rcx
2673 jmp .Lcbc_dec_loop8_enter 2673 jmp .Lcbc_dec_loop8_enter
2674 .align 16 2674 .align 16
2675 .Lcbc_dec_loop8: 2675 .Lcbc_dec_loop8:
2676 movups %xmm9,(%rsi) 2676 movups %xmm9,(%rsi)
2677 leaq 16(%rsi),%rsi 2677 leaq 16(%rsi),%rsi
2678 .Lcbc_dec_loop8_enter: 2678 .Lcbc_dec_loop8_enter:
2679 movdqu 96(%rdi),%xmm8 2679 movdqu 96(%rdi),%xmm8
2680 pxor %xmm0,%xmm2 2680 pxor %xmm0,%xmm2
2681 movdqu 112(%rdi),%xmm9 2681 movdqu 112(%rdi),%xmm9
2682 pxor %xmm0,%xmm3 2682 pxor %xmm0,%xmm3
2683 movups 16-112(%rcx),%xmm1 2683 movups 16-112(%rcx),%xmm1
2684 pxor %xmm0,%xmm4 2684 pxor %xmm0,%xmm4
2685 xorq %r11,%r11 2685 xorq %r11,%r11
2686 » cmpq» $112,%rdx 2686 » cmpq» $0x70,%rdx
2687 pxor %xmm0,%xmm5 2687 pxor %xmm0,%xmm5
2688 pxor %xmm0,%xmm6 2688 pxor %xmm0,%xmm6
2689 pxor %xmm0,%xmm7 2689 pxor %xmm0,%xmm7
2690 pxor %xmm0,%xmm8 2690 pxor %xmm0,%xmm8
2691 2691
2692 .byte 102,15,56,222,209 2692 .byte 102,15,56,222,209
2693 pxor %xmm0,%xmm9 2693 pxor %xmm0,%xmm9
2694 movups 32-112(%rcx),%xmm0 2694 movups 32-112(%rcx),%xmm0
2695 .byte 102,15,56,222,217 2695 .byte 102,15,56,222,217
2696 .byte 102,15,56,222,225 2696 .byte 102,15,56,222,225
(...skipping 164 matching lines...) Expand 10 before | Expand all | Expand 10 after
2861 movdqa %xmm13,%xmm4 2861 movdqa %xmm13,%xmm4
2862 movups %xmm5,48(%rsi) 2862 movups %xmm5,48(%rsi)
2863 movdqa %xmm14,%xmm5 2863 movdqa %xmm14,%xmm5
2864 movups %xmm6,64(%rsi) 2864 movups %xmm6,64(%rsi)
2865 movdqa %xmm15,%xmm6 2865 movdqa %xmm15,%xmm6
2866 movups %xmm7,80(%rsi) 2866 movups %xmm7,80(%rsi)
2867 movdqa %xmm1,%xmm7 2867 movdqa %xmm1,%xmm7
2868 movups %xmm8,96(%rsi) 2868 movups %xmm8,96(%rsi)
2869 leaq 112(%rsi),%rsi 2869 leaq 112(%rsi),%rsi
2870 2870
2871 » subq» $128,%rdx 2871 » subq» $0x80,%rdx
2872 ja .Lcbc_dec_loop8 2872 ja .Lcbc_dec_loop8
2873 2873
2874 movaps %xmm9,%xmm2 2874 movaps %xmm9,%xmm2
2875 leaq -112(%rcx),%rcx 2875 leaq -112(%rcx),%rcx
2876 » addq» $112,%rdx 2876 » addq» $0x70,%rdx
2877 jle .Lcbc_dec_clear_tail_collected 2877 jle .Lcbc_dec_clear_tail_collected
2878 movups %xmm9,(%rsi) 2878 movups %xmm9,(%rsi)
2879 leaq 16(%rsi),%rsi 2879 leaq 16(%rsi),%rsi
2880 » cmpq» $80,%rdx 2880 » cmpq» $0x50,%rdx
2881 jbe .Lcbc_dec_tail 2881 jbe .Lcbc_dec_tail
2882 2882
2883 movaps %xmm11,%xmm2 2883 movaps %xmm11,%xmm2
2884 .Lcbc_dec_six_or_seven: 2884 .Lcbc_dec_six_or_seven:
2885 » cmpq» $96,%rdx 2885 » cmpq» $0x60,%rdx
2886 ja .Lcbc_dec_seven 2886 ja .Lcbc_dec_seven
2887 2887
2888 movaps %xmm7,%xmm8 2888 movaps %xmm7,%xmm8
2889 call _aesni_decrypt6 2889 call _aesni_decrypt6
2890 pxor %xmm10,%xmm2 2890 pxor %xmm10,%xmm2
2891 movaps %xmm8,%xmm10 2891 movaps %xmm8,%xmm10
2892 pxor %xmm11,%xmm3 2892 pxor %xmm11,%xmm3
2893 movdqu %xmm2,(%rsi) 2893 movdqu %xmm2,(%rsi)
2894 pxor %xmm12,%xmm4 2894 pxor %xmm12,%xmm4
2895 movdqu %xmm3,16(%rsi) 2895 movdqu %xmm3,16(%rsi)
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
2968 movdqu %xmm3,16(%rsi) 2968 movdqu %xmm3,16(%rsi)
2969 pxor %xmm13,%xmm5 2969 pxor %xmm13,%xmm5
2970 movdqu %xmm4,32(%rsi) 2970 movdqu %xmm4,32(%rsi)
2971 pxor %xmm14,%xmm6 2971 pxor %xmm14,%xmm6
2972 movq %r11,%rcx 2972 movq %r11,%rcx
2973 movdqu %xmm5,48(%rsi) 2973 movdqu %xmm5,48(%rsi)
2974 pxor %xmm15,%xmm7 2974 pxor %xmm15,%xmm7
2975 movl %r10d,%eax 2975 movl %r10d,%eax
2976 movdqu %xmm6,64(%rsi) 2976 movdqu %xmm6,64(%rsi)
2977 leaq 80(%rsi),%rsi 2977 leaq 80(%rsi),%rsi
2978 » subq» $96,%rdx 2978 » subq» $0x60,%rdx
2979 ja .Lcbc_dec_loop6 2979 ja .Lcbc_dec_loop6
2980 2980
2981 movdqa %xmm7,%xmm2 2981 movdqa %xmm7,%xmm2
2982 » addq» $80,%rdx 2982 » addq» $0x50,%rdx
2983 jle .Lcbc_dec_clear_tail_collected 2983 jle .Lcbc_dec_clear_tail_collected
2984 movups %xmm7,(%rsi) 2984 movups %xmm7,(%rsi)
2985 leaq 16(%rsi),%rsi 2985 leaq 16(%rsi),%rsi
2986 2986
2987 .Lcbc_dec_tail: 2987 .Lcbc_dec_tail:
2988 movups (%rdi),%xmm2 2988 movups (%rdi),%xmm2
2989 » subq» $16,%rdx 2989 » subq» $0x10,%rdx
2990 jbe .Lcbc_dec_one 2990 jbe .Lcbc_dec_one
2991 2991
2992 movups 16(%rdi),%xmm3 2992 movups 16(%rdi),%xmm3
2993 movaps %xmm2,%xmm11 2993 movaps %xmm2,%xmm11
2994 » subq» $16,%rdx 2994 » subq» $0x10,%rdx
2995 jbe .Lcbc_dec_two 2995 jbe .Lcbc_dec_two
2996 2996
2997 movups 32(%rdi),%xmm4 2997 movups 32(%rdi),%xmm4
2998 movaps %xmm3,%xmm12 2998 movaps %xmm3,%xmm12
2999 » subq» $16,%rdx 2999 » subq» $0x10,%rdx
3000 jbe .Lcbc_dec_three 3000 jbe .Lcbc_dec_three
3001 3001
3002 movups 48(%rdi),%xmm5 3002 movups 48(%rdi),%xmm5
3003 movaps %xmm4,%xmm13 3003 movaps %xmm4,%xmm13
3004 » subq» $16,%rdx 3004 » subq» $0x10,%rdx
3005 jbe .Lcbc_dec_four 3005 jbe .Lcbc_dec_four
3006 3006
3007 movups 64(%rdi),%xmm6 3007 movups 64(%rdi),%xmm6
3008 movaps %xmm5,%xmm14 3008 movaps %xmm5,%xmm14
3009 movaps %xmm6,%xmm15 3009 movaps %xmm6,%xmm15
3010 xorps %xmm7,%xmm7 3010 xorps %xmm7,%xmm7
3011 call _aesni_decrypt6 3011 call _aesni_decrypt6
3012 pxor %xmm10,%xmm2 3012 pxor %xmm10,%xmm2
3013 movaps %xmm15,%xmm10 3013 movaps %xmm15,%xmm10
3014 pxor %xmm11,%xmm3 3014 pxor %xmm11,%xmm3
3015 movdqu %xmm2,(%rsi) 3015 movdqu %xmm2,(%rsi)
3016 pxor %xmm12,%xmm4 3016 pxor %xmm12,%xmm4
3017 movdqu %xmm3,16(%rsi) 3017 movdqu %xmm3,16(%rsi)
3018 pxor %xmm3,%xmm3 3018 pxor %xmm3,%xmm3
3019 pxor %xmm13,%xmm5 3019 pxor %xmm13,%xmm5
3020 movdqu %xmm4,32(%rsi) 3020 movdqu %xmm4,32(%rsi)
3021 pxor %xmm4,%xmm4 3021 pxor %xmm4,%xmm4
3022 pxor %xmm14,%xmm6 3022 pxor %xmm14,%xmm6
3023 movdqu %xmm5,48(%rsi) 3023 movdqu %xmm5,48(%rsi)
3024 pxor %xmm5,%xmm5 3024 pxor %xmm5,%xmm5
3025 leaq 64(%rsi),%rsi 3025 leaq 64(%rsi),%rsi
3026 movdqa %xmm6,%xmm2 3026 movdqa %xmm6,%xmm2
3027 pxor %xmm6,%xmm6 3027 pxor %xmm6,%xmm6
3028 pxor %xmm7,%xmm7 3028 pxor %xmm7,%xmm7
3029 » subq» $16,%rdx 3029 » subq» $0x10,%rdx
3030 jmp .Lcbc_dec_tail_collected 3030 jmp .Lcbc_dec_tail_collected
3031 3031
3032 .align 16 3032 .align 16
3033 .Lcbc_dec_one: 3033 .Lcbc_dec_one:
3034 movaps %xmm2,%xmm11 3034 movaps %xmm2,%xmm11
3035 movups (%rcx),%xmm0 3035 movups (%rcx),%xmm0
3036 movups 16(%rcx),%xmm1 3036 movups 16(%rcx),%xmm1
3037 leaq 32(%rcx),%rcx 3037 leaq 32(%rcx),%rcx
3038 xorps %xmm0,%xmm2 3038 xorps %xmm0,%xmm2
3039 .Loop_dec1_17: 3039 .Loop_dec1_17:
(...skipping 298 matching lines...) Expand 10 before | Expand all | Expand 10 after
3338 leaq 24(%rax),%rax 3338 leaq 24(%rax),%rax
3339 3339
3340 movdqa %xmm0,%xmm3 3340 movdqa %xmm0,%xmm3
3341 pslldq $4,%xmm0 3341 pslldq $4,%xmm0
3342 pxor %xmm0,%xmm3 3342 pxor %xmm0,%xmm3
3343 pslldq $4,%xmm0 3343 pslldq $4,%xmm0
3344 pxor %xmm0,%xmm3 3344 pxor %xmm0,%xmm3
3345 pslldq $4,%xmm0 3345 pslldq $4,%xmm0
3346 pxor %xmm3,%xmm0 3346 pxor %xmm3,%xmm0
3347 3347
3348 » pshufd» $255,%xmm0,%xmm3 3348 » pshufd» $0xff,%xmm0,%xmm3
3349 pxor %xmm1,%xmm3 3349 pxor %xmm1,%xmm3
3350 pslldq $4,%xmm1 3350 pslldq $4,%xmm1
3351 pxor %xmm1,%xmm3 3351 pxor %xmm1,%xmm3
3352 3352
3353 pxor %xmm2,%xmm0 3353 pxor %xmm2,%xmm0
3354 pxor %xmm3,%xmm2 3354 pxor %xmm3,%xmm2
3355 movdqu %xmm0,-16(%rax) 3355 movdqu %xmm0,-16(%rax)
3356 3356
3357 decl %r10d 3357 decl %r10d
3358 jnz .Loop_key192 3358 jnz .Loop_key192
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
3425 pslldq $4,%xmm0 3425 pslldq $4,%xmm0
3426 pxor %xmm3,%xmm0 3426 pxor %xmm3,%xmm0
3427 pslld $1,%xmm4 3427 pslld $1,%xmm4
3428 3428
3429 pxor %xmm2,%xmm0 3429 pxor %xmm2,%xmm0
3430 movdqu %xmm0,(%rax) 3430 movdqu %xmm0,(%rax)
3431 3431
3432 decl %r10d 3432 decl %r10d
3433 jz .Ldone_key256 3433 jz .Ldone_key256
3434 3434
3435 » pshufd» $255,%xmm0,%xmm2 3435 » pshufd» $0xff,%xmm0,%xmm2
3436 pxor %xmm3,%xmm3 3436 pxor %xmm3,%xmm3
3437 .byte 102,15,56,221,211 3437 .byte 102,15,56,221,211
3438 3438
3439 movdqa %xmm1,%xmm3 3439 movdqa %xmm1,%xmm3
3440 pslldq $4,%xmm1 3440 pslldq $4,%xmm1
3441 pxor %xmm1,%xmm3 3441 pxor %xmm1,%xmm3
3442 pslldq $4,%xmm1 3442 pslldq $4,%xmm1
3443 pxor %xmm1,%xmm3 3443 pxor %xmm1,%xmm3
3444 pslldq $4,%xmm1 3444 pslldq $4,%xmm1
3445 pxor %xmm3,%xmm1 3445 pxor %xmm3,%xmm1
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after
3556 .Lkey_rotate192: 3556 .Lkey_rotate192:
3557 .long 0x04070605,0x04070605,0x04070605,0x04070605 3557 .long 0x04070605,0x04070605,0x04070605,0x04070605
3558 .Lkey_rcon1: 3558 .Lkey_rcon1:
3559 .long 1,1,1,1 3559 .long 1,1,1,1
3560 .Lkey_rcon1b: 3560 .Lkey_rcon1b:
3561 .long 0x1b,0x1b,0x1b,0x1b 3561 .long 0x1b,0x1b,0x1b,0x1b
3562 3562
3563 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32 ,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101 ,110,115,115,108,46,111,114,103,62,0 3563 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32 ,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101 ,110,115,115,108,46,111,114,103,62,0
3564 .align 64 3564 .align 64
3565 #endif 3565 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698