Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1071)

Side by Side Diff: third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont.S

Issue 2695223006: Roll src/third_party/boringssl/src dc8c1d962..0f28691d3 (Closed)
Patch Set: Use correct parent. Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 #if defined(__x86_64__) 1 #if defined(__x86_64__)
2 .text 2 .text
3 3
4 .extern OPENSSL_ia32cap_P 4 .extern OPENSSL_ia32cap_P
5 .hidden OPENSSL_ia32cap_P 5 .hidden OPENSSL_ia32cap_P
6 6
7 .globl bn_mul_mont 7 .globl bn_mul_mont
8 .hidden bn_mul_mont 8 .hidden bn_mul_mont
9 .type bn_mul_mont,@function 9 .type bn_mul_mont,@function
10 .align 16 10 .align 16
11 bn_mul_mont: 11 bn_mul_mont:
12 movl %r9d,%r9d
13 movq %rsp,%rax
12 testl $3,%r9d 14 testl $3,%r9d
13 jnz .Lmul_enter 15 jnz .Lmul_enter
14 cmpl $8,%r9d 16 cmpl $8,%r9d
15 jb .Lmul_enter 17 jb .Lmul_enter
16 cmpq %rsi,%rdx 18 cmpq %rsi,%rdx
17 jne .Lmul4x_enter 19 jne .Lmul4x_enter
18 testl $7,%r9d 20 testl $7,%r9d
19 jz .Lsqr8x_enter 21 jz .Lsqr8x_enter
20 jmp .Lmul4x_enter 22 jmp .Lmul4x_enter
21 23
22 .align 16 24 .align 16
23 .Lmul_enter: 25 .Lmul_enter:
24 pushq %rbx 26 pushq %rbx
25 pushq %rbp 27 pushq %rbp
26 pushq %r12 28 pushq %r12
27 pushq %r13 29 pushq %r13
28 pushq %r14 30 pushq %r14
29 pushq %r15 31 pushq %r15
30 32
31 » movl» %r9d,%r9d 33 » negq» %r9
32 » leaq» 2(%r9),%r10
33 movq %rsp,%r11 34 movq %rsp,%r11
34 » negq» %r10 35 » leaq» -16(%rsp,%r9,8),%r10
35 » leaq» (%rsp,%r10,8),%rsp 36 » negq» %r9
36 » andq» $-1024,%rsp 37 » andq» $-1024,%r10
37 38
38 » movq» %r11,8(%rsp,%r9,8) 39
40
41
42
43
44
45
46
47 » subq» %r10,%r11
48 » andq» $-4096,%r11
49 » leaq» (%r10,%r11,1),%rsp
50 » movq» (%rsp),%r11
51 » cmpq» %r10,%rsp
52 » ja» .Lmul_page_walk
53 » jmp» .Lmul_page_walk_done
54
55 .align» 16
56 .Lmul_page_walk:
57 » leaq» -4096(%rsp),%rsp
58 » movq» (%rsp),%r11
59 » cmpq» %r10,%rsp
60 » ja» .Lmul_page_walk
61 .Lmul_page_walk_done:
62
63 » movq» %rax,8(%rsp,%r9,8)
39 .Lmul_body: 64 .Lmul_body:
40 movq %rdx,%r12 65 movq %rdx,%r12
41 movq (%r8),%r8 66 movq (%r8),%r8
42 movq (%r12),%rbx 67 movq (%r12),%rbx
43 movq (%rsi),%rax 68 movq (%rsi),%rax
44 69
45 xorq %r14,%r14 70 xorq %r14,%r14
46 xorq %r15,%r15 71 xorq %r15,%r15
47 72
48 movq %r8,%rbp 73 movq %r8,%rbp
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after
180 .align 16 205 .align 16
181 .Lsub: sbbq (%rcx,%r14,8),%rax 206 .Lsub: sbbq (%rcx,%r14,8),%rax
182 movq %rax,(%rdi,%r14,8) 207 movq %rax,(%rdi,%r14,8)
183 movq 8(%rsi,%r14,8),%rax 208 movq 8(%rsi,%r14,8),%rax
184 leaq 1(%r14),%r14 209 leaq 1(%r14),%r14
185 decq %r15 210 decq %r15
186 jnz .Lsub 211 jnz .Lsub
187 212
188 sbbq $0,%rax 213 sbbq $0,%rax
189 xorq %r14,%r14 214 xorq %r14,%r14
215 andq %rax,%rsi
216 notq %rax
217 movq %rdi,%rcx
218 andq %rax,%rcx
190 movq %r9,%r15 219 movq %r9,%r15
220 orq %rcx,%rsi
191 .align 16 221 .align 16
192 .Lcopy: 222 .Lcopy:
193 » movq» (%rsp,%r14,8),%rsi 223 » movq» (%rsi,%r14,8),%rax
194 » movq» (%rdi,%r14,8),%rcx
195 » xorq» %rcx,%rsi
196 » andq» %rax,%rsi
197 » xorq» %rcx,%rsi
198 movq %r14,(%rsp,%r14,8) 224 movq %r14,(%rsp,%r14,8)
199 » movq» %rsi,(%rdi,%r14,8) 225 » movq» %rax,(%rdi,%r14,8)
200 leaq 1(%r14),%r14 226 leaq 1(%r14),%r14
201 subq $1,%r15 227 subq $1,%r15
202 jnz .Lcopy 228 jnz .Lcopy
203 229
204 movq 8(%rsp,%r9,8),%rsi 230 movq 8(%rsp,%r9,8),%rsi
205 movq $1,%rax 231 movq $1,%rax
206 » movq» (%rsi),%r15 232 » movq» -48(%rsi),%r15
207 » movq» 8(%rsi),%r14 233 » movq» -40(%rsi),%r14
208 » movq» 16(%rsi),%r13 234 » movq» -32(%rsi),%r13
209 » movq» 24(%rsi),%r12 235 » movq» -24(%rsi),%r12
210 » movq» 32(%rsi),%rbp 236 » movq» -16(%rsi),%rbp
211 » movq» 40(%rsi),%rbx 237 » movq» -8(%rsi),%rbx
212 » leaq» 48(%rsi),%rsp 238 » leaq» (%rsi),%rsp
213 .Lmul_epilogue: 239 .Lmul_epilogue:
214 .byte 0xf3,0xc3 240 .byte 0xf3,0xc3
215 .size bn_mul_mont,.-bn_mul_mont 241 .size bn_mul_mont,.-bn_mul_mont
216 .type bn_mul4x_mont,@function 242 .type bn_mul4x_mont,@function
217 .align 16 243 .align 16
218 bn_mul4x_mont: 244 bn_mul4x_mont:
245 movl %r9d,%r9d
246 movq %rsp,%rax
219 .Lmul4x_enter: 247 .Lmul4x_enter:
220 pushq %rbx 248 pushq %rbx
221 pushq %rbp 249 pushq %rbp
222 pushq %r12 250 pushq %r12
223 pushq %r13 251 pushq %r13
224 pushq %r14 252 pushq %r14
225 pushq %r15 253 pushq %r15
226 254
227 » movl» %r9d,%r9d 255 » negq» %r9
228 » leaq» 4(%r9),%r10
229 movq %rsp,%r11 256 movq %rsp,%r11
230 » negq» %r10 257 » leaq» -32(%rsp,%r9,8),%r10
231 » leaq» (%rsp,%r10,8),%rsp 258 » negq» %r9
232 » andq» $-1024,%rsp 259 » andq» $-1024,%r10
233 260
234 » movq» %r11,8(%rsp,%r9,8) 261 » subq» %r10,%r11
262 » andq» $-4096,%r11
263 » leaq» (%r10,%r11,1),%rsp
264 » movq» (%rsp),%r11
265 » cmpq» %r10,%rsp
266 » ja» .Lmul4x_page_walk
267 » jmp» .Lmul4x_page_walk_done
268
269 .Lmul4x_page_walk:
270 » leaq» -4096(%rsp),%rsp
271 » movq» (%rsp),%r11
272 » cmpq» %r10,%rsp
273 » ja» .Lmul4x_page_walk
274 .Lmul4x_page_walk_done:
275
276 » movq» %rax,8(%rsp,%r9,8)
235 .Lmul4x_body: 277 .Lmul4x_body:
236 movq %rdi,16(%rsp,%r9,8) 278 movq %rdi,16(%rsp,%r9,8)
237 movq %rdx,%r12 279 movq %rdx,%r12
238 movq (%r8),%r8 280 movq (%r8),%r8
239 movq (%r12),%rbx 281 movq (%r12),%rbx
240 movq (%rsi),%rax 282 movq (%rsi),%rax
241 283
242 xorq %r14,%r14 284 xorq %r14,%r14
243 xorq %r15,%r15 285 xorq %r15,%r15
244 286
(...skipping 280 matching lines...) Expand 10 before | Expand all | Expand 10 after
525 adcq $0,%rdi 567 adcq $0,%rdi
526 addq (%rsp,%r9,8),%r13 568 addq (%rsp,%r9,8),%r13
527 adcq $0,%rdi 569 adcq $0,%rdi
528 movq %r13,-8(%rsp,%r15,8) 570 movq %r13,-8(%rsp,%r15,8)
529 movq %rdi,(%rsp,%r15,8) 571 movq %rdi,(%rsp,%r15,8)
530 572
531 cmpq %r9,%r14 573 cmpq %r9,%r14
532 jb .Louter4x 574 jb .Louter4x
533 movq 16(%rsp,%r9,8),%rdi 575 movq 16(%rsp,%r9,8),%rdi
534 movq 0(%rsp),%rax 576 movq 0(%rsp),%rax
577 pxor %xmm0,%xmm0
535 movq 8(%rsp),%rdx 578 movq 8(%rsp),%rdx
536 shrq $2,%r9 579 shrq $2,%r9
537 leaq (%rsp),%rsi 580 leaq (%rsp),%rsi
538 xorq %r14,%r14 581 xorq %r14,%r14
539 582
540 subq 0(%rcx),%rax 583 subq 0(%rcx),%rax
541 movq 16(%rsi),%rbx 584 movq 16(%rsi),%rbx
542 movq 24(%rsi),%rbp 585 movq 24(%rsi),%rbp
543 sbbq 8(%rcx),%rdx 586 sbbq 8(%rcx),%rdx
544 leaq -1(%r9),%r15 587 leaq -1(%r9),%r15
(...skipping 17 matching lines...) Expand all
562 jnz .Lsub4x 605 jnz .Lsub4x
563 606
564 movq %rax,0(%rdi,%r14,8) 607 movq %rax,0(%rdi,%r14,8)
565 movq 32(%rsi,%r14,8),%rax 608 movq 32(%rsi,%r14,8),%rax
566 sbbq 16(%rcx,%r14,8),%rbx 609 sbbq 16(%rcx,%r14,8),%rbx
567 movq %rdx,8(%rdi,%r14,8) 610 movq %rdx,8(%rdi,%r14,8)
568 sbbq 24(%rcx,%r14,8),%rbp 611 sbbq 24(%rcx,%r14,8),%rbp
569 movq %rbx,16(%rdi,%r14,8) 612 movq %rbx,16(%rdi,%r14,8)
570 613
571 sbbq $0,%rax 614 sbbq $0,%rax
572 movq %rax,%xmm0
573 punpcklqdq %xmm0,%xmm0
574 movq %rbp,24(%rdi,%r14,8) 615 movq %rbp,24(%rdi,%r14,8)
575 xorq %r14,%r14 616 xorq %r14,%r14
617 andq %rax,%rsi
618 notq %rax
619 movq %rdi,%rcx
620 andq %rax,%rcx
621 leaq -1(%r9),%r15
622 orq %rcx,%rsi
576 623
577 » movq» %r9,%r15 624 » movdqu» (%rsi),%xmm1
578 » pxor» %xmm5,%xmm5 625 » movdqa» %xmm0,(%rsp)
626 » movdqu» %xmm1,(%rdi)
579 jmp .Lcopy4x 627 jmp .Lcopy4x
580 .align 16 628 .align 16
581 .Lcopy4x: 629 .Lcopy4x:
582 » movdqu» (%rsp,%r14,1),%xmm2 630 » movdqu» 16(%rsi,%r14,1),%xmm2
583 » movdqu» 16(%rsp,%r14,1),%xmm4 631 » movdqu» 32(%rsi,%r14,1),%xmm1
584 » movdqu» (%rdi,%r14,1),%xmm1 632 » movdqa» %xmm0,16(%rsp,%r14,1)
585 » movdqu» 16(%rdi,%r14,1),%xmm3 633 » movdqu» %xmm2,16(%rdi,%r14,1)
586 » pxor» %xmm1,%xmm2 634 » movdqa» %xmm0,32(%rsp,%r14,1)
587 » pxor» %xmm3,%xmm4 635 » movdqu» %xmm1,32(%rdi,%r14,1)
588 » pand» %xmm0,%xmm2
589 » pand» %xmm0,%xmm4
590 » pxor» %xmm1,%xmm2
591 » pxor» %xmm3,%xmm4
592 » movdqu» %xmm2,(%rdi,%r14,1)
593 » movdqu» %xmm4,16(%rdi,%r14,1)
594 » movdqa» %xmm5,(%rsp,%r14,1)
595 » movdqa» %xmm5,16(%rsp,%r14,1)
596
597 leaq 32(%r14),%r14 636 leaq 32(%r14),%r14
598 decq %r15 637 decq %r15
599 jnz .Lcopy4x 638 jnz .Lcopy4x
600 639
601 shlq $2,%r9 640 shlq $2,%r9
641 movdqu 16(%rsi,%r14,1),%xmm2
642 movdqa %xmm0,16(%rsp,%r14,1)
643 movdqu %xmm2,16(%rdi,%r14,1)
602 movq 8(%rsp,%r9,8),%rsi 644 movq 8(%rsp,%r9,8),%rsi
603 movq $1,%rax 645 movq $1,%rax
604 » movq» (%rsi),%r15 646 » movq» -48(%rsi),%r15
605 » movq» 8(%rsi),%r14 647 » movq» -40(%rsi),%r14
606 » movq» 16(%rsi),%r13 648 » movq» -32(%rsi),%r13
607 » movq» 24(%rsi),%r12 649 » movq» -24(%rsi),%r12
608 » movq» 32(%rsi),%rbp 650 » movq» -16(%rsi),%rbp
609 » movq» 40(%rsi),%rbx 651 » movq» -8(%rsi),%rbx
610 » leaq» 48(%rsi),%rsp 652 » leaq» (%rsi),%rsp
611 .Lmul4x_epilogue: 653 .Lmul4x_epilogue:
612 .byte 0xf3,0xc3 654 .byte 0xf3,0xc3
613 .size bn_mul4x_mont,.-bn_mul4x_mont 655 .size bn_mul4x_mont,.-bn_mul4x_mont
614 .extern bn_sqr8x_internal 656 .extern bn_sqr8x_internal
615 .hidden bn_sqr8x_internal 657 .hidden bn_sqr8x_internal
616 658
617 .type bn_sqr8x_mont,@function 659 .type bn_sqr8x_mont,@function
618 .align 32 660 .align 32
619 bn_sqr8x_mont: 661 bn_sqr8x_mont:
662 movq %rsp,%rax
620 .Lsqr8x_enter: 663 .Lsqr8x_enter:
621 movq %rsp,%rax
622 pushq %rbx 664 pushq %rbx
623 pushq %rbp 665 pushq %rbp
624 pushq %r12 666 pushq %r12
625 pushq %r13 667 pushq %r13
626 pushq %r14 668 pushq %r14
627 pushq %r15 669 pushq %r15
670 .Lsqr8x_prologue:
628 671
629 movl %r9d,%r10d 672 movl %r9d,%r10d
630 shll $3,%r9d 673 shll $3,%r9d
631 shlq $3+2,%r10 674 shlq $3+2,%r10
632 negq %r9 675 negq %r9
633 676
634 677
635 678
636 679
637 680
638 681
639 leaq -64(%rsp,%r9,2),%r11 682 leaq -64(%rsp,%r9,2),%r11
683 movq %rsp,%rbp
640 movq (%r8),%r8 684 movq (%r8),%r8
641 subq %rsi,%r11 685 subq %rsi,%r11
642 andq $4095,%r11 686 andq $4095,%r11
643 cmpq %r11,%r10 687 cmpq %r11,%r10
644 jb .Lsqr8x_sp_alt 688 jb .Lsqr8x_sp_alt
645 » subq» %r11,%rsp 689 » subq» %r11,%rbp
646 » leaq» -64(%rsp,%r9,2),%rsp 690 » leaq» -64(%rbp,%r9,2),%rbp
647 jmp .Lsqr8x_sp_done 691 jmp .Lsqr8x_sp_done
648 692
649 .align 32 693 .align 32
650 .Lsqr8x_sp_alt: 694 .Lsqr8x_sp_alt:
651 leaq 4096-64(,%r9,2),%r10 695 leaq 4096-64(,%r9,2),%r10
652 » leaq» -64(%rsp,%r9,2),%rsp 696 » leaq» -64(%rbp,%r9,2),%rbp
653 subq %r10,%r11 697 subq %r10,%r11
654 movq $0,%r10 698 movq $0,%r10
655 cmovcq %r10,%r11 699 cmovcq %r10,%r11
656 » subq» %r11,%rsp 700 » subq» %r11,%rbp
657 .Lsqr8x_sp_done: 701 .Lsqr8x_sp_done:
658 » andq» $-64,%rsp 702 » andq» $-64,%rbp
703 » movq» %rsp,%r11
704 » subq» %rbp,%r11
705 » andq» $-4096,%r11
706 » leaq» (%r11,%rbp,1),%rsp
707 » movq» (%rsp),%r10
708 » cmpq» %rbp,%rsp
709 » ja» .Lsqr8x_page_walk
710 » jmp» .Lsqr8x_page_walk_done
711
712 .align» 16
713 .Lsqr8x_page_walk:
714 » leaq» -4096(%rsp),%rsp
715 » movq» (%rsp),%r10
716 » cmpq» %rbp,%rsp
717 » ja» .Lsqr8x_page_walk
718 .Lsqr8x_page_walk_done:
719
659 movq %r9,%r10 720 movq %r9,%r10
660 negq %r9 721 negq %r9
661 722
662 movq %r8,32(%rsp) 723 movq %r8,32(%rsp)
663 movq %rax,40(%rsp) 724 movq %rax,40(%rsp)
664 .Lsqr8x_body: 725 .Lsqr8x_body:
665 726
666 .byte 102,72,15,110,209 727 .byte 102,72,15,110,209
667 pxor %xmm0,%xmm0 728 pxor %xmm0,%xmm0
668 .byte 102,72,15,110,207 729 .byte 102,72,15,110,207
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
741 movq -24(%rsi),%r12 802 movq -24(%rsi),%r12
742 movq -16(%rsi),%rbp 803 movq -16(%rsi),%rbp
743 movq -8(%rsi),%rbx 804 movq -8(%rsi),%rbx
744 leaq (%rsi),%rsp 805 leaq (%rsi),%rsp
745 .Lsqr8x_epilogue: 806 .Lsqr8x_epilogue:
746 .byte 0xf3,0xc3 807 .byte 0xf3,0xc3
747 .size bn_sqr8x_mont,.-bn_sqr8x_mont 808 .size bn_sqr8x_mont,.-bn_sqr8x_mont
748 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105 ,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84 ,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10 8,46,111,114,103,62,0 809 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105 ,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84 ,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10 8,46,111,114,103,62,0
749 .align 16 810 .align 16
750 #endif 811 #endif
OLDNEW
« no previous file with comments | « third_party/boringssl/linux-x86/crypto/bn/x86-mont.S ('k') | third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont5.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698