Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(163)

Side by Side Diff: third_party/boringssl/mac-x86_64/crypto/bn/x86_64-mont.S

Issue 2695223006: Roll src/third_party/boringssl/src dc8c1d962..0f28691d3 (Closed)
Patch Set: Use correct parent. Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 #if defined(__x86_64__) 1 #if defined(__x86_64__)
2 .text 2 .text
3 3
4 4
5 5
6 .globl _bn_mul_mont 6 .globl _bn_mul_mont
7 .private_extern _bn_mul_mont 7 .private_extern _bn_mul_mont
8 8
9 .p2align 4 9 .p2align 4
10 _bn_mul_mont: 10 _bn_mul_mont:
11 movl %r9d,%r9d
12 movq %rsp,%rax
11 testl $3,%r9d 13 testl $3,%r9d
12 jnz L$mul_enter 14 jnz L$mul_enter
13 cmpl $8,%r9d 15 cmpl $8,%r9d
14 jb L$mul_enter 16 jb L$mul_enter
15 cmpq %rsi,%rdx 17 cmpq %rsi,%rdx
16 jne L$mul4x_enter 18 jne L$mul4x_enter
17 testl $7,%r9d 19 testl $7,%r9d
18 jz L$sqr8x_enter 20 jz L$sqr8x_enter
19 jmp L$mul4x_enter 21 jmp L$mul4x_enter
20 22
21 .p2align 4 23 .p2align 4
22 L$mul_enter: 24 L$mul_enter:
23 pushq %rbx 25 pushq %rbx
24 pushq %rbp 26 pushq %rbp
25 pushq %r12 27 pushq %r12
26 pushq %r13 28 pushq %r13
27 pushq %r14 29 pushq %r14
28 pushq %r15 30 pushq %r15
29 31
30 » movl» %r9d,%r9d 32 » negq» %r9
31 » leaq» 2(%r9),%r10
32 movq %rsp,%r11 33 movq %rsp,%r11
33 » negq» %r10 34 » leaq» -16(%rsp,%r9,8),%r10
34 » leaq» (%rsp,%r10,8),%rsp 35 » negq» %r9
35 » andq» $-1024,%rsp 36 » andq» $-1024,%r10
36 37
37 » movq» %r11,8(%rsp,%r9,8) 38
39
40
41
42
43
44
45
46 » subq» %r10,%r11
47 » andq» $-4096,%r11
48 » leaq» (%r10,%r11,1),%rsp
49 » movq» (%rsp),%r11
50 » cmpq» %r10,%rsp
51 » ja» L$mul_page_walk
52 » jmp» L$mul_page_walk_done
53
54 .p2align» 4
55 L$mul_page_walk:
56 » leaq» -4096(%rsp),%rsp
57 » movq» (%rsp),%r11
58 » cmpq» %r10,%rsp
59 » ja» L$mul_page_walk
60 L$mul_page_walk_done:
61
62 » movq» %rax,8(%rsp,%r9,8)
38 L$mul_body: 63 L$mul_body:
39 movq %rdx,%r12 64 movq %rdx,%r12
40 movq (%r8),%r8 65 movq (%r8),%r8
41 movq (%r12),%rbx 66 movq (%r12),%rbx
42 movq (%rsi),%rax 67 movq (%rsi),%rax
43 68
44 xorq %r14,%r14 69 xorq %r14,%r14
45 xorq %r15,%r15 70 xorq %r15,%r15
46 71
47 movq %r8,%rbp 72 movq %r8,%rbp
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after
179 .p2align 4 204 .p2align 4
180 L$sub: sbbq (%rcx,%r14,8),%rax 205 L$sub: sbbq (%rcx,%r14,8),%rax
181 movq %rax,(%rdi,%r14,8) 206 movq %rax,(%rdi,%r14,8)
182 movq 8(%rsi,%r14,8),%rax 207 movq 8(%rsi,%r14,8),%rax
183 leaq 1(%r14),%r14 208 leaq 1(%r14),%r14
184 decq %r15 209 decq %r15
185 jnz L$sub 210 jnz L$sub
186 211
187 sbbq $0,%rax 212 sbbq $0,%rax
188 xorq %r14,%r14 213 xorq %r14,%r14
214 andq %rax,%rsi
215 notq %rax
216 movq %rdi,%rcx
217 andq %rax,%rcx
189 movq %r9,%r15 218 movq %r9,%r15
219 orq %rcx,%rsi
190 .p2align 4 220 .p2align 4
191 L$copy: 221 L$copy:
192 » movq» (%rsp,%r14,8),%rsi 222 » movq» (%rsi,%r14,8),%rax
193 » movq» (%rdi,%r14,8),%rcx
194 » xorq» %rcx,%rsi
195 » andq» %rax,%rsi
196 » xorq» %rcx,%rsi
197 movq %r14,(%rsp,%r14,8) 223 movq %r14,(%rsp,%r14,8)
198 » movq» %rsi,(%rdi,%r14,8) 224 » movq» %rax,(%rdi,%r14,8)
199 leaq 1(%r14),%r14 225 leaq 1(%r14),%r14
200 subq $1,%r15 226 subq $1,%r15
201 jnz L$copy 227 jnz L$copy
202 228
203 movq 8(%rsp,%r9,8),%rsi 229 movq 8(%rsp,%r9,8),%rsi
204 movq $1,%rax 230 movq $1,%rax
205 » movq» (%rsi),%r15 231 » movq» -48(%rsi),%r15
206 » movq» 8(%rsi),%r14 232 » movq» -40(%rsi),%r14
207 » movq» 16(%rsi),%r13 233 » movq» -32(%rsi),%r13
208 » movq» 24(%rsi),%r12 234 » movq» -24(%rsi),%r12
209 » movq» 32(%rsi),%rbp 235 » movq» -16(%rsi),%rbp
210 » movq» 40(%rsi),%rbx 236 » movq» -8(%rsi),%rbx
211 » leaq» 48(%rsi),%rsp 237 » leaq» (%rsi),%rsp
212 L$mul_epilogue: 238 L$mul_epilogue:
213 .byte 0xf3,0xc3 239 .byte 0xf3,0xc3
214 240
215 241
216 .p2align 4 242 .p2align 4
217 bn_mul4x_mont: 243 bn_mul4x_mont:
244 movl %r9d,%r9d
245 movq %rsp,%rax
218 L$mul4x_enter: 246 L$mul4x_enter:
219 pushq %rbx 247 pushq %rbx
220 pushq %rbp 248 pushq %rbp
221 pushq %r12 249 pushq %r12
222 pushq %r13 250 pushq %r13
223 pushq %r14 251 pushq %r14
224 pushq %r15 252 pushq %r15
225 253
226 » movl» %r9d,%r9d 254 » negq» %r9
227 » leaq» 4(%r9),%r10
228 movq %rsp,%r11 255 movq %rsp,%r11
229 » negq» %r10 256 » leaq» -32(%rsp,%r9,8),%r10
230 » leaq» (%rsp,%r10,8),%rsp 257 » negq» %r9
231 » andq» $-1024,%rsp 258 » andq» $-1024,%r10
232 259
233 » movq» %r11,8(%rsp,%r9,8) 260 » subq» %r10,%r11
261 » andq» $-4096,%r11
262 » leaq» (%r10,%r11,1),%rsp
263 » movq» (%rsp),%r11
264 » cmpq» %r10,%rsp
265 » ja» L$mul4x_page_walk
266 » jmp» L$mul4x_page_walk_done
267
268 L$mul4x_page_walk:
269 » leaq» -4096(%rsp),%rsp
270 » movq» (%rsp),%r11
271 » cmpq» %r10,%rsp
272 » ja» L$mul4x_page_walk
273 L$mul4x_page_walk_done:
274
275 » movq» %rax,8(%rsp,%r9,8)
234 L$mul4x_body: 276 L$mul4x_body:
235 movq %rdi,16(%rsp,%r9,8) 277 movq %rdi,16(%rsp,%r9,8)
236 movq %rdx,%r12 278 movq %rdx,%r12
237 movq (%r8),%r8 279 movq (%r8),%r8
238 movq (%r12),%rbx 280 movq (%r12),%rbx
239 movq (%rsi),%rax 281 movq (%rsi),%rax
240 282
241 xorq %r14,%r14 283 xorq %r14,%r14
242 xorq %r15,%r15 284 xorq %r15,%r15
243 285
(...skipping 280 matching lines...) Expand 10 before | Expand all | Expand 10 after
524 adcq $0,%rdi 566 adcq $0,%rdi
525 addq (%rsp,%r9,8),%r13 567 addq (%rsp,%r9,8),%r13
526 adcq $0,%rdi 568 adcq $0,%rdi
527 movq %r13,-8(%rsp,%r15,8) 569 movq %r13,-8(%rsp,%r15,8)
528 movq %rdi,(%rsp,%r15,8) 570 movq %rdi,(%rsp,%r15,8)
529 571
530 cmpq %r9,%r14 572 cmpq %r9,%r14
531 jb L$outer4x 573 jb L$outer4x
532 movq 16(%rsp,%r9,8),%rdi 574 movq 16(%rsp,%r9,8),%rdi
533 movq 0(%rsp),%rax 575 movq 0(%rsp),%rax
576 pxor %xmm0,%xmm0
534 movq 8(%rsp),%rdx 577 movq 8(%rsp),%rdx
535 shrq $2,%r9 578 shrq $2,%r9
536 leaq (%rsp),%rsi 579 leaq (%rsp),%rsi
537 xorq %r14,%r14 580 xorq %r14,%r14
538 581
539 subq 0(%rcx),%rax 582 subq 0(%rcx),%rax
540 movq 16(%rsi),%rbx 583 movq 16(%rsi),%rbx
541 movq 24(%rsi),%rbp 584 movq 24(%rsi),%rbp
542 sbbq 8(%rcx),%rdx 585 sbbq 8(%rcx),%rdx
543 leaq -1(%r9),%r15 586 leaq -1(%r9),%r15
(...skipping 17 matching lines...) Expand all
561 jnz L$sub4x 604 jnz L$sub4x
562 605
563 movq %rax,0(%rdi,%r14,8) 606 movq %rax,0(%rdi,%r14,8)
564 movq 32(%rsi,%r14,8),%rax 607 movq 32(%rsi,%r14,8),%rax
565 sbbq 16(%rcx,%r14,8),%rbx 608 sbbq 16(%rcx,%r14,8),%rbx
566 movq %rdx,8(%rdi,%r14,8) 609 movq %rdx,8(%rdi,%r14,8)
567 sbbq 24(%rcx,%r14,8),%rbp 610 sbbq 24(%rcx,%r14,8),%rbp
568 movq %rbx,16(%rdi,%r14,8) 611 movq %rbx,16(%rdi,%r14,8)
569 612
570 sbbq $0,%rax 613 sbbq $0,%rax
571 movq %rax,%xmm0
572 punpcklqdq %xmm0,%xmm0
573 movq %rbp,24(%rdi,%r14,8) 614 movq %rbp,24(%rdi,%r14,8)
574 xorq %r14,%r14 615 xorq %r14,%r14
616 andq %rax,%rsi
617 notq %rax
618 movq %rdi,%rcx
619 andq %rax,%rcx
620 leaq -1(%r9),%r15
621 orq %rcx,%rsi
575 622
576 » movq» %r9,%r15 623 » movdqu» (%rsi),%xmm1
577 » pxor» %xmm5,%xmm5 624 » movdqa» %xmm0,(%rsp)
625 » movdqu» %xmm1,(%rdi)
578 jmp L$copy4x 626 jmp L$copy4x
579 .p2align 4 627 .p2align 4
580 L$copy4x: 628 L$copy4x:
581 » movdqu» (%rsp,%r14,1),%xmm2 629 » movdqu» 16(%rsi,%r14,1),%xmm2
582 » movdqu» 16(%rsp,%r14,1),%xmm4 630 » movdqu» 32(%rsi,%r14,1),%xmm1
583 » movdqu» (%rdi,%r14,1),%xmm1 631 » movdqa» %xmm0,16(%rsp,%r14,1)
584 » movdqu» 16(%rdi,%r14,1),%xmm3 632 » movdqu» %xmm2,16(%rdi,%r14,1)
585 » pxor» %xmm1,%xmm2 633 » movdqa» %xmm0,32(%rsp,%r14,1)
586 » pxor» %xmm3,%xmm4 634 » movdqu» %xmm1,32(%rdi,%r14,1)
587 » pand» %xmm0,%xmm2
588 » pand» %xmm0,%xmm4
589 » pxor» %xmm1,%xmm2
590 » pxor» %xmm3,%xmm4
591 » movdqu» %xmm2,(%rdi,%r14,1)
592 » movdqu» %xmm4,16(%rdi,%r14,1)
593 » movdqa» %xmm5,(%rsp,%r14,1)
594 » movdqa» %xmm5,16(%rsp,%r14,1)
595
596 leaq 32(%r14),%r14 635 leaq 32(%r14),%r14
597 decq %r15 636 decq %r15
598 jnz L$copy4x 637 jnz L$copy4x
599 638
600 shlq $2,%r9 639 shlq $2,%r9
640 movdqu 16(%rsi,%r14,1),%xmm2
641 movdqa %xmm0,16(%rsp,%r14,1)
642 movdqu %xmm2,16(%rdi,%r14,1)
601 movq 8(%rsp,%r9,8),%rsi 643 movq 8(%rsp,%r9,8),%rsi
602 movq $1,%rax 644 movq $1,%rax
603 » movq» (%rsi),%r15 645 » movq» -48(%rsi),%r15
604 » movq» 8(%rsi),%r14 646 » movq» -40(%rsi),%r14
605 » movq» 16(%rsi),%r13 647 » movq» -32(%rsi),%r13
606 » movq» 24(%rsi),%r12 648 » movq» -24(%rsi),%r12
607 » movq» 32(%rsi),%rbp 649 » movq» -16(%rsi),%rbp
608 » movq» 40(%rsi),%rbx 650 » movq» -8(%rsi),%rbx
609 » leaq» 48(%rsi),%rsp 651 » leaq» (%rsi),%rsp
610 L$mul4x_epilogue: 652 L$mul4x_epilogue:
611 .byte 0xf3,0xc3 653 .byte 0xf3,0xc3
612 654
613 655
614 656
615 657
616 .p2align 5 658 .p2align 5
617 bn_sqr8x_mont: 659 bn_sqr8x_mont:
660 movq %rsp,%rax
618 L$sqr8x_enter: 661 L$sqr8x_enter:
619 movq %rsp,%rax
620 pushq %rbx 662 pushq %rbx
621 pushq %rbp 663 pushq %rbp
622 pushq %r12 664 pushq %r12
623 pushq %r13 665 pushq %r13
624 pushq %r14 666 pushq %r14
625 pushq %r15 667 pushq %r15
668 L$sqr8x_prologue:
626 669
627 movl %r9d,%r10d 670 movl %r9d,%r10d
628 shll $3,%r9d 671 shll $3,%r9d
629 shlq $3+2,%r10 672 shlq $3+2,%r10
630 negq %r9 673 negq %r9
631 674
632 675
633 676
634 677
635 678
636 679
637 leaq -64(%rsp,%r9,2),%r11 680 leaq -64(%rsp,%r9,2),%r11
681 movq %rsp,%rbp
638 movq (%r8),%r8 682 movq (%r8),%r8
639 subq %rsi,%r11 683 subq %rsi,%r11
640 andq $4095,%r11 684 andq $4095,%r11
641 cmpq %r11,%r10 685 cmpq %r11,%r10
642 jb L$sqr8x_sp_alt 686 jb L$sqr8x_sp_alt
643 » subq» %r11,%rsp 687 » subq» %r11,%rbp
644 » leaq» -64(%rsp,%r9,2),%rsp 688 » leaq» -64(%rbp,%r9,2),%rbp
645 jmp L$sqr8x_sp_done 689 jmp L$sqr8x_sp_done
646 690
647 .p2align 5 691 .p2align 5
648 L$sqr8x_sp_alt: 692 L$sqr8x_sp_alt:
649 leaq 4096-64(,%r9,2),%r10 693 leaq 4096-64(,%r9,2),%r10
650 » leaq» -64(%rsp,%r9,2),%rsp 694 » leaq» -64(%rbp,%r9,2),%rbp
651 subq %r10,%r11 695 subq %r10,%r11
652 movq $0,%r10 696 movq $0,%r10
653 cmovcq %r10,%r11 697 cmovcq %r10,%r11
654 » subq» %r11,%rsp 698 » subq» %r11,%rbp
655 L$sqr8x_sp_done: 699 L$sqr8x_sp_done:
656 » andq» $-64,%rsp 700 » andq» $-64,%rbp
701 » movq» %rsp,%r11
702 » subq» %rbp,%r11
703 » andq» $-4096,%r11
704 » leaq» (%r11,%rbp,1),%rsp
705 » movq» (%rsp),%r10
706 » cmpq» %rbp,%rsp
707 » ja» L$sqr8x_page_walk
708 » jmp» L$sqr8x_page_walk_done
709
710 .p2align» 4
711 L$sqr8x_page_walk:
712 » leaq» -4096(%rsp),%rsp
713 » movq» (%rsp),%r10
714 » cmpq» %rbp,%rsp
715 » ja» L$sqr8x_page_walk
716 L$sqr8x_page_walk_done:
717
657 movq %r9,%r10 718 movq %r9,%r10
658 negq %r9 719 negq %r9
659 720
660 movq %r8,32(%rsp) 721 movq %r8,32(%rsp)
661 movq %rax,40(%rsp) 722 movq %rax,40(%rsp)
662 L$sqr8x_body: 723 L$sqr8x_body:
663 724
664 .byte 102,72,15,110,209 725 .byte 102,72,15,110,209
665 pxor %xmm0,%xmm0 726 pxor %xmm0,%xmm0
666 .byte 102,72,15,110,207 727 .byte 102,72,15,110,207
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
739 movq -24(%rsi),%r12 800 movq -24(%rsi),%r12
740 movq -16(%rsi),%rbp 801 movq -16(%rsi),%rbp
741 movq -8(%rsi),%rbx 802 movq -8(%rsi),%rbx
742 leaq (%rsi),%rsp 803 leaq (%rsi),%rsp
743 L$sqr8x_epilogue: 804 L$sqr8x_epilogue:
744 .byte 0xf3,0xc3 805 .byte 0xf3,0xc3
745 806
746 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105 ,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84 ,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10 8,46,111,114,103,62,0 807 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105 ,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84 ,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10 8,46,111,114,103,62,0
747 .p2align 4 808 .p2align 4
748 #endif 809 #endif
OLDNEW
« no previous file with comments | « third_party/boringssl/mac-x86/crypto/bn/x86-mont.S ('k') | third_party/boringssl/mac-x86_64/crypto/bn/x86_64-mont5.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698