Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(222)

Side by Side Diff: third_party/boringssl/mac-x86_64/crypto/ec/p256-x86_64-asm.S

Issue 1930203003: Try BoringSSL roll again (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #if defined(__x86_64__)
2 .text
3
4
5
6 .p2align 6
7 L$poly:
8 .quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00 000001
9
10 L$One:
11 .long 1,1,1,1,1,1,1,1
12 L$Two:
13 .long 2,2,2,2,2,2,2,2
14 L$Three:
15 .long 3,3,3,3,3,3,3,3
16 L$ONE_mont:
17 .quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000ff fffffe
18
19
20 .p2align 6
21 ecp_nistz256_mul_by_2:
22 pushq %r12
23 pushq %r13
24
25 movq 0(%rsi),%r8
26 movq 8(%rsi),%r9
27 addq %r8,%r8
28 movq 16(%rsi),%r10
29 adcq %r9,%r9
30 movq 24(%rsi),%r11
31 leaq L$poly(%rip),%rsi
32 movq %r8,%rax
33 adcq %r10,%r10
34 adcq %r11,%r11
35 movq %r9,%rdx
36 sbbq %r13,%r13
37
38 subq 0(%rsi),%r8
39 movq %r10,%rcx
40 sbbq 8(%rsi),%r9
41 sbbq 16(%rsi),%r10
42 movq %r11,%r12
43 sbbq 24(%rsi),%r11
44 testq %r13,%r13
45
46 cmovzq %rax,%r8
47 cmovzq %rdx,%r9
48 movq %r8,0(%rdi)
49 cmovzq %rcx,%r10
50 movq %r9,8(%rdi)
51 cmovzq %r12,%r11
52 movq %r10,16(%rdi)
53 movq %r11,24(%rdi)
54
55 popq %r13
56 popq %r12
57 .byte 0xf3,0xc3
58
59
60
61
62 .globl _ecp_nistz256_neg
63 .private_extern _ecp_nistz256_neg
64
65 .p2align 5
66 _ecp_nistz256_neg:
67 pushq %r12
68 pushq %r13
69
70 xorq %r8,%r8
71 xorq %r9,%r9
72 xorq %r10,%r10
73 xorq %r11,%r11
74 xorq %r13,%r13
75
76 subq 0(%rsi),%r8
77 sbbq 8(%rsi),%r9
78 sbbq 16(%rsi),%r10
79 movq %r8,%rax
80 sbbq 24(%rsi),%r11
81 leaq L$poly(%rip),%rsi
82 movq %r9,%rdx
83 sbbq $0,%r13
84
85 addq 0(%rsi),%r8
86 movq %r10,%rcx
87 adcq 8(%rsi),%r9
88 adcq 16(%rsi),%r10
89 movq %r11,%r12
90 adcq 24(%rsi),%r11
91 testq %r13,%r13
92
93 cmovzq %rax,%r8
94 cmovzq %rdx,%r9
95 movq %r8,0(%rdi)
96 cmovzq %rcx,%r10
97 movq %r9,8(%rdi)
98 cmovzq %r12,%r11
99 movq %r10,16(%rdi)
100 movq %r11,24(%rdi)
101
102 popq %r13
103 popq %r12
104 .byte 0xf3,0xc3
105
106
107
108
109
110
111
112 .globl _ecp_nistz256_mul_mont
113 .private_extern _ecp_nistz256_mul_mont
114
115 .p2align 5
116 _ecp_nistz256_mul_mont:
117 L$mul_mont:
118 pushq %rbp
119 pushq %rbx
120 pushq %r12
121 pushq %r13
122 pushq %r14
123 pushq %r15
124 movq %rdx,%rbx
125 movq 0(%rdx),%rax
126 movq 0(%rsi),%r9
127 movq 8(%rsi),%r10
128 movq 16(%rsi),%r11
129 movq 24(%rsi),%r12
130
131 call __ecp_nistz256_mul_montq
132 L$mul_mont_done:
133 popq %r15
134 popq %r14
135 popq %r13
136 popq %r12
137 popq %rbx
138 popq %rbp
139 .byte 0xf3,0xc3
140
141
142
143 .p2align 5
144 __ecp_nistz256_mul_montq:
145
146
147 movq %rax,%rbp
148 mulq %r9
149 movq L$poly+8(%rip),%r14
150 movq %rax,%r8
151 movq %rbp,%rax
152 movq %rdx,%r9
153
154 mulq %r10
155 movq L$poly+24(%rip),%r15
156 addq %rax,%r9
157 movq %rbp,%rax
158 adcq $0,%rdx
159 movq %rdx,%r10
160
161 mulq %r11
162 addq %rax,%r10
163 movq %rbp,%rax
164 adcq $0,%rdx
165 movq %rdx,%r11
166
167 mulq %r12
168 addq %rax,%r11
169 movq %r8,%rax
170 adcq $0,%rdx
171 xorq %r13,%r13
172 movq %rdx,%r12
173
174
175
176
177
178
179
180
181
182
183 movq %r8,%rbp
184 shlq $32,%r8
185 mulq %r15
186 shrq $32,%rbp
187 addq %r8,%r9
188 adcq %rbp,%r10
189 adcq %rax,%r11
190 movq 8(%rbx),%rax
191 adcq %rdx,%r12
192 adcq $0,%r13
193 xorq %r8,%r8
194
195
196
197 movq %rax,%rbp
198 mulq 0(%rsi)
199 addq %rax,%r9
200 movq %rbp,%rax
201 adcq $0,%rdx
202 movq %rdx,%rcx
203
204 mulq 8(%rsi)
205 addq %rcx,%r10
206 adcq $0,%rdx
207 addq %rax,%r10
208 movq %rbp,%rax
209 adcq $0,%rdx
210 movq %rdx,%rcx
211
212 mulq 16(%rsi)
213 addq %rcx,%r11
214 adcq $0,%rdx
215 addq %rax,%r11
216 movq %rbp,%rax
217 adcq $0,%rdx
218 movq %rdx,%rcx
219
220 mulq 24(%rsi)
221 addq %rcx,%r12
222 adcq $0,%rdx
223 addq %rax,%r12
224 movq %r9,%rax
225 adcq %rdx,%r13
226 adcq $0,%r8
227
228
229
230 movq %r9,%rbp
231 shlq $32,%r9
232 mulq %r15
233 shrq $32,%rbp
234 addq %r9,%r10
235 adcq %rbp,%r11
236 adcq %rax,%r12
237 movq 16(%rbx),%rax
238 adcq %rdx,%r13
239 adcq $0,%r8
240 xorq %r9,%r9
241
242
243
244 movq %rax,%rbp
245 mulq 0(%rsi)
246 addq %rax,%r10
247 movq %rbp,%rax
248 adcq $0,%rdx
249 movq %rdx,%rcx
250
251 mulq 8(%rsi)
252 addq %rcx,%r11
253 adcq $0,%rdx
254 addq %rax,%r11
255 movq %rbp,%rax
256 adcq $0,%rdx
257 movq %rdx,%rcx
258
259 mulq 16(%rsi)
260 addq %rcx,%r12
261 adcq $0,%rdx
262 addq %rax,%r12
263 movq %rbp,%rax
264 adcq $0,%rdx
265 movq %rdx,%rcx
266
267 mulq 24(%rsi)
268 addq %rcx,%r13
269 adcq $0,%rdx
270 addq %rax,%r13
271 movq %r10,%rax
272 adcq %rdx,%r8
273 adcq $0,%r9
274
275
276
277 movq %r10,%rbp
278 shlq $32,%r10
279 mulq %r15
280 shrq $32,%rbp
281 addq %r10,%r11
282 adcq %rbp,%r12
283 adcq %rax,%r13
284 movq 24(%rbx),%rax
285 adcq %rdx,%r8
286 adcq $0,%r9
287 xorq %r10,%r10
288
289
290
291 movq %rax,%rbp
292 mulq 0(%rsi)
293 addq %rax,%r11
294 movq %rbp,%rax
295 adcq $0,%rdx
296 movq %rdx,%rcx
297
298 mulq 8(%rsi)
299 addq %rcx,%r12
300 adcq $0,%rdx
301 addq %rax,%r12
302 movq %rbp,%rax
303 adcq $0,%rdx
304 movq %rdx,%rcx
305
306 mulq 16(%rsi)
307 addq %rcx,%r13
308 adcq $0,%rdx
309 addq %rax,%r13
310 movq %rbp,%rax
311 adcq $0,%rdx
312 movq %rdx,%rcx
313
314 mulq 24(%rsi)
315 addq %rcx,%r8
316 adcq $0,%rdx
317 addq %rax,%r8
318 movq %r11,%rax
319 adcq %rdx,%r9
320 adcq $0,%r10
321
322
323
324 movq %r11,%rbp
325 shlq $32,%r11
326 mulq %r15
327 shrq $32,%rbp
328 addq %r11,%r12
329 adcq %rbp,%r13
330 movq %r12,%rcx
331 adcq %rax,%r8
332 adcq %rdx,%r9
333 movq %r13,%rbp
334 adcq $0,%r10
335
336
337
338 subq $-1,%r12
339 movq %r8,%rbx
340 sbbq %r14,%r13
341 sbbq $0,%r8
342 movq %r9,%rdx
343 sbbq %r15,%r9
344 sbbq $0,%r10
345
346 cmovcq %rcx,%r12
347 cmovcq %rbp,%r13
348 movq %r12,0(%rdi)
349 cmovcq %rbx,%r8
350 movq %r13,8(%rdi)
351 cmovcq %rdx,%r9
352 movq %r8,16(%rdi)
353 movq %r9,24(%rdi)
354
355 .byte 0xf3,0xc3
356
357
358
359
360
361
362
363
364
365 .globl _ecp_nistz256_sqr_mont
366 .private_extern _ecp_nistz256_sqr_mont
367
368 .p2align 5
369 _ecp_nistz256_sqr_mont:
370 pushq %rbp
371 pushq %rbx
372 pushq %r12
373 pushq %r13
374 pushq %r14
375 pushq %r15
376 movq 0(%rsi),%rax
377 movq 8(%rsi),%r14
378 movq 16(%rsi),%r15
379 movq 24(%rsi),%r8
380
381 call __ecp_nistz256_sqr_montq
382 L$sqr_mont_done:
383 popq %r15
384 popq %r14
385 popq %r13
386 popq %r12
387 popq %rbx
388 popq %rbp
389 .byte 0xf3,0xc3
390
391
392
393 .p2align 5
394 __ecp_nistz256_sqr_montq:
395 movq %rax,%r13
396 mulq %r14
397 movq %rax,%r9
398 movq %r15,%rax
399 movq %rdx,%r10
400
401 mulq %r13
402 addq %rax,%r10
403 movq %r8,%rax
404 adcq $0,%rdx
405 movq %rdx,%r11
406
407 mulq %r13
408 addq %rax,%r11
409 movq %r15,%rax
410 adcq $0,%rdx
411 movq %rdx,%r12
412
413
414 mulq %r14
415 addq %rax,%r11
416 movq %r8,%rax
417 adcq $0,%rdx
418 movq %rdx,%rbp
419
420 mulq %r14
421 addq %rax,%r12
422 movq %r8,%rax
423 adcq $0,%rdx
424 addq %rbp,%r12
425 movq %rdx,%r13
426 adcq $0,%r13
427
428
429 mulq %r15
430 xorq %r15,%r15
431 addq %rax,%r13
432 movq 0(%rsi),%rax
433 movq %rdx,%r14
434 adcq $0,%r14
435
436 addq %r9,%r9
437 adcq %r10,%r10
438 adcq %r11,%r11
439 adcq %r12,%r12
440 adcq %r13,%r13
441 adcq %r14,%r14
442 adcq $0,%r15
443
444 mulq %rax
445 movq %rax,%r8
446 movq 8(%rsi),%rax
447 movq %rdx,%rcx
448
449 mulq %rax
450 addq %rcx,%r9
451 adcq %rax,%r10
452 movq 16(%rsi),%rax
453 adcq $0,%rdx
454 movq %rdx,%rcx
455
456 mulq %rax
457 addq %rcx,%r11
458 adcq %rax,%r12
459 movq 24(%rsi),%rax
460 adcq $0,%rdx
461 movq %rdx,%rcx
462
463 mulq %rax
464 addq %rcx,%r13
465 adcq %rax,%r14
466 movq %r8,%rax
467 adcq %rdx,%r15
468
469 movq L$poly+8(%rip),%rsi
470 movq L$poly+24(%rip),%rbp
471
472
473
474
475 movq %r8,%rcx
476 shlq $32,%r8
477 mulq %rbp
478 shrq $32,%rcx
479 addq %r8,%r9
480 adcq %rcx,%r10
481 adcq %rax,%r11
482 movq %r9,%rax
483 adcq $0,%rdx
484
485
486
487 movq %r9,%rcx
488 shlq $32,%r9
489 movq %rdx,%r8
490 mulq %rbp
491 shrq $32,%rcx
492 addq %r9,%r10
493 adcq %rcx,%r11
494 adcq %rax,%r8
495 movq %r10,%rax
496 adcq $0,%rdx
497
498
499
500 movq %r10,%rcx
501 shlq $32,%r10
502 movq %rdx,%r9
503 mulq %rbp
504 shrq $32,%rcx
505 addq %r10,%r11
506 adcq %rcx,%r8
507 adcq %rax,%r9
508 movq %r11,%rax
509 adcq $0,%rdx
510
511
512
513 movq %r11,%rcx
514 shlq $32,%r11
515 movq %rdx,%r10
516 mulq %rbp
517 shrq $32,%rcx
518 addq %r11,%r8
519 adcq %rcx,%r9
520 adcq %rax,%r10
521 adcq $0,%rdx
522 xorq %r11,%r11
523
524
525
526 addq %r8,%r12
527 adcq %r9,%r13
528 movq %r12,%r8
529 adcq %r10,%r14
530 adcq %rdx,%r15
531 movq %r13,%r9
532 adcq $0,%r11
533
534 subq $-1,%r12
535 movq %r14,%r10
536 sbbq %rsi,%r13
537 sbbq $0,%r14
538 movq %r15,%rcx
539 sbbq %rbp,%r15
540 sbbq $0,%r11
541
542 cmovcq %r8,%r12
543 cmovcq %r9,%r13
544 movq %r12,0(%rdi)
545 cmovcq %r10,%r14
546 movq %r13,8(%rdi)
547 cmovcq %rcx,%r15
548 movq %r14,16(%rdi)
549 movq %r15,24(%rdi)
550
551 .byte 0xf3,0xc3
552
553
554
555
556
557
558
559 .globl _ecp_nistz256_from_mont
560 .private_extern _ecp_nistz256_from_mont
561
562 .p2align 5
563 _ecp_nistz256_from_mont:
564 pushq %r12
565 pushq %r13
566
567 movq 0(%rsi),%rax
568 movq L$poly+24(%rip),%r13
569 movq 8(%rsi),%r9
570 movq 16(%rsi),%r10
571 movq 24(%rsi),%r11
572 movq %rax,%r8
573 movq L$poly+8(%rip),%r12
574
575
576
577 movq %rax,%rcx
578 shlq $32,%r8
579 mulq %r13
580 shrq $32,%rcx
581 addq %r8,%r9
582 adcq %rcx,%r10
583 adcq %rax,%r11
584 movq %r9,%rax
585 adcq $0,%rdx
586
587
588
589 movq %r9,%rcx
590 shlq $32,%r9
591 movq %rdx,%r8
592 mulq %r13
593 shrq $32,%rcx
594 addq %r9,%r10
595 adcq %rcx,%r11
596 adcq %rax,%r8
597 movq %r10,%rax
598 adcq $0,%rdx
599
600
601
602 movq %r10,%rcx
603 shlq $32,%r10
604 movq %rdx,%r9
605 mulq %r13
606 shrq $32,%rcx
607 addq %r10,%r11
608 adcq %rcx,%r8
609 adcq %rax,%r9
610 movq %r11,%rax
611 adcq $0,%rdx
612
613
614
615 movq %r11,%rcx
616 shlq $32,%r11
617 movq %rdx,%r10
618 mulq %r13
619 shrq $32,%rcx
620 addq %r11,%r8
621 adcq %rcx,%r9
622 movq %r8,%rcx
623 adcq %rax,%r10
624 movq %r9,%rsi
625 adcq $0,%rdx
626
627 subq $-1,%r8
628 movq %r10,%rax
629 sbbq %r12,%r9
630 sbbq $0,%r10
631 movq %rdx,%r11
632 sbbq %r13,%rdx
633 sbbq %r13,%r13
634
635 cmovnzq %rcx,%r8
636 cmovnzq %rsi,%r9
637 movq %r8,0(%rdi)
638 cmovnzq %rax,%r10
639 movq %r9,8(%rdi)
640 cmovzq %rdx,%r11
641 movq %r10,16(%rdi)
642 movq %r11,24(%rdi)
643
644 popq %r13
645 popq %r12
646 .byte 0xf3,0xc3
647
648
649
650 .globl _ecp_nistz256_select_w5
651 .private_extern _ecp_nistz256_select_w5
652
653 .p2align 5
654 _ecp_nistz256_select_w5:
655 movdqa L$One(%rip),%xmm0
656 movd %edx,%xmm1
657
658 pxor %xmm2,%xmm2
659 pxor %xmm3,%xmm3
660 pxor %xmm4,%xmm4
661 pxor %xmm5,%xmm5
662 pxor %xmm6,%xmm6
663 pxor %xmm7,%xmm7
664
665 movdqa %xmm0,%xmm8
666 pshufd $0,%xmm1,%xmm1
667
668 movq $16,%rax
669 L$select_loop_sse_w5:
670
671 movdqa %xmm8,%xmm15
672 paddd %xmm0,%xmm8
673 pcmpeqd %xmm1,%xmm15
674
675 movdqa 0(%rsi),%xmm9
676 movdqa 16(%rsi),%xmm10
677 movdqa 32(%rsi),%xmm11
678 movdqa 48(%rsi),%xmm12
679 movdqa 64(%rsi),%xmm13
680 movdqa 80(%rsi),%xmm14
681 leaq 96(%rsi),%rsi
682
683 pand %xmm15,%xmm9
684 pand %xmm15,%xmm10
685 por %xmm9,%xmm2
686 pand %xmm15,%xmm11
687 por %xmm10,%xmm3
688 pand %xmm15,%xmm12
689 por %xmm11,%xmm4
690 pand %xmm15,%xmm13
691 por %xmm12,%xmm5
692 pand %xmm15,%xmm14
693 por %xmm13,%xmm6
694 por %xmm14,%xmm7
695
696 decq %rax
697 jnz L$select_loop_sse_w5
698
699 movdqu %xmm2,0(%rdi)
700 movdqu %xmm3,16(%rdi)
701 movdqu %xmm4,32(%rdi)
702 movdqu %xmm5,48(%rdi)
703 movdqu %xmm6,64(%rdi)
704 movdqu %xmm7,80(%rdi)
705 .byte 0xf3,0xc3
706
707
708
709
710 .globl _ecp_nistz256_select_w7
711 .private_extern _ecp_nistz256_select_w7
712
713 .p2align 5
714 _ecp_nistz256_select_w7:
715 movdqa L$One(%rip),%xmm8
716 movd %edx,%xmm1
717
718 pxor %xmm2,%xmm2
719 pxor %xmm3,%xmm3
720 pxor %xmm4,%xmm4
721 pxor %xmm5,%xmm5
722
723 movdqa %xmm8,%xmm0
724 pshufd $0,%xmm1,%xmm1
725 movq $64,%rax
726
727 L$select_loop_sse_w7:
728 movdqa %xmm8,%xmm15
729 paddd %xmm0,%xmm8
730 movdqa 0(%rsi),%xmm9
731 movdqa 16(%rsi),%xmm10
732 pcmpeqd %xmm1,%xmm15
733 movdqa 32(%rsi),%xmm11
734 movdqa 48(%rsi),%xmm12
735 leaq 64(%rsi),%rsi
736
737 pand %xmm15,%xmm9
738 pand %xmm15,%xmm10
739 por %xmm9,%xmm2
740 pand %xmm15,%xmm11
741 por %xmm10,%xmm3
742 pand %xmm15,%xmm12
743 por %xmm11,%xmm4
744 prefetcht0 255(%rsi)
745 por %xmm12,%xmm5
746
747 decq %rax
748 jnz L$select_loop_sse_w7
749
750 movdqu %xmm2,0(%rdi)
751 movdqu %xmm3,16(%rdi)
752 movdqu %xmm4,32(%rdi)
753 movdqu %xmm5,48(%rdi)
754 .byte 0xf3,0xc3
755
756 .globl _ecp_nistz256_avx2_select_w7
757 .private_extern _ecp_nistz256_avx2_select_w7
758
759 .p2align 5
760 _ecp_nistz256_avx2_select_w7:
761 .byte 0x0f,0x0b
762 .byte 0xf3,0xc3
763
764
765 .p2align 5
766 __ecp_nistz256_add_toq:
767 addq 0(%rbx),%r12
768 adcq 8(%rbx),%r13
769 movq %r12,%rax
770 adcq 16(%rbx),%r8
771 adcq 24(%rbx),%r9
772 movq %r13,%rbp
773 sbbq %r11,%r11
774
775 subq $-1,%r12
776 movq %r8,%rcx
777 sbbq %r14,%r13
778 sbbq $0,%r8
779 movq %r9,%r10
780 sbbq %r15,%r9
781 testq %r11,%r11
782
783 cmovzq %rax,%r12
784 cmovzq %rbp,%r13
785 movq %r12,0(%rdi)
786 cmovzq %rcx,%r8
787 movq %r13,8(%rdi)
788 cmovzq %r10,%r9
789 movq %r8,16(%rdi)
790 movq %r9,24(%rdi)
791
792 .byte 0xf3,0xc3
793
794
795
796 .p2align 5
797 __ecp_nistz256_sub_fromq:
798 subq 0(%rbx),%r12
799 sbbq 8(%rbx),%r13
800 movq %r12,%rax
801 sbbq 16(%rbx),%r8
802 sbbq 24(%rbx),%r9
803 movq %r13,%rbp
804 sbbq %r11,%r11
805
806 addq $-1,%r12
807 movq %r8,%rcx
808 adcq %r14,%r13
809 adcq $0,%r8
810 movq %r9,%r10
811 adcq %r15,%r9
812 testq %r11,%r11
813
814 cmovzq %rax,%r12
815 cmovzq %rbp,%r13
816 movq %r12,0(%rdi)
817 cmovzq %rcx,%r8
818 movq %r13,8(%rdi)
819 cmovzq %r10,%r9
820 movq %r8,16(%rdi)
821 movq %r9,24(%rdi)
822
823 .byte 0xf3,0xc3
824
825
826
827 .p2align 5
828 __ecp_nistz256_subq:
829 subq %r12,%rax
830 sbbq %r13,%rbp
831 movq %rax,%r12
832 sbbq %r8,%rcx
833 sbbq %r9,%r10
834 movq %rbp,%r13
835 sbbq %r11,%r11
836
837 addq $-1,%rax
838 movq %rcx,%r8
839 adcq %r14,%rbp
840 adcq $0,%rcx
841 movq %r10,%r9
842 adcq %r15,%r10
843 testq %r11,%r11
844
845 cmovnzq %rax,%r12
846 cmovnzq %rbp,%r13
847 cmovnzq %rcx,%r8
848 cmovnzq %r10,%r9
849
850 .byte 0xf3,0xc3
851
852
853
854 .p2align 5
855 __ecp_nistz256_mul_by_2q:
856 addq %r12,%r12
857 adcq %r13,%r13
858 movq %r12,%rax
859 adcq %r8,%r8
860 adcq %r9,%r9
861 movq %r13,%rbp
862 sbbq %r11,%r11
863
864 subq $-1,%r12
865 movq %r8,%rcx
866 sbbq %r14,%r13
867 sbbq $0,%r8
868 movq %r9,%r10
869 sbbq %r15,%r9
870 testq %r11,%r11
871
872 cmovzq %rax,%r12
873 cmovzq %rbp,%r13
874 movq %r12,0(%rdi)
875 cmovzq %rcx,%r8
876 movq %r13,8(%rdi)
877 cmovzq %r10,%r9
878 movq %r8,16(%rdi)
879 movq %r9,24(%rdi)
880
881 .byte 0xf3,0xc3
882
883 .globl _ecp_nistz256_point_double
884 .private_extern _ecp_nistz256_point_double
885
886 .p2align 5
887 _ecp_nistz256_point_double:
888 pushq %rbp
889 pushq %rbx
890 pushq %r12
891 pushq %r13
892 pushq %r14
893 pushq %r15
894 subq $160+8,%rsp
895
896 movdqu 0(%rsi),%xmm0
897 movq %rsi,%rbx
898 movdqu 16(%rsi),%xmm1
899 movq 32+0(%rsi),%r12
900 movq 32+8(%rsi),%r13
901 movq 32+16(%rsi),%r8
902 movq 32+24(%rsi),%r9
903 movq L$poly+8(%rip),%r14
904 movq L$poly+24(%rip),%r15
905 movdqa %xmm0,96(%rsp)
906 movdqa %xmm1,96+16(%rsp)
907 leaq 32(%rdi),%r10
908 leaq 64(%rdi),%r11
909 .byte 102,72,15,110,199
910 .byte 102,73,15,110,202
911 .byte 102,73,15,110,211
912
913 leaq 0(%rsp),%rdi
914 call __ecp_nistz256_mul_by_2q
915
916 movq 64+0(%rsi),%rax
917 movq 64+8(%rsi),%r14
918 movq 64+16(%rsi),%r15
919 movq 64+24(%rsi),%r8
920 leaq 64-0(%rsi),%rsi
921 leaq 64(%rsp),%rdi
922 call __ecp_nistz256_sqr_montq
923
924 movq 0+0(%rsp),%rax
925 movq 8+0(%rsp),%r14
926 leaq 0+0(%rsp),%rsi
927 movq 16+0(%rsp),%r15
928 movq 24+0(%rsp),%r8
929 leaq 0(%rsp),%rdi
930 call __ecp_nistz256_sqr_montq
931
932 movq 32(%rbx),%rax
933 movq 64+0(%rbx),%r9
934 movq 64+8(%rbx),%r10
935 movq 64+16(%rbx),%r11
936 movq 64+24(%rbx),%r12
937 leaq 64-0(%rbx),%rsi
938 leaq 32(%rbx),%rbx
939 .byte 102,72,15,126,215
940 call __ecp_nistz256_mul_montq
941 call __ecp_nistz256_mul_by_2q
942
943 movq 96+0(%rsp),%r12
944 movq 96+8(%rsp),%r13
945 leaq 64(%rsp),%rbx
946 movq 96+16(%rsp),%r8
947 movq 96+24(%rsp),%r9
948 leaq 32(%rsp),%rdi
949 call __ecp_nistz256_add_toq
950
951 movq 96+0(%rsp),%r12
952 movq 96+8(%rsp),%r13
953 leaq 64(%rsp),%rbx
954 movq 96+16(%rsp),%r8
955 movq 96+24(%rsp),%r9
956 leaq 64(%rsp),%rdi
957 call __ecp_nistz256_sub_fromq
958
959 movq 0+0(%rsp),%rax
960 movq 8+0(%rsp),%r14
961 leaq 0+0(%rsp),%rsi
962 movq 16+0(%rsp),%r15
963 movq 24+0(%rsp),%r8
964 .byte 102,72,15,126,207
965 call __ecp_nistz256_sqr_montq
966 xorq %r9,%r9
967 movq %r12,%rax
968 addq $-1,%r12
969 movq %r13,%r10
970 adcq %rsi,%r13
971 movq %r14,%rcx
972 adcq $0,%r14
973 movq %r15,%r8
974 adcq %rbp,%r15
975 adcq $0,%r9
976 xorq %rsi,%rsi
977 testq $1,%rax
978
979 cmovzq %rax,%r12
980 cmovzq %r10,%r13
981 cmovzq %rcx,%r14
982 cmovzq %r8,%r15
983 cmovzq %rsi,%r9
984
985 movq %r13,%rax
986 shrq $1,%r12
987 shlq $63,%rax
988 movq %r14,%r10
989 shrq $1,%r13
990 orq %rax,%r12
991 shlq $63,%r10
992 movq %r15,%rcx
993 shrq $1,%r14
994 orq %r10,%r13
995 shlq $63,%rcx
996 movq %r12,0(%rdi)
997 shrq $1,%r15
998 movq %r13,8(%rdi)
999 shlq $63,%r9
1000 orq %rcx,%r14
1001 orq %r9,%r15
1002 movq %r14,16(%rdi)
1003 movq %r15,24(%rdi)
1004 movq 64(%rsp),%rax
1005 leaq 64(%rsp),%rbx
1006 movq 0+32(%rsp),%r9
1007 movq 8+32(%rsp),%r10
1008 leaq 0+32(%rsp),%rsi
1009 movq 16+32(%rsp),%r11
1010 movq 24+32(%rsp),%r12
1011 leaq 32(%rsp),%rdi
1012 call __ecp_nistz256_mul_montq
1013
1014 leaq 128(%rsp),%rdi
1015 call __ecp_nistz256_mul_by_2q
1016
1017 leaq 32(%rsp),%rbx
1018 leaq 32(%rsp),%rdi
1019 call __ecp_nistz256_add_toq
1020
1021 movq 96(%rsp),%rax
1022 leaq 96(%rsp),%rbx
1023 movq 0+0(%rsp),%r9
1024 movq 8+0(%rsp),%r10
1025 leaq 0+0(%rsp),%rsi
1026 movq 16+0(%rsp),%r11
1027 movq 24+0(%rsp),%r12
1028 leaq 0(%rsp),%rdi
1029 call __ecp_nistz256_mul_montq
1030
1031 leaq 128(%rsp),%rdi
1032 call __ecp_nistz256_mul_by_2q
1033
1034 movq 0+32(%rsp),%rax
1035 movq 8+32(%rsp),%r14
1036 leaq 0+32(%rsp),%rsi
1037 movq 16+32(%rsp),%r15
1038 movq 24+32(%rsp),%r8
1039 .byte 102,72,15,126,199
1040 call __ecp_nistz256_sqr_montq
1041
1042 leaq 128(%rsp),%rbx
1043 movq %r14,%r8
1044 movq %r15,%r9
1045 movq %rsi,%r14
1046 movq %rbp,%r15
1047 call __ecp_nistz256_sub_fromq
1048
1049 movq 0+0(%rsp),%rax
1050 movq 0+8(%rsp),%rbp
1051 movq 0+16(%rsp),%rcx
1052 movq 0+24(%rsp),%r10
1053 leaq 0(%rsp),%rdi
1054 call __ecp_nistz256_subq
1055
1056 movq 32(%rsp),%rax
1057 leaq 32(%rsp),%rbx
1058 movq %r12,%r14
1059 xorl %ecx,%ecx
1060 movq %r12,0+0(%rsp)
1061 movq %r13,%r10
1062 movq %r13,0+8(%rsp)
1063 cmovzq %r8,%r11
1064 movq %r8,0+16(%rsp)
1065 leaq 0-0(%rsp),%rsi
1066 cmovzq %r9,%r12
1067 movq %r9,0+24(%rsp)
1068 movq %r14,%r9
1069 leaq 0(%rsp),%rdi
1070 call __ecp_nistz256_mul_montq
1071
1072 .byte 102,72,15,126,203
1073 .byte 102,72,15,126,207
1074 call __ecp_nistz256_sub_fromq
1075
1076 addq $160+8,%rsp
1077 popq %r15
1078 popq %r14
1079 popq %r13
1080 popq %r12
1081 popq %rbx
1082 popq %rbp
1083 .byte 0xf3,0xc3
1084
1085 .globl _ecp_nistz256_point_add
1086 .private_extern _ecp_nistz256_point_add
1087
1088 .p2align 5
1089 _ecp_nistz256_point_add:
1090 pushq %rbp
1091 pushq %rbx
1092 pushq %r12
1093 pushq %r13
1094 pushq %r14
1095 pushq %r15
1096 subq $576+8,%rsp
1097
1098 movdqu 0(%rsi),%xmm0
1099 movdqu 16(%rsi),%xmm1
1100 movdqu 32(%rsi),%xmm2
1101 movdqu 48(%rsi),%xmm3
1102 movdqu 64(%rsi),%xmm4
1103 movdqu 80(%rsi),%xmm5
1104 movq %rsi,%rbx
1105 movq %rdx,%rsi
1106 movdqa %xmm0,384(%rsp)
1107 movdqa %xmm1,384+16(%rsp)
1108 por %xmm0,%xmm1
1109 movdqa %xmm2,416(%rsp)
1110 movdqa %xmm3,416+16(%rsp)
1111 por %xmm2,%xmm3
1112 movdqa %xmm4,448(%rsp)
1113 movdqa %xmm5,448+16(%rsp)
1114 por %xmm1,%xmm3
1115
1116 movdqu 0(%rsi),%xmm0
1117 pshufd $177,%xmm3,%xmm5
1118 movdqu 16(%rsi),%xmm1
1119 movdqu 32(%rsi),%xmm2
1120 por %xmm3,%xmm5
1121 movdqu 48(%rsi),%xmm3
1122 movq 64+0(%rsi),%rax
1123 movq 64+8(%rsi),%r14
1124 movq 64+16(%rsi),%r15
1125 movq 64+24(%rsi),%r8
1126 movdqa %xmm0,480(%rsp)
1127 pshufd $30,%xmm5,%xmm4
1128 movdqa %xmm1,480+16(%rsp)
1129 por %xmm0,%xmm1
1130 .byte 102,72,15,110,199
1131 movdqa %xmm2,512(%rsp)
1132 movdqa %xmm3,512+16(%rsp)
1133 por %xmm2,%xmm3
1134 por %xmm4,%xmm5
1135 pxor %xmm4,%xmm4
1136 por %xmm1,%xmm3
1137
1138 leaq 64-0(%rsi),%rsi
1139 movq %rax,544+0(%rsp)
1140 movq %r14,544+8(%rsp)
1141 movq %r15,544+16(%rsp)
1142 movq %r8,544+24(%rsp)
1143 leaq 96(%rsp),%rdi
1144 call __ecp_nistz256_sqr_montq
1145
1146 pcmpeqd %xmm4,%xmm5
1147 pshufd $177,%xmm3,%xmm4
1148 por %xmm3,%xmm4
1149 pshufd $0,%xmm5,%xmm5
1150 pshufd $30,%xmm4,%xmm3
1151 por %xmm3,%xmm4
1152 pxor %xmm3,%xmm3
1153 pcmpeqd %xmm3,%xmm4
1154 pshufd $0,%xmm4,%xmm4
1155 movq 64+0(%rbx),%rax
1156 movq 64+8(%rbx),%r14
1157 movq 64+16(%rbx),%r15
1158 movq 64+24(%rbx),%r8
1159
1160 leaq 64-0(%rbx),%rsi
1161 leaq 32(%rsp),%rdi
1162 call __ecp_nistz256_sqr_montq
1163
1164 movq 544(%rsp),%rax
1165 leaq 544(%rsp),%rbx
1166 movq 0+96(%rsp),%r9
1167 movq 8+96(%rsp),%r10
1168 leaq 0+96(%rsp),%rsi
1169 movq 16+96(%rsp),%r11
1170 movq 24+96(%rsp),%r12
1171 leaq 224(%rsp),%rdi
1172 call __ecp_nistz256_mul_montq
1173
1174 movq 448(%rsp),%rax
1175 leaq 448(%rsp),%rbx
1176 movq 0+32(%rsp),%r9
1177 movq 8+32(%rsp),%r10
1178 leaq 0+32(%rsp),%rsi
1179 movq 16+32(%rsp),%r11
1180 movq 24+32(%rsp),%r12
1181 leaq 256(%rsp),%rdi
1182 call __ecp_nistz256_mul_montq
1183
1184 movq 416(%rsp),%rax
1185 leaq 416(%rsp),%rbx
1186 movq 0+224(%rsp),%r9
1187 movq 8+224(%rsp),%r10
1188 leaq 0+224(%rsp),%rsi
1189 movq 16+224(%rsp),%r11
1190 movq 24+224(%rsp),%r12
1191 leaq 224(%rsp),%rdi
1192 call __ecp_nistz256_mul_montq
1193
1194 movq 512(%rsp),%rax
1195 leaq 512(%rsp),%rbx
1196 movq 0+256(%rsp),%r9
1197 movq 8+256(%rsp),%r10
1198 leaq 0+256(%rsp),%rsi
1199 movq 16+256(%rsp),%r11
1200 movq 24+256(%rsp),%r12
1201 leaq 256(%rsp),%rdi
1202 call __ecp_nistz256_mul_montq
1203
1204 leaq 224(%rsp),%rbx
1205 leaq 64(%rsp),%rdi
1206 call __ecp_nistz256_sub_fromq
1207
1208 orq %r13,%r12
1209 movdqa %xmm4,%xmm2
1210 orq %r8,%r12
1211 orq %r9,%r12
1212 por %xmm5,%xmm2
1213 .byte 102,73,15,110,220
1214
1215 movq 384(%rsp),%rax
1216 leaq 384(%rsp),%rbx
1217 movq 0+96(%rsp),%r9
1218 movq 8+96(%rsp),%r10
1219 leaq 0+96(%rsp),%rsi
1220 movq 16+96(%rsp),%r11
1221 movq 24+96(%rsp),%r12
1222 leaq 160(%rsp),%rdi
1223 call __ecp_nistz256_mul_montq
1224
1225 movq 480(%rsp),%rax
1226 leaq 480(%rsp),%rbx
1227 movq 0+32(%rsp),%r9
1228 movq 8+32(%rsp),%r10
1229 leaq 0+32(%rsp),%rsi
1230 movq 16+32(%rsp),%r11
1231 movq 24+32(%rsp),%r12
1232 leaq 192(%rsp),%rdi
1233 call __ecp_nistz256_mul_montq
1234
1235 leaq 160(%rsp),%rbx
1236 leaq 0(%rsp),%rdi
1237 call __ecp_nistz256_sub_fromq
1238
1239 orq %r13,%r12
1240 orq %r8,%r12
1241 orq %r9,%r12
1242
1243 .byte 0x3e
1244 jnz L$add_proceedq
1245 .byte 102,73,15,126,208
1246 .byte 102,73,15,126,217
1247 testq %r8,%r8
1248 jnz L$add_proceedq
1249 testq %r9,%r9
1250 jz L$add_proceedq
1251
1252 .byte 102,72,15,126,199
1253 pxor %xmm0,%xmm0
1254 movdqu %xmm0,0(%rdi)
1255 movdqu %xmm0,16(%rdi)
1256 movdqu %xmm0,32(%rdi)
1257 movdqu %xmm0,48(%rdi)
1258 movdqu %xmm0,64(%rdi)
1259 movdqu %xmm0,80(%rdi)
1260 jmp L$add_doneq
1261
1262 .p2align 5
1263 L$add_proceedq:
1264 movq 0+64(%rsp),%rax
1265 movq 8+64(%rsp),%r14
1266 leaq 0+64(%rsp),%rsi
1267 movq 16+64(%rsp),%r15
1268 movq 24+64(%rsp),%r8
1269 leaq 96(%rsp),%rdi
1270 call __ecp_nistz256_sqr_montq
1271
1272 movq 448(%rsp),%rax
1273 leaq 448(%rsp),%rbx
1274 movq 0+0(%rsp),%r9
1275 movq 8+0(%rsp),%r10
1276 leaq 0+0(%rsp),%rsi
1277 movq 16+0(%rsp),%r11
1278 movq 24+0(%rsp),%r12
1279 leaq 352(%rsp),%rdi
1280 call __ecp_nistz256_mul_montq
1281
1282 movq 0+0(%rsp),%rax
1283 movq 8+0(%rsp),%r14
1284 leaq 0+0(%rsp),%rsi
1285 movq 16+0(%rsp),%r15
1286 movq 24+0(%rsp),%r8
1287 leaq 32(%rsp),%rdi
1288 call __ecp_nistz256_sqr_montq
1289
1290 movq 544(%rsp),%rax
1291 leaq 544(%rsp),%rbx
1292 movq 0+352(%rsp),%r9
1293 movq 8+352(%rsp),%r10
1294 leaq 0+352(%rsp),%rsi
1295 movq 16+352(%rsp),%r11
1296 movq 24+352(%rsp),%r12
1297 leaq 352(%rsp),%rdi
1298 call __ecp_nistz256_mul_montq
1299
1300 movq 0(%rsp),%rax
1301 leaq 0(%rsp),%rbx
1302 movq 0+32(%rsp),%r9
1303 movq 8+32(%rsp),%r10
1304 leaq 0+32(%rsp),%rsi
1305 movq 16+32(%rsp),%r11
1306 movq 24+32(%rsp),%r12
1307 leaq 128(%rsp),%rdi
1308 call __ecp_nistz256_mul_montq
1309
1310 movq 160(%rsp),%rax
1311 leaq 160(%rsp),%rbx
1312 movq 0+32(%rsp),%r9
1313 movq 8+32(%rsp),%r10
1314 leaq 0+32(%rsp),%rsi
1315 movq 16+32(%rsp),%r11
1316 movq 24+32(%rsp),%r12
1317 leaq 192(%rsp),%rdi
1318 call __ecp_nistz256_mul_montq
1319
1320
1321
1322
1323 addq %r12,%r12
1324 leaq 96(%rsp),%rsi
1325 adcq %r13,%r13
1326 movq %r12,%rax
1327 adcq %r8,%r8
1328 adcq %r9,%r9
1329 movq %r13,%rbp
1330 sbbq %r11,%r11
1331
1332 subq $-1,%r12
1333 movq %r8,%rcx
1334 sbbq %r14,%r13
1335 sbbq $0,%r8
1336 movq %r9,%r10
1337 sbbq %r15,%r9
1338 testq %r11,%r11
1339
1340 cmovzq %rax,%r12
1341 movq 0(%rsi),%rax
1342 cmovzq %rbp,%r13
1343 movq 8(%rsi),%rbp
1344 cmovzq %rcx,%r8
1345 movq 16(%rsi),%rcx
1346 cmovzq %r10,%r9
1347 movq 24(%rsi),%r10
1348
1349 call __ecp_nistz256_subq
1350
1351 leaq 128(%rsp),%rbx
1352 leaq 288(%rsp),%rdi
1353 call __ecp_nistz256_sub_fromq
1354
1355 movq 192+0(%rsp),%rax
1356 movq 192+8(%rsp),%rbp
1357 movq 192+16(%rsp),%rcx
1358 movq 192+24(%rsp),%r10
1359 leaq 320(%rsp),%rdi
1360
1361 call __ecp_nistz256_subq
1362
1363 movq %r12,0(%rdi)
1364 movq %r13,8(%rdi)
1365 movq %r8,16(%rdi)
1366 movq %r9,24(%rdi)
1367 movq 128(%rsp),%rax
1368 leaq 128(%rsp),%rbx
1369 movq 0+224(%rsp),%r9
1370 movq 8+224(%rsp),%r10
1371 leaq 0+224(%rsp),%rsi
1372 movq 16+224(%rsp),%r11
1373 movq 24+224(%rsp),%r12
1374 leaq 256(%rsp),%rdi
1375 call __ecp_nistz256_mul_montq
1376
1377 movq 320(%rsp),%rax
1378 leaq 320(%rsp),%rbx
1379 movq 0+64(%rsp),%r9
1380 movq 8+64(%rsp),%r10
1381 leaq 0+64(%rsp),%rsi
1382 movq 16+64(%rsp),%r11
1383 movq 24+64(%rsp),%r12
1384 leaq 320(%rsp),%rdi
1385 call __ecp_nistz256_mul_montq
1386
1387 leaq 256(%rsp),%rbx
1388 leaq 320(%rsp),%rdi
1389 call __ecp_nistz256_sub_fromq
1390
1391 .byte 102,72,15,126,199
1392
1393 movdqa %xmm5,%xmm0
1394 movdqa %xmm5,%xmm1
1395 pandn 352(%rsp),%xmm0
1396 movdqa %xmm5,%xmm2
1397 pandn 352+16(%rsp),%xmm1
1398 movdqa %xmm5,%xmm3
1399 pand 544(%rsp),%xmm2
1400 pand 544+16(%rsp),%xmm3
1401 por %xmm0,%xmm2
1402 por %xmm1,%xmm3
1403
1404 movdqa %xmm4,%xmm0
1405 movdqa %xmm4,%xmm1
1406 pandn %xmm2,%xmm0
1407 movdqa %xmm4,%xmm2
1408 pandn %xmm3,%xmm1
1409 movdqa %xmm4,%xmm3
1410 pand 448(%rsp),%xmm2
1411 pand 448+16(%rsp),%xmm3
1412 por %xmm0,%xmm2
1413 por %xmm1,%xmm3
1414 movdqu %xmm2,64(%rdi)
1415 movdqu %xmm3,80(%rdi)
1416
1417 movdqa %xmm5,%xmm0
1418 movdqa %xmm5,%xmm1
1419 pandn 288(%rsp),%xmm0
1420 movdqa %xmm5,%xmm2
1421 pandn 288+16(%rsp),%xmm1
1422 movdqa %xmm5,%xmm3
1423 pand 480(%rsp),%xmm2
1424 pand 480+16(%rsp),%xmm3
1425 por %xmm0,%xmm2
1426 por %xmm1,%xmm3
1427
1428 movdqa %xmm4,%xmm0
1429 movdqa %xmm4,%xmm1
1430 pandn %xmm2,%xmm0
1431 movdqa %xmm4,%xmm2
1432 pandn %xmm3,%xmm1
1433 movdqa %xmm4,%xmm3
1434 pand 384(%rsp),%xmm2
1435 pand 384+16(%rsp),%xmm3
1436 por %xmm0,%xmm2
1437 por %xmm1,%xmm3
1438 movdqu %xmm2,0(%rdi)
1439 movdqu %xmm3,16(%rdi)
1440
1441 movdqa %xmm5,%xmm0
1442 movdqa %xmm5,%xmm1
1443 pandn 320(%rsp),%xmm0
1444 movdqa %xmm5,%xmm2
1445 pandn 320+16(%rsp),%xmm1
1446 movdqa %xmm5,%xmm3
1447 pand 512(%rsp),%xmm2
1448 pand 512+16(%rsp),%xmm3
1449 por %xmm0,%xmm2
1450 por %xmm1,%xmm3
1451
1452 movdqa %xmm4,%xmm0
1453 movdqa %xmm4,%xmm1
1454 pandn %xmm2,%xmm0
1455 movdqa %xmm4,%xmm2
1456 pandn %xmm3,%xmm1
1457 movdqa %xmm4,%xmm3
1458 pand 416(%rsp),%xmm2
1459 pand 416+16(%rsp),%xmm3
1460 por %xmm0,%xmm2
1461 por %xmm1,%xmm3
1462 movdqu %xmm2,32(%rdi)
1463 movdqu %xmm3,48(%rdi)
1464
1465 L$add_doneq:
1466 addq $576+8,%rsp
1467 popq %r15
1468 popq %r14
1469 popq %r13
1470 popq %r12
1471 popq %rbx
1472 popq %rbp
1473 .byte 0xf3,0xc3
1474
1475 .globl _ecp_nistz256_point_add_affine
1476 .private_extern _ecp_nistz256_point_add_affine
1477
1478 .p2align 5
1479 _ecp_nistz256_point_add_affine:
1480 pushq %rbp
1481 pushq %rbx
1482 pushq %r12
1483 pushq %r13
1484 pushq %r14
1485 pushq %r15
1486 subq $480+8,%rsp
1487
1488 movdqu 0(%rsi),%xmm0
1489 movq %rdx,%rbx
1490 movdqu 16(%rsi),%xmm1
1491 movdqu 32(%rsi),%xmm2
1492 movdqu 48(%rsi),%xmm3
1493 movdqu 64(%rsi),%xmm4
1494 movdqu 80(%rsi),%xmm5
1495 movq 64+0(%rsi),%rax
1496 movq 64+8(%rsi),%r14
1497 movq 64+16(%rsi),%r15
1498 movq 64+24(%rsi),%r8
1499 movdqa %xmm0,320(%rsp)
1500 movdqa %xmm1,320+16(%rsp)
1501 por %xmm0,%xmm1
1502 movdqa %xmm2,352(%rsp)
1503 movdqa %xmm3,352+16(%rsp)
1504 por %xmm2,%xmm3
1505 movdqa %xmm4,384(%rsp)
1506 movdqa %xmm5,384+16(%rsp)
1507 por %xmm1,%xmm3
1508
1509 movdqu 0(%rbx),%xmm0
1510 pshufd $177,%xmm3,%xmm5
1511 movdqu 16(%rbx),%xmm1
1512 movdqu 32(%rbx),%xmm2
1513 por %xmm3,%xmm5
1514 movdqu 48(%rbx),%xmm3
1515 movdqa %xmm0,416(%rsp)
1516 pshufd $30,%xmm5,%xmm4
1517 movdqa %xmm1,416+16(%rsp)
1518 por %xmm0,%xmm1
1519 .byte 102,72,15,110,199
1520 movdqa %xmm2,448(%rsp)
1521 movdqa %xmm3,448+16(%rsp)
1522 por %xmm2,%xmm3
1523 por %xmm4,%xmm5
1524 pxor %xmm4,%xmm4
1525 por %xmm1,%xmm3
1526
1527 leaq 64-0(%rsi),%rsi
1528 leaq 32(%rsp),%rdi
1529 call __ecp_nistz256_sqr_montq
1530
1531 pcmpeqd %xmm4,%xmm5
1532 pshufd $177,%xmm3,%xmm4
1533 movq 0(%rbx),%rax
1534
1535 movq %r12,%r9
1536 por %xmm3,%xmm4
1537 pshufd $0,%xmm5,%xmm5
1538 pshufd $30,%xmm4,%xmm3
1539 movq %r13,%r10
1540 por %xmm3,%xmm4
1541 pxor %xmm3,%xmm3
1542 movq %r14,%r11
1543 pcmpeqd %xmm3,%xmm4
1544 pshufd $0,%xmm4,%xmm4
1545
1546 leaq 32-0(%rsp),%rsi
1547 movq %r15,%r12
1548 leaq 0(%rsp),%rdi
1549 call __ecp_nistz256_mul_montq
1550
1551 leaq 320(%rsp),%rbx
1552 leaq 64(%rsp),%rdi
1553 call __ecp_nistz256_sub_fromq
1554
1555 movq 384(%rsp),%rax
1556 leaq 384(%rsp),%rbx
1557 movq 0+32(%rsp),%r9
1558 movq 8+32(%rsp),%r10
1559 leaq 0+32(%rsp),%rsi
1560 movq 16+32(%rsp),%r11
1561 movq 24+32(%rsp),%r12
1562 leaq 32(%rsp),%rdi
1563 call __ecp_nistz256_mul_montq
1564
1565 movq 384(%rsp),%rax
1566 leaq 384(%rsp),%rbx
1567 movq 0+64(%rsp),%r9
1568 movq 8+64(%rsp),%r10
1569 leaq 0+64(%rsp),%rsi
1570 movq 16+64(%rsp),%r11
1571 movq 24+64(%rsp),%r12
1572 leaq 288(%rsp),%rdi
1573 call __ecp_nistz256_mul_montq
1574
1575 movq 448(%rsp),%rax
1576 leaq 448(%rsp),%rbx
1577 movq 0+32(%rsp),%r9
1578 movq 8+32(%rsp),%r10
1579 leaq 0+32(%rsp),%rsi
1580 movq 16+32(%rsp),%r11
1581 movq 24+32(%rsp),%r12
1582 leaq 32(%rsp),%rdi
1583 call __ecp_nistz256_mul_montq
1584
1585 leaq 352(%rsp),%rbx
1586 leaq 96(%rsp),%rdi
1587 call __ecp_nistz256_sub_fromq
1588
1589 movq 0+64(%rsp),%rax
1590 movq 8+64(%rsp),%r14
1591 leaq 0+64(%rsp),%rsi
1592 movq 16+64(%rsp),%r15
1593 movq 24+64(%rsp),%r8
1594 leaq 128(%rsp),%rdi
1595 call __ecp_nistz256_sqr_montq
1596
1597 movq 0+96(%rsp),%rax
1598 movq 8+96(%rsp),%r14
1599 leaq 0+96(%rsp),%rsi
1600 movq 16+96(%rsp),%r15
1601 movq 24+96(%rsp),%r8
1602 leaq 192(%rsp),%rdi
1603 call __ecp_nistz256_sqr_montq
1604
1605 movq 128(%rsp),%rax
1606 leaq 128(%rsp),%rbx
1607 movq 0+64(%rsp),%r9
1608 movq 8+64(%rsp),%r10
1609 leaq 0+64(%rsp),%rsi
1610 movq 16+64(%rsp),%r11
1611 movq 24+64(%rsp),%r12
1612 leaq 160(%rsp),%rdi
1613 call __ecp_nistz256_mul_montq
1614
1615 movq 320(%rsp),%rax
1616 leaq 320(%rsp),%rbx
1617 movq 0+128(%rsp),%r9
1618 movq 8+128(%rsp),%r10
1619 leaq 0+128(%rsp),%rsi
1620 movq 16+128(%rsp),%r11
1621 movq 24+128(%rsp),%r12
1622 leaq 0(%rsp),%rdi
1623 call __ecp_nistz256_mul_montq
1624
1625
1626
1627
1628 addq %r12,%r12
1629 leaq 192(%rsp),%rsi
1630 adcq %r13,%r13
1631 movq %r12,%rax
1632 adcq %r8,%r8
1633 adcq %r9,%r9
1634 movq %r13,%rbp
1635 sbbq %r11,%r11
1636
1637 subq $-1,%r12
1638 movq %r8,%rcx
1639 sbbq %r14,%r13
1640 sbbq $0,%r8
1641 movq %r9,%r10
1642 sbbq %r15,%r9
1643 testq %r11,%r11
1644
1645 cmovzq %rax,%r12
1646 movq 0(%rsi),%rax
1647 cmovzq %rbp,%r13
1648 movq 8(%rsi),%rbp
1649 cmovzq %rcx,%r8
1650 movq 16(%rsi),%rcx
1651 cmovzq %r10,%r9
1652 movq 24(%rsi),%r10
1653
1654 call __ecp_nistz256_subq
1655
1656 leaq 160(%rsp),%rbx
1657 leaq 224(%rsp),%rdi
1658 call __ecp_nistz256_sub_fromq
1659
1660 movq 0+0(%rsp),%rax
1661 movq 0+8(%rsp),%rbp
1662 movq 0+16(%rsp),%rcx
1663 movq 0+24(%rsp),%r10
1664 leaq 64(%rsp),%rdi
1665
1666 call __ecp_nistz256_subq
1667
1668 movq %r12,0(%rdi)
1669 movq %r13,8(%rdi)
1670 movq %r8,16(%rdi)
1671 movq %r9,24(%rdi)
1672 movq 352(%rsp),%rax
1673 leaq 352(%rsp),%rbx
1674 movq 0+160(%rsp),%r9
1675 movq 8+160(%rsp),%r10
1676 leaq 0+160(%rsp),%rsi
1677 movq 16+160(%rsp),%r11
1678 movq 24+160(%rsp),%r12
1679 leaq 32(%rsp),%rdi
1680 call __ecp_nistz256_mul_montq
1681
1682 movq 96(%rsp),%rax
1683 leaq 96(%rsp),%rbx
1684 movq 0+64(%rsp),%r9
1685 movq 8+64(%rsp),%r10
1686 leaq 0+64(%rsp),%rsi
1687 movq 16+64(%rsp),%r11
1688 movq 24+64(%rsp),%r12
1689 leaq 64(%rsp),%rdi
1690 call __ecp_nistz256_mul_montq
1691
1692 leaq 32(%rsp),%rbx
1693 leaq 256(%rsp),%rdi
1694 call __ecp_nistz256_sub_fromq
1695
1696 .byte 102,72,15,126,199
1697
1698 movdqa %xmm5,%xmm0
1699 movdqa %xmm5,%xmm1
1700 pandn 288(%rsp),%xmm0
1701 movdqa %xmm5,%xmm2
1702 pandn 288+16(%rsp),%xmm1
1703 movdqa %xmm5,%xmm3
1704 pand L$ONE_mont(%rip),%xmm2
1705 pand L$ONE_mont+16(%rip),%xmm3
1706 por %xmm0,%xmm2
1707 por %xmm1,%xmm3
1708
1709 movdqa %xmm4,%xmm0
1710 movdqa %xmm4,%xmm1
1711 pandn %xmm2,%xmm0
1712 movdqa %xmm4,%xmm2
1713 pandn %xmm3,%xmm1
1714 movdqa %xmm4,%xmm3
1715 pand 384(%rsp),%xmm2
1716 pand 384+16(%rsp),%xmm3
1717 por %xmm0,%xmm2
1718 por %xmm1,%xmm3
1719 movdqu %xmm2,64(%rdi)
1720 movdqu %xmm3,80(%rdi)
1721
1722 movdqa %xmm5,%xmm0
1723 movdqa %xmm5,%xmm1
1724 pandn 224(%rsp),%xmm0
1725 movdqa %xmm5,%xmm2
1726 pandn 224+16(%rsp),%xmm1
1727 movdqa %xmm5,%xmm3
1728 pand 416(%rsp),%xmm2
1729 pand 416+16(%rsp),%xmm3
1730 por %xmm0,%xmm2
1731 por %xmm1,%xmm3
1732
1733 movdqa %xmm4,%xmm0
1734 movdqa %xmm4,%xmm1
1735 pandn %xmm2,%xmm0
1736 movdqa %xmm4,%xmm2
1737 pandn %xmm3,%xmm1
1738 movdqa %xmm4,%xmm3
1739 pand 320(%rsp),%xmm2
1740 pand 320+16(%rsp),%xmm3
1741 por %xmm0,%xmm2
1742 por %xmm1,%xmm3
1743 movdqu %xmm2,0(%rdi)
1744 movdqu %xmm3,16(%rdi)
1745
1746 movdqa %xmm5,%xmm0
1747 movdqa %xmm5,%xmm1
1748 pandn 256(%rsp),%xmm0
1749 movdqa %xmm5,%xmm2
1750 pandn 256+16(%rsp),%xmm1
1751 movdqa %xmm5,%xmm3
1752 pand 448(%rsp),%xmm2
1753 pand 448+16(%rsp),%xmm3
1754 por %xmm0,%xmm2
1755 por %xmm1,%xmm3
1756
1757 movdqa %xmm4,%xmm0
1758 movdqa %xmm4,%xmm1
1759 pandn %xmm2,%xmm0
1760 movdqa %xmm4,%xmm2
1761 pandn %xmm3,%xmm1
1762 movdqa %xmm4,%xmm3
1763 pand 352(%rsp),%xmm2
1764 pand 352+16(%rsp),%xmm3
1765 por %xmm0,%xmm2
1766 por %xmm1,%xmm3
1767 movdqu %xmm2,32(%rdi)
1768 movdqu %xmm3,48(%rdi)
1769
1770 addq $480+8,%rsp
1771 popq %r15
1772 popq %r14
1773 popq %r13
1774 popq %r12
1775 popq %rbx
1776 popq %rbp
1777 .byte 0xf3,0xc3
1778
1779 #endif
OLDNEW
« no previous file with comments | « third_party/boringssl/mac-x86_64/crypto/cpu-x86_64-asm.S ('k') | third_party/boringssl/mac-x86_64/crypto/rc4/rc4-md5-x86_64.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698