Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(75)

Side by Side Diff: third_party/boringssl/mac-x86_64/crypto/ec/p256-x86_64-asm.S

Issue 2354623003: Pull boringssl generated source from boringssl_gen (Closed)
Patch Set: . Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #if defined(__x86_64__)
2 .text
3
4
5
6 .p2align 6
7 L$poly:
8 .quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00 000001
9
10 L$One:
11 .long 1,1,1,1,1,1,1,1
12 L$Two:
13 .long 2,2,2,2,2,2,2,2
14 L$Three:
15 .long 3,3,3,3,3,3,3,3
16 L$ONE_mont:
17 .quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000ff fffffe
18
19
20 .p2align 6
21 ecp_nistz256_mul_by_2:
22 pushq %r12
23 pushq %r13
24
25 movq 0(%rsi),%r8
26 movq 8(%rsi),%r9
27 addq %r8,%r8
28 movq 16(%rsi),%r10
29 adcq %r9,%r9
30 movq 24(%rsi),%r11
31 leaq L$poly(%rip),%rsi
32 movq %r8,%rax
33 adcq %r10,%r10
34 adcq %r11,%r11
35 movq %r9,%rdx
36 sbbq %r13,%r13
37
38 subq 0(%rsi),%r8
39 movq %r10,%rcx
40 sbbq 8(%rsi),%r9
41 sbbq 16(%rsi),%r10
42 movq %r11,%r12
43 sbbq 24(%rsi),%r11
44 testq %r13,%r13
45
46 cmovzq %rax,%r8
47 cmovzq %rdx,%r9
48 movq %r8,0(%rdi)
49 cmovzq %rcx,%r10
50 movq %r9,8(%rdi)
51 cmovzq %r12,%r11
52 movq %r10,16(%rdi)
53 movq %r11,24(%rdi)
54
55 popq %r13
56 popq %r12
57 .byte 0xf3,0xc3
58
59
60
61
62 .globl _ecp_nistz256_neg
63 .private_extern _ecp_nistz256_neg
64
65 .p2align 5
66 _ecp_nistz256_neg:
67 pushq %r12
68 pushq %r13
69
70 xorq %r8,%r8
71 xorq %r9,%r9
72 xorq %r10,%r10
73 xorq %r11,%r11
74 xorq %r13,%r13
75
76 subq 0(%rsi),%r8
77 sbbq 8(%rsi),%r9
78 sbbq 16(%rsi),%r10
79 movq %r8,%rax
80 sbbq 24(%rsi),%r11
81 leaq L$poly(%rip),%rsi
82 movq %r9,%rdx
83 sbbq $0,%r13
84
85 addq 0(%rsi),%r8
86 movq %r10,%rcx
87 adcq 8(%rsi),%r9
88 adcq 16(%rsi),%r10
89 movq %r11,%r12
90 adcq 24(%rsi),%r11
91 testq %r13,%r13
92
93 cmovzq %rax,%r8
94 cmovzq %rdx,%r9
95 movq %r8,0(%rdi)
96 cmovzq %rcx,%r10
97 movq %r9,8(%rdi)
98 cmovzq %r12,%r11
99 movq %r10,16(%rdi)
100 movq %r11,24(%rdi)
101
102 popq %r13
103 popq %r12
104 .byte 0xf3,0xc3
105
106
107
108
109
110
111
112 .globl _ecp_nistz256_mul_mont
113 .private_extern _ecp_nistz256_mul_mont
114
115 .p2align 5
116 _ecp_nistz256_mul_mont:
117 L$mul_mont:
118 pushq %rbp
119 pushq %rbx
120 pushq %r12
121 pushq %r13
122 pushq %r14
123 pushq %r15
124 movq %rdx,%rbx
125 movq 0(%rdx),%rax
126 movq 0(%rsi),%r9
127 movq 8(%rsi),%r10
128 movq 16(%rsi),%r11
129 movq 24(%rsi),%r12
130
131 call __ecp_nistz256_mul_montq
132 L$mul_mont_done:
133 popq %r15
134 popq %r14
135 popq %r13
136 popq %r12
137 popq %rbx
138 popq %rbp
139 .byte 0xf3,0xc3
140
141
142
143 .p2align 5
144 __ecp_nistz256_mul_montq:
145
146
147 movq %rax,%rbp
148 mulq %r9
149 movq L$poly+8(%rip),%r14
150 movq %rax,%r8
151 movq %rbp,%rax
152 movq %rdx,%r9
153
154 mulq %r10
155 movq L$poly+24(%rip),%r15
156 addq %rax,%r9
157 movq %rbp,%rax
158 adcq $0,%rdx
159 movq %rdx,%r10
160
161 mulq %r11
162 addq %rax,%r10
163 movq %rbp,%rax
164 adcq $0,%rdx
165 movq %rdx,%r11
166
167 mulq %r12
168 addq %rax,%r11
169 movq %r8,%rax
170 adcq $0,%rdx
171 xorq %r13,%r13
172 movq %rdx,%r12
173
174
175
176
177
178
179
180
181
182
183 movq %r8,%rbp
184 shlq $32,%r8
185 mulq %r15
186 shrq $32,%rbp
187 addq %r8,%r9
188 adcq %rbp,%r10
189 adcq %rax,%r11
190 movq 8(%rbx),%rax
191 adcq %rdx,%r12
192 adcq $0,%r13
193 xorq %r8,%r8
194
195
196
197 movq %rax,%rbp
198 mulq 0(%rsi)
199 addq %rax,%r9
200 movq %rbp,%rax
201 adcq $0,%rdx
202 movq %rdx,%rcx
203
204 mulq 8(%rsi)
205 addq %rcx,%r10
206 adcq $0,%rdx
207 addq %rax,%r10
208 movq %rbp,%rax
209 adcq $0,%rdx
210 movq %rdx,%rcx
211
212 mulq 16(%rsi)
213 addq %rcx,%r11
214 adcq $0,%rdx
215 addq %rax,%r11
216 movq %rbp,%rax
217 adcq $0,%rdx
218 movq %rdx,%rcx
219
220 mulq 24(%rsi)
221 addq %rcx,%r12
222 adcq $0,%rdx
223 addq %rax,%r12
224 movq %r9,%rax
225 adcq %rdx,%r13
226 adcq $0,%r8
227
228
229
230 movq %r9,%rbp
231 shlq $32,%r9
232 mulq %r15
233 shrq $32,%rbp
234 addq %r9,%r10
235 adcq %rbp,%r11
236 adcq %rax,%r12
237 movq 16(%rbx),%rax
238 adcq %rdx,%r13
239 adcq $0,%r8
240 xorq %r9,%r9
241
242
243
244 movq %rax,%rbp
245 mulq 0(%rsi)
246 addq %rax,%r10
247 movq %rbp,%rax
248 adcq $0,%rdx
249 movq %rdx,%rcx
250
251 mulq 8(%rsi)
252 addq %rcx,%r11
253 adcq $0,%rdx
254 addq %rax,%r11
255 movq %rbp,%rax
256 adcq $0,%rdx
257 movq %rdx,%rcx
258
259 mulq 16(%rsi)
260 addq %rcx,%r12
261 adcq $0,%rdx
262 addq %rax,%r12
263 movq %rbp,%rax
264 adcq $0,%rdx
265 movq %rdx,%rcx
266
267 mulq 24(%rsi)
268 addq %rcx,%r13
269 adcq $0,%rdx
270 addq %rax,%r13
271 movq %r10,%rax
272 adcq %rdx,%r8
273 adcq $0,%r9
274
275
276
277 movq %r10,%rbp
278 shlq $32,%r10
279 mulq %r15
280 shrq $32,%rbp
281 addq %r10,%r11
282 adcq %rbp,%r12
283 adcq %rax,%r13
284 movq 24(%rbx),%rax
285 adcq %rdx,%r8
286 adcq $0,%r9
287 xorq %r10,%r10
288
289
290
291 movq %rax,%rbp
292 mulq 0(%rsi)
293 addq %rax,%r11
294 movq %rbp,%rax
295 adcq $0,%rdx
296 movq %rdx,%rcx
297
298 mulq 8(%rsi)
299 addq %rcx,%r12
300 adcq $0,%rdx
301 addq %rax,%r12
302 movq %rbp,%rax
303 adcq $0,%rdx
304 movq %rdx,%rcx
305
306 mulq 16(%rsi)
307 addq %rcx,%r13
308 adcq $0,%rdx
309 addq %rax,%r13
310 movq %rbp,%rax
311 adcq $0,%rdx
312 movq %rdx,%rcx
313
314 mulq 24(%rsi)
315 addq %rcx,%r8
316 adcq $0,%rdx
317 addq %rax,%r8
318 movq %r11,%rax
319 adcq %rdx,%r9
320 adcq $0,%r10
321
322
323
324 movq %r11,%rbp
325 shlq $32,%r11
326 mulq %r15
327 shrq $32,%rbp
328 addq %r11,%r12
329 adcq %rbp,%r13
330 movq %r12,%rcx
331 adcq %rax,%r8
332 adcq %rdx,%r9
333 movq %r13,%rbp
334 adcq $0,%r10
335
336
337
338 subq $-1,%r12
339 movq %r8,%rbx
340 sbbq %r14,%r13
341 sbbq $0,%r8
342 movq %r9,%rdx
343 sbbq %r15,%r9
344 sbbq $0,%r10
345
346 cmovcq %rcx,%r12
347 cmovcq %rbp,%r13
348 movq %r12,0(%rdi)
349 cmovcq %rbx,%r8
350 movq %r13,8(%rdi)
351 cmovcq %rdx,%r9
352 movq %r8,16(%rdi)
353 movq %r9,24(%rdi)
354
355 .byte 0xf3,0xc3
356
357
358
359
360
361
362
363
364
365 .globl _ecp_nistz256_sqr_mont
366 .private_extern _ecp_nistz256_sqr_mont
367
368 .p2align 5
369 _ecp_nistz256_sqr_mont:
370 pushq %rbp
371 pushq %rbx
372 pushq %r12
373 pushq %r13
374 pushq %r14
375 pushq %r15
376 movq 0(%rsi),%rax
377 movq 8(%rsi),%r14
378 movq 16(%rsi),%r15
379 movq 24(%rsi),%r8
380
381 call __ecp_nistz256_sqr_montq
382 L$sqr_mont_done:
383 popq %r15
384 popq %r14
385 popq %r13
386 popq %r12
387 popq %rbx
388 popq %rbp
389 .byte 0xf3,0xc3
390
391
392
393 .p2align 5
394 __ecp_nistz256_sqr_montq:
395 movq %rax,%r13
396 mulq %r14
397 movq %rax,%r9
398 movq %r15,%rax
399 movq %rdx,%r10
400
401 mulq %r13
402 addq %rax,%r10
403 movq %r8,%rax
404 adcq $0,%rdx
405 movq %rdx,%r11
406
407 mulq %r13
408 addq %rax,%r11
409 movq %r15,%rax
410 adcq $0,%rdx
411 movq %rdx,%r12
412
413
414 mulq %r14
415 addq %rax,%r11
416 movq %r8,%rax
417 adcq $0,%rdx
418 movq %rdx,%rbp
419
420 mulq %r14
421 addq %rax,%r12
422 movq %r8,%rax
423 adcq $0,%rdx
424 addq %rbp,%r12
425 movq %rdx,%r13
426 adcq $0,%r13
427
428
429 mulq %r15
430 xorq %r15,%r15
431 addq %rax,%r13
432 movq 0(%rsi),%rax
433 movq %rdx,%r14
434 adcq $0,%r14
435
436 addq %r9,%r9
437 adcq %r10,%r10
438 adcq %r11,%r11
439 adcq %r12,%r12
440 adcq %r13,%r13
441 adcq %r14,%r14
442 adcq $0,%r15
443
444 mulq %rax
445 movq %rax,%r8
446 movq 8(%rsi),%rax
447 movq %rdx,%rcx
448
449 mulq %rax
450 addq %rcx,%r9
451 adcq %rax,%r10
452 movq 16(%rsi),%rax
453 adcq $0,%rdx
454 movq %rdx,%rcx
455
456 mulq %rax
457 addq %rcx,%r11
458 adcq %rax,%r12
459 movq 24(%rsi),%rax
460 adcq $0,%rdx
461 movq %rdx,%rcx
462
463 mulq %rax
464 addq %rcx,%r13
465 adcq %rax,%r14
466 movq %r8,%rax
467 adcq %rdx,%r15
468
469 movq L$poly+8(%rip),%rsi
470 movq L$poly+24(%rip),%rbp
471
472
473
474
475 movq %r8,%rcx
476 shlq $32,%r8
477 mulq %rbp
478 shrq $32,%rcx
479 addq %r8,%r9
480 adcq %rcx,%r10
481 adcq %rax,%r11
482 movq %r9,%rax
483 adcq $0,%rdx
484
485
486
487 movq %r9,%rcx
488 shlq $32,%r9
489 movq %rdx,%r8
490 mulq %rbp
491 shrq $32,%rcx
492 addq %r9,%r10
493 adcq %rcx,%r11
494 adcq %rax,%r8
495 movq %r10,%rax
496 adcq $0,%rdx
497
498
499
500 movq %r10,%rcx
501 shlq $32,%r10
502 movq %rdx,%r9
503 mulq %rbp
504 shrq $32,%rcx
505 addq %r10,%r11
506 adcq %rcx,%r8
507 adcq %rax,%r9
508 movq %r11,%rax
509 adcq $0,%rdx
510
511
512
513 movq %r11,%rcx
514 shlq $32,%r11
515 movq %rdx,%r10
516 mulq %rbp
517 shrq $32,%rcx
518 addq %r11,%r8
519 adcq %rcx,%r9
520 adcq %rax,%r10
521 adcq $0,%rdx
522 xorq %r11,%r11
523
524
525
526 addq %r8,%r12
527 adcq %r9,%r13
528 movq %r12,%r8
529 adcq %r10,%r14
530 adcq %rdx,%r15
531 movq %r13,%r9
532 adcq $0,%r11
533
534 subq $-1,%r12
535 movq %r14,%r10
536 sbbq %rsi,%r13
537 sbbq $0,%r14
538 movq %r15,%rcx
539 sbbq %rbp,%r15
540 sbbq $0,%r11
541
542 cmovcq %r8,%r12
543 cmovcq %r9,%r13
544 movq %r12,0(%rdi)
545 cmovcq %r10,%r14
546 movq %r13,8(%rdi)
547 cmovcq %rcx,%r15
548 movq %r14,16(%rdi)
549 movq %r15,24(%rdi)
550
551 .byte 0xf3,0xc3
552
553
554
555
556
557
558
559 .globl _ecp_nistz256_from_mont
560 .private_extern _ecp_nistz256_from_mont
561
562 .p2align 5
563 _ecp_nistz256_from_mont:
564 pushq %r12
565 pushq %r13
566
567 movq 0(%rsi),%rax
568 movq L$poly+24(%rip),%r13
569 movq 8(%rsi),%r9
570 movq 16(%rsi),%r10
571 movq 24(%rsi),%r11
572 movq %rax,%r8
573 movq L$poly+8(%rip),%r12
574
575
576
577 movq %rax,%rcx
578 shlq $32,%r8
579 mulq %r13
580 shrq $32,%rcx
581 addq %r8,%r9
582 adcq %rcx,%r10
583 adcq %rax,%r11
584 movq %r9,%rax
585 adcq $0,%rdx
586
587
588
589 movq %r9,%rcx
590 shlq $32,%r9
591 movq %rdx,%r8
592 mulq %r13
593 shrq $32,%rcx
594 addq %r9,%r10
595 adcq %rcx,%r11
596 adcq %rax,%r8
597 movq %r10,%rax
598 adcq $0,%rdx
599
600
601
602 movq %r10,%rcx
603 shlq $32,%r10
604 movq %rdx,%r9
605 mulq %r13
606 shrq $32,%rcx
607 addq %r10,%r11
608 adcq %rcx,%r8
609 adcq %rax,%r9
610 movq %r11,%rax
611 adcq $0,%rdx
612
613
614
615 movq %r11,%rcx
616 shlq $32,%r11
617 movq %rdx,%r10
618 mulq %r13
619 shrq $32,%rcx
620 addq %r11,%r8
621 adcq %rcx,%r9
622 movq %r8,%rcx
623 adcq %rax,%r10
624 movq %r9,%rsi
625 adcq $0,%rdx
626
627 subq $-1,%r8
628 movq %r10,%rax
629 sbbq %r12,%r9
630 sbbq $0,%r10
631 movq %rdx,%r11
632 sbbq %r13,%rdx
633 sbbq %r13,%r13
634
635 cmovnzq %rcx,%r8
636 cmovnzq %rsi,%r9
637 movq %r8,0(%rdi)
638 cmovnzq %rax,%r10
639 movq %r9,8(%rdi)
640 cmovzq %rdx,%r11
641 movq %r10,16(%rdi)
642 movq %r11,24(%rdi)
643
644 popq %r13
645 popq %r12
646 .byte 0xf3,0xc3
647
648
649
650 .globl _ecp_nistz256_select_w5
651 .private_extern _ecp_nistz256_select_w5
652
653 .p2align 5
654 _ecp_nistz256_select_w5:
655 movdqa L$One(%rip),%xmm0
656 movd %edx,%xmm1
657
658 pxor %xmm2,%xmm2
659 pxor %xmm3,%xmm3
660 pxor %xmm4,%xmm4
661 pxor %xmm5,%xmm5
662 pxor %xmm6,%xmm6
663 pxor %xmm7,%xmm7
664
665 movdqa %xmm0,%xmm8
666 pshufd $0,%xmm1,%xmm1
667
668 movq $16,%rax
669 L$select_loop_sse_w5:
670
671 movdqa %xmm8,%xmm15
672 paddd %xmm0,%xmm8
673 pcmpeqd %xmm1,%xmm15
674
675 movdqa 0(%rsi),%xmm9
676 movdqa 16(%rsi),%xmm10
677 movdqa 32(%rsi),%xmm11
678 movdqa 48(%rsi),%xmm12
679 movdqa 64(%rsi),%xmm13
680 movdqa 80(%rsi),%xmm14
681 leaq 96(%rsi),%rsi
682
683 pand %xmm15,%xmm9
684 pand %xmm15,%xmm10
685 por %xmm9,%xmm2
686 pand %xmm15,%xmm11
687 por %xmm10,%xmm3
688 pand %xmm15,%xmm12
689 por %xmm11,%xmm4
690 pand %xmm15,%xmm13
691 por %xmm12,%xmm5
692 pand %xmm15,%xmm14
693 por %xmm13,%xmm6
694 por %xmm14,%xmm7
695
696 decq %rax
697 jnz L$select_loop_sse_w5
698
699 movdqu %xmm2,0(%rdi)
700 movdqu %xmm3,16(%rdi)
701 movdqu %xmm4,32(%rdi)
702 movdqu %xmm5,48(%rdi)
703 movdqu %xmm6,64(%rdi)
704 movdqu %xmm7,80(%rdi)
705 .byte 0xf3,0xc3
706
707
708
709
710 .globl _ecp_nistz256_select_w7
711 .private_extern _ecp_nistz256_select_w7
712
713 .p2align 5
714 _ecp_nistz256_select_w7:
715 movdqa L$One(%rip),%xmm8
716 movd %edx,%xmm1
717
718 pxor %xmm2,%xmm2
719 pxor %xmm3,%xmm3
720 pxor %xmm4,%xmm4
721 pxor %xmm5,%xmm5
722
723 movdqa %xmm8,%xmm0
724 pshufd $0,%xmm1,%xmm1
725 movq $64,%rax
726
727 L$select_loop_sse_w7:
728 movdqa %xmm8,%xmm15
729 paddd %xmm0,%xmm8
730 movdqa 0(%rsi),%xmm9
731 movdqa 16(%rsi),%xmm10
732 pcmpeqd %xmm1,%xmm15
733 movdqa 32(%rsi),%xmm11
734 movdqa 48(%rsi),%xmm12
735 leaq 64(%rsi),%rsi
736
737 pand %xmm15,%xmm9
738 pand %xmm15,%xmm10
739 por %xmm9,%xmm2
740 pand %xmm15,%xmm11
741 por %xmm10,%xmm3
742 pand %xmm15,%xmm12
743 por %xmm11,%xmm4
744 prefetcht0 255(%rsi)
745 por %xmm12,%xmm5
746
747 decq %rax
748 jnz L$select_loop_sse_w7
749
750 movdqu %xmm2,0(%rdi)
751 movdqu %xmm3,16(%rdi)
752 movdqu %xmm4,32(%rdi)
753 movdqu %xmm5,48(%rdi)
754 .byte 0xf3,0xc3
755
756 .globl _ecp_nistz256_avx2_select_w7
757 .private_extern _ecp_nistz256_avx2_select_w7
758
759 .p2align 5
760 _ecp_nistz256_avx2_select_w7:
761 .byte 0x0f,0x0b
762 .byte 0xf3,0xc3
763
764
765 .p2align 5
766 __ecp_nistz256_add_toq:
767 addq 0(%rbx),%r12
768 adcq 8(%rbx),%r13
769 movq %r12,%rax
770 adcq 16(%rbx),%r8
771 adcq 24(%rbx),%r9
772 movq %r13,%rbp
773 sbbq %r11,%r11
774
775 subq $-1,%r12
776 movq %r8,%rcx
777 sbbq %r14,%r13
778 sbbq $0,%r8
779 movq %r9,%r10
780 sbbq %r15,%r9
781 testq %r11,%r11
782
783 cmovzq %rax,%r12
784 cmovzq %rbp,%r13
785 movq %r12,0(%rdi)
786 cmovzq %rcx,%r8
787 movq %r13,8(%rdi)
788 cmovzq %r10,%r9
789 movq %r8,16(%rdi)
790 movq %r9,24(%rdi)
791
792 .byte 0xf3,0xc3
793
794
795
796 .p2align 5
797 __ecp_nistz256_sub_fromq:
798 subq 0(%rbx),%r12
799 sbbq 8(%rbx),%r13
800 movq %r12,%rax
801 sbbq 16(%rbx),%r8
802 sbbq 24(%rbx),%r9
803 movq %r13,%rbp
804 sbbq %r11,%r11
805
806 addq $-1,%r12
807 movq %r8,%rcx
808 adcq %r14,%r13
809 adcq $0,%r8
810 movq %r9,%r10
811 adcq %r15,%r9
812 testq %r11,%r11
813
814 cmovzq %rax,%r12
815 cmovzq %rbp,%r13
816 movq %r12,0(%rdi)
817 cmovzq %rcx,%r8
818 movq %r13,8(%rdi)
819 cmovzq %r10,%r9
820 movq %r8,16(%rdi)
821 movq %r9,24(%rdi)
822
823 .byte 0xf3,0xc3
824
825
826
827 .p2align 5
828 __ecp_nistz256_subq:
829 subq %r12,%rax
830 sbbq %r13,%rbp
831 movq %rax,%r12
832 sbbq %r8,%rcx
833 sbbq %r9,%r10
834 movq %rbp,%r13
835 sbbq %r11,%r11
836
837 addq $-1,%rax
838 movq %rcx,%r8
839 adcq %r14,%rbp
840 adcq $0,%rcx
841 movq %r10,%r9
842 adcq %r15,%r10
843 testq %r11,%r11
844
845 cmovnzq %rax,%r12
846 cmovnzq %rbp,%r13
847 cmovnzq %rcx,%r8
848 cmovnzq %r10,%r9
849
850 .byte 0xf3,0xc3
851
852
853
854 .p2align 5
855 __ecp_nistz256_mul_by_2q:
856 addq %r12,%r12
857 adcq %r13,%r13
858 movq %r12,%rax
859 adcq %r8,%r8
860 adcq %r9,%r9
861 movq %r13,%rbp
862 sbbq %r11,%r11
863
864 subq $-1,%r12
865 movq %r8,%rcx
866 sbbq %r14,%r13
867 sbbq $0,%r8
868 movq %r9,%r10
869 sbbq %r15,%r9
870 testq %r11,%r11
871
872 cmovzq %rax,%r12
873 cmovzq %rbp,%r13
874 movq %r12,0(%rdi)
875 cmovzq %rcx,%r8
876 movq %r13,8(%rdi)
877 cmovzq %r10,%r9
878 movq %r8,16(%rdi)
879 movq %r9,24(%rdi)
880
881 .byte 0xf3,0xc3
882
883 .globl _ecp_nistz256_point_double
884 .private_extern _ecp_nistz256_point_double
885
886 .p2align 5
887 _ecp_nistz256_point_double:
888 pushq %rbp
889 pushq %rbx
890 pushq %r12
891 pushq %r13
892 pushq %r14
893 pushq %r15
894 subq $160+8,%rsp
895
896 L$point_double_shortcutq:
897 movdqu 0(%rsi),%xmm0
898 movq %rsi,%rbx
899 movdqu 16(%rsi),%xmm1
900 movq 32+0(%rsi),%r12
901 movq 32+8(%rsi),%r13
902 movq 32+16(%rsi),%r8
903 movq 32+24(%rsi),%r9
904 movq L$poly+8(%rip),%r14
905 movq L$poly+24(%rip),%r15
906 movdqa %xmm0,96(%rsp)
907 movdqa %xmm1,96+16(%rsp)
908 leaq 32(%rdi),%r10
909 leaq 64(%rdi),%r11
910 .byte 102,72,15,110,199
911 .byte 102,73,15,110,202
912 .byte 102,73,15,110,211
913
914 leaq 0(%rsp),%rdi
915 call __ecp_nistz256_mul_by_2q
916
917 movq 64+0(%rsi),%rax
918 movq 64+8(%rsi),%r14
919 movq 64+16(%rsi),%r15
920 movq 64+24(%rsi),%r8
921 leaq 64-0(%rsi),%rsi
922 leaq 64(%rsp),%rdi
923 call __ecp_nistz256_sqr_montq
924
925 movq 0+0(%rsp),%rax
926 movq 8+0(%rsp),%r14
927 leaq 0+0(%rsp),%rsi
928 movq 16+0(%rsp),%r15
929 movq 24+0(%rsp),%r8
930 leaq 0(%rsp),%rdi
931 call __ecp_nistz256_sqr_montq
932
933 movq 32(%rbx),%rax
934 movq 64+0(%rbx),%r9
935 movq 64+8(%rbx),%r10
936 movq 64+16(%rbx),%r11
937 movq 64+24(%rbx),%r12
938 leaq 64-0(%rbx),%rsi
939 leaq 32(%rbx),%rbx
940 .byte 102,72,15,126,215
941 call __ecp_nistz256_mul_montq
942 call __ecp_nistz256_mul_by_2q
943
944 movq 96+0(%rsp),%r12
945 movq 96+8(%rsp),%r13
946 leaq 64(%rsp),%rbx
947 movq 96+16(%rsp),%r8
948 movq 96+24(%rsp),%r9
949 leaq 32(%rsp),%rdi
950 call __ecp_nistz256_add_toq
951
952 movq 96+0(%rsp),%r12
953 movq 96+8(%rsp),%r13
954 leaq 64(%rsp),%rbx
955 movq 96+16(%rsp),%r8
956 movq 96+24(%rsp),%r9
957 leaq 64(%rsp),%rdi
958 call __ecp_nistz256_sub_fromq
959
960 movq 0+0(%rsp),%rax
961 movq 8+0(%rsp),%r14
962 leaq 0+0(%rsp),%rsi
963 movq 16+0(%rsp),%r15
964 movq 24+0(%rsp),%r8
965 .byte 102,72,15,126,207
966 call __ecp_nistz256_sqr_montq
967 xorq %r9,%r9
968 movq %r12,%rax
969 addq $-1,%r12
970 movq %r13,%r10
971 adcq %rsi,%r13
972 movq %r14,%rcx
973 adcq $0,%r14
974 movq %r15,%r8
975 adcq %rbp,%r15
976 adcq $0,%r9
977 xorq %rsi,%rsi
978 testq $1,%rax
979
980 cmovzq %rax,%r12
981 cmovzq %r10,%r13
982 cmovzq %rcx,%r14
983 cmovzq %r8,%r15
984 cmovzq %rsi,%r9
985
986 movq %r13,%rax
987 shrq $1,%r12
988 shlq $63,%rax
989 movq %r14,%r10
990 shrq $1,%r13
991 orq %rax,%r12
992 shlq $63,%r10
993 movq %r15,%rcx
994 shrq $1,%r14
995 orq %r10,%r13
996 shlq $63,%rcx
997 movq %r12,0(%rdi)
998 shrq $1,%r15
999 movq %r13,8(%rdi)
1000 shlq $63,%r9
1001 orq %rcx,%r14
1002 orq %r9,%r15
1003 movq %r14,16(%rdi)
1004 movq %r15,24(%rdi)
1005 movq 64(%rsp),%rax
1006 leaq 64(%rsp),%rbx
1007 movq 0+32(%rsp),%r9
1008 movq 8+32(%rsp),%r10
1009 leaq 0+32(%rsp),%rsi
1010 movq 16+32(%rsp),%r11
1011 movq 24+32(%rsp),%r12
1012 leaq 32(%rsp),%rdi
1013 call __ecp_nistz256_mul_montq
1014
1015 leaq 128(%rsp),%rdi
1016 call __ecp_nistz256_mul_by_2q
1017
1018 leaq 32(%rsp),%rbx
1019 leaq 32(%rsp),%rdi
1020 call __ecp_nistz256_add_toq
1021
1022 movq 96(%rsp),%rax
1023 leaq 96(%rsp),%rbx
1024 movq 0+0(%rsp),%r9
1025 movq 8+0(%rsp),%r10
1026 leaq 0+0(%rsp),%rsi
1027 movq 16+0(%rsp),%r11
1028 movq 24+0(%rsp),%r12
1029 leaq 0(%rsp),%rdi
1030 call __ecp_nistz256_mul_montq
1031
1032 leaq 128(%rsp),%rdi
1033 call __ecp_nistz256_mul_by_2q
1034
1035 movq 0+32(%rsp),%rax
1036 movq 8+32(%rsp),%r14
1037 leaq 0+32(%rsp),%rsi
1038 movq 16+32(%rsp),%r15
1039 movq 24+32(%rsp),%r8
1040 .byte 102,72,15,126,199
1041 call __ecp_nistz256_sqr_montq
1042
1043 leaq 128(%rsp),%rbx
1044 movq %r14,%r8
1045 movq %r15,%r9
1046 movq %rsi,%r14
1047 movq %rbp,%r15
1048 call __ecp_nistz256_sub_fromq
1049
1050 movq 0+0(%rsp),%rax
1051 movq 0+8(%rsp),%rbp
1052 movq 0+16(%rsp),%rcx
1053 movq 0+24(%rsp),%r10
1054 leaq 0(%rsp),%rdi
1055 call __ecp_nistz256_subq
1056
1057 movq 32(%rsp),%rax
1058 leaq 32(%rsp),%rbx
1059 movq %r12,%r14
1060 xorl %ecx,%ecx
1061 movq %r12,0+0(%rsp)
1062 movq %r13,%r10
1063 movq %r13,0+8(%rsp)
1064 cmovzq %r8,%r11
1065 movq %r8,0+16(%rsp)
1066 leaq 0-0(%rsp),%rsi
1067 cmovzq %r9,%r12
1068 movq %r9,0+24(%rsp)
1069 movq %r14,%r9
1070 leaq 0(%rsp),%rdi
1071 call __ecp_nistz256_mul_montq
1072
1073 .byte 102,72,15,126,203
1074 .byte 102,72,15,126,207
1075 call __ecp_nistz256_sub_fromq
1076
1077 addq $160+8,%rsp
1078 popq %r15
1079 popq %r14
1080 popq %r13
1081 popq %r12
1082 popq %rbx
1083 popq %rbp
1084 .byte 0xf3,0xc3
1085
1086 .globl _ecp_nistz256_point_add
1087 .private_extern _ecp_nistz256_point_add
1088
1089 .p2align 5
1090 _ecp_nistz256_point_add:
1091 pushq %rbp
1092 pushq %rbx
1093 pushq %r12
1094 pushq %r13
1095 pushq %r14
1096 pushq %r15
1097 subq $576+8,%rsp
1098
1099 movdqu 0(%rsi),%xmm0
1100 movdqu 16(%rsi),%xmm1
1101 movdqu 32(%rsi),%xmm2
1102 movdqu 48(%rsi),%xmm3
1103 movdqu 64(%rsi),%xmm4
1104 movdqu 80(%rsi),%xmm5
1105 movq %rsi,%rbx
1106 movq %rdx,%rsi
1107 movdqa %xmm0,384(%rsp)
1108 movdqa %xmm1,384+16(%rsp)
1109 por %xmm0,%xmm1
1110 movdqa %xmm2,416(%rsp)
1111 movdqa %xmm3,416+16(%rsp)
1112 por %xmm2,%xmm3
1113 movdqa %xmm4,448(%rsp)
1114 movdqa %xmm5,448+16(%rsp)
1115 por %xmm1,%xmm3
1116
1117 movdqu 0(%rsi),%xmm0
1118 pshufd $0xb1,%xmm3,%xmm5
1119 movdqu 16(%rsi),%xmm1
1120 movdqu 32(%rsi),%xmm2
1121 por %xmm3,%xmm5
1122 movdqu 48(%rsi),%xmm3
1123 movq 64+0(%rsi),%rax
1124 movq 64+8(%rsi),%r14
1125 movq 64+16(%rsi),%r15
1126 movq 64+24(%rsi),%r8
1127 movdqa %xmm0,480(%rsp)
1128 pshufd $0x1e,%xmm5,%xmm4
1129 movdqa %xmm1,480+16(%rsp)
1130 por %xmm0,%xmm1
1131 .byte 102,72,15,110,199
1132 movdqa %xmm2,512(%rsp)
1133 movdqa %xmm3,512+16(%rsp)
1134 por %xmm2,%xmm3
1135 por %xmm4,%xmm5
1136 pxor %xmm4,%xmm4
1137 por %xmm1,%xmm3
1138
1139 leaq 64-0(%rsi),%rsi
1140 movq %rax,544+0(%rsp)
1141 movq %r14,544+8(%rsp)
1142 movq %r15,544+16(%rsp)
1143 movq %r8,544+24(%rsp)
1144 leaq 96(%rsp),%rdi
1145 call __ecp_nistz256_sqr_montq
1146
1147 pcmpeqd %xmm4,%xmm5
1148 pshufd $0xb1,%xmm3,%xmm4
1149 por %xmm3,%xmm4
1150 pshufd $0,%xmm5,%xmm5
1151 pshufd $0x1e,%xmm4,%xmm3
1152 por %xmm3,%xmm4
1153 pxor %xmm3,%xmm3
1154 pcmpeqd %xmm3,%xmm4
1155 pshufd $0,%xmm4,%xmm4
1156 movq 64+0(%rbx),%rax
1157 movq 64+8(%rbx),%r14
1158 movq 64+16(%rbx),%r15
1159 movq 64+24(%rbx),%r8
1160 .byte 102,72,15,110,203
1161
1162 leaq 64-0(%rbx),%rsi
1163 leaq 32(%rsp),%rdi
1164 call __ecp_nistz256_sqr_montq
1165
1166 movq 544(%rsp),%rax
1167 leaq 544(%rsp),%rbx
1168 movq 0+96(%rsp),%r9
1169 movq 8+96(%rsp),%r10
1170 leaq 0+96(%rsp),%rsi
1171 movq 16+96(%rsp),%r11
1172 movq 24+96(%rsp),%r12
1173 leaq 224(%rsp),%rdi
1174 call __ecp_nistz256_mul_montq
1175
1176 movq 448(%rsp),%rax
1177 leaq 448(%rsp),%rbx
1178 movq 0+32(%rsp),%r9
1179 movq 8+32(%rsp),%r10
1180 leaq 0+32(%rsp),%rsi
1181 movq 16+32(%rsp),%r11
1182 movq 24+32(%rsp),%r12
1183 leaq 256(%rsp),%rdi
1184 call __ecp_nistz256_mul_montq
1185
1186 movq 416(%rsp),%rax
1187 leaq 416(%rsp),%rbx
1188 movq 0+224(%rsp),%r9
1189 movq 8+224(%rsp),%r10
1190 leaq 0+224(%rsp),%rsi
1191 movq 16+224(%rsp),%r11
1192 movq 24+224(%rsp),%r12
1193 leaq 224(%rsp),%rdi
1194 call __ecp_nistz256_mul_montq
1195
1196 movq 512(%rsp),%rax
1197 leaq 512(%rsp),%rbx
1198 movq 0+256(%rsp),%r9
1199 movq 8+256(%rsp),%r10
1200 leaq 0+256(%rsp),%rsi
1201 movq 16+256(%rsp),%r11
1202 movq 24+256(%rsp),%r12
1203 leaq 256(%rsp),%rdi
1204 call __ecp_nistz256_mul_montq
1205
1206 leaq 224(%rsp),%rbx
1207 leaq 64(%rsp),%rdi
1208 call __ecp_nistz256_sub_fromq
1209
1210 orq %r13,%r12
1211 movdqa %xmm4,%xmm2
1212 orq %r8,%r12
1213 orq %r9,%r12
1214 por %xmm5,%xmm2
1215 .byte 102,73,15,110,220
1216
1217 movq 384(%rsp),%rax
1218 leaq 384(%rsp),%rbx
1219 movq 0+96(%rsp),%r9
1220 movq 8+96(%rsp),%r10
1221 leaq 0+96(%rsp),%rsi
1222 movq 16+96(%rsp),%r11
1223 movq 24+96(%rsp),%r12
1224 leaq 160(%rsp),%rdi
1225 call __ecp_nistz256_mul_montq
1226
1227 movq 480(%rsp),%rax
1228 leaq 480(%rsp),%rbx
1229 movq 0+32(%rsp),%r9
1230 movq 8+32(%rsp),%r10
1231 leaq 0+32(%rsp),%rsi
1232 movq 16+32(%rsp),%r11
1233 movq 24+32(%rsp),%r12
1234 leaq 192(%rsp),%rdi
1235 call __ecp_nistz256_mul_montq
1236
1237 leaq 160(%rsp),%rbx
1238 leaq 0(%rsp),%rdi
1239 call __ecp_nistz256_sub_fromq
1240
1241 orq %r13,%r12
1242 orq %r8,%r12
1243 orq %r9,%r12
1244
1245 .byte 0x3e
1246 jnz L$add_proceedq
1247 .byte 102,73,15,126,208
1248 .byte 102,73,15,126,217
1249 testq %r8,%r8
1250 jnz L$add_proceedq
1251 testq %r9,%r9
1252 jz L$add_doubleq
1253
1254 .byte 102,72,15,126,199
1255 pxor %xmm0,%xmm0
1256 movdqu %xmm0,0(%rdi)
1257 movdqu %xmm0,16(%rdi)
1258 movdqu %xmm0,32(%rdi)
1259 movdqu %xmm0,48(%rdi)
1260 movdqu %xmm0,64(%rdi)
1261 movdqu %xmm0,80(%rdi)
1262 jmp L$add_doneq
1263
1264 .p2align 5
1265 L$add_doubleq:
1266 .byte 102,72,15,126,206
1267 .byte 102,72,15,126,199
1268 addq $416,%rsp
1269 jmp L$point_double_shortcutq
1270
1271 .p2align 5
1272 L$add_proceedq:
1273 movq 0+64(%rsp),%rax
1274 movq 8+64(%rsp),%r14
1275 leaq 0+64(%rsp),%rsi
1276 movq 16+64(%rsp),%r15
1277 movq 24+64(%rsp),%r8
1278 leaq 96(%rsp),%rdi
1279 call __ecp_nistz256_sqr_montq
1280
1281 movq 448(%rsp),%rax
1282 leaq 448(%rsp),%rbx
1283 movq 0+0(%rsp),%r9
1284 movq 8+0(%rsp),%r10
1285 leaq 0+0(%rsp),%rsi
1286 movq 16+0(%rsp),%r11
1287 movq 24+0(%rsp),%r12
1288 leaq 352(%rsp),%rdi
1289 call __ecp_nistz256_mul_montq
1290
1291 movq 0+0(%rsp),%rax
1292 movq 8+0(%rsp),%r14
1293 leaq 0+0(%rsp),%rsi
1294 movq 16+0(%rsp),%r15
1295 movq 24+0(%rsp),%r8
1296 leaq 32(%rsp),%rdi
1297 call __ecp_nistz256_sqr_montq
1298
1299 movq 544(%rsp),%rax
1300 leaq 544(%rsp),%rbx
1301 movq 0+352(%rsp),%r9
1302 movq 8+352(%rsp),%r10
1303 leaq 0+352(%rsp),%rsi
1304 movq 16+352(%rsp),%r11
1305 movq 24+352(%rsp),%r12
1306 leaq 352(%rsp),%rdi
1307 call __ecp_nistz256_mul_montq
1308
1309 movq 0(%rsp),%rax
1310 leaq 0(%rsp),%rbx
1311 movq 0+32(%rsp),%r9
1312 movq 8+32(%rsp),%r10
1313 leaq 0+32(%rsp),%rsi
1314 movq 16+32(%rsp),%r11
1315 movq 24+32(%rsp),%r12
1316 leaq 128(%rsp),%rdi
1317 call __ecp_nistz256_mul_montq
1318
1319 movq 160(%rsp),%rax
1320 leaq 160(%rsp),%rbx
1321 movq 0+32(%rsp),%r9
1322 movq 8+32(%rsp),%r10
1323 leaq 0+32(%rsp),%rsi
1324 movq 16+32(%rsp),%r11
1325 movq 24+32(%rsp),%r12
1326 leaq 192(%rsp),%rdi
1327 call __ecp_nistz256_mul_montq
1328
1329
1330
1331
1332 addq %r12,%r12
1333 leaq 96(%rsp),%rsi
1334 adcq %r13,%r13
1335 movq %r12,%rax
1336 adcq %r8,%r8
1337 adcq %r9,%r9
1338 movq %r13,%rbp
1339 sbbq %r11,%r11
1340
1341 subq $-1,%r12
1342 movq %r8,%rcx
1343 sbbq %r14,%r13
1344 sbbq $0,%r8
1345 movq %r9,%r10
1346 sbbq %r15,%r9
1347 testq %r11,%r11
1348
1349 cmovzq %rax,%r12
1350 movq 0(%rsi),%rax
1351 cmovzq %rbp,%r13
1352 movq 8(%rsi),%rbp
1353 cmovzq %rcx,%r8
1354 movq 16(%rsi),%rcx
1355 cmovzq %r10,%r9
1356 movq 24(%rsi),%r10
1357
1358 call __ecp_nistz256_subq
1359
1360 leaq 128(%rsp),%rbx
1361 leaq 288(%rsp),%rdi
1362 call __ecp_nistz256_sub_fromq
1363
1364 movq 192+0(%rsp),%rax
1365 movq 192+8(%rsp),%rbp
1366 movq 192+16(%rsp),%rcx
1367 movq 192+24(%rsp),%r10
1368 leaq 320(%rsp),%rdi
1369
1370 call __ecp_nistz256_subq
1371
1372 movq %r12,0(%rdi)
1373 movq %r13,8(%rdi)
1374 movq %r8,16(%rdi)
1375 movq %r9,24(%rdi)
1376 movq 128(%rsp),%rax
1377 leaq 128(%rsp),%rbx
1378 movq 0+224(%rsp),%r9
1379 movq 8+224(%rsp),%r10
1380 leaq 0+224(%rsp),%rsi
1381 movq 16+224(%rsp),%r11
1382 movq 24+224(%rsp),%r12
1383 leaq 256(%rsp),%rdi
1384 call __ecp_nistz256_mul_montq
1385
1386 movq 320(%rsp),%rax
1387 leaq 320(%rsp),%rbx
1388 movq 0+64(%rsp),%r9
1389 movq 8+64(%rsp),%r10
1390 leaq 0+64(%rsp),%rsi
1391 movq 16+64(%rsp),%r11
1392 movq 24+64(%rsp),%r12
1393 leaq 320(%rsp),%rdi
1394 call __ecp_nistz256_mul_montq
1395
1396 leaq 256(%rsp),%rbx
1397 leaq 320(%rsp),%rdi
1398 call __ecp_nistz256_sub_fromq
1399
1400 .byte 102,72,15,126,199
1401
1402 movdqa %xmm5,%xmm0
1403 movdqa %xmm5,%xmm1
1404 pandn 352(%rsp),%xmm0
1405 movdqa %xmm5,%xmm2
1406 pandn 352+16(%rsp),%xmm1
1407 movdqa %xmm5,%xmm3
1408 pand 544(%rsp),%xmm2
1409 pand 544+16(%rsp),%xmm3
1410 por %xmm0,%xmm2
1411 por %xmm1,%xmm3
1412
1413 movdqa %xmm4,%xmm0
1414 movdqa %xmm4,%xmm1
1415 pandn %xmm2,%xmm0
1416 movdqa %xmm4,%xmm2
1417 pandn %xmm3,%xmm1
1418 movdqa %xmm4,%xmm3
1419 pand 448(%rsp),%xmm2
1420 pand 448+16(%rsp),%xmm3
1421 por %xmm0,%xmm2
1422 por %xmm1,%xmm3
1423 movdqu %xmm2,64(%rdi)
1424 movdqu %xmm3,80(%rdi)
1425
1426 movdqa %xmm5,%xmm0
1427 movdqa %xmm5,%xmm1
1428 pandn 288(%rsp),%xmm0
1429 movdqa %xmm5,%xmm2
1430 pandn 288+16(%rsp),%xmm1
1431 movdqa %xmm5,%xmm3
1432 pand 480(%rsp),%xmm2
1433 pand 480+16(%rsp),%xmm3
1434 por %xmm0,%xmm2
1435 por %xmm1,%xmm3
1436
1437 movdqa %xmm4,%xmm0
1438 movdqa %xmm4,%xmm1
1439 pandn %xmm2,%xmm0
1440 movdqa %xmm4,%xmm2
1441 pandn %xmm3,%xmm1
1442 movdqa %xmm4,%xmm3
1443 pand 384(%rsp),%xmm2
1444 pand 384+16(%rsp),%xmm3
1445 por %xmm0,%xmm2
1446 por %xmm1,%xmm3
1447 movdqu %xmm2,0(%rdi)
1448 movdqu %xmm3,16(%rdi)
1449
1450 movdqa %xmm5,%xmm0
1451 movdqa %xmm5,%xmm1
1452 pandn 320(%rsp),%xmm0
1453 movdqa %xmm5,%xmm2
1454 pandn 320+16(%rsp),%xmm1
1455 movdqa %xmm5,%xmm3
1456 pand 512(%rsp),%xmm2
1457 pand 512+16(%rsp),%xmm3
1458 por %xmm0,%xmm2
1459 por %xmm1,%xmm3
1460
1461 movdqa %xmm4,%xmm0
1462 movdqa %xmm4,%xmm1
1463 pandn %xmm2,%xmm0
1464 movdqa %xmm4,%xmm2
1465 pandn %xmm3,%xmm1
1466 movdqa %xmm4,%xmm3
1467 pand 416(%rsp),%xmm2
1468 pand 416+16(%rsp),%xmm3
1469 por %xmm0,%xmm2
1470 por %xmm1,%xmm3
1471 movdqu %xmm2,32(%rdi)
1472 movdqu %xmm3,48(%rdi)
1473
1474 L$add_doneq:
1475 addq $576+8,%rsp
1476 popq %r15
1477 popq %r14
1478 popq %r13
1479 popq %r12
1480 popq %rbx
1481 popq %rbp
1482 .byte 0xf3,0xc3
1483
1484 .globl _ecp_nistz256_point_add_affine
1485 .private_extern _ecp_nistz256_point_add_affine
1486
1487 .p2align 5
1488 _ecp_nistz256_point_add_affine:
1489 pushq %rbp
1490 pushq %rbx
1491 pushq %r12
1492 pushq %r13
1493 pushq %r14
1494 pushq %r15
1495 subq $480+8,%rsp
1496
1497 movdqu 0(%rsi),%xmm0
1498 movq %rdx,%rbx
1499 movdqu 16(%rsi),%xmm1
1500 movdqu 32(%rsi),%xmm2
1501 movdqu 48(%rsi),%xmm3
1502 movdqu 64(%rsi),%xmm4
1503 movdqu 80(%rsi),%xmm5
1504 movq 64+0(%rsi),%rax
1505 movq 64+8(%rsi),%r14
1506 movq 64+16(%rsi),%r15
1507 movq 64+24(%rsi),%r8
1508 movdqa %xmm0,320(%rsp)
1509 movdqa %xmm1,320+16(%rsp)
1510 por %xmm0,%xmm1
1511 movdqa %xmm2,352(%rsp)
1512 movdqa %xmm3,352+16(%rsp)
1513 por %xmm2,%xmm3
1514 movdqa %xmm4,384(%rsp)
1515 movdqa %xmm5,384+16(%rsp)
1516 por %xmm1,%xmm3
1517
1518 movdqu 0(%rbx),%xmm0
1519 pshufd $0xb1,%xmm3,%xmm5
1520 movdqu 16(%rbx),%xmm1
1521 movdqu 32(%rbx),%xmm2
1522 por %xmm3,%xmm5
1523 movdqu 48(%rbx),%xmm3
1524 movdqa %xmm0,416(%rsp)
1525 pshufd $0x1e,%xmm5,%xmm4
1526 movdqa %xmm1,416+16(%rsp)
1527 por %xmm0,%xmm1
1528 .byte 102,72,15,110,199
1529 movdqa %xmm2,448(%rsp)
1530 movdqa %xmm3,448+16(%rsp)
1531 por %xmm2,%xmm3
1532 por %xmm4,%xmm5
1533 pxor %xmm4,%xmm4
1534 por %xmm1,%xmm3
1535
1536 leaq 64-0(%rsi),%rsi
1537 leaq 32(%rsp),%rdi
1538 call __ecp_nistz256_sqr_montq
1539
1540 pcmpeqd %xmm4,%xmm5
1541 pshufd $0xb1,%xmm3,%xmm4
1542 movq 0(%rbx),%rax
1543
1544 movq %r12,%r9
1545 por %xmm3,%xmm4
1546 pshufd $0,%xmm5,%xmm5
1547 pshufd $0x1e,%xmm4,%xmm3
1548 movq %r13,%r10
1549 por %xmm3,%xmm4
1550 pxor %xmm3,%xmm3
1551 movq %r14,%r11
1552 pcmpeqd %xmm3,%xmm4
1553 pshufd $0,%xmm4,%xmm4
1554
1555 leaq 32-0(%rsp),%rsi
1556 movq %r15,%r12
1557 leaq 0(%rsp),%rdi
1558 call __ecp_nistz256_mul_montq
1559
1560 leaq 320(%rsp),%rbx
1561 leaq 64(%rsp),%rdi
1562 call __ecp_nistz256_sub_fromq
1563
1564 movq 384(%rsp),%rax
1565 leaq 384(%rsp),%rbx
1566 movq 0+32(%rsp),%r9
1567 movq 8+32(%rsp),%r10
1568 leaq 0+32(%rsp),%rsi
1569 movq 16+32(%rsp),%r11
1570 movq 24+32(%rsp),%r12
1571 leaq 32(%rsp),%rdi
1572 call __ecp_nistz256_mul_montq
1573
1574 movq 384(%rsp),%rax
1575 leaq 384(%rsp),%rbx
1576 movq 0+64(%rsp),%r9
1577 movq 8+64(%rsp),%r10
1578 leaq 0+64(%rsp),%rsi
1579 movq 16+64(%rsp),%r11
1580 movq 24+64(%rsp),%r12
1581 leaq 288(%rsp),%rdi
1582 call __ecp_nistz256_mul_montq
1583
1584 movq 448(%rsp),%rax
1585 leaq 448(%rsp),%rbx
1586 movq 0+32(%rsp),%r9
1587 movq 8+32(%rsp),%r10
1588 leaq 0+32(%rsp),%rsi
1589 movq 16+32(%rsp),%r11
1590 movq 24+32(%rsp),%r12
1591 leaq 32(%rsp),%rdi
1592 call __ecp_nistz256_mul_montq
1593
1594 leaq 352(%rsp),%rbx
1595 leaq 96(%rsp),%rdi
1596 call __ecp_nistz256_sub_fromq
1597
1598 movq 0+64(%rsp),%rax
1599 movq 8+64(%rsp),%r14
1600 leaq 0+64(%rsp),%rsi
1601 movq 16+64(%rsp),%r15
1602 movq 24+64(%rsp),%r8
1603 leaq 128(%rsp),%rdi
1604 call __ecp_nistz256_sqr_montq
1605
1606 movq 0+96(%rsp),%rax
1607 movq 8+96(%rsp),%r14
1608 leaq 0+96(%rsp),%rsi
1609 movq 16+96(%rsp),%r15
1610 movq 24+96(%rsp),%r8
1611 leaq 192(%rsp),%rdi
1612 call __ecp_nistz256_sqr_montq
1613
1614 movq 128(%rsp),%rax
1615 leaq 128(%rsp),%rbx
1616 movq 0+64(%rsp),%r9
1617 movq 8+64(%rsp),%r10
1618 leaq 0+64(%rsp),%rsi
1619 movq 16+64(%rsp),%r11
1620 movq 24+64(%rsp),%r12
1621 leaq 160(%rsp),%rdi
1622 call __ecp_nistz256_mul_montq
1623
1624 movq 320(%rsp),%rax
1625 leaq 320(%rsp),%rbx
1626 movq 0+128(%rsp),%r9
1627 movq 8+128(%rsp),%r10
1628 leaq 0+128(%rsp),%rsi
1629 movq 16+128(%rsp),%r11
1630 movq 24+128(%rsp),%r12
1631 leaq 0(%rsp),%rdi
1632 call __ecp_nistz256_mul_montq
1633
1634
1635
1636
1637 addq %r12,%r12
1638 leaq 192(%rsp),%rsi
1639 adcq %r13,%r13
1640 movq %r12,%rax
1641 adcq %r8,%r8
1642 adcq %r9,%r9
1643 movq %r13,%rbp
1644 sbbq %r11,%r11
1645
1646 subq $-1,%r12
1647 movq %r8,%rcx
1648 sbbq %r14,%r13
1649 sbbq $0,%r8
1650 movq %r9,%r10
1651 sbbq %r15,%r9
1652 testq %r11,%r11
1653
1654 cmovzq %rax,%r12
1655 movq 0(%rsi),%rax
1656 cmovzq %rbp,%r13
1657 movq 8(%rsi),%rbp
1658 cmovzq %rcx,%r8
1659 movq 16(%rsi),%rcx
1660 cmovzq %r10,%r9
1661 movq 24(%rsi),%r10
1662
1663 call __ecp_nistz256_subq
1664
1665 leaq 160(%rsp),%rbx
1666 leaq 224(%rsp),%rdi
1667 call __ecp_nistz256_sub_fromq
1668
1669 movq 0+0(%rsp),%rax
1670 movq 0+8(%rsp),%rbp
1671 movq 0+16(%rsp),%rcx
1672 movq 0+24(%rsp),%r10
1673 leaq 64(%rsp),%rdi
1674
1675 call __ecp_nistz256_subq
1676
1677 movq %r12,0(%rdi)
1678 movq %r13,8(%rdi)
1679 movq %r8,16(%rdi)
1680 movq %r9,24(%rdi)
1681 movq 352(%rsp),%rax
1682 leaq 352(%rsp),%rbx
1683 movq 0+160(%rsp),%r9
1684 movq 8+160(%rsp),%r10
1685 leaq 0+160(%rsp),%rsi
1686 movq 16+160(%rsp),%r11
1687 movq 24+160(%rsp),%r12
1688 leaq 32(%rsp),%rdi
1689 call __ecp_nistz256_mul_montq
1690
1691 movq 96(%rsp),%rax
1692 leaq 96(%rsp),%rbx
1693 movq 0+64(%rsp),%r9
1694 movq 8+64(%rsp),%r10
1695 leaq 0+64(%rsp),%rsi
1696 movq 16+64(%rsp),%r11
1697 movq 24+64(%rsp),%r12
1698 leaq 64(%rsp),%rdi
1699 call __ecp_nistz256_mul_montq
1700
1701 leaq 32(%rsp),%rbx
1702 leaq 256(%rsp),%rdi
1703 call __ecp_nistz256_sub_fromq
1704
1705 .byte 102,72,15,126,199
1706
1707 movdqa %xmm5,%xmm0
1708 movdqa %xmm5,%xmm1
1709 pandn 288(%rsp),%xmm0
1710 movdqa %xmm5,%xmm2
1711 pandn 288+16(%rsp),%xmm1
1712 movdqa %xmm5,%xmm3
1713 pand L$ONE_mont(%rip),%xmm2
1714 pand L$ONE_mont+16(%rip),%xmm3
1715 por %xmm0,%xmm2
1716 por %xmm1,%xmm3
1717
1718 movdqa %xmm4,%xmm0
1719 movdqa %xmm4,%xmm1
1720 pandn %xmm2,%xmm0
1721 movdqa %xmm4,%xmm2
1722 pandn %xmm3,%xmm1
1723 movdqa %xmm4,%xmm3
1724 pand 384(%rsp),%xmm2
1725 pand 384+16(%rsp),%xmm3
1726 por %xmm0,%xmm2
1727 por %xmm1,%xmm3
1728 movdqu %xmm2,64(%rdi)
1729 movdqu %xmm3,80(%rdi)
1730
1731 movdqa %xmm5,%xmm0
1732 movdqa %xmm5,%xmm1
1733 pandn 224(%rsp),%xmm0
1734 movdqa %xmm5,%xmm2
1735 pandn 224+16(%rsp),%xmm1
1736 movdqa %xmm5,%xmm3
1737 pand 416(%rsp),%xmm2
1738 pand 416+16(%rsp),%xmm3
1739 por %xmm0,%xmm2
1740 por %xmm1,%xmm3
1741
1742 movdqa %xmm4,%xmm0
1743 movdqa %xmm4,%xmm1
1744 pandn %xmm2,%xmm0
1745 movdqa %xmm4,%xmm2
1746 pandn %xmm3,%xmm1
1747 movdqa %xmm4,%xmm3
1748 pand 320(%rsp),%xmm2
1749 pand 320+16(%rsp),%xmm3
1750 por %xmm0,%xmm2
1751 por %xmm1,%xmm3
1752 movdqu %xmm2,0(%rdi)
1753 movdqu %xmm3,16(%rdi)
1754
1755 movdqa %xmm5,%xmm0
1756 movdqa %xmm5,%xmm1
1757 pandn 256(%rsp),%xmm0
1758 movdqa %xmm5,%xmm2
1759 pandn 256+16(%rsp),%xmm1
1760 movdqa %xmm5,%xmm3
1761 pand 448(%rsp),%xmm2
1762 pand 448+16(%rsp),%xmm3
1763 por %xmm0,%xmm2
1764 por %xmm1,%xmm3
1765
1766 movdqa %xmm4,%xmm0
1767 movdqa %xmm4,%xmm1
1768 pandn %xmm2,%xmm0
1769 movdqa %xmm4,%xmm2
1770 pandn %xmm3,%xmm1
1771 movdqa %xmm4,%xmm3
1772 pand 352(%rsp),%xmm2
1773 pand 352+16(%rsp),%xmm3
1774 por %xmm0,%xmm2
1775 por %xmm1,%xmm3
1776 movdqu %xmm2,32(%rdi)
1777 movdqu %xmm3,48(%rdi)
1778
1779 addq $480+8,%rsp
1780 popq %r15
1781 popq %r14
1782 popq %r13
1783 popq %r12
1784 popq %rbx
1785 popq %rbp
1786 .byte 0xf3,0xc3
1787
1788 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698