Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(578)

Side by Side Diff: third_party/boringssl/linux-x86_64/crypto/ec/p256-x86_64-asm.S

Issue 1930203003: Try BoringSSL roll again (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #if defined(__x86_64__)
2 .text
3 .extern OPENSSL_ia32cap_P
4 .hidden OPENSSL_ia32cap_P
5
6
7 .align 64
8 .Lpoly:
9 .quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00 000001
10
11 .LOne:
12 .long 1,1,1,1,1,1,1,1
13 .LTwo:
14 .long 2,2,2,2,2,2,2,2
15 .LThree:
16 .long 3,3,3,3,3,3,3,3
17 .LONE_mont:
18 .quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000ff fffffe
19
20 .type ecp_nistz256_mul_by_2,@function
21 .align 64
22 ecp_nistz256_mul_by_2:
23 pushq %r12
24 pushq %r13
25
26 movq 0(%rsi),%r8
27 movq 8(%rsi),%r9
28 addq %r8,%r8
29 movq 16(%rsi),%r10
30 adcq %r9,%r9
31 movq 24(%rsi),%r11
32 leaq .Lpoly(%rip),%rsi
33 movq %r8,%rax
34 adcq %r10,%r10
35 adcq %r11,%r11
36 movq %r9,%rdx
37 sbbq %r13,%r13
38
39 subq 0(%rsi),%r8
40 movq %r10,%rcx
41 sbbq 8(%rsi),%r9
42 sbbq 16(%rsi),%r10
43 movq %r11,%r12
44 sbbq 24(%rsi),%r11
45 testq %r13,%r13
46
47 cmovzq %rax,%r8
48 cmovzq %rdx,%r9
49 movq %r8,0(%rdi)
50 cmovzq %rcx,%r10
51 movq %r9,8(%rdi)
52 cmovzq %r12,%r11
53 movq %r10,16(%rdi)
54 movq %r11,24(%rdi)
55
56 popq %r13
57 popq %r12
58 .byte 0xf3,0xc3
59 .size ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2
60
61
62
63 .globl ecp_nistz256_neg
64 .hidden ecp_nistz256_neg
65 .type ecp_nistz256_neg,@function
66 .align 32
67 ecp_nistz256_neg:
68 pushq %r12
69 pushq %r13
70
71 xorq %r8,%r8
72 xorq %r9,%r9
73 xorq %r10,%r10
74 xorq %r11,%r11
75 xorq %r13,%r13
76
77 subq 0(%rsi),%r8
78 sbbq 8(%rsi),%r9
79 sbbq 16(%rsi),%r10
80 movq %r8,%rax
81 sbbq 24(%rsi),%r11
82 leaq .Lpoly(%rip),%rsi
83 movq %r9,%rdx
84 sbbq $0,%r13
85
86 addq 0(%rsi),%r8
87 movq %r10,%rcx
88 adcq 8(%rsi),%r9
89 adcq 16(%rsi),%r10
90 movq %r11,%r12
91 adcq 24(%rsi),%r11
92 testq %r13,%r13
93
94 cmovzq %rax,%r8
95 cmovzq %rdx,%r9
96 movq %r8,0(%rdi)
97 cmovzq %rcx,%r10
98 movq %r9,8(%rdi)
99 cmovzq %r12,%r11
100 movq %r10,16(%rdi)
101 movq %r11,24(%rdi)
102
103 popq %r13
104 popq %r12
105 .byte 0xf3,0xc3
106 .size ecp_nistz256_neg,.-ecp_nistz256_neg
107
108
109
110
111
112
113 .globl ecp_nistz256_mul_mont
114 .hidden ecp_nistz256_mul_mont
115 .type ecp_nistz256_mul_mont,@function
116 .align 32
117 ecp_nistz256_mul_mont:
118 .Lmul_mont:
119 pushq %rbp
120 pushq %rbx
121 pushq %r12
122 pushq %r13
123 pushq %r14
124 pushq %r15
125 movq %rdx,%rbx
126 movq 0(%rdx),%rax
127 movq 0(%rsi),%r9
128 movq 8(%rsi),%r10
129 movq 16(%rsi),%r11
130 movq 24(%rsi),%r12
131
132 call __ecp_nistz256_mul_montq
133 .Lmul_mont_done:
134 popq %r15
135 popq %r14
136 popq %r13
137 popq %r12
138 popq %rbx
139 popq %rbp
140 .byte 0xf3,0xc3
141 .size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
142
143 .type __ecp_nistz256_mul_montq,@function
144 .align 32
145 __ecp_nistz256_mul_montq:
146
147
148 movq %rax,%rbp
149 mulq %r9
150 movq .Lpoly+8(%rip),%r14
151 movq %rax,%r8
152 movq %rbp,%rax
153 movq %rdx,%r9
154
155 mulq %r10
156 movq .Lpoly+24(%rip),%r15
157 addq %rax,%r9
158 movq %rbp,%rax
159 adcq $0,%rdx
160 movq %rdx,%r10
161
162 mulq %r11
163 addq %rax,%r10
164 movq %rbp,%rax
165 adcq $0,%rdx
166 movq %rdx,%r11
167
168 mulq %r12
169 addq %rax,%r11
170 movq %r8,%rax
171 adcq $0,%rdx
172 xorq %r13,%r13
173 movq %rdx,%r12
174
175
176
177
178
179
180
181
182
183
184 movq %r8,%rbp
185 shlq $32,%r8
186 mulq %r15
187 shrq $32,%rbp
188 addq %r8,%r9
189 adcq %rbp,%r10
190 adcq %rax,%r11
191 movq 8(%rbx),%rax
192 adcq %rdx,%r12
193 adcq $0,%r13
194 xorq %r8,%r8
195
196
197
198 movq %rax,%rbp
199 mulq 0(%rsi)
200 addq %rax,%r9
201 movq %rbp,%rax
202 adcq $0,%rdx
203 movq %rdx,%rcx
204
205 mulq 8(%rsi)
206 addq %rcx,%r10
207 adcq $0,%rdx
208 addq %rax,%r10
209 movq %rbp,%rax
210 adcq $0,%rdx
211 movq %rdx,%rcx
212
213 mulq 16(%rsi)
214 addq %rcx,%r11
215 adcq $0,%rdx
216 addq %rax,%r11
217 movq %rbp,%rax
218 adcq $0,%rdx
219 movq %rdx,%rcx
220
221 mulq 24(%rsi)
222 addq %rcx,%r12
223 adcq $0,%rdx
224 addq %rax,%r12
225 movq %r9,%rax
226 adcq %rdx,%r13
227 adcq $0,%r8
228
229
230
231 movq %r9,%rbp
232 shlq $32,%r9
233 mulq %r15
234 shrq $32,%rbp
235 addq %r9,%r10
236 adcq %rbp,%r11
237 adcq %rax,%r12
238 movq 16(%rbx),%rax
239 adcq %rdx,%r13
240 adcq $0,%r8
241 xorq %r9,%r9
242
243
244
245 movq %rax,%rbp
246 mulq 0(%rsi)
247 addq %rax,%r10
248 movq %rbp,%rax
249 adcq $0,%rdx
250 movq %rdx,%rcx
251
252 mulq 8(%rsi)
253 addq %rcx,%r11
254 adcq $0,%rdx
255 addq %rax,%r11
256 movq %rbp,%rax
257 adcq $0,%rdx
258 movq %rdx,%rcx
259
260 mulq 16(%rsi)
261 addq %rcx,%r12
262 adcq $0,%rdx
263 addq %rax,%r12
264 movq %rbp,%rax
265 adcq $0,%rdx
266 movq %rdx,%rcx
267
268 mulq 24(%rsi)
269 addq %rcx,%r13
270 adcq $0,%rdx
271 addq %rax,%r13
272 movq %r10,%rax
273 adcq %rdx,%r8
274 adcq $0,%r9
275
276
277
278 movq %r10,%rbp
279 shlq $32,%r10
280 mulq %r15
281 shrq $32,%rbp
282 addq %r10,%r11
283 adcq %rbp,%r12
284 adcq %rax,%r13
285 movq 24(%rbx),%rax
286 adcq %rdx,%r8
287 adcq $0,%r9
288 xorq %r10,%r10
289
290
291
292 movq %rax,%rbp
293 mulq 0(%rsi)
294 addq %rax,%r11
295 movq %rbp,%rax
296 adcq $0,%rdx
297 movq %rdx,%rcx
298
299 mulq 8(%rsi)
300 addq %rcx,%r12
301 adcq $0,%rdx
302 addq %rax,%r12
303 movq %rbp,%rax
304 adcq $0,%rdx
305 movq %rdx,%rcx
306
307 mulq 16(%rsi)
308 addq %rcx,%r13
309 adcq $0,%rdx
310 addq %rax,%r13
311 movq %rbp,%rax
312 adcq $0,%rdx
313 movq %rdx,%rcx
314
315 mulq 24(%rsi)
316 addq %rcx,%r8
317 adcq $0,%rdx
318 addq %rax,%r8
319 movq %r11,%rax
320 adcq %rdx,%r9
321 adcq $0,%r10
322
323
324
325 movq %r11,%rbp
326 shlq $32,%r11
327 mulq %r15
328 shrq $32,%rbp
329 addq %r11,%r12
330 adcq %rbp,%r13
331 movq %r12,%rcx
332 adcq %rax,%r8
333 adcq %rdx,%r9
334 movq %r13,%rbp
335 adcq $0,%r10
336
337
338
339 subq $-1,%r12
340 movq %r8,%rbx
341 sbbq %r14,%r13
342 sbbq $0,%r8
343 movq %r9,%rdx
344 sbbq %r15,%r9
345 sbbq $0,%r10
346
347 cmovcq %rcx,%r12
348 cmovcq %rbp,%r13
349 movq %r12,0(%rdi)
350 cmovcq %rbx,%r8
351 movq %r13,8(%rdi)
352 cmovcq %rdx,%r9
353 movq %r8,16(%rdi)
354 movq %r9,24(%rdi)
355
356 .byte 0xf3,0xc3
357 .size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
358
359
360
361
362
363
364
365
366 .globl ecp_nistz256_sqr_mont
367 .hidden ecp_nistz256_sqr_mont
368 .type ecp_nistz256_sqr_mont,@function
369 .align 32
370 ecp_nistz256_sqr_mont:
371 pushq %rbp
372 pushq %rbx
373 pushq %r12
374 pushq %r13
375 pushq %r14
376 pushq %r15
377 movq 0(%rsi),%rax
378 movq 8(%rsi),%r14
379 movq 16(%rsi),%r15
380 movq 24(%rsi),%r8
381
382 call __ecp_nistz256_sqr_montq
383 .Lsqr_mont_done:
384 popq %r15
385 popq %r14
386 popq %r13
387 popq %r12
388 popq %rbx
389 popq %rbp
390 .byte 0xf3,0xc3
391 .size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
392
393 .type __ecp_nistz256_sqr_montq,@function
394 .align 32
395 __ecp_nistz256_sqr_montq:
396 movq %rax,%r13
397 mulq %r14
398 movq %rax,%r9
399 movq %r15,%rax
400 movq %rdx,%r10
401
402 mulq %r13
403 addq %rax,%r10
404 movq %r8,%rax
405 adcq $0,%rdx
406 movq %rdx,%r11
407
408 mulq %r13
409 addq %rax,%r11
410 movq %r15,%rax
411 adcq $0,%rdx
412 movq %rdx,%r12
413
414
415 mulq %r14
416 addq %rax,%r11
417 movq %r8,%rax
418 adcq $0,%rdx
419 movq %rdx,%rbp
420
421 mulq %r14
422 addq %rax,%r12
423 movq %r8,%rax
424 adcq $0,%rdx
425 addq %rbp,%r12
426 movq %rdx,%r13
427 adcq $0,%r13
428
429
430 mulq %r15
431 xorq %r15,%r15
432 addq %rax,%r13
433 movq 0(%rsi),%rax
434 movq %rdx,%r14
435 adcq $0,%r14
436
437 addq %r9,%r9
438 adcq %r10,%r10
439 adcq %r11,%r11
440 adcq %r12,%r12
441 adcq %r13,%r13
442 adcq %r14,%r14
443 adcq $0,%r15
444
445 mulq %rax
446 movq %rax,%r8
447 movq 8(%rsi),%rax
448 movq %rdx,%rcx
449
450 mulq %rax
451 addq %rcx,%r9
452 adcq %rax,%r10
453 movq 16(%rsi),%rax
454 adcq $0,%rdx
455 movq %rdx,%rcx
456
457 mulq %rax
458 addq %rcx,%r11
459 adcq %rax,%r12
460 movq 24(%rsi),%rax
461 adcq $0,%rdx
462 movq %rdx,%rcx
463
464 mulq %rax
465 addq %rcx,%r13
466 adcq %rax,%r14
467 movq %r8,%rax
468 adcq %rdx,%r15
469
470 movq .Lpoly+8(%rip),%rsi
471 movq .Lpoly+24(%rip),%rbp
472
473
474
475
476 movq %r8,%rcx
477 shlq $32,%r8
478 mulq %rbp
479 shrq $32,%rcx
480 addq %r8,%r9
481 adcq %rcx,%r10
482 adcq %rax,%r11
483 movq %r9,%rax
484 adcq $0,%rdx
485
486
487
488 movq %r9,%rcx
489 shlq $32,%r9
490 movq %rdx,%r8
491 mulq %rbp
492 shrq $32,%rcx
493 addq %r9,%r10
494 adcq %rcx,%r11
495 adcq %rax,%r8
496 movq %r10,%rax
497 adcq $0,%rdx
498
499
500
501 movq %r10,%rcx
502 shlq $32,%r10
503 movq %rdx,%r9
504 mulq %rbp
505 shrq $32,%rcx
506 addq %r10,%r11
507 adcq %rcx,%r8
508 adcq %rax,%r9
509 movq %r11,%rax
510 adcq $0,%rdx
511
512
513
514 movq %r11,%rcx
515 shlq $32,%r11
516 movq %rdx,%r10
517 mulq %rbp
518 shrq $32,%rcx
519 addq %r11,%r8
520 adcq %rcx,%r9
521 adcq %rax,%r10
522 adcq $0,%rdx
523 xorq %r11,%r11
524
525
526
527 addq %r8,%r12
528 adcq %r9,%r13
529 movq %r12,%r8
530 adcq %r10,%r14
531 adcq %rdx,%r15
532 movq %r13,%r9
533 adcq $0,%r11
534
535 subq $-1,%r12
536 movq %r14,%r10
537 sbbq %rsi,%r13
538 sbbq $0,%r14
539 movq %r15,%rcx
540 sbbq %rbp,%r15
541 sbbq $0,%r11
542
543 cmovcq %r8,%r12
544 cmovcq %r9,%r13
545 movq %r12,0(%rdi)
546 cmovcq %r10,%r14
547 movq %r13,8(%rdi)
548 cmovcq %rcx,%r15
549 movq %r14,16(%rdi)
550 movq %r15,24(%rdi)
551
552 .byte 0xf3,0xc3
553 .size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
554
555
556
557
558
559
560 .globl ecp_nistz256_from_mont
561 .hidden ecp_nistz256_from_mont
562 .type ecp_nistz256_from_mont,@function
563 .align 32
564 ecp_nistz256_from_mont:
565 pushq %r12
566 pushq %r13
567
568 movq 0(%rsi),%rax
569 movq .Lpoly+24(%rip),%r13
570 movq 8(%rsi),%r9
571 movq 16(%rsi),%r10
572 movq 24(%rsi),%r11
573 movq %rax,%r8
574 movq .Lpoly+8(%rip),%r12
575
576
577
578 movq %rax,%rcx
579 shlq $32,%r8
580 mulq %r13
581 shrq $32,%rcx
582 addq %r8,%r9
583 adcq %rcx,%r10
584 adcq %rax,%r11
585 movq %r9,%rax
586 adcq $0,%rdx
587
588
589
590 movq %r9,%rcx
591 shlq $32,%r9
592 movq %rdx,%r8
593 mulq %r13
594 shrq $32,%rcx
595 addq %r9,%r10
596 adcq %rcx,%r11
597 adcq %rax,%r8
598 movq %r10,%rax
599 adcq $0,%rdx
600
601
602
603 movq %r10,%rcx
604 shlq $32,%r10
605 movq %rdx,%r9
606 mulq %r13
607 shrq $32,%rcx
608 addq %r10,%r11
609 adcq %rcx,%r8
610 adcq %rax,%r9
611 movq %r11,%rax
612 adcq $0,%rdx
613
614
615
616 movq %r11,%rcx
617 shlq $32,%r11
618 movq %rdx,%r10
619 mulq %r13
620 shrq $32,%rcx
621 addq %r11,%r8
622 adcq %rcx,%r9
623 movq %r8,%rcx
624 adcq %rax,%r10
625 movq %r9,%rsi
626 adcq $0,%rdx
627
628 subq $-1,%r8
629 movq %r10,%rax
630 sbbq %r12,%r9
631 sbbq $0,%r10
632 movq %rdx,%r11
633 sbbq %r13,%rdx
634 sbbq %r13,%r13
635
636 cmovnzq %rcx,%r8
637 cmovnzq %rsi,%r9
638 movq %r8,0(%rdi)
639 cmovnzq %rax,%r10
640 movq %r9,8(%rdi)
641 cmovzq %rdx,%r11
642 movq %r10,16(%rdi)
643 movq %r11,24(%rdi)
644
645 popq %r13
646 popq %r12
647 .byte 0xf3,0xc3
648 .size ecp_nistz256_from_mont,.-ecp_nistz256_from_mont
649
650
651 .globl ecp_nistz256_select_w5
652 .hidden ecp_nistz256_select_w5
653 .type ecp_nistz256_select_w5,@function
654 .align 32
655 ecp_nistz256_select_w5:
656 movdqa .LOne(%rip),%xmm0
657 movd %edx,%xmm1
658
659 pxor %xmm2,%xmm2
660 pxor %xmm3,%xmm3
661 pxor %xmm4,%xmm4
662 pxor %xmm5,%xmm5
663 pxor %xmm6,%xmm6
664 pxor %xmm7,%xmm7
665
666 movdqa %xmm0,%xmm8
667 pshufd $0,%xmm1,%xmm1
668
669 movq $16,%rax
670 .Lselect_loop_sse_w5:
671
672 movdqa %xmm8,%xmm15
673 paddd %xmm0,%xmm8
674 pcmpeqd %xmm1,%xmm15
675
676 movdqa 0(%rsi),%xmm9
677 movdqa 16(%rsi),%xmm10
678 movdqa 32(%rsi),%xmm11
679 movdqa 48(%rsi),%xmm12
680 movdqa 64(%rsi),%xmm13
681 movdqa 80(%rsi),%xmm14
682 leaq 96(%rsi),%rsi
683
684 pand %xmm15,%xmm9
685 pand %xmm15,%xmm10
686 por %xmm9,%xmm2
687 pand %xmm15,%xmm11
688 por %xmm10,%xmm3
689 pand %xmm15,%xmm12
690 por %xmm11,%xmm4
691 pand %xmm15,%xmm13
692 por %xmm12,%xmm5
693 pand %xmm15,%xmm14
694 por %xmm13,%xmm6
695 por %xmm14,%xmm7
696
697 decq %rax
698 jnz .Lselect_loop_sse_w5
699
700 movdqu %xmm2,0(%rdi)
701 movdqu %xmm3,16(%rdi)
702 movdqu %xmm4,32(%rdi)
703 movdqu %xmm5,48(%rdi)
704 movdqu %xmm6,64(%rdi)
705 movdqu %xmm7,80(%rdi)
706 .byte 0xf3,0xc3
707 .size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
708
709
710
711 .globl ecp_nistz256_select_w7
712 .hidden ecp_nistz256_select_w7
713 .type ecp_nistz256_select_w7,@function
714 .align 32
715 ecp_nistz256_select_w7:
716 movdqa .LOne(%rip),%xmm8
717 movd %edx,%xmm1
718
719 pxor %xmm2,%xmm2
720 pxor %xmm3,%xmm3
721 pxor %xmm4,%xmm4
722 pxor %xmm5,%xmm5
723
724 movdqa %xmm8,%xmm0
725 pshufd $0,%xmm1,%xmm1
726 movq $64,%rax
727
728 .Lselect_loop_sse_w7:
729 movdqa %xmm8,%xmm15
730 paddd %xmm0,%xmm8
731 movdqa 0(%rsi),%xmm9
732 movdqa 16(%rsi),%xmm10
733 pcmpeqd %xmm1,%xmm15
734 movdqa 32(%rsi),%xmm11
735 movdqa 48(%rsi),%xmm12
736 leaq 64(%rsi),%rsi
737
738 pand %xmm15,%xmm9
739 pand %xmm15,%xmm10
740 por %xmm9,%xmm2
741 pand %xmm15,%xmm11
742 por %xmm10,%xmm3
743 pand %xmm15,%xmm12
744 por %xmm11,%xmm4
745 prefetcht0 255(%rsi)
746 por %xmm12,%xmm5
747
748 decq %rax
749 jnz .Lselect_loop_sse_w7
750
751 movdqu %xmm2,0(%rdi)
752 movdqu %xmm3,16(%rdi)
753 movdqu %xmm4,32(%rdi)
754 movdqu %xmm5,48(%rdi)
755 .byte 0xf3,0xc3
756 .size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
757 .globl ecp_nistz256_avx2_select_w7
758 .hidden ecp_nistz256_avx2_select_w7
759 .type ecp_nistz256_avx2_select_w7,@function
760 .align 32
761 ecp_nistz256_avx2_select_w7:
762 .byte 0x0f,0x0b
763 .byte 0xf3,0xc3
764 .size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
765 .type __ecp_nistz256_add_toq,@function
766 .align 32
767 __ecp_nistz256_add_toq:
768 addq 0(%rbx),%r12
769 adcq 8(%rbx),%r13
770 movq %r12,%rax
771 adcq 16(%rbx),%r8
772 adcq 24(%rbx),%r9
773 movq %r13,%rbp
774 sbbq %r11,%r11
775
776 subq $-1,%r12
777 movq %r8,%rcx
778 sbbq %r14,%r13
779 sbbq $0,%r8
780 movq %r9,%r10
781 sbbq %r15,%r9
782 testq %r11,%r11
783
784 cmovzq %rax,%r12
785 cmovzq %rbp,%r13
786 movq %r12,0(%rdi)
787 cmovzq %rcx,%r8
788 movq %r13,8(%rdi)
789 cmovzq %r10,%r9
790 movq %r8,16(%rdi)
791 movq %r9,24(%rdi)
792
793 .byte 0xf3,0xc3
794 .size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
795
796 .type __ecp_nistz256_sub_fromq,@function
797 .align 32
798 __ecp_nistz256_sub_fromq:
799 subq 0(%rbx),%r12
800 sbbq 8(%rbx),%r13
801 movq %r12,%rax
802 sbbq 16(%rbx),%r8
803 sbbq 24(%rbx),%r9
804 movq %r13,%rbp
805 sbbq %r11,%r11
806
807 addq $-1,%r12
808 movq %r8,%rcx
809 adcq %r14,%r13
810 adcq $0,%r8
811 movq %r9,%r10
812 adcq %r15,%r9
813 testq %r11,%r11
814
815 cmovzq %rax,%r12
816 cmovzq %rbp,%r13
817 movq %r12,0(%rdi)
818 cmovzq %rcx,%r8
819 movq %r13,8(%rdi)
820 cmovzq %r10,%r9
821 movq %r8,16(%rdi)
822 movq %r9,24(%rdi)
823
824 .byte 0xf3,0xc3
825 .size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
826
827 .type __ecp_nistz256_subq,@function
828 .align 32
829 __ecp_nistz256_subq:
830 subq %r12,%rax
831 sbbq %r13,%rbp
832 movq %rax,%r12
833 sbbq %r8,%rcx
834 sbbq %r9,%r10
835 movq %rbp,%r13
836 sbbq %r11,%r11
837
838 addq $-1,%rax
839 movq %rcx,%r8
840 adcq %r14,%rbp
841 adcq $0,%rcx
842 movq %r10,%r9
843 adcq %r15,%r10
844 testq %r11,%r11
845
846 cmovnzq %rax,%r12
847 cmovnzq %rbp,%r13
848 cmovnzq %rcx,%r8
849 cmovnzq %r10,%r9
850
851 .byte 0xf3,0xc3
852 .size __ecp_nistz256_subq,.-__ecp_nistz256_subq
853
854 .type __ecp_nistz256_mul_by_2q,@function
855 .align 32
856 __ecp_nistz256_mul_by_2q:
857 addq %r12,%r12
858 adcq %r13,%r13
859 movq %r12,%rax
860 adcq %r8,%r8
861 adcq %r9,%r9
862 movq %r13,%rbp
863 sbbq %r11,%r11
864
865 subq $-1,%r12
866 movq %r8,%rcx
867 sbbq %r14,%r13
868 sbbq $0,%r8
869 movq %r9,%r10
870 sbbq %r15,%r9
871 testq %r11,%r11
872
873 cmovzq %rax,%r12
874 cmovzq %rbp,%r13
875 movq %r12,0(%rdi)
876 cmovzq %rcx,%r8
877 movq %r13,8(%rdi)
878 cmovzq %r10,%r9
879 movq %r8,16(%rdi)
880 movq %r9,24(%rdi)
881
882 .byte 0xf3,0xc3
883 .size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
884 .globl ecp_nistz256_point_double
885 .hidden ecp_nistz256_point_double
886 .type ecp_nistz256_point_double,@function
887 .align 32
888 ecp_nistz256_point_double:
889 pushq %rbp
890 pushq %rbx
891 pushq %r12
892 pushq %r13
893 pushq %r14
894 pushq %r15
895 subq $160+8,%rsp
896
897 movdqu 0(%rsi),%xmm0
898 movq %rsi,%rbx
899 movdqu 16(%rsi),%xmm1
900 movq 32+0(%rsi),%r12
901 movq 32+8(%rsi),%r13
902 movq 32+16(%rsi),%r8
903 movq 32+24(%rsi),%r9
904 movq .Lpoly+8(%rip),%r14
905 movq .Lpoly+24(%rip),%r15
906 movdqa %xmm0,96(%rsp)
907 movdqa %xmm1,96+16(%rsp)
908 leaq 32(%rdi),%r10
909 leaq 64(%rdi),%r11
910 .byte 102,72,15,110,199
911 .byte 102,73,15,110,202
912 .byte 102,73,15,110,211
913
914 leaq 0(%rsp),%rdi
915 call __ecp_nistz256_mul_by_2q
916
917 movq 64+0(%rsi),%rax
918 movq 64+8(%rsi),%r14
919 movq 64+16(%rsi),%r15
920 movq 64+24(%rsi),%r8
921 leaq 64-0(%rsi),%rsi
922 leaq 64(%rsp),%rdi
923 call __ecp_nistz256_sqr_montq
924
925 movq 0+0(%rsp),%rax
926 movq 8+0(%rsp),%r14
927 leaq 0+0(%rsp),%rsi
928 movq 16+0(%rsp),%r15
929 movq 24+0(%rsp),%r8
930 leaq 0(%rsp),%rdi
931 call __ecp_nistz256_sqr_montq
932
933 movq 32(%rbx),%rax
934 movq 64+0(%rbx),%r9
935 movq 64+8(%rbx),%r10
936 movq 64+16(%rbx),%r11
937 movq 64+24(%rbx),%r12
938 leaq 64-0(%rbx),%rsi
939 leaq 32(%rbx),%rbx
940 .byte 102,72,15,126,215
941 call __ecp_nistz256_mul_montq
942 call __ecp_nistz256_mul_by_2q
943
944 movq 96+0(%rsp),%r12
945 movq 96+8(%rsp),%r13
946 leaq 64(%rsp),%rbx
947 movq 96+16(%rsp),%r8
948 movq 96+24(%rsp),%r9
949 leaq 32(%rsp),%rdi
950 call __ecp_nistz256_add_toq
951
952 movq 96+0(%rsp),%r12
953 movq 96+8(%rsp),%r13
954 leaq 64(%rsp),%rbx
955 movq 96+16(%rsp),%r8
956 movq 96+24(%rsp),%r9
957 leaq 64(%rsp),%rdi
958 call __ecp_nistz256_sub_fromq
959
960 movq 0+0(%rsp),%rax
961 movq 8+0(%rsp),%r14
962 leaq 0+0(%rsp),%rsi
963 movq 16+0(%rsp),%r15
964 movq 24+0(%rsp),%r8
965 .byte 102,72,15,126,207
966 call __ecp_nistz256_sqr_montq
967 xorq %r9,%r9
968 movq %r12,%rax
969 addq $-1,%r12
970 movq %r13,%r10
971 adcq %rsi,%r13
972 movq %r14,%rcx
973 adcq $0,%r14
974 movq %r15,%r8
975 adcq %rbp,%r15
976 adcq $0,%r9
977 xorq %rsi,%rsi
978 testq $1,%rax
979
980 cmovzq %rax,%r12
981 cmovzq %r10,%r13
982 cmovzq %rcx,%r14
983 cmovzq %r8,%r15
984 cmovzq %rsi,%r9
985
986 movq %r13,%rax
987 shrq $1,%r12
988 shlq $63,%rax
989 movq %r14,%r10
990 shrq $1,%r13
991 orq %rax,%r12
992 shlq $63,%r10
993 movq %r15,%rcx
994 shrq $1,%r14
995 orq %r10,%r13
996 shlq $63,%rcx
997 movq %r12,0(%rdi)
998 shrq $1,%r15
999 movq %r13,8(%rdi)
1000 shlq $63,%r9
1001 orq %rcx,%r14
1002 orq %r9,%r15
1003 movq %r14,16(%rdi)
1004 movq %r15,24(%rdi)
1005 movq 64(%rsp),%rax
1006 leaq 64(%rsp),%rbx
1007 movq 0+32(%rsp),%r9
1008 movq 8+32(%rsp),%r10
1009 leaq 0+32(%rsp),%rsi
1010 movq 16+32(%rsp),%r11
1011 movq 24+32(%rsp),%r12
1012 leaq 32(%rsp),%rdi
1013 call __ecp_nistz256_mul_montq
1014
1015 leaq 128(%rsp),%rdi
1016 call __ecp_nistz256_mul_by_2q
1017
1018 leaq 32(%rsp),%rbx
1019 leaq 32(%rsp),%rdi
1020 call __ecp_nistz256_add_toq
1021
1022 movq 96(%rsp),%rax
1023 leaq 96(%rsp),%rbx
1024 movq 0+0(%rsp),%r9
1025 movq 8+0(%rsp),%r10
1026 leaq 0+0(%rsp),%rsi
1027 movq 16+0(%rsp),%r11
1028 movq 24+0(%rsp),%r12
1029 leaq 0(%rsp),%rdi
1030 call __ecp_nistz256_mul_montq
1031
1032 leaq 128(%rsp),%rdi
1033 call __ecp_nistz256_mul_by_2q
1034
1035 movq 0+32(%rsp),%rax
1036 movq 8+32(%rsp),%r14
1037 leaq 0+32(%rsp),%rsi
1038 movq 16+32(%rsp),%r15
1039 movq 24+32(%rsp),%r8
1040 .byte 102,72,15,126,199
1041 call __ecp_nistz256_sqr_montq
1042
1043 leaq 128(%rsp),%rbx
1044 movq %r14,%r8
1045 movq %r15,%r9
1046 movq %rsi,%r14
1047 movq %rbp,%r15
1048 call __ecp_nistz256_sub_fromq
1049
1050 movq 0+0(%rsp),%rax
1051 movq 0+8(%rsp),%rbp
1052 movq 0+16(%rsp),%rcx
1053 movq 0+24(%rsp),%r10
1054 leaq 0(%rsp),%rdi
1055 call __ecp_nistz256_subq
1056
1057 movq 32(%rsp),%rax
1058 leaq 32(%rsp),%rbx
1059 movq %r12,%r14
1060 xorl %ecx,%ecx
1061 movq %r12,0+0(%rsp)
1062 movq %r13,%r10
1063 movq %r13,0+8(%rsp)
1064 cmovzq %r8,%r11
1065 movq %r8,0+16(%rsp)
1066 leaq 0-0(%rsp),%rsi
1067 cmovzq %r9,%r12
1068 movq %r9,0+24(%rsp)
1069 movq %r14,%r9
1070 leaq 0(%rsp),%rdi
1071 call __ecp_nistz256_mul_montq
1072
1073 .byte 102,72,15,126,203
1074 .byte 102,72,15,126,207
1075 call __ecp_nistz256_sub_fromq
1076
1077 addq $160+8,%rsp
1078 popq %r15
1079 popq %r14
1080 popq %r13
1081 popq %r12
1082 popq %rbx
1083 popq %rbp
1084 .byte 0xf3,0xc3
1085 .size ecp_nistz256_point_double,.-ecp_nistz256_point_double
1086 .globl ecp_nistz256_point_add
1087 .hidden ecp_nistz256_point_add
1088 .type ecp_nistz256_point_add,@function
1089 .align 32
1090 ecp_nistz256_point_add:
1091 pushq %rbp
1092 pushq %rbx
1093 pushq %r12
1094 pushq %r13
1095 pushq %r14
1096 pushq %r15
1097 subq $576+8,%rsp
1098
1099 movdqu 0(%rsi),%xmm0
1100 movdqu 16(%rsi),%xmm1
1101 movdqu 32(%rsi),%xmm2
1102 movdqu 48(%rsi),%xmm3
1103 movdqu 64(%rsi),%xmm4
1104 movdqu 80(%rsi),%xmm5
1105 movq %rsi,%rbx
1106 movq %rdx,%rsi
1107 movdqa %xmm0,384(%rsp)
1108 movdqa %xmm1,384+16(%rsp)
1109 por %xmm0,%xmm1
1110 movdqa %xmm2,416(%rsp)
1111 movdqa %xmm3,416+16(%rsp)
1112 por %xmm2,%xmm3
1113 movdqa %xmm4,448(%rsp)
1114 movdqa %xmm5,448+16(%rsp)
1115 por %xmm1,%xmm3
1116
1117 movdqu 0(%rsi),%xmm0
1118 pshufd $177,%xmm3,%xmm5
1119 movdqu 16(%rsi),%xmm1
1120 movdqu 32(%rsi),%xmm2
1121 por %xmm3,%xmm5
1122 movdqu 48(%rsi),%xmm3
1123 movq 64+0(%rsi),%rax
1124 movq 64+8(%rsi),%r14
1125 movq 64+16(%rsi),%r15
1126 movq 64+24(%rsi),%r8
1127 movdqa %xmm0,480(%rsp)
1128 pshufd $30,%xmm5,%xmm4
1129 movdqa %xmm1,480+16(%rsp)
1130 por %xmm0,%xmm1
1131 .byte 102,72,15,110,199
1132 movdqa %xmm2,512(%rsp)
1133 movdqa %xmm3,512+16(%rsp)
1134 por %xmm2,%xmm3
1135 por %xmm4,%xmm5
1136 pxor %xmm4,%xmm4
1137 por %xmm1,%xmm3
1138
1139 leaq 64-0(%rsi),%rsi
1140 movq %rax,544+0(%rsp)
1141 movq %r14,544+8(%rsp)
1142 movq %r15,544+16(%rsp)
1143 movq %r8,544+24(%rsp)
1144 leaq 96(%rsp),%rdi
1145 call __ecp_nistz256_sqr_montq
1146
1147 pcmpeqd %xmm4,%xmm5
1148 pshufd $177,%xmm3,%xmm4
1149 por %xmm3,%xmm4
1150 pshufd $0,%xmm5,%xmm5
1151 pshufd $30,%xmm4,%xmm3
1152 por %xmm3,%xmm4
1153 pxor %xmm3,%xmm3
1154 pcmpeqd %xmm3,%xmm4
1155 pshufd $0,%xmm4,%xmm4
1156 movq 64+0(%rbx),%rax
1157 movq 64+8(%rbx),%r14
1158 movq 64+16(%rbx),%r15
1159 movq 64+24(%rbx),%r8
1160
1161 leaq 64-0(%rbx),%rsi
1162 leaq 32(%rsp),%rdi
1163 call __ecp_nistz256_sqr_montq
1164
1165 movq 544(%rsp),%rax
1166 leaq 544(%rsp),%rbx
1167 movq 0+96(%rsp),%r9
1168 movq 8+96(%rsp),%r10
1169 leaq 0+96(%rsp),%rsi
1170 movq 16+96(%rsp),%r11
1171 movq 24+96(%rsp),%r12
1172 leaq 224(%rsp),%rdi
1173 call __ecp_nistz256_mul_montq
1174
1175 movq 448(%rsp),%rax
1176 leaq 448(%rsp),%rbx
1177 movq 0+32(%rsp),%r9
1178 movq 8+32(%rsp),%r10
1179 leaq 0+32(%rsp),%rsi
1180 movq 16+32(%rsp),%r11
1181 movq 24+32(%rsp),%r12
1182 leaq 256(%rsp),%rdi
1183 call __ecp_nistz256_mul_montq
1184
1185 movq 416(%rsp),%rax
1186 leaq 416(%rsp),%rbx
1187 movq 0+224(%rsp),%r9
1188 movq 8+224(%rsp),%r10
1189 leaq 0+224(%rsp),%rsi
1190 movq 16+224(%rsp),%r11
1191 movq 24+224(%rsp),%r12
1192 leaq 224(%rsp),%rdi
1193 call __ecp_nistz256_mul_montq
1194
1195 movq 512(%rsp),%rax
1196 leaq 512(%rsp),%rbx
1197 movq 0+256(%rsp),%r9
1198 movq 8+256(%rsp),%r10
1199 leaq 0+256(%rsp),%rsi
1200 movq 16+256(%rsp),%r11
1201 movq 24+256(%rsp),%r12
1202 leaq 256(%rsp),%rdi
1203 call __ecp_nistz256_mul_montq
1204
1205 leaq 224(%rsp),%rbx
1206 leaq 64(%rsp),%rdi
1207 call __ecp_nistz256_sub_fromq
1208
1209 orq %r13,%r12
1210 movdqa %xmm4,%xmm2
1211 orq %r8,%r12
1212 orq %r9,%r12
1213 por %xmm5,%xmm2
1214 .byte 102,73,15,110,220
1215
1216 movq 384(%rsp),%rax
1217 leaq 384(%rsp),%rbx
1218 movq 0+96(%rsp),%r9
1219 movq 8+96(%rsp),%r10
1220 leaq 0+96(%rsp),%rsi
1221 movq 16+96(%rsp),%r11
1222 movq 24+96(%rsp),%r12
1223 leaq 160(%rsp),%rdi
1224 call __ecp_nistz256_mul_montq
1225
1226 movq 480(%rsp),%rax
1227 leaq 480(%rsp),%rbx
1228 movq 0+32(%rsp),%r9
1229 movq 8+32(%rsp),%r10
1230 leaq 0+32(%rsp),%rsi
1231 movq 16+32(%rsp),%r11
1232 movq 24+32(%rsp),%r12
1233 leaq 192(%rsp),%rdi
1234 call __ecp_nistz256_mul_montq
1235
1236 leaq 160(%rsp),%rbx
1237 leaq 0(%rsp),%rdi
1238 call __ecp_nistz256_sub_fromq
1239
1240 orq %r13,%r12
1241 orq %r8,%r12
1242 orq %r9,%r12
1243
1244 .byte 0x3e
1245 jnz .Ladd_proceedq
1246 .byte 102,73,15,126,208
1247 .byte 102,73,15,126,217
1248 testq %r8,%r8
1249 jnz .Ladd_proceedq
1250 testq %r9,%r9
1251 jz .Ladd_proceedq
1252
1253 .byte 102,72,15,126,199
1254 pxor %xmm0,%xmm0
1255 movdqu %xmm0,0(%rdi)
1256 movdqu %xmm0,16(%rdi)
1257 movdqu %xmm0,32(%rdi)
1258 movdqu %xmm0,48(%rdi)
1259 movdqu %xmm0,64(%rdi)
1260 movdqu %xmm0,80(%rdi)
1261 jmp .Ladd_doneq
1262
1263 .align 32
1264 .Ladd_proceedq:
1265 movq 0+64(%rsp),%rax
1266 movq 8+64(%rsp),%r14
1267 leaq 0+64(%rsp),%rsi
1268 movq 16+64(%rsp),%r15
1269 movq 24+64(%rsp),%r8
1270 leaq 96(%rsp),%rdi
1271 call __ecp_nistz256_sqr_montq
1272
1273 movq 448(%rsp),%rax
1274 leaq 448(%rsp),%rbx
1275 movq 0+0(%rsp),%r9
1276 movq 8+0(%rsp),%r10
1277 leaq 0+0(%rsp),%rsi
1278 movq 16+0(%rsp),%r11
1279 movq 24+0(%rsp),%r12
1280 leaq 352(%rsp),%rdi
1281 call __ecp_nistz256_mul_montq
1282
1283 movq 0+0(%rsp),%rax
1284 movq 8+0(%rsp),%r14
1285 leaq 0+0(%rsp),%rsi
1286 movq 16+0(%rsp),%r15
1287 movq 24+0(%rsp),%r8
1288 leaq 32(%rsp),%rdi
1289 call __ecp_nistz256_sqr_montq
1290
1291 movq 544(%rsp),%rax
1292 leaq 544(%rsp),%rbx
1293 movq 0+352(%rsp),%r9
1294 movq 8+352(%rsp),%r10
1295 leaq 0+352(%rsp),%rsi
1296 movq 16+352(%rsp),%r11
1297 movq 24+352(%rsp),%r12
1298 leaq 352(%rsp),%rdi
1299 call __ecp_nistz256_mul_montq
1300
1301 movq 0(%rsp),%rax
1302 leaq 0(%rsp),%rbx
1303 movq 0+32(%rsp),%r9
1304 movq 8+32(%rsp),%r10
1305 leaq 0+32(%rsp),%rsi
1306 movq 16+32(%rsp),%r11
1307 movq 24+32(%rsp),%r12
1308 leaq 128(%rsp),%rdi
1309 call __ecp_nistz256_mul_montq
1310
1311 movq 160(%rsp),%rax
1312 leaq 160(%rsp),%rbx
1313 movq 0+32(%rsp),%r9
1314 movq 8+32(%rsp),%r10
1315 leaq 0+32(%rsp),%rsi
1316 movq 16+32(%rsp),%r11
1317 movq 24+32(%rsp),%r12
1318 leaq 192(%rsp),%rdi
1319 call __ecp_nistz256_mul_montq
1320
1321
1322
1323
1324 addq %r12,%r12
1325 leaq 96(%rsp),%rsi
1326 adcq %r13,%r13
1327 movq %r12,%rax
1328 adcq %r8,%r8
1329 adcq %r9,%r9
1330 movq %r13,%rbp
1331 sbbq %r11,%r11
1332
1333 subq $-1,%r12
1334 movq %r8,%rcx
1335 sbbq %r14,%r13
1336 sbbq $0,%r8
1337 movq %r9,%r10
1338 sbbq %r15,%r9
1339 testq %r11,%r11
1340
1341 cmovzq %rax,%r12
1342 movq 0(%rsi),%rax
1343 cmovzq %rbp,%r13
1344 movq 8(%rsi),%rbp
1345 cmovzq %rcx,%r8
1346 movq 16(%rsi),%rcx
1347 cmovzq %r10,%r9
1348 movq 24(%rsi),%r10
1349
1350 call __ecp_nistz256_subq
1351
1352 leaq 128(%rsp),%rbx
1353 leaq 288(%rsp),%rdi
1354 call __ecp_nistz256_sub_fromq
1355
1356 movq 192+0(%rsp),%rax
1357 movq 192+8(%rsp),%rbp
1358 movq 192+16(%rsp),%rcx
1359 movq 192+24(%rsp),%r10
1360 leaq 320(%rsp),%rdi
1361
1362 call __ecp_nistz256_subq
1363
1364 movq %r12,0(%rdi)
1365 movq %r13,8(%rdi)
1366 movq %r8,16(%rdi)
1367 movq %r9,24(%rdi)
1368 movq 128(%rsp),%rax
1369 leaq 128(%rsp),%rbx
1370 movq 0+224(%rsp),%r9
1371 movq 8+224(%rsp),%r10
1372 leaq 0+224(%rsp),%rsi
1373 movq 16+224(%rsp),%r11
1374 movq 24+224(%rsp),%r12
1375 leaq 256(%rsp),%rdi
1376 call __ecp_nistz256_mul_montq
1377
1378 movq 320(%rsp),%rax
1379 leaq 320(%rsp),%rbx
1380 movq 0+64(%rsp),%r9
1381 movq 8+64(%rsp),%r10
1382 leaq 0+64(%rsp),%rsi
1383 movq 16+64(%rsp),%r11
1384 movq 24+64(%rsp),%r12
1385 leaq 320(%rsp),%rdi
1386 call __ecp_nistz256_mul_montq
1387
1388 leaq 256(%rsp),%rbx
1389 leaq 320(%rsp),%rdi
1390 call __ecp_nistz256_sub_fromq
1391
1392 .byte 102,72,15,126,199
1393
1394 movdqa %xmm5,%xmm0
1395 movdqa %xmm5,%xmm1
1396 pandn 352(%rsp),%xmm0
1397 movdqa %xmm5,%xmm2
1398 pandn 352+16(%rsp),%xmm1
1399 movdqa %xmm5,%xmm3
1400 pand 544(%rsp),%xmm2
1401 pand 544+16(%rsp),%xmm3
1402 por %xmm0,%xmm2
1403 por %xmm1,%xmm3
1404
1405 movdqa %xmm4,%xmm0
1406 movdqa %xmm4,%xmm1
1407 pandn %xmm2,%xmm0
1408 movdqa %xmm4,%xmm2
1409 pandn %xmm3,%xmm1
1410 movdqa %xmm4,%xmm3
1411 pand 448(%rsp),%xmm2
1412 pand 448+16(%rsp),%xmm3
1413 por %xmm0,%xmm2
1414 por %xmm1,%xmm3
1415 movdqu %xmm2,64(%rdi)
1416 movdqu %xmm3,80(%rdi)
1417
1418 movdqa %xmm5,%xmm0
1419 movdqa %xmm5,%xmm1
1420 pandn 288(%rsp),%xmm0
1421 movdqa %xmm5,%xmm2
1422 pandn 288+16(%rsp),%xmm1
1423 movdqa %xmm5,%xmm3
1424 pand 480(%rsp),%xmm2
1425 pand 480+16(%rsp),%xmm3
1426 por %xmm0,%xmm2
1427 por %xmm1,%xmm3
1428
1429 movdqa %xmm4,%xmm0
1430 movdqa %xmm4,%xmm1
1431 pandn %xmm2,%xmm0
1432 movdqa %xmm4,%xmm2
1433 pandn %xmm3,%xmm1
1434 movdqa %xmm4,%xmm3
1435 pand 384(%rsp),%xmm2
1436 pand 384+16(%rsp),%xmm3
1437 por %xmm0,%xmm2
1438 por %xmm1,%xmm3
1439 movdqu %xmm2,0(%rdi)
1440 movdqu %xmm3,16(%rdi)
1441
1442 movdqa %xmm5,%xmm0
1443 movdqa %xmm5,%xmm1
1444 pandn 320(%rsp),%xmm0
1445 movdqa %xmm5,%xmm2
1446 pandn 320+16(%rsp),%xmm1
1447 movdqa %xmm5,%xmm3
1448 pand 512(%rsp),%xmm2
1449 pand 512+16(%rsp),%xmm3
1450 por %xmm0,%xmm2
1451 por %xmm1,%xmm3
1452
1453 movdqa %xmm4,%xmm0
1454 movdqa %xmm4,%xmm1
1455 pandn %xmm2,%xmm0
1456 movdqa %xmm4,%xmm2
1457 pandn %xmm3,%xmm1
1458 movdqa %xmm4,%xmm3
1459 pand 416(%rsp),%xmm2
1460 pand 416+16(%rsp),%xmm3
1461 por %xmm0,%xmm2
1462 por %xmm1,%xmm3
1463 movdqu %xmm2,32(%rdi)
1464 movdqu %xmm3,48(%rdi)
1465
1466 .Ladd_doneq:
1467 addq $576+8,%rsp
1468 popq %r15
1469 popq %r14
1470 popq %r13
1471 popq %r12
1472 popq %rbx
1473 popq %rbp
1474 .byte 0xf3,0xc3
1475 .size ecp_nistz256_point_add,.-ecp_nistz256_point_add
1476 .globl ecp_nistz256_point_add_affine
1477 .hidden ecp_nistz256_point_add_affine
1478 .type ecp_nistz256_point_add_affine,@function
1479 .align 32
1480 ecp_nistz256_point_add_affine:
1481 pushq %rbp
1482 pushq %rbx
1483 pushq %r12
1484 pushq %r13
1485 pushq %r14
1486 pushq %r15
1487 subq $480+8,%rsp
1488
1489 movdqu 0(%rsi),%xmm0
1490 movq %rdx,%rbx
1491 movdqu 16(%rsi),%xmm1
1492 movdqu 32(%rsi),%xmm2
1493 movdqu 48(%rsi),%xmm3
1494 movdqu 64(%rsi),%xmm4
1495 movdqu 80(%rsi),%xmm5
1496 movq 64+0(%rsi),%rax
1497 movq 64+8(%rsi),%r14
1498 movq 64+16(%rsi),%r15
1499 movq 64+24(%rsi),%r8
1500 movdqa %xmm0,320(%rsp)
1501 movdqa %xmm1,320+16(%rsp)
1502 por %xmm0,%xmm1
1503 movdqa %xmm2,352(%rsp)
1504 movdqa %xmm3,352+16(%rsp)
1505 por %xmm2,%xmm3
1506 movdqa %xmm4,384(%rsp)
1507 movdqa %xmm5,384+16(%rsp)
1508 por %xmm1,%xmm3
1509
1510 movdqu 0(%rbx),%xmm0
1511 pshufd $177,%xmm3,%xmm5
1512 movdqu 16(%rbx),%xmm1
1513 movdqu 32(%rbx),%xmm2
1514 por %xmm3,%xmm5
1515 movdqu 48(%rbx),%xmm3
1516 movdqa %xmm0,416(%rsp)
1517 pshufd $30,%xmm5,%xmm4
1518 movdqa %xmm1,416+16(%rsp)
1519 por %xmm0,%xmm1
1520 .byte 102,72,15,110,199
1521 movdqa %xmm2,448(%rsp)
1522 movdqa %xmm3,448+16(%rsp)
1523 por %xmm2,%xmm3
1524 por %xmm4,%xmm5
1525 pxor %xmm4,%xmm4
1526 por %xmm1,%xmm3
1527
1528 leaq 64-0(%rsi),%rsi
1529 leaq 32(%rsp),%rdi
1530 call __ecp_nistz256_sqr_montq
1531
1532 pcmpeqd %xmm4,%xmm5
1533 pshufd $177,%xmm3,%xmm4
1534 movq 0(%rbx),%rax
1535
1536 movq %r12,%r9
1537 por %xmm3,%xmm4
1538 pshufd $0,%xmm5,%xmm5
1539 pshufd $30,%xmm4,%xmm3
1540 movq %r13,%r10
1541 por %xmm3,%xmm4
1542 pxor %xmm3,%xmm3
1543 movq %r14,%r11
1544 pcmpeqd %xmm3,%xmm4
1545 pshufd $0,%xmm4,%xmm4
1546
1547 leaq 32-0(%rsp),%rsi
1548 movq %r15,%r12
1549 leaq 0(%rsp),%rdi
1550 call __ecp_nistz256_mul_montq
1551
1552 leaq 320(%rsp),%rbx
1553 leaq 64(%rsp),%rdi
1554 call __ecp_nistz256_sub_fromq
1555
1556 movq 384(%rsp),%rax
1557 leaq 384(%rsp),%rbx
1558 movq 0+32(%rsp),%r9
1559 movq 8+32(%rsp),%r10
1560 leaq 0+32(%rsp),%rsi
1561 movq 16+32(%rsp),%r11
1562 movq 24+32(%rsp),%r12
1563 leaq 32(%rsp),%rdi
1564 call __ecp_nistz256_mul_montq
1565
1566 movq 384(%rsp),%rax
1567 leaq 384(%rsp),%rbx
1568 movq 0+64(%rsp),%r9
1569 movq 8+64(%rsp),%r10
1570 leaq 0+64(%rsp),%rsi
1571 movq 16+64(%rsp),%r11
1572 movq 24+64(%rsp),%r12
1573 leaq 288(%rsp),%rdi
1574 call __ecp_nistz256_mul_montq
1575
1576 movq 448(%rsp),%rax
1577 leaq 448(%rsp),%rbx
1578 movq 0+32(%rsp),%r9
1579 movq 8+32(%rsp),%r10
1580 leaq 0+32(%rsp),%rsi
1581 movq 16+32(%rsp),%r11
1582 movq 24+32(%rsp),%r12
1583 leaq 32(%rsp),%rdi
1584 call __ecp_nistz256_mul_montq
1585
1586 leaq 352(%rsp),%rbx
1587 leaq 96(%rsp),%rdi
1588 call __ecp_nistz256_sub_fromq
1589
1590 movq 0+64(%rsp),%rax
1591 movq 8+64(%rsp),%r14
1592 leaq 0+64(%rsp),%rsi
1593 movq 16+64(%rsp),%r15
1594 movq 24+64(%rsp),%r8
1595 leaq 128(%rsp),%rdi
1596 call __ecp_nistz256_sqr_montq
1597
1598 movq 0+96(%rsp),%rax
1599 movq 8+96(%rsp),%r14
1600 leaq 0+96(%rsp),%rsi
1601 movq 16+96(%rsp),%r15
1602 movq 24+96(%rsp),%r8
1603 leaq 192(%rsp),%rdi
1604 call __ecp_nistz256_sqr_montq
1605
1606 movq 128(%rsp),%rax
1607 leaq 128(%rsp),%rbx
1608 movq 0+64(%rsp),%r9
1609 movq 8+64(%rsp),%r10
1610 leaq 0+64(%rsp),%rsi
1611 movq 16+64(%rsp),%r11
1612 movq 24+64(%rsp),%r12
1613 leaq 160(%rsp),%rdi
1614 call __ecp_nistz256_mul_montq
1615
1616 movq 320(%rsp),%rax
1617 leaq 320(%rsp),%rbx
1618 movq 0+128(%rsp),%r9
1619 movq 8+128(%rsp),%r10
1620 leaq 0+128(%rsp),%rsi
1621 movq 16+128(%rsp),%r11
1622 movq 24+128(%rsp),%r12
1623 leaq 0(%rsp),%rdi
1624 call __ecp_nistz256_mul_montq
1625
1626
1627
1628
1629 addq %r12,%r12
1630 leaq 192(%rsp),%rsi
1631 adcq %r13,%r13
1632 movq %r12,%rax
1633 adcq %r8,%r8
1634 adcq %r9,%r9
1635 movq %r13,%rbp
1636 sbbq %r11,%r11
1637
1638 subq $-1,%r12
1639 movq %r8,%rcx
1640 sbbq %r14,%r13
1641 sbbq $0,%r8
1642 movq %r9,%r10
1643 sbbq %r15,%r9
1644 testq %r11,%r11
1645
1646 cmovzq %rax,%r12
1647 movq 0(%rsi),%rax
1648 cmovzq %rbp,%r13
1649 movq 8(%rsi),%rbp
1650 cmovzq %rcx,%r8
1651 movq 16(%rsi),%rcx
1652 cmovzq %r10,%r9
1653 movq 24(%rsi),%r10
1654
1655 call __ecp_nistz256_subq
1656
1657 leaq 160(%rsp),%rbx
1658 leaq 224(%rsp),%rdi
1659 call __ecp_nistz256_sub_fromq
1660
1661 movq 0+0(%rsp),%rax
1662 movq 0+8(%rsp),%rbp
1663 movq 0+16(%rsp),%rcx
1664 movq 0+24(%rsp),%r10
1665 leaq 64(%rsp),%rdi
1666
1667 call __ecp_nistz256_subq
1668
1669 movq %r12,0(%rdi)
1670 movq %r13,8(%rdi)
1671 movq %r8,16(%rdi)
1672 movq %r9,24(%rdi)
1673 movq 352(%rsp),%rax
1674 leaq 352(%rsp),%rbx
1675 movq 0+160(%rsp),%r9
1676 movq 8+160(%rsp),%r10
1677 leaq 0+160(%rsp),%rsi
1678 movq 16+160(%rsp),%r11
1679 movq 24+160(%rsp),%r12
1680 leaq 32(%rsp),%rdi
1681 call __ecp_nistz256_mul_montq
1682
1683 movq 96(%rsp),%rax
1684 leaq 96(%rsp),%rbx
1685 movq 0+64(%rsp),%r9
1686 movq 8+64(%rsp),%r10
1687 leaq 0+64(%rsp),%rsi
1688 movq 16+64(%rsp),%r11
1689 movq 24+64(%rsp),%r12
1690 leaq 64(%rsp),%rdi
1691 call __ecp_nistz256_mul_montq
1692
1693 leaq 32(%rsp),%rbx
1694 leaq 256(%rsp),%rdi
1695 call __ecp_nistz256_sub_fromq
1696
1697 .byte 102,72,15,126,199
1698
1699 movdqa %xmm5,%xmm0
1700 movdqa %xmm5,%xmm1
1701 pandn 288(%rsp),%xmm0
1702 movdqa %xmm5,%xmm2
1703 pandn 288+16(%rsp),%xmm1
1704 movdqa %xmm5,%xmm3
1705 pand .LONE_mont(%rip),%xmm2
1706 pand .LONE_mont+16(%rip),%xmm3
1707 por %xmm0,%xmm2
1708 por %xmm1,%xmm3
1709
1710 movdqa %xmm4,%xmm0
1711 movdqa %xmm4,%xmm1
1712 pandn %xmm2,%xmm0
1713 movdqa %xmm4,%xmm2
1714 pandn %xmm3,%xmm1
1715 movdqa %xmm4,%xmm3
1716 pand 384(%rsp),%xmm2
1717 pand 384+16(%rsp),%xmm3
1718 por %xmm0,%xmm2
1719 por %xmm1,%xmm3
1720 movdqu %xmm2,64(%rdi)
1721 movdqu %xmm3,80(%rdi)
1722
1723 movdqa %xmm5,%xmm0
1724 movdqa %xmm5,%xmm1
1725 pandn 224(%rsp),%xmm0
1726 movdqa %xmm5,%xmm2
1727 pandn 224+16(%rsp),%xmm1
1728 movdqa %xmm5,%xmm3
1729 pand 416(%rsp),%xmm2
1730 pand 416+16(%rsp),%xmm3
1731 por %xmm0,%xmm2
1732 por %xmm1,%xmm3
1733
1734 movdqa %xmm4,%xmm0
1735 movdqa %xmm4,%xmm1
1736 pandn %xmm2,%xmm0
1737 movdqa %xmm4,%xmm2
1738 pandn %xmm3,%xmm1
1739 movdqa %xmm4,%xmm3
1740 pand 320(%rsp),%xmm2
1741 pand 320+16(%rsp),%xmm3
1742 por %xmm0,%xmm2
1743 por %xmm1,%xmm3
1744 movdqu %xmm2,0(%rdi)
1745 movdqu %xmm3,16(%rdi)
1746
1747 movdqa %xmm5,%xmm0
1748 movdqa %xmm5,%xmm1
1749 pandn 256(%rsp),%xmm0
1750 movdqa %xmm5,%xmm2
1751 pandn 256+16(%rsp),%xmm1
1752 movdqa %xmm5,%xmm3
1753 pand 448(%rsp),%xmm2
1754 pand 448+16(%rsp),%xmm3
1755 por %xmm0,%xmm2
1756 por %xmm1,%xmm3
1757
1758 movdqa %xmm4,%xmm0
1759 movdqa %xmm4,%xmm1
1760 pandn %xmm2,%xmm0
1761 movdqa %xmm4,%xmm2
1762 pandn %xmm3,%xmm1
1763 movdqa %xmm4,%xmm3
1764 pand 352(%rsp),%xmm2
1765 pand 352+16(%rsp),%xmm3
1766 por %xmm0,%xmm2
1767 por %xmm1,%xmm3
1768 movdqu %xmm2,32(%rdi)
1769 movdqu %xmm3,48(%rdi)
1770
1771 addq $480+8,%rsp
1772 popq %r15
1773 popq %r14
1774 popq %r13
1775 popq %r12
1776 popq %rbx
1777 popq %rbp
1778 .byte 0xf3,0xc3
1779 .size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
1780 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698