Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(160)

Side by Side Diff: third_party/boringssl/linux-x86_64/crypto/ec/p256-x86_64-asm.S

Issue 2354623003: Pull boringssl generated source from boringssl_gen (Closed)
Patch Set: . Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #if defined(__x86_64__)
2 .text
3 .extern OPENSSL_ia32cap_P
4 .hidden OPENSSL_ia32cap_P
5
6
7 .align 64
8 .Lpoly:
9 .quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00 000001
10
11 .LOne:
12 .long 1,1,1,1,1,1,1,1
13 .LTwo:
14 .long 2,2,2,2,2,2,2,2
15 .LThree:
16 .long 3,3,3,3,3,3,3,3
17 .LONE_mont:
18 .quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000ff fffffe
19
20 .type ecp_nistz256_mul_by_2,@function
21 .align 64
22 ecp_nistz256_mul_by_2:
23 pushq %r12
24 pushq %r13
25
26 movq 0(%rsi),%r8
27 movq 8(%rsi),%r9
28 addq %r8,%r8
29 movq 16(%rsi),%r10
30 adcq %r9,%r9
31 movq 24(%rsi),%r11
32 leaq .Lpoly(%rip),%rsi
33 movq %r8,%rax
34 adcq %r10,%r10
35 adcq %r11,%r11
36 movq %r9,%rdx
37 sbbq %r13,%r13
38
39 subq 0(%rsi),%r8
40 movq %r10,%rcx
41 sbbq 8(%rsi),%r9
42 sbbq 16(%rsi),%r10
43 movq %r11,%r12
44 sbbq 24(%rsi),%r11
45 testq %r13,%r13
46
47 cmovzq %rax,%r8
48 cmovzq %rdx,%r9
49 movq %r8,0(%rdi)
50 cmovzq %rcx,%r10
51 movq %r9,8(%rdi)
52 cmovzq %r12,%r11
53 movq %r10,16(%rdi)
54 movq %r11,24(%rdi)
55
56 popq %r13
57 popq %r12
58 .byte 0xf3,0xc3
59 .size ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2
60
61
62
63 .globl ecp_nistz256_neg
64 .hidden ecp_nistz256_neg
65 .type ecp_nistz256_neg,@function
66 .align 32
67 ecp_nistz256_neg:
68 pushq %r12
69 pushq %r13
70
71 xorq %r8,%r8
72 xorq %r9,%r9
73 xorq %r10,%r10
74 xorq %r11,%r11
75 xorq %r13,%r13
76
77 subq 0(%rsi),%r8
78 sbbq 8(%rsi),%r9
79 sbbq 16(%rsi),%r10
80 movq %r8,%rax
81 sbbq 24(%rsi),%r11
82 leaq .Lpoly(%rip),%rsi
83 movq %r9,%rdx
84 sbbq $0,%r13
85
86 addq 0(%rsi),%r8
87 movq %r10,%rcx
88 adcq 8(%rsi),%r9
89 adcq 16(%rsi),%r10
90 movq %r11,%r12
91 adcq 24(%rsi),%r11
92 testq %r13,%r13
93
94 cmovzq %rax,%r8
95 cmovzq %rdx,%r9
96 movq %r8,0(%rdi)
97 cmovzq %rcx,%r10
98 movq %r9,8(%rdi)
99 cmovzq %r12,%r11
100 movq %r10,16(%rdi)
101 movq %r11,24(%rdi)
102
103 popq %r13
104 popq %r12
105 .byte 0xf3,0xc3
106 .size ecp_nistz256_neg,.-ecp_nistz256_neg
107
108
109
110
111
112
113 .globl ecp_nistz256_mul_mont
114 .hidden ecp_nistz256_mul_mont
115 .type ecp_nistz256_mul_mont,@function
116 .align 32
117 ecp_nistz256_mul_mont:
118 .Lmul_mont:
119 pushq %rbp
120 pushq %rbx
121 pushq %r12
122 pushq %r13
123 pushq %r14
124 pushq %r15
125 movq %rdx,%rbx
126 movq 0(%rdx),%rax
127 movq 0(%rsi),%r9
128 movq 8(%rsi),%r10
129 movq 16(%rsi),%r11
130 movq 24(%rsi),%r12
131
132 call __ecp_nistz256_mul_montq
133 .Lmul_mont_done:
134 popq %r15
135 popq %r14
136 popq %r13
137 popq %r12
138 popq %rbx
139 popq %rbp
140 .byte 0xf3,0xc3
141 .size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
142
143 .type __ecp_nistz256_mul_montq,@function
144 .align 32
145 __ecp_nistz256_mul_montq:
146
147
148 movq %rax,%rbp
149 mulq %r9
150 movq .Lpoly+8(%rip),%r14
151 movq %rax,%r8
152 movq %rbp,%rax
153 movq %rdx,%r9
154
155 mulq %r10
156 movq .Lpoly+24(%rip),%r15
157 addq %rax,%r9
158 movq %rbp,%rax
159 adcq $0,%rdx
160 movq %rdx,%r10
161
162 mulq %r11
163 addq %rax,%r10
164 movq %rbp,%rax
165 adcq $0,%rdx
166 movq %rdx,%r11
167
168 mulq %r12
169 addq %rax,%r11
170 movq %r8,%rax
171 adcq $0,%rdx
172 xorq %r13,%r13
173 movq %rdx,%r12
174
175
176
177
178
179
180
181
182
183
184 movq %r8,%rbp
185 shlq $32,%r8
186 mulq %r15
187 shrq $32,%rbp
188 addq %r8,%r9
189 adcq %rbp,%r10
190 adcq %rax,%r11
191 movq 8(%rbx),%rax
192 adcq %rdx,%r12
193 adcq $0,%r13
194 xorq %r8,%r8
195
196
197
198 movq %rax,%rbp
199 mulq 0(%rsi)
200 addq %rax,%r9
201 movq %rbp,%rax
202 adcq $0,%rdx
203 movq %rdx,%rcx
204
205 mulq 8(%rsi)
206 addq %rcx,%r10
207 adcq $0,%rdx
208 addq %rax,%r10
209 movq %rbp,%rax
210 adcq $0,%rdx
211 movq %rdx,%rcx
212
213 mulq 16(%rsi)
214 addq %rcx,%r11
215 adcq $0,%rdx
216 addq %rax,%r11
217 movq %rbp,%rax
218 adcq $0,%rdx
219 movq %rdx,%rcx
220
221 mulq 24(%rsi)
222 addq %rcx,%r12
223 adcq $0,%rdx
224 addq %rax,%r12
225 movq %r9,%rax
226 adcq %rdx,%r13
227 adcq $0,%r8
228
229
230
231 movq %r9,%rbp
232 shlq $32,%r9
233 mulq %r15
234 shrq $32,%rbp
235 addq %r9,%r10
236 adcq %rbp,%r11
237 adcq %rax,%r12
238 movq 16(%rbx),%rax
239 adcq %rdx,%r13
240 adcq $0,%r8
241 xorq %r9,%r9
242
243
244
245 movq %rax,%rbp
246 mulq 0(%rsi)
247 addq %rax,%r10
248 movq %rbp,%rax
249 adcq $0,%rdx
250 movq %rdx,%rcx
251
252 mulq 8(%rsi)
253 addq %rcx,%r11
254 adcq $0,%rdx
255 addq %rax,%r11
256 movq %rbp,%rax
257 adcq $0,%rdx
258 movq %rdx,%rcx
259
260 mulq 16(%rsi)
261 addq %rcx,%r12
262 adcq $0,%rdx
263 addq %rax,%r12
264 movq %rbp,%rax
265 adcq $0,%rdx
266 movq %rdx,%rcx
267
268 mulq 24(%rsi)
269 addq %rcx,%r13
270 adcq $0,%rdx
271 addq %rax,%r13
272 movq %r10,%rax
273 adcq %rdx,%r8
274 adcq $0,%r9
275
276
277
278 movq %r10,%rbp
279 shlq $32,%r10
280 mulq %r15
281 shrq $32,%rbp
282 addq %r10,%r11
283 adcq %rbp,%r12
284 adcq %rax,%r13
285 movq 24(%rbx),%rax
286 adcq %rdx,%r8
287 adcq $0,%r9
288 xorq %r10,%r10
289
290
291
292 movq %rax,%rbp
293 mulq 0(%rsi)
294 addq %rax,%r11
295 movq %rbp,%rax
296 adcq $0,%rdx
297 movq %rdx,%rcx
298
299 mulq 8(%rsi)
300 addq %rcx,%r12
301 adcq $0,%rdx
302 addq %rax,%r12
303 movq %rbp,%rax
304 adcq $0,%rdx
305 movq %rdx,%rcx
306
307 mulq 16(%rsi)
308 addq %rcx,%r13
309 adcq $0,%rdx
310 addq %rax,%r13
311 movq %rbp,%rax
312 adcq $0,%rdx
313 movq %rdx,%rcx
314
315 mulq 24(%rsi)
316 addq %rcx,%r8
317 adcq $0,%rdx
318 addq %rax,%r8
319 movq %r11,%rax
320 adcq %rdx,%r9
321 adcq $0,%r10
322
323
324
325 movq %r11,%rbp
326 shlq $32,%r11
327 mulq %r15
328 shrq $32,%rbp
329 addq %r11,%r12
330 adcq %rbp,%r13
331 movq %r12,%rcx
332 adcq %rax,%r8
333 adcq %rdx,%r9
334 movq %r13,%rbp
335 adcq $0,%r10
336
337
338
339 subq $-1,%r12
340 movq %r8,%rbx
341 sbbq %r14,%r13
342 sbbq $0,%r8
343 movq %r9,%rdx
344 sbbq %r15,%r9
345 sbbq $0,%r10
346
347 cmovcq %rcx,%r12
348 cmovcq %rbp,%r13
349 movq %r12,0(%rdi)
350 cmovcq %rbx,%r8
351 movq %r13,8(%rdi)
352 cmovcq %rdx,%r9
353 movq %r8,16(%rdi)
354 movq %r9,24(%rdi)
355
356 .byte 0xf3,0xc3
357 .size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
358
359
360
361
362
363
364
365
366 .globl ecp_nistz256_sqr_mont
367 .hidden ecp_nistz256_sqr_mont
368 .type ecp_nistz256_sqr_mont,@function
369 .align 32
370 ecp_nistz256_sqr_mont:
371 pushq %rbp
372 pushq %rbx
373 pushq %r12
374 pushq %r13
375 pushq %r14
376 pushq %r15
377 movq 0(%rsi),%rax
378 movq 8(%rsi),%r14
379 movq 16(%rsi),%r15
380 movq 24(%rsi),%r8
381
382 call __ecp_nistz256_sqr_montq
383 .Lsqr_mont_done:
384 popq %r15
385 popq %r14
386 popq %r13
387 popq %r12
388 popq %rbx
389 popq %rbp
390 .byte 0xf3,0xc3
391 .size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
392
393 .type __ecp_nistz256_sqr_montq,@function
394 .align 32
395 __ecp_nistz256_sqr_montq:
396 movq %rax,%r13
397 mulq %r14
398 movq %rax,%r9
399 movq %r15,%rax
400 movq %rdx,%r10
401
402 mulq %r13
403 addq %rax,%r10
404 movq %r8,%rax
405 adcq $0,%rdx
406 movq %rdx,%r11
407
408 mulq %r13
409 addq %rax,%r11
410 movq %r15,%rax
411 adcq $0,%rdx
412 movq %rdx,%r12
413
414
415 mulq %r14
416 addq %rax,%r11
417 movq %r8,%rax
418 adcq $0,%rdx
419 movq %rdx,%rbp
420
421 mulq %r14
422 addq %rax,%r12
423 movq %r8,%rax
424 adcq $0,%rdx
425 addq %rbp,%r12
426 movq %rdx,%r13
427 adcq $0,%r13
428
429
430 mulq %r15
431 xorq %r15,%r15
432 addq %rax,%r13
433 movq 0(%rsi),%rax
434 movq %rdx,%r14
435 adcq $0,%r14
436
437 addq %r9,%r9
438 adcq %r10,%r10
439 adcq %r11,%r11
440 adcq %r12,%r12
441 adcq %r13,%r13
442 adcq %r14,%r14
443 adcq $0,%r15
444
445 mulq %rax
446 movq %rax,%r8
447 movq 8(%rsi),%rax
448 movq %rdx,%rcx
449
450 mulq %rax
451 addq %rcx,%r9
452 adcq %rax,%r10
453 movq 16(%rsi),%rax
454 adcq $0,%rdx
455 movq %rdx,%rcx
456
457 mulq %rax
458 addq %rcx,%r11
459 adcq %rax,%r12
460 movq 24(%rsi),%rax
461 adcq $0,%rdx
462 movq %rdx,%rcx
463
464 mulq %rax
465 addq %rcx,%r13
466 adcq %rax,%r14
467 movq %r8,%rax
468 adcq %rdx,%r15
469
470 movq .Lpoly+8(%rip),%rsi
471 movq .Lpoly+24(%rip),%rbp
472
473
474
475
476 movq %r8,%rcx
477 shlq $32,%r8
478 mulq %rbp
479 shrq $32,%rcx
480 addq %r8,%r9
481 adcq %rcx,%r10
482 adcq %rax,%r11
483 movq %r9,%rax
484 adcq $0,%rdx
485
486
487
488 movq %r9,%rcx
489 shlq $32,%r9
490 movq %rdx,%r8
491 mulq %rbp
492 shrq $32,%rcx
493 addq %r9,%r10
494 adcq %rcx,%r11
495 adcq %rax,%r8
496 movq %r10,%rax
497 adcq $0,%rdx
498
499
500
501 movq %r10,%rcx
502 shlq $32,%r10
503 movq %rdx,%r9
504 mulq %rbp
505 shrq $32,%rcx
506 addq %r10,%r11
507 adcq %rcx,%r8
508 adcq %rax,%r9
509 movq %r11,%rax
510 adcq $0,%rdx
511
512
513
514 movq %r11,%rcx
515 shlq $32,%r11
516 movq %rdx,%r10
517 mulq %rbp
518 shrq $32,%rcx
519 addq %r11,%r8
520 adcq %rcx,%r9
521 adcq %rax,%r10
522 adcq $0,%rdx
523 xorq %r11,%r11
524
525
526
527 addq %r8,%r12
528 adcq %r9,%r13
529 movq %r12,%r8
530 adcq %r10,%r14
531 adcq %rdx,%r15
532 movq %r13,%r9
533 adcq $0,%r11
534
535 subq $-1,%r12
536 movq %r14,%r10
537 sbbq %rsi,%r13
538 sbbq $0,%r14
539 movq %r15,%rcx
540 sbbq %rbp,%r15
541 sbbq $0,%r11
542
543 cmovcq %r8,%r12
544 cmovcq %r9,%r13
545 movq %r12,0(%rdi)
546 cmovcq %r10,%r14
547 movq %r13,8(%rdi)
548 cmovcq %rcx,%r15
549 movq %r14,16(%rdi)
550 movq %r15,24(%rdi)
551
552 .byte 0xf3,0xc3
553 .size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
554
555
556
557
558
559
560 .globl ecp_nistz256_from_mont
561 .hidden ecp_nistz256_from_mont
562 .type ecp_nistz256_from_mont,@function
563 .align 32
564 ecp_nistz256_from_mont:
565 pushq %r12
566 pushq %r13
567
568 movq 0(%rsi),%rax
569 movq .Lpoly+24(%rip),%r13
570 movq 8(%rsi),%r9
571 movq 16(%rsi),%r10
572 movq 24(%rsi),%r11
573 movq %rax,%r8
574 movq .Lpoly+8(%rip),%r12
575
576
577
578 movq %rax,%rcx
579 shlq $32,%r8
580 mulq %r13
581 shrq $32,%rcx
582 addq %r8,%r9
583 adcq %rcx,%r10
584 adcq %rax,%r11
585 movq %r9,%rax
586 adcq $0,%rdx
587
588
589
590 movq %r9,%rcx
591 shlq $32,%r9
592 movq %rdx,%r8
593 mulq %r13
594 shrq $32,%rcx
595 addq %r9,%r10
596 adcq %rcx,%r11
597 adcq %rax,%r8
598 movq %r10,%rax
599 adcq $0,%rdx
600
601
602
603 movq %r10,%rcx
604 shlq $32,%r10
605 movq %rdx,%r9
606 mulq %r13
607 shrq $32,%rcx
608 addq %r10,%r11
609 adcq %rcx,%r8
610 adcq %rax,%r9
611 movq %r11,%rax
612 adcq $0,%rdx
613
614
615
616 movq %r11,%rcx
617 shlq $32,%r11
618 movq %rdx,%r10
619 mulq %r13
620 shrq $32,%rcx
621 addq %r11,%r8
622 adcq %rcx,%r9
623 movq %r8,%rcx
624 adcq %rax,%r10
625 movq %r9,%rsi
626 adcq $0,%rdx
627
628 subq $-1,%r8
629 movq %r10,%rax
630 sbbq %r12,%r9
631 sbbq $0,%r10
632 movq %rdx,%r11
633 sbbq %r13,%rdx
634 sbbq %r13,%r13
635
636 cmovnzq %rcx,%r8
637 cmovnzq %rsi,%r9
638 movq %r8,0(%rdi)
639 cmovnzq %rax,%r10
640 movq %r9,8(%rdi)
641 cmovzq %rdx,%r11
642 movq %r10,16(%rdi)
643 movq %r11,24(%rdi)
644
645 popq %r13
646 popq %r12
647 .byte 0xf3,0xc3
648 .size ecp_nistz256_from_mont,.-ecp_nistz256_from_mont
649
650
651 .globl ecp_nistz256_select_w5
652 .hidden ecp_nistz256_select_w5
653 .type ecp_nistz256_select_w5,@function
654 .align 32
655 ecp_nistz256_select_w5:
656 movdqa .LOne(%rip),%xmm0
657 movd %edx,%xmm1
658
659 pxor %xmm2,%xmm2
660 pxor %xmm3,%xmm3
661 pxor %xmm4,%xmm4
662 pxor %xmm5,%xmm5
663 pxor %xmm6,%xmm6
664 pxor %xmm7,%xmm7
665
666 movdqa %xmm0,%xmm8
667 pshufd $0,%xmm1,%xmm1
668
669 movq $16,%rax
670 .Lselect_loop_sse_w5:
671
672 movdqa %xmm8,%xmm15
673 paddd %xmm0,%xmm8
674 pcmpeqd %xmm1,%xmm15
675
676 movdqa 0(%rsi),%xmm9
677 movdqa 16(%rsi),%xmm10
678 movdqa 32(%rsi),%xmm11
679 movdqa 48(%rsi),%xmm12
680 movdqa 64(%rsi),%xmm13
681 movdqa 80(%rsi),%xmm14
682 leaq 96(%rsi),%rsi
683
684 pand %xmm15,%xmm9
685 pand %xmm15,%xmm10
686 por %xmm9,%xmm2
687 pand %xmm15,%xmm11
688 por %xmm10,%xmm3
689 pand %xmm15,%xmm12
690 por %xmm11,%xmm4
691 pand %xmm15,%xmm13
692 por %xmm12,%xmm5
693 pand %xmm15,%xmm14
694 por %xmm13,%xmm6
695 por %xmm14,%xmm7
696
697 decq %rax
698 jnz .Lselect_loop_sse_w5
699
700 movdqu %xmm2,0(%rdi)
701 movdqu %xmm3,16(%rdi)
702 movdqu %xmm4,32(%rdi)
703 movdqu %xmm5,48(%rdi)
704 movdqu %xmm6,64(%rdi)
705 movdqu %xmm7,80(%rdi)
706 .byte 0xf3,0xc3
707 .size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
708
709
710
711 .globl ecp_nistz256_select_w7
712 .hidden ecp_nistz256_select_w7
713 .type ecp_nistz256_select_w7,@function
714 .align 32
715 ecp_nistz256_select_w7:
716 movdqa .LOne(%rip),%xmm8
717 movd %edx,%xmm1
718
719 pxor %xmm2,%xmm2
720 pxor %xmm3,%xmm3
721 pxor %xmm4,%xmm4
722 pxor %xmm5,%xmm5
723
724 movdqa %xmm8,%xmm0
725 pshufd $0,%xmm1,%xmm1
726 movq $64,%rax
727
728 .Lselect_loop_sse_w7:
729 movdqa %xmm8,%xmm15
730 paddd %xmm0,%xmm8
731 movdqa 0(%rsi),%xmm9
732 movdqa 16(%rsi),%xmm10
733 pcmpeqd %xmm1,%xmm15
734 movdqa 32(%rsi),%xmm11
735 movdqa 48(%rsi),%xmm12
736 leaq 64(%rsi),%rsi
737
738 pand %xmm15,%xmm9
739 pand %xmm15,%xmm10
740 por %xmm9,%xmm2
741 pand %xmm15,%xmm11
742 por %xmm10,%xmm3
743 pand %xmm15,%xmm12
744 por %xmm11,%xmm4
745 prefetcht0 255(%rsi)
746 por %xmm12,%xmm5
747
748 decq %rax
749 jnz .Lselect_loop_sse_w7
750
751 movdqu %xmm2,0(%rdi)
752 movdqu %xmm3,16(%rdi)
753 movdqu %xmm4,32(%rdi)
754 movdqu %xmm5,48(%rdi)
755 .byte 0xf3,0xc3
756 .size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
757 .globl ecp_nistz256_avx2_select_w7
758 .hidden ecp_nistz256_avx2_select_w7
759 .type ecp_nistz256_avx2_select_w7,@function
760 .align 32
761 ecp_nistz256_avx2_select_w7:
762 .byte 0x0f,0x0b
763 .byte 0xf3,0xc3
764 .size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
765 .type __ecp_nistz256_add_toq,@function
766 .align 32
767 __ecp_nistz256_add_toq:
768 addq 0(%rbx),%r12
769 adcq 8(%rbx),%r13
770 movq %r12,%rax
771 adcq 16(%rbx),%r8
772 adcq 24(%rbx),%r9
773 movq %r13,%rbp
774 sbbq %r11,%r11
775
776 subq $-1,%r12
777 movq %r8,%rcx
778 sbbq %r14,%r13
779 sbbq $0,%r8
780 movq %r9,%r10
781 sbbq %r15,%r9
782 testq %r11,%r11
783
784 cmovzq %rax,%r12
785 cmovzq %rbp,%r13
786 movq %r12,0(%rdi)
787 cmovzq %rcx,%r8
788 movq %r13,8(%rdi)
789 cmovzq %r10,%r9
790 movq %r8,16(%rdi)
791 movq %r9,24(%rdi)
792
793 .byte 0xf3,0xc3
794 .size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
795
796 .type __ecp_nistz256_sub_fromq,@function
797 .align 32
798 __ecp_nistz256_sub_fromq:
799 subq 0(%rbx),%r12
800 sbbq 8(%rbx),%r13
801 movq %r12,%rax
802 sbbq 16(%rbx),%r8
803 sbbq 24(%rbx),%r9
804 movq %r13,%rbp
805 sbbq %r11,%r11
806
807 addq $-1,%r12
808 movq %r8,%rcx
809 adcq %r14,%r13
810 adcq $0,%r8
811 movq %r9,%r10
812 adcq %r15,%r9
813 testq %r11,%r11
814
815 cmovzq %rax,%r12
816 cmovzq %rbp,%r13
817 movq %r12,0(%rdi)
818 cmovzq %rcx,%r8
819 movq %r13,8(%rdi)
820 cmovzq %r10,%r9
821 movq %r8,16(%rdi)
822 movq %r9,24(%rdi)
823
824 .byte 0xf3,0xc3
825 .size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
826
827 .type __ecp_nistz256_subq,@function
828 .align 32
829 __ecp_nistz256_subq:
830 subq %r12,%rax
831 sbbq %r13,%rbp
832 movq %rax,%r12
833 sbbq %r8,%rcx
834 sbbq %r9,%r10
835 movq %rbp,%r13
836 sbbq %r11,%r11
837
838 addq $-1,%rax
839 movq %rcx,%r8
840 adcq %r14,%rbp
841 adcq $0,%rcx
842 movq %r10,%r9
843 adcq %r15,%r10
844 testq %r11,%r11
845
846 cmovnzq %rax,%r12
847 cmovnzq %rbp,%r13
848 cmovnzq %rcx,%r8
849 cmovnzq %r10,%r9
850
851 .byte 0xf3,0xc3
852 .size __ecp_nistz256_subq,.-__ecp_nistz256_subq
853
854 .type __ecp_nistz256_mul_by_2q,@function
855 .align 32
856 __ecp_nistz256_mul_by_2q:
857 addq %r12,%r12
858 adcq %r13,%r13
859 movq %r12,%rax
860 adcq %r8,%r8
861 adcq %r9,%r9
862 movq %r13,%rbp
863 sbbq %r11,%r11
864
865 subq $-1,%r12
866 movq %r8,%rcx
867 sbbq %r14,%r13
868 sbbq $0,%r8
869 movq %r9,%r10
870 sbbq %r15,%r9
871 testq %r11,%r11
872
873 cmovzq %rax,%r12
874 cmovzq %rbp,%r13
875 movq %r12,0(%rdi)
876 cmovzq %rcx,%r8
877 movq %r13,8(%rdi)
878 cmovzq %r10,%r9
879 movq %r8,16(%rdi)
880 movq %r9,24(%rdi)
881
882 .byte 0xf3,0xc3
883 .size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
884 .globl ecp_nistz256_point_double
885 .hidden ecp_nistz256_point_double
886 .type ecp_nistz256_point_double,@function
887 .align 32
888 ecp_nistz256_point_double:
889 pushq %rbp
890 pushq %rbx
891 pushq %r12
892 pushq %r13
893 pushq %r14
894 pushq %r15
895 subq $160+8,%rsp
896
897 .Lpoint_double_shortcutq:
898 movdqu 0(%rsi),%xmm0
899 movq %rsi,%rbx
900 movdqu 16(%rsi),%xmm1
901 movq 32+0(%rsi),%r12
902 movq 32+8(%rsi),%r13
903 movq 32+16(%rsi),%r8
904 movq 32+24(%rsi),%r9
905 movq .Lpoly+8(%rip),%r14
906 movq .Lpoly+24(%rip),%r15
907 movdqa %xmm0,96(%rsp)
908 movdqa %xmm1,96+16(%rsp)
909 leaq 32(%rdi),%r10
910 leaq 64(%rdi),%r11
911 .byte 102,72,15,110,199
912 .byte 102,73,15,110,202
913 .byte 102,73,15,110,211
914
915 leaq 0(%rsp),%rdi
916 call __ecp_nistz256_mul_by_2q
917
918 movq 64+0(%rsi),%rax
919 movq 64+8(%rsi),%r14
920 movq 64+16(%rsi),%r15
921 movq 64+24(%rsi),%r8
922 leaq 64-0(%rsi),%rsi
923 leaq 64(%rsp),%rdi
924 call __ecp_nistz256_sqr_montq
925
926 movq 0+0(%rsp),%rax
927 movq 8+0(%rsp),%r14
928 leaq 0+0(%rsp),%rsi
929 movq 16+0(%rsp),%r15
930 movq 24+0(%rsp),%r8
931 leaq 0(%rsp),%rdi
932 call __ecp_nistz256_sqr_montq
933
934 movq 32(%rbx),%rax
935 movq 64+0(%rbx),%r9
936 movq 64+8(%rbx),%r10
937 movq 64+16(%rbx),%r11
938 movq 64+24(%rbx),%r12
939 leaq 64-0(%rbx),%rsi
940 leaq 32(%rbx),%rbx
941 .byte 102,72,15,126,215
942 call __ecp_nistz256_mul_montq
943 call __ecp_nistz256_mul_by_2q
944
945 movq 96+0(%rsp),%r12
946 movq 96+8(%rsp),%r13
947 leaq 64(%rsp),%rbx
948 movq 96+16(%rsp),%r8
949 movq 96+24(%rsp),%r9
950 leaq 32(%rsp),%rdi
951 call __ecp_nistz256_add_toq
952
953 movq 96+0(%rsp),%r12
954 movq 96+8(%rsp),%r13
955 leaq 64(%rsp),%rbx
956 movq 96+16(%rsp),%r8
957 movq 96+24(%rsp),%r9
958 leaq 64(%rsp),%rdi
959 call __ecp_nistz256_sub_fromq
960
961 movq 0+0(%rsp),%rax
962 movq 8+0(%rsp),%r14
963 leaq 0+0(%rsp),%rsi
964 movq 16+0(%rsp),%r15
965 movq 24+0(%rsp),%r8
966 .byte 102,72,15,126,207
967 call __ecp_nistz256_sqr_montq
968 xorq %r9,%r9
969 movq %r12,%rax
970 addq $-1,%r12
971 movq %r13,%r10
972 adcq %rsi,%r13
973 movq %r14,%rcx
974 adcq $0,%r14
975 movq %r15,%r8
976 adcq %rbp,%r15
977 adcq $0,%r9
978 xorq %rsi,%rsi
979 testq $1,%rax
980
981 cmovzq %rax,%r12
982 cmovzq %r10,%r13
983 cmovzq %rcx,%r14
984 cmovzq %r8,%r15
985 cmovzq %rsi,%r9
986
987 movq %r13,%rax
988 shrq $1,%r12
989 shlq $63,%rax
990 movq %r14,%r10
991 shrq $1,%r13
992 orq %rax,%r12
993 shlq $63,%r10
994 movq %r15,%rcx
995 shrq $1,%r14
996 orq %r10,%r13
997 shlq $63,%rcx
998 movq %r12,0(%rdi)
999 shrq $1,%r15
1000 movq %r13,8(%rdi)
1001 shlq $63,%r9
1002 orq %rcx,%r14
1003 orq %r9,%r15
1004 movq %r14,16(%rdi)
1005 movq %r15,24(%rdi)
1006 movq 64(%rsp),%rax
1007 leaq 64(%rsp),%rbx
1008 movq 0+32(%rsp),%r9
1009 movq 8+32(%rsp),%r10
1010 leaq 0+32(%rsp),%rsi
1011 movq 16+32(%rsp),%r11
1012 movq 24+32(%rsp),%r12
1013 leaq 32(%rsp),%rdi
1014 call __ecp_nistz256_mul_montq
1015
1016 leaq 128(%rsp),%rdi
1017 call __ecp_nistz256_mul_by_2q
1018
1019 leaq 32(%rsp),%rbx
1020 leaq 32(%rsp),%rdi
1021 call __ecp_nistz256_add_toq
1022
1023 movq 96(%rsp),%rax
1024 leaq 96(%rsp),%rbx
1025 movq 0+0(%rsp),%r9
1026 movq 8+0(%rsp),%r10
1027 leaq 0+0(%rsp),%rsi
1028 movq 16+0(%rsp),%r11
1029 movq 24+0(%rsp),%r12
1030 leaq 0(%rsp),%rdi
1031 call __ecp_nistz256_mul_montq
1032
1033 leaq 128(%rsp),%rdi
1034 call __ecp_nistz256_mul_by_2q
1035
1036 movq 0+32(%rsp),%rax
1037 movq 8+32(%rsp),%r14
1038 leaq 0+32(%rsp),%rsi
1039 movq 16+32(%rsp),%r15
1040 movq 24+32(%rsp),%r8
1041 .byte 102,72,15,126,199
1042 call __ecp_nistz256_sqr_montq
1043
1044 leaq 128(%rsp),%rbx
1045 movq %r14,%r8
1046 movq %r15,%r9
1047 movq %rsi,%r14
1048 movq %rbp,%r15
1049 call __ecp_nistz256_sub_fromq
1050
1051 movq 0+0(%rsp),%rax
1052 movq 0+8(%rsp),%rbp
1053 movq 0+16(%rsp),%rcx
1054 movq 0+24(%rsp),%r10
1055 leaq 0(%rsp),%rdi
1056 call __ecp_nistz256_subq
1057
1058 movq 32(%rsp),%rax
1059 leaq 32(%rsp),%rbx
1060 movq %r12,%r14
1061 xorl %ecx,%ecx
1062 movq %r12,0+0(%rsp)
1063 movq %r13,%r10
1064 movq %r13,0+8(%rsp)
1065 cmovzq %r8,%r11
1066 movq %r8,0+16(%rsp)
1067 leaq 0-0(%rsp),%rsi
1068 cmovzq %r9,%r12
1069 movq %r9,0+24(%rsp)
1070 movq %r14,%r9
1071 leaq 0(%rsp),%rdi
1072 call __ecp_nistz256_mul_montq
1073
1074 .byte 102,72,15,126,203
1075 .byte 102,72,15,126,207
1076 call __ecp_nistz256_sub_fromq
1077
1078 addq $160+8,%rsp
1079 popq %r15
1080 popq %r14
1081 popq %r13
1082 popq %r12
1083 popq %rbx
1084 popq %rbp
1085 .byte 0xf3,0xc3
1086 .size ecp_nistz256_point_double,.-ecp_nistz256_point_double
1087 .globl ecp_nistz256_point_add
1088 .hidden ecp_nistz256_point_add
1089 .type ecp_nistz256_point_add,@function
1090 .align 32
1091 ecp_nistz256_point_add:
1092 pushq %rbp
1093 pushq %rbx
1094 pushq %r12
1095 pushq %r13
1096 pushq %r14
1097 pushq %r15
1098 subq $576+8,%rsp
1099
1100 movdqu 0(%rsi),%xmm0
1101 movdqu 16(%rsi),%xmm1
1102 movdqu 32(%rsi),%xmm2
1103 movdqu 48(%rsi),%xmm3
1104 movdqu 64(%rsi),%xmm4
1105 movdqu 80(%rsi),%xmm5
1106 movq %rsi,%rbx
1107 movq %rdx,%rsi
1108 movdqa %xmm0,384(%rsp)
1109 movdqa %xmm1,384+16(%rsp)
1110 por %xmm0,%xmm1
1111 movdqa %xmm2,416(%rsp)
1112 movdqa %xmm3,416+16(%rsp)
1113 por %xmm2,%xmm3
1114 movdqa %xmm4,448(%rsp)
1115 movdqa %xmm5,448+16(%rsp)
1116 por %xmm1,%xmm3
1117
1118 movdqu 0(%rsi),%xmm0
1119 pshufd $0xb1,%xmm3,%xmm5
1120 movdqu 16(%rsi),%xmm1
1121 movdqu 32(%rsi),%xmm2
1122 por %xmm3,%xmm5
1123 movdqu 48(%rsi),%xmm3
1124 movq 64+0(%rsi),%rax
1125 movq 64+8(%rsi),%r14
1126 movq 64+16(%rsi),%r15
1127 movq 64+24(%rsi),%r8
1128 movdqa %xmm0,480(%rsp)
1129 pshufd $0x1e,%xmm5,%xmm4
1130 movdqa %xmm1,480+16(%rsp)
1131 por %xmm0,%xmm1
1132 .byte 102,72,15,110,199
1133 movdqa %xmm2,512(%rsp)
1134 movdqa %xmm3,512+16(%rsp)
1135 por %xmm2,%xmm3
1136 por %xmm4,%xmm5
1137 pxor %xmm4,%xmm4
1138 por %xmm1,%xmm3
1139
1140 leaq 64-0(%rsi),%rsi
1141 movq %rax,544+0(%rsp)
1142 movq %r14,544+8(%rsp)
1143 movq %r15,544+16(%rsp)
1144 movq %r8,544+24(%rsp)
1145 leaq 96(%rsp),%rdi
1146 call __ecp_nistz256_sqr_montq
1147
1148 pcmpeqd %xmm4,%xmm5
1149 pshufd $0xb1,%xmm3,%xmm4
1150 por %xmm3,%xmm4
1151 pshufd $0,%xmm5,%xmm5
1152 pshufd $0x1e,%xmm4,%xmm3
1153 por %xmm3,%xmm4
1154 pxor %xmm3,%xmm3
1155 pcmpeqd %xmm3,%xmm4
1156 pshufd $0,%xmm4,%xmm4
1157 movq 64+0(%rbx),%rax
1158 movq 64+8(%rbx),%r14
1159 movq 64+16(%rbx),%r15
1160 movq 64+24(%rbx),%r8
1161 .byte 102,72,15,110,203
1162
1163 leaq 64-0(%rbx),%rsi
1164 leaq 32(%rsp),%rdi
1165 call __ecp_nistz256_sqr_montq
1166
1167 movq 544(%rsp),%rax
1168 leaq 544(%rsp),%rbx
1169 movq 0+96(%rsp),%r9
1170 movq 8+96(%rsp),%r10
1171 leaq 0+96(%rsp),%rsi
1172 movq 16+96(%rsp),%r11
1173 movq 24+96(%rsp),%r12
1174 leaq 224(%rsp),%rdi
1175 call __ecp_nistz256_mul_montq
1176
1177 movq 448(%rsp),%rax
1178 leaq 448(%rsp),%rbx
1179 movq 0+32(%rsp),%r9
1180 movq 8+32(%rsp),%r10
1181 leaq 0+32(%rsp),%rsi
1182 movq 16+32(%rsp),%r11
1183 movq 24+32(%rsp),%r12
1184 leaq 256(%rsp),%rdi
1185 call __ecp_nistz256_mul_montq
1186
1187 movq 416(%rsp),%rax
1188 leaq 416(%rsp),%rbx
1189 movq 0+224(%rsp),%r9
1190 movq 8+224(%rsp),%r10
1191 leaq 0+224(%rsp),%rsi
1192 movq 16+224(%rsp),%r11
1193 movq 24+224(%rsp),%r12
1194 leaq 224(%rsp),%rdi
1195 call __ecp_nistz256_mul_montq
1196
1197 movq 512(%rsp),%rax
1198 leaq 512(%rsp),%rbx
1199 movq 0+256(%rsp),%r9
1200 movq 8+256(%rsp),%r10
1201 leaq 0+256(%rsp),%rsi
1202 movq 16+256(%rsp),%r11
1203 movq 24+256(%rsp),%r12
1204 leaq 256(%rsp),%rdi
1205 call __ecp_nistz256_mul_montq
1206
1207 leaq 224(%rsp),%rbx
1208 leaq 64(%rsp),%rdi
1209 call __ecp_nistz256_sub_fromq
1210
1211 orq %r13,%r12
1212 movdqa %xmm4,%xmm2
1213 orq %r8,%r12
1214 orq %r9,%r12
1215 por %xmm5,%xmm2
1216 .byte 102,73,15,110,220
1217
1218 movq 384(%rsp),%rax
1219 leaq 384(%rsp),%rbx
1220 movq 0+96(%rsp),%r9
1221 movq 8+96(%rsp),%r10
1222 leaq 0+96(%rsp),%rsi
1223 movq 16+96(%rsp),%r11
1224 movq 24+96(%rsp),%r12
1225 leaq 160(%rsp),%rdi
1226 call __ecp_nistz256_mul_montq
1227
1228 movq 480(%rsp),%rax
1229 leaq 480(%rsp),%rbx
1230 movq 0+32(%rsp),%r9
1231 movq 8+32(%rsp),%r10
1232 leaq 0+32(%rsp),%rsi
1233 movq 16+32(%rsp),%r11
1234 movq 24+32(%rsp),%r12
1235 leaq 192(%rsp),%rdi
1236 call __ecp_nistz256_mul_montq
1237
1238 leaq 160(%rsp),%rbx
1239 leaq 0(%rsp),%rdi
1240 call __ecp_nistz256_sub_fromq
1241
1242 orq %r13,%r12
1243 orq %r8,%r12
1244 orq %r9,%r12
1245
1246 .byte 0x3e
1247 jnz .Ladd_proceedq
1248 .byte 102,73,15,126,208
1249 .byte 102,73,15,126,217
1250 testq %r8,%r8
1251 jnz .Ladd_proceedq
1252 testq %r9,%r9
1253 jz .Ladd_doubleq
1254
1255 .byte 102,72,15,126,199
1256 pxor %xmm0,%xmm0
1257 movdqu %xmm0,0(%rdi)
1258 movdqu %xmm0,16(%rdi)
1259 movdqu %xmm0,32(%rdi)
1260 movdqu %xmm0,48(%rdi)
1261 movdqu %xmm0,64(%rdi)
1262 movdqu %xmm0,80(%rdi)
1263 jmp .Ladd_doneq
1264
1265 .align 32
1266 .Ladd_doubleq:
1267 .byte 102,72,15,126,206
1268 .byte 102,72,15,126,199
1269 addq $416,%rsp
1270 jmp .Lpoint_double_shortcutq
1271
1272 .align 32
1273 .Ladd_proceedq:
1274 movq 0+64(%rsp),%rax
1275 movq 8+64(%rsp),%r14
1276 leaq 0+64(%rsp),%rsi
1277 movq 16+64(%rsp),%r15
1278 movq 24+64(%rsp),%r8
1279 leaq 96(%rsp),%rdi
1280 call __ecp_nistz256_sqr_montq
1281
1282 movq 448(%rsp),%rax
1283 leaq 448(%rsp),%rbx
1284 movq 0+0(%rsp),%r9
1285 movq 8+0(%rsp),%r10
1286 leaq 0+0(%rsp),%rsi
1287 movq 16+0(%rsp),%r11
1288 movq 24+0(%rsp),%r12
1289 leaq 352(%rsp),%rdi
1290 call __ecp_nistz256_mul_montq
1291
1292 movq 0+0(%rsp),%rax
1293 movq 8+0(%rsp),%r14
1294 leaq 0+0(%rsp),%rsi
1295 movq 16+0(%rsp),%r15
1296 movq 24+0(%rsp),%r8
1297 leaq 32(%rsp),%rdi
1298 call __ecp_nistz256_sqr_montq
1299
1300 movq 544(%rsp),%rax
1301 leaq 544(%rsp),%rbx
1302 movq 0+352(%rsp),%r9
1303 movq 8+352(%rsp),%r10
1304 leaq 0+352(%rsp),%rsi
1305 movq 16+352(%rsp),%r11
1306 movq 24+352(%rsp),%r12
1307 leaq 352(%rsp),%rdi
1308 call __ecp_nistz256_mul_montq
1309
1310 movq 0(%rsp),%rax
1311 leaq 0(%rsp),%rbx
1312 movq 0+32(%rsp),%r9
1313 movq 8+32(%rsp),%r10
1314 leaq 0+32(%rsp),%rsi
1315 movq 16+32(%rsp),%r11
1316 movq 24+32(%rsp),%r12
1317 leaq 128(%rsp),%rdi
1318 call __ecp_nistz256_mul_montq
1319
1320 movq 160(%rsp),%rax
1321 leaq 160(%rsp),%rbx
1322 movq 0+32(%rsp),%r9
1323 movq 8+32(%rsp),%r10
1324 leaq 0+32(%rsp),%rsi
1325 movq 16+32(%rsp),%r11
1326 movq 24+32(%rsp),%r12
1327 leaq 192(%rsp),%rdi
1328 call __ecp_nistz256_mul_montq
1329
1330
1331
1332
1333 addq %r12,%r12
1334 leaq 96(%rsp),%rsi
1335 adcq %r13,%r13
1336 movq %r12,%rax
1337 adcq %r8,%r8
1338 adcq %r9,%r9
1339 movq %r13,%rbp
1340 sbbq %r11,%r11
1341
1342 subq $-1,%r12
1343 movq %r8,%rcx
1344 sbbq %r14,%r13
1345 sbbq $0,%r8
1346 movq %r9,%r10
1347 sbbq %r15,%r9
1348 testq %r11,%r11
1349
1350 cmovzq %rax,%r12
1351 movq 0(%rsi),%rax
1352 cmovzq %rbp,%r13
1353 movq 8(%rsi),%rbp
1354 cmovzq %rcx,%r8
1355 movq 16(%rsi),%rcx
1356 cmovzq %r10,%r9
1357 movq 24(%rsi),%r10
1358
1359 call __ecp_nistz256_subq
1360
1361 leaq 128(%rsp),%rbx
1362 leaq 288(%rsp),%rdi
1363 call __ecp_nistz256_sub_fromq
1364
1365 movq 192+0(%rsp),%rax
1366 movq 192+8(%rsp),%rbp
1367 movq 192+16(%rsp),%rcx
1368 movq 192+24(%rsp),%r10
1369 leaq 320(%rsp),%rdi
1370
1371 call __ecp_nistz256_subq
1372
1373 movq %r12,0(%rdi)
1374 movq %r13,8(%rdi)
1375 movq %r8,16(%rdi)
1376 movq %r9,24(%rdi)
1377 movq 128(%rsp),%rax
1378 leaq 128(%rsp),%rbx
1379 movq 0+224(%rsp),%r9
1380 movq 8+224(%rsp),%r10
1381 leaq 0+224(%rsp),%rsi
1382 movq 16+224(%rsp),%r11
1383 movq 24+224(%rsp),%r12
1384 leaq 256(%rsp),%rdi
1385 call __ecp_nistz256_mul_montq
1386
1387 movq 320(%rsp),%rax
1388 leaq 320(%rsp),%rbx
1389 movq 0+64(%rsp),%r9
1390 movq 8+64(%rsp),%r10
1391 leaq 0+64(%rsp),%rsi
1392 movq 16+64(%rsp),%r11
1393 movq 24+64(%rsp),%r12
1394 leaq 320(%rsp),%rdi
1395 call __ecp_nistz256_mul_montq
1396
1397 leaq 256(%rsp),%rbx
1398 leaq 320(%rsp),%rdi
1399 call __ecp_nistz256_sub_fromq
1400
1401 .byte 102,72,15,126,199
1402
1403 movdqa %xmm5,%xmm0
1404 movdqa %xmm5,%xmm1
1405 pandn 352(%rsp),%xmm0
1406 movdqa %xmm5,%xmm2
1407 pandn 352+16(%rsp),%xmm1
1408 movdqa %xmm5,%xmm3
1409 pand 544(%rsp),%xmm2
1410 pand 544+16(%rsp),%xmm3
1411 por %xmm0,%xmm2
1412 por %xmm1,%xmm3
1413
1414 movdqa %xmm4,%xmm0
1415 movdqa %xmm4,%xmm1
1416 pandn %xmm2,%xmm0
1417 movdqa %xmm4,%xmm2
1418 pandn %xmm3,%xmm1
1419 movdqa %xmm4,%xmm3
1420 pand 448(%rsp),%xmm2
1421 pand 448+16(%rsp),%xmm3
1422 por %xmm0,%xmm2
1423 por %xmm1,%xmm3
1424 movdqu %xmm2,64(%rdi)
1425 movdqu %xmm3,80(%rdi)
1426
1427 movdqa %xmm5,%xmm0
1428 movdqa %xmm5,%xmm1
1429 pandn 288(%rsp),%xmm0
1430 movdqa %xmm5,%xmm2
1431 pandn 288+16(%rsp),%xmm1
1432 movdqa %xmm5,%xmm3
1433 pand 480(%rsp),%xmm2
1434 pand 480+16(%rsp),%xmm3
1435 por %xmm0,%xmm2
1436 por %xmm1,%xmm3
1437
1438 movdqa %xmm4,%xmm0
1439 movdqa %xmm4,%xmm1
1440 pandn %xmm2,%xmm0
1441 movdqa %xmm4,%xmm2
1442 pandn %xmm3,%xmm1
1443 movdqa %xmm4,%xmm3
1444 pand 384(%rsp),%xmm2
1445 pand 384+16(%rsp),%xmm3
1446 por %xmm0,%xmm2
1447 por %xmm1,%xmm3
1448 movdqu %xmm2,0(%rdi)
1449 movdqu %xmm3,16(%rdi)
1450
1451 movdqa %xmm5,%xmm0
1452 movdqa %xmm5,%xmm1
1453 pandn 320(%rsp),%xmm0
1454 movdqa %xmm5,%xmm2
1455 pandn 320+16(%rsp),%xmm1
1456 movdqa %xmm5,%xmm3
1457 pand 512(%rsp),%xmm2
1458 pand 512+16(%rsp),%xmm3
1459 por %xmm0,%xmm2
1460 por %xmm1,%xmm3
1461
1462 movdqa %xmm4,%xmm0
1463 movdqa %xmm4,%xmm1
1464 pandn %xmm2,%xmm0
1465 movdqa %xmm4,%xmm2
1466 pandn %xmm3,%xmm1
1467 movdqa %xmm4,%xmm3
1468 pand 416(%rsp),%xmm2
1469 pand 416+16(%rsp),%xmm3
1470 por %xmm0,%xmm2
1471 por %xmm1,%xmm3
1472 movdqu %xmm2,32(%rdi)
1473 movdqu %xmm3,48(%rdi)
1474
1475 .Ladd_doneq:
1476 addq $576+8,%rsp
1477 popq %r15
1478 popq %r14
1479 popq %r13
1480 popq %r12
1481 popq %rbx
1482 popq %rbp
1483 .byte 0xf3,0xc3
1484 .size ecp_nistz256_point_add,.-ecp_nistz256_point_add
1485 .globl ecp_nistz256_point_add_affine
1486 .hidden ecp_nistz256_point_add_affine
1487 .type ecp_nistz256_point_add_affine,@function
1488 .align 32
1489 ecp_nistz256_point_add_affine:
1490 pushq %rbp
1491 pushq %rbx
1492 pushq %r12
1493 pushq %r13
1494 pushq %r14
1495 pushq %r15
1496 subq $480+8,%rsp
1497
1498 movdqu 0(%rsi),%xmm0
1499 movq %rdx,%rbx
1500 movdqu 16(%rsi),%xmm1
1501 movdqu 32(%rsi),%xmm2
1502 movdqu 48(%rsi),%xmm3
1503 movdqu 64(%rsi),%xmm4
1504 movdqu 80(%rsi),%xmm5
1505 movq 64+0(%rsi),%rax
1506 movq 64+8(%rsi),%r14
1507 movq 64+16(%rsi),%r15
1508 movq 64+24(%rsi),%r8
1509 movdqa %xmm0,320(%rsp)
1510 movdqa %xmm1,320+16(%rsp)
1511 por %xmm0,%xmm1
1512 movdqa %xmm2,352(%rsp)
1513 movdqa %xmm3,352+16(%rsp)
1514 por %xmm2,%xmm3
1515 movdqa %xmm4,384(%rsp)
1516 movdqa %xmm5,384+16(%rsp)
1517 por %xmm1,%xmm3
1518
1519 movdqu 0(%rbx),%xmm0
1520 pshufd $0xb1,%xmm3,%xmm5
1521 movdqu 16(%rbx),%xmm1
1522 movdqu 32(%rbx),%xmm2
1523 por %xmm3,%xmm5
1524 movdqu 48(%rbx),%xmm3
1525 movdqa %xmm0,416(%rsp)
1526 pshufd $0x1e,%xmm5,%xmm4
1527 movdqa %xmm1,416+16(%rsp)
1528 por %xmm0,%xmm1
1529 .byte 102,72,15,110,199
1530 movdqa %xmm2,448(%rsp)
1531 movdqa %xmm3,448+16(%rsp)
1532 por %xmm2,%xmm3
1533 por %xmm4,%xmm5
1534 pxor %xmm4,%xmm4
1535 por %xmm1,%xmm3
1536
1537 leaq 64-0(%rsi),%rsi
1538 leaq 32(%rsp),%rdi
1539 call __ecp_nistz256_sqr_montq
1540
1541 pcmpeqd %xmm4,%xmm5
1542 pshufd $0xb1,%xmm3,%xmm4
1543 movq 0(%rbx),%rax
1544
1545 movq %r12,%r9
1546 por %xmm3,%xmm4
1547 pshufd $0,%xmm5,%xmm5
1548 pshufd $0x1e,%xmm4,%xmm3
1549 movq %r13,%r10
1550 por %xmm3,%xmm4
1551 pxor %xmm3,%xmm3
1552 movq %r14,%r11
1553 pcmpeqd %xmm3,%xmm4
1554 pshufd $0,%xmm4,%xmm4
1555
1556 leaq 32-0(%rsp),%rsi
1557 movq %r15,%r12
1558 leaq 0(%rsp),%rdi
1559 call __ecp_nistz256_mul_montq
1560
1561 leaq 320(%rsp),%rbx
1562 leaq 64(%rsp),%rdi
1563 call __ecp_nistz256_sub_fromq
1564
1565 movq 384(%rsp),%rax
1566 leaq 384(%rsp),%rbx
1567 movq 0+32(%rsp),%r9
1568 movq 8+32(%rsp),%r10
1569 leaq 0+32(%rsp),%rsi
1570 movq 16+32(%rsp),%r11
1571 movq 24+32(%rsp),%r12
1572 leaq 32(%rsp),%rdi
1573 call __ecp_nistz256_mul_montq
1574
1575 movq 384(%rsp),%rax
1576 leaq 384(%rsp),%rbx
1577 movq 0+64(%rsp),%r9
1578 movq 8+64(%rsp),%r10
1579 leaq 0+64(%rsp),%rsi
1580 movq 16+64(%rsp),%r11
1581 movq 24+64(%rsp),%r12
1582 leaq 288(%rsp),%rdi
1583 call __ecp_nistz256_mul_montq
1584
1585 movq 448(%rsp),%rax
1586 leaq 448(%rsp),%rbx
1587 movq 0+32(%rsp),%r9
1588 movq 8+32(%rsp),%r10
1589 leaq 0+32(%rsp),%rsi
1590 movq 16+32(%rsp),%r11
1591 movq 24+32(%rsp),%r12
1592 leaq 32(%rsp),%rdi
1593 call __ecp_nistz256_mul_montq
1594
1595 leaq 352(%rsp),%rbx
1596 leaq 96(%rsp),%rdi
1597 call __ecp_nistz256_sub_fromq
1598
1599 movq 0+64(%rsp),%rax
1600 movq 8+64(%rsp),%r14
1601 leaq 0+64(%rsp),%rsi
1602 movq 16+64(%rsp),%r15
1603 movq 24+64(%rsp),%r8
1604 leaq 128(%rsp),%rdi
1605 call __ecp_nistz256_sqr_montq
1606
1607 movq 0+96(%rsp),%rax
1608 movq 8+96(%rsp),%r14
1609 leaq 0+96(%rsp),%rsi
1610 movq 16+96(%rsp),%r15
1611 movq 24+96(%rsp),%r8
1612 leaq 192(%rsp),%rdi
1613 call __ecp_nistz256_sqr_montq
1614
1615 movq 128(%rsp),%rax
1616 leaq 128(%rsp),%rbx
1617 movq 0+64(%rsp),%r9
1618 movq 8+64(%rsp),%r10
1619 leaq 0+64(%rsp),%rsi
1620 movq 16+64(%rsp),%r11
1621 movq 24+64(%rsp),%r12
1622 leaq 160(%rsp),%rdi
1623 call __ecp_nistz256_mul_montq
1624
1625 movq 320(%rsp),%rax
1626 leaq 320(%rsp),%rbx
1627 movq 0+128(%rsp),%r9
1628 movq 8+128(%rsp),%r10
1629 leaq 0+128(%rsp),%rsi
1630 movq 16+128(%rsp),%r11
1631 movq 24+128(%rsp),%r12
1632 leaq 0(%rsp),%rdi
1633 call __ecp_nistz256_mul_montq
1634
1635
1636
1637
1638 addq %r12,%r12
1639 leaq 192(%rsp),%rsi
1640 adcq %r13,%r13
1641 movq %r12,%rax
1642 adcq %r8,%r8
1643 adcq %r9,%r9
1644 movq %r13,%rbp
1645 sbbq %r11,%r11
1646
1647 subq $-1,%r12
1648 movq %r8,%rcx
1649 sbbq %r14,%r13
1650 sbbq $0,%r8
1651 movq %r9,%r10
1652 sbbq %r15,%r9
1653 testq %r11,%r11
1654
1655 cmovzq %rax,%r12
1656 movq 0(%rsi),%rax
1657 cmovzq %rbp,%r13
1658 movq 8(%rsi),%rbp
1659 cmovzq %rcx,%r8
1660 movq 16(%rsi),%rcx
1661 cmovzq %r10,%r9
1662 movq 24(%rsi),%r10
1663
1664 call __ecp_nistz256_subq
1665
1666 leaq 160(%rsp),%rbx
1667 leaq 224(%rsp),%rdi
1668 call __ecp_nistz256_sub_fromq
1669
1670 movq 0+0(%rsp),%rax
1671 movq 0+8(%rsp),%rbp
1672 movq 0+16(%rsp),%rcx
1673 movq 0+24(%rsp),%r10
1674 leaq 64(%rsp),%rdi
1675
1676 call __ecp_nistz256_subq
1677
1678 movq %r12,0(%rdi)
1679 movq %r13,8(%rdi)
1680 movq %r8,16(%rdi)
1681 movq %r9,24(%rdi)
1682 movq 352(%rsp),%rax
1683 leaq 352(%rsp),%rbx
1684 movq 0+160(%rsp),%r9
1685 movq 8+160(%rsp),%r10
1686 leaq 0+160(%rsp),%rsi
1687 movq 16+160(%rsp),%r11
1688 movq 24+160(%rsp),%r12
1689 leaq 32(%rsp),%rdi
1690 call __ecp_nistz256_mul_montq
1691
1692 movq 96(%rsp),%rax
1693 leaq 96(%rsp),%rbx
1694 movq 0+64(%rsp),%r9
1695 movq 8+64(%rsp),%r10
1696 leaq 0+64(%rsp),%rsi
1697 movq 16+64(%rsp),%r11
1698 movq 24+64(%rsp),%r12
1699 leaq 64(%rsp),%rdi
1700 call __ecp_nistz256_mul_montq
1701
1702 leaq 32(%rsp),%rbx
1703 leaq 256(%rsp),%rdi
1704 call __ecp_nistz256_sub_fromq
1705
1706 .byte 102,72,15,126,199
1707
1708 movdqa %xmm5,%xmm0
1709 movdqa %xmm5,%xmm1
1710 pandn 288(%rsp),%xmm0
1711 movdqa %xmm5,%xmm2
1712 pandn 288+16(%rsp),%xmm1
1713 movdqa %xmm5,%xmm3
1714 pand .LONE_mont(%rip),%xmm2
1715 pand .LONE_mont+16(%rip),%xmm3
1716 por %xmm0,%xmm2
1717 por %xmm1,%xmm3
1718
1719 movdqa %xmm4,%xmm0
1720 movdqa %xmm4,%xmm1
1721 pandn %xmm2,%xmm0
1722 movdqa %xmm4,%xmm2
1723 pandn %xmm3,%xmm1
1724 movdqa %xmm4,%xmm3
1725 pand 384(%rsp),%xmm2
1726 pand 384+16(%rsp),%xmm3
1727 por %xmm0,%xmm2
1728 por %xmm1,%xmm3
1729 movdqu %xmm2,64(%rdi)
1730 movdqu %xmm3,80(%rdi)
1731
1732 movdqa %xmm5,%xmm0
1733 movdqa %xmm5,%xmm1
1734 pandn 224(%rsp),%xmm0
1735 movdqa %xmm5,%xmm2
1736 pandn 224+16(%rsp),%xmm1
1737 movdqa %xmm5,%xmm3
1738 pand 416(%rsp),%xmm2
1739 pand 416+16(%rsp),%xmm3
1740 por %xmm0,%xmm2
1741 por %xmm1,%xmm3
1742
1743 movdqa %xmm4,%xmm0
1744 movdqa %xmm4,%xmm1
1745 pandn %xmm2,%xmm0
1746 movdqa %xmm4,%xmm2
1747 pandn %xmm3,%xmm1
1748 movdqa %xmm4,%xmm3
1749 pand 320(%rsp),%xmm2
1750 pand 320+16(%rsp),%xmm3
1751 por %xmm0,%xmm2
1752 por %xmm1,%xmm3
1753 movdqu %xmm2,0(%rdi)
1754 movdqu %xmm3,16(%rdi)
1755
1756 movdqa %xmm5,%xmm0
1757 movdqa %xmm5,%xmm1
1758 pandn 256(%rsp),%xmm0
1759 movdqa %xmm5,%xmm2
1760 pandn 256+16(%rsp),%xmm1
1761 movdqa %xmm5,%xmm3
1762 pand 448(%rsp),%xmm2
1763 pand 448+16(%rsp),%xmm3
1764 por %xmm0,%xmm2
1765 por %xmm1,%xmm3
1766
1767 movdqa %xmm4,%xmm0
1768 movdqa %xmm4,%xmm1
1769 pandn %xmm2,%xmm0
1770 movdqa %xmm4,%xmm2
1771 pandn %xmm3,%xmm1
1772 movdqa %xmm4,%xmm3
1773 pand 352(%rsp),%xmm2
1774 pand 352+16(%rsp),%xmm3
1775 por %xmm0,%xmm2
1776 por %xmm1,%xmm3
1777 movdqu %xmm2,32(%rdi)
1778 movdqu %xmm3,48(%rdi)
1779
1780 addq $480+8,%rsp
1781 popq %r15
1782 popq %r14
1783 popq %r13
1784 popq %r12
1785 popq %rbx
1786 popq %rbp
1787 .byte 0xf3,0xc3
1788 .size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
1789 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698