Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(768)

Side by Side Diff: third_party/boringssl/linux-x86_64/crypto/ec/p256-x86_64-asm.S

Issue 2869243005: Roll src/third_party/boringssl/src ddfcc6a60..1e5cb820d (Closed)
Patch Set: Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
2 .text
3 .extern OPENSSL_ia32cap_P
4 .hidden OPENSSL_ia32cap_P
5
6
7 .align 64
8 .Lpoly:
9 .quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00 000001
10
11 .LOne:
12 .long 1,1,1,1,1,1,1,1
13 .LTwo:
14 .long 2,2,2,2,2,2,2,2
15 .LThree:
16 .long 3,3,3,3,3,3,3,3
17 .LONE_mont:
18 .quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000ff fffffe
19
20
21
22 .globl ecp_nistz256_neg
23 .hidden ecp_nistz256_neg
24 .type ecp_nistz256_neg,@function
25 .align 32
26 ecp_nistz256_neg:
27 pushq %r12
28 pushq %r13
29
30 xorq %r8,%r8
31 xorq %r9,%r9
32 xorq %r10,%r10
33 xorq %r11,%r11
34 xorq %r13,%r13
35
36 subq 0(%rsi),%r8
37 sbbq 8(%rsi),%r9
38 sbbq 16(%rsi),%r10
39 movq %r8,%rax
40 sbbq 24(%rsi),%r11
41 leaq .Lpoly(%rip),%rsi
42 movq %r9,%rdx
43 sbbq $0,%r13
44
45 addq 0(%rsi),%r8
46 movq %r10,%rcx
47 adcq 8(%rsi),%r9
48 adcq 16(%rsi),%r10
49 movq %r11,%r12
50 adcq 24(%rsi),%r11
51 testq %r13,%r13
52
53 cmovzq %rax,%r8
54 cmovzq %rdx,%r9
55 movq %r8,0(%rdi)
56 cmovzq %rcx,%r10
57 movq %r9,8(%rdi)
58 cmovzq %r12,%r11
59 movq %r10,16(%rdi)
60 movq %r11,24(%rdi)
61
62 popq %r13
63 popq %r12
64 .byte 0xf3,0xc3
65 .size ecp_nistz256_neg,.-ecp_nistz256_neg
66
67
68
69
70
71
72 .globl ecp_nistz256_mul_mont
73 .hidden ecp_nistz256_mul_mont
74 .type ecp_nistz256_mul_mont,@function
75 .align 32
76 ecp_nistz256_mul_mont:
77 .Lmul_mont:
78 pushq %rbp
79 pushq %rbx
80 pushq %r12
81 pushq %r13
82 pushq %r14
83 pushq %r15
84 movq %rdx,%rbx
85 movq 0(%rdx),%rax
86 movq 0(%rsi),%r9
87 movq 8(%rsi),%r10
88 movq 16(%rsi),%r11
89 movq 24(%rsi),%r12
90
91 call __ecp_nistz256_mul_montq
92 .Lmul_mont_done:
93 popq %r15
94 popq %r14
95 popq %r13
96 popq %r12
97 popq %rbx
98 popq %rbp
99 .byte 0xf3,0xc3
100 .size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
101
102 .type __ecp_nistz256_mul_montq,@function
103 .align 32
104 __ecp_nistz256_mul_montq:
105
106
107 movq %rax,%rbp
108 mulq %r9
109 movq .Lpoly+8(%rip),%r14
110 movq %rax,%r8
111 movq %rbp,%rax
112 movq %rdx,%r9
113
114 mulq %r10
115 movq .Lpoly+24(%rip),%r15
116 addq %rax,%r9
117 movq %rbp,%rax
118 adcq $0,%rdx
119 movq %rdx,%r10
120
121 mulq %r11
122 addq %rax,%r10
123 movq %rbp,%rax
124 adcq $0,%rdx
125 movq %rdx,%r11
126
127 mulq %r12
128 addq %rax,%r11
129 movq %r8,%rax
130 adcq $0,%rdx
131 xorq %r13,%r13
132 movq %rdx,%r12
133
134
135
136
137
138
139
140
141
142
143 movq %r8,%rbp
144 shlq $32,%r8
145 mulq %r15
146 shrq $32,%rbp
147 addq %r8,%r9
148 adcq %rbp,%r10
149 adcq %rax,%r11
150 movq 8(%rbx),%rax
151 adcq %rdx,%r12
152 adcq $0,%r13
153 xorq %r8,%r8
154
155
156
157 movq %rax,%rbp
158 mulq 0(%rsi)
159 addq %rax,%r9
160 movq %rbp,%rax
161 adcq $0,%rdx
162 movq %rdx,%rcx
163
164 mulq 8(%rsi)
165 addq %rcx,%r10
166 adcq $0,%rdx
167 addq %rax,%r10
168 movq %rbp,%rax
169 adcq $0,%rdx
170 movq %rdx,%rcx
171
172 mulq 16(%rsi)
173 addq %rcx,%r11
174 adcq $0,%rdx
175 addq %rax,%r11
176 movq %rbp,%rax
177 adcq $0,%rdx
178 movq %rdx,%rcx
179
180 mulq 24(%rsi)
181 addq %rcx,%r12
182 adcq $0,%rdx
183 addq %rax,%r12
184 movq %r9,%rax
185 adcq %rdx,%r13
186 adcq $0,%r8
187
188
189
190 movq %r9,%rbp
191 shlq $32,%r9
192 mulq %r15
193 shrq $32,%rbp
194 addq %r9,%r10
195 adcq %rbp,%r11
196 adcq %rax,%r12
197 movq 16(%rbx),%rax
198 adcq %rdx,%r13
199 adcq $0,%r8
200 xorq %r9,%r9
201
202
203
204 movq %rax,%rbp
205 mulq 0(%rsi)
206 addq %rax,%r10
207 movq %rbp,%rax
208 adcq $0,%rdx
209 movq %rdx,%rcx
210
211 mulq 8(%rsi)
212 addq %rcx,%r11
213 adcq $0,%rdx
214 addq %rax,%r11
215 movq %rbp,%rax
216 adcq $0,%rdx
217 movq %rdx,%rcx
218
219 mulq 16(%rsi)
220 addq %rcx,%r12
221 adcq $0,%rdx
222 addq %rax,%r12
223 movq %rbp,%rax
224 adcq $0,%rdx
225 movq %rdx,%rcx
226
227 mulq 24(%rsi)
228 addq %rcx,%r13
229 adcq $0,%rdx
230 addq %rax,%r13
231 movq %r10,%rax
232 adcq %rdx,%r8
233 adcq $0,%r9
234
235
236
237 movq %r10,%rbp
238 shlq $32,%r10
239 mulq %r15
240 shrq $32,%rbp
241 addq %r10,%r11
242 adcq %rbp,%r12
243 adcq %rax,%r13
244 movq 24(%rbx),%rax
245 adcq %rdx,%r8
246 adcq $0,%r9
247 xorq %r10,%r10
248
249
250
251 movq %rax,%rbp
252 mulq 0(%rsi)
253 addq %rax,%r11
254 movq %rbp,%rax
255 adcq $0,%rdx
256 movq %rdx,%rcx
257
258 mulq 8(%rsi)
259 addq %rcx,%r12
260 adcq $0,%rdx
261 addq %rax,%r12
262 movq %rbp,%rax
263 adcq $0,%rdx
264 movq %rdx,%rcx
265
266 mulq 16(%rsi)
267 addq %rcx,%r13
268 adcq $0,%rdx
269 addq %rax,%r13
270 movq %rbp,%rax
271 adcq $0,%rdx
272 movq %rdx,%rcx
273
274 mulq 24(%rsi)
275 addq %rcx,%r8
276 adcq $0,%rdx
277 addq %rax,%r8
278 movq %r11,%rax
279 adcq %rdx,%r9
280 adcq $0,%r10
281
282
283
284 movq %r11,%rbp
285 shlq $32,%r11
286 mulq %r15
287 shrq $32,%rbp
288 addq %r11,%r12
289 adcq %rbp,%r13
290 movq %r12,%rcx
291 adcq %rax,%r8
292 adcq %rdx,%r9
293 movq %r13,%rbp
294 adcq $0,%r10
295
296
297
298 subq $-1,%r12
299 movq %r8,%rbx
300 sbbq %r14,%r13
301 sbbq $0,%r8
302 movq %r9,%rdx
303 sbbq %r15,%r9
304 sbbq $0,%r10
305
306 cmovcq %rcx,%r12
307 cmovcq %rbp,%r13
308 movq %r12,0(%rdi)
309 cmovcq %rbx,%r8
310 movq %r13,8(%rdi)
311 cmovcq %rdx,%r9
312 movq %r8,16(%rdi)
313 movq %r9,24(%rdi)
314
315 .byte 0xf3,0xc3
316 .size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
317
318
319
320
321
322
323
324
325 .globl ecp_nistz256_sqr_mont
326 .hidden ecp_nistz256_sqr_mont
327 .type ecp_nistz256_sqr_mont,@function
328 .align 32
329 ecp_nistz256_sqr_mont:
330 pushq %rbp
331 pushq %rbx
332 pushq %r12
333 pushq %r13
334 pushq %r14
335 pushq %r15
336 movq 0(%rsi),%rax
337 movq 8(%rsi),%r14
338 movq 16(%rsi),%r15
339 movq 24(%rsi),%r8
340
341 call __ecp_nistz256_sqr_montq
342 .Lsqr_mont_done:
343 popq %r15
344 popq %r14
345 popq %r13
346 popq %r12
347 popq %rbx
348 popq %rbp
349 .byte 0xf3,0xc3
350 .size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
351
352 .type __ecp_nistz256_sqr_montq,@function
353 .align 32
354 __ecp_nistz256_sqr_montq:
355 movq %rax,%r13
356 mulq %r14
357 movq %rax,%r9
358 movq %r15,%rax
359 movq %rdx,%r10
360
361 mulq %r13
362 addq %rax,%r10
363 movq %r8,%rax
364 adcq $0,%rdx
365 movq %rdx,%r11
366
367 mulq %r13
368 addq %rax,%r11
369 movq %r15,%rax
370 adcq $0,%rdx
371 movq %rdx,%r12
372
373
374 mulq %r14
375 addq %rax,%r11
376 movq %r8,%rax
377 adcq $0,%rdx
378 movq %rdx,%rbp
379
380 mulq %r14
381 addq %rax,%r12
382 movq %r8,%rax
383 adcq $0,%rdx
384 addq %rbp,%r12
385 movq %rdx,%r13
386 adcq $0,%r13
387
388
389 mulq %r15
390 xorq %r15,%r15
391 addq %rax,%r13
392 movq 0(%rsi),%rax
393 movq %rdx,%r14
394 adcq $0,%r14
395
396 addq %r9,%r9
397 adcq %r10,%r10
398 adcq %r11,%r11
399 adcq %r12,%r12
400 adcq %r13,%r13
401 adcq %r14,%r14
402 adcq $0,%r15
403
404 mulq %rax
405 movq %rax,%r8
406 movq 8(%rsi),%rax
407 movq %rdx,%rcx
408
409 mulq %rax
410 addq %rcx,%r9
411 adcq %rax,%r10
412 movq 16(%rsi),%rax
413 adcq $0,%rdx
414 movq %rdx,%rcx
415
416 mulq %rax
417 addq %rcx,%r11
418 adcq %rax,%r12
419 movq 24(%rsi),%rax
420 adcq $0,%rdx
421 movq %rdx,%rcx
422
423 mulq %rax
424 addq %rcx,%r13
425 adcq %rax,%r14
426 movq %r8,%rax
427 adcq %rdx,%r15
428
429 movq .Lpoly+8(%rip),%rsi
430 movq .Lpoly+24(%rip),%rbp
431
432
433
434
435 movq %r8,%rcx
436 shlq $32,%r8
437 mulq %rbp
438 shrq $32,%rcx
439 addq %r8,%r9
440 adcq %rcx,%r10
441 adcq %rax,%r11
442 movq %r9,%rax
443 adcq $0,%rdx
444
445
446
447 movq %r9,%rcx
448 shlq $32,%r9
449 movq %rdx,%r8
450 mulq %rbp
451 shrq $32,%rcx
452 addq %r9,%r10
453 adcq %rcx,%r11
454 adcq %rax,%r8
455 movq %r10,%rax
456 adcq $0,%rdx
457
458
459
460 movq %r10,%rcx
461 shlq $32,%r10
462 movq %rdx,%r9
463 mulq %rbp
464 shrq $32,%rcx
465 addq %r10,%r11
466 adcq %rcx,%r8
467 adcq %rax,%r9
468 movq %r11,%rax
469 adcq $0,%rdx
470
471
472
473 movq %r11,%rcx
474 shlq $32,%r11
475 movq %rdx,%r10
476 mulq %rbp
477 shrq $32,%rcx
478 addq %r11,%r8
479 adcq %rcx,%r9
480 adcq %rax,%r10
481 adcq $0,%rdx
482 xorq %r11,%r11
483
484
485
486 addq %r8,%r12
487 adcq %r9,%r13
488 movq %r12,%r8
489 adcq %r10,%r14
490 adcq %rdx,%r15
491 movq %r13,%r9
492 adcq $0,%r11
493
494 subq $-1,%r12
495 movq %r14,%r10
496 sbbq %rsi,%r13
497 sbbq $0,%r14
498 movq %r15,%rcx
499 sbbq %rbp,%r15
500 sbbq $0,%r11
501
502 cmovcq %r8,%r12
503 cmovcq %r9,%r13
504 movq %r12,0(%rdi)
505 cmovcq %r10,%r14
506 movq %r13,8(%rdi)
507 cmovcq %rcx,%r15
508 movq %r14,16(%rdi)
509 movq %r15,24(%rdi)
510
511 .byte 0xf3,0xc3
512 .size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
513
514
515 .globl ecp_nistz256_select_w5
516 .hidden ecp_nistz256_select_w5
517 .type ecp_nistz256_select_w5,@function
518 .align 32
519 ecp_nistz256_select_w5:
520 movdqa .LOne(%rip),%xmm0
521 movd %edx,%xmm1
522
523 pxor %xmm2,%xmm2
524 pxor %xmm3,%xmm3
525 pxor %xmm4,%xmm4
526 pxor %xmm5,%xmm5
527 pxor %xmm6,%xmm6
528 pxor %xmm7,%xmm7
529
530 movdqa %xmm0,%xmm8
531 pshufd $0,%xmm1,%xmm1
532
533 movq $16,%rax
534 .Lselect_loop_sse_w5:
535
536 movdqa %xmm8,%xmm15
537 paddd %xmm0,%xmm8
538 pcmpeqd %xmm1,%xmm15
539
540 movdqa 0(%rsi),%xmm9
541 movdqa 16(%rsi),%xmm10
542 movdqa 32(%rsi),%xmm11
543 movdqa 48(%rsi),%xmm12
544 movdqa 64(%rsi),%xmm13
545 movdqa 80(%rsi),%xmm14
546 leaq 96(%rsi),%rsi
547
548 pand %xmm15,%xmm9
549 pand %xmm15,%xmm10
550 por %xmm9,%xmm2
551 pand %xmm15,%xmm11
552 por %xmm10,%xmm3
553 pand %xmm15,%xmm12
554 por %xmm11,%xmm4
555 pand %xmm15,%xmm13
556 por %xmm12,%xmm5
557 pand %xmm15,%xmm14
558 por %xmm13,%xmm6
559 por %xmm14,%xmm7
560
561 decq %rax
562 jnz .Lselect_loop_sse_w5
563
564 movdqu %xmm2,0(%rdi)
565 movdqu %xmm3,16(%rdi)
566 movdqu %xmm4,32(%rdi)
567 movdqu %xmm5,48(%rdi)
568 movdqu %xmm6,64(%rdi)
569 movdqu %xmm7,80(%rdi)
570 .byte 0xf3,0xc3
571 .size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
572
573
574
575 .globl ecp_nistz256_select_w7
576 .hidden ecp_nistz256_select_w7
577 .type ecp_nistz256_select_w7,@function
578 .align 32
579 ecp_nistz256_select_w7:
580 movdqa .LOne(%rip),%xmm8
581 movd %edx,%xmm1
582
583 pxor %xmm2,%xmm2
584 pxor %xmm3,%xmm3
585 pxor %xmm4,%xmm4
586 pxor %xmm5,%xmm5
587
588 movdqa %xmm8,%xmm0
589 pshufd $0,%xmm1,%xmm1
590 movq $64,%rax
591
592 .Lselect_loop_sse_w7:
593 movdqa %xmm8,%xmm15
594 paddd %xmm0,%xmm8
595 movdqa 0(%rsi),%xmm9
596 movdqa 16(%rsi),%xmm10
597 pcmpeqd %xmm1,%xmm15
598 movdqa 32(%rsi),%xmm11
599 movdqa 48(%rsi),%xmm12
600 leaq 64(%rsi),%rsi
601
602 pand %xmm15,%xmm9
603 pand %xmm15,%xmm10
604 por %xmm9,%xmm2
605 pand %xmm15,%xmm11
606 por %xmm10,%xmm3
607 pand %xmm15,%xmm12
608 por %xmm11,%xmm4
609 prefetcht0 255(%rsi)
610 por %xmm12,%xmm5
611
612 decq %rax
613 jnz .Lselect_loop_sse_w7
614
615 movdqu %xmm2,0(%rdi)
616 movdqu %xmm3,16(%rdi)
617 movdqu %xmm4,32(%rdi)
618 movdqu %xmm5,48(%rdi)
619 .byte 0xf3,0xc3
620 .size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
621 .globl ecp_nistz256_avx2_select_w7
622 .hidden ecp_nistz256_avx2_select_w7
623 .type ecp_nistz256_avx2_select_w7,@function
624 .align 32
625 ecp_nistz256_avx2_select_w7:
626 .byte 0x0f,0x0b
627 .byte 0xf3,0xc3
628 .size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
629 .type __ecp_nistz256_add_toq,@function
630 .align 32
631 __ecp_nistz256_add_toq:
632 xorq %r11,%r11
633 addq 0(%rbx),%r12
634 adcq 8(%rbx),%r13
635 movq %r12,%rax
636 adcq 16(%rbx),%r8
637 adcq 24(%rbx),%r9
638 movq %r13,%rbp
639 adcq $0,%r11
640
641 subq $-1,%r12
642 movq %r8,%rcx
643 sbbq %r14,%r13
644 sbbq $0,%r8
645 movq %r9,%r10
646 sbbq %r15,%r9
647 sbbq $0,%r11
648
649 cmovcq %rax,%r12
650 cmovcq %rbp,%r13
651 movq %r12,0(%rdi)
652 cmovcq %rcx,%r8
653 movq %r13,8(%rdi)
654 cmovcq %r10,%r9
655 movq %r8,16(%rdi)
656 movq %r9,24(%rdi)
657
658 .byte 0xf3,0xc3
659 .size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
660
661 .type __ecp_nistz256_sub_fromq,@function
662 .align 32
663 __ecp_nistz256_sub_fromq:
664 subq 0(%rbx),%r12
665 sbbq 8(%rbx),%r13
666 movq %r12,%rax
667 sbbq 16(%rbx),%r8
668 sbbq 24(%rbx),%r9
669 movq %r13,%rbp
670 sbbq %r11,%r11
671
672 addq $-1,%r12
673 movq %r8,%rcx
674 adcq %r14,%r13
675 adcq $0,%r8
676 movq %r9,%r10
677 adcq %r15,%r9
678 testq %r11,%r11
679
680 cmovzq %rax,%r12
681 cmovzq %rbp,%r13
682 movq %r12,0(%rdi)
683 cmovzq %rcx,%r8
684 movq %r13,8(%rdi)
685 cmovzq %r10,%r9
686 movq %r8,16(%rdi)
687 movq %r9,24(%rdi)
688
689 .byte 0xf3,0xc3
690 .size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
691
692 .type __ecp_nistz256_subq,@function
693 .align 32
694 __ecp_nistz256_subq:
695 subq %r12,%rax
696 sbbq %r13,%rbp
697 movq %rax,%r12
698 sbbq %r8,%rcx
699 sbbq %r9,%r10
700 movq %rbp,%r13
701 sbbq %r11,%r11
702
703 addq $-1,%rax
704 movq %rcx,%r8
705 adcq %r14,%rbp
706 adcq $0,%rcx
707 movq %r10,%r9
708 adcq %r15,%r10
709 testq %r11,%r11
710
711 cmovnzq %rax,%r12
712 cmovnzq %rbp,%r13
713 cmovnzq %rcx,%r8
714 cmovnzq %r10,%r9
715
716 .byte 0xf3,0xc3
717 .size __ecp_nistz256_subq,.-__ecp_nistz256_subq
718
719 .type __ecp_nistz256_mul_by_2q,@function
720 .align 32
721 __ecp_nistz256_mul_by_2q:
722 xorq %r11,%r11
723 addq %r12,%r12
724 adcq %r13,%r13
725 movq %r12,%rax
726 adcq %r8,%r8
727 adcq %r9,%r9
728 movq %r13,%rbp
729 adcq $0,%r11
730
731 subq $-1,%r12
732 movq %r8,%rcx
733 sbbq %r14,%r13
734 sbbq $0,%r8
735 movq %r9,%r10
736 sbbq %r15,%r9
737 sbbq $0,%r11
738
739 cmovcq %rax,%r12
740 cmovcq %rbp,%r13
741 movq %r12,0(%rdi)
742 cmovcq %rcx,%r8
743 movq %r13,8(%rdi)
744 cmovcq %r10,%r9
745 movq %r8,16(%rdi)
746 movq %r9,24(%rdi)
747
748 .byte 0xf3,0xc3
749 .size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
750 .globl ecp_nistz256_point_double
751 .hidden ecp_nistz256_point_double
752 .type ecp_nistz256_point_double,@function
753 .align 32
754 ecp_nistz256_point_double:
755 pushq %rbp
756 pushq %rbx
757 pushq %r12
758 pushq %r13
759 pushq %r14
760 pushq %r15
761 subq $160+8,%rsp
762
763 .Lpoint_double_shortcutq:
764 movdqu 0(%rsi),%xmm0
765 movq %rsi,%rbx
766 movdqu 16(%rsi),%xmm1
767 movq 32+0(%rsi),%r12
768 movq 32+8(%rsi),%r13
769 movq 32+16(%rsi),%r8
770 movq 32+24(%rsi),%r9
771 movq .Lpoly+8(%rip),%r14
772 movq .Lpoly+24(%rip),%r15
773 movdqa %xmm0,96(%rsp)
774 movdqa %xmm1,96+16(%rsp)
775 leaq 32(%rdi),%r10
776 leaq 64(%rdi),%r11
777 .byte 102,72,15,110,199
778 .byte 102,73,15,110,202
779 .byte 102,73,15,110,211
780
781 leaq 0(%rsp),%rdi
782 call __ecp_nistz256_mul_by_2q
783
784 movq 64+0(%rsi),%rax
785 movq 64+8(%rsi),%r14
786 movq 64+16(%rsi),%r15
787 movq 64+24(%rsi),%r8
788 leaq 64-0(%rsi),%rsi
789 leaq 64(%rsp),%rdi
790 call __ecp_nistz256_sqr_montq
791
792 movq 0+0(%rsp),%rax
793 movq 8+0(%rsp),%r14
794 leaq 0+0(%rsp),%rsi
795 movq 16+0(%rsp),%r15
796 movq 24+0(%rsp),%r8
797 leaq 0(%rsp),%rdi
798 call __ecp_nistz256_sqr_montq
799
800 movq 32(%rbx),%rax
801 movq 64+0(%rbx),%r9
802 movq 64+8(%rbx),%r10
803 movq 64+16(%rbx),%r11
804 movq 64+24(%rbx),%r12
805 leaq 64-0(%rbx),%rsi
806 leaq 32(%rbx),%rbx
807 .byte 102,72,15,126,215
808 call __ecp_nistz256_mul_montq
809 call __ecp_nistz256_mul_by_2q
810
811 movq 96+0(%rsp),%r12
812 movq 96+8(%rsp),%r13
813 leaq 64(%rsp),%rbx
814 movq 96+16(%rsp),%r8
815 movq 96+24(%rsp),%r9
816 leaq 32(%rsp),%rdi
817 call __ecp_nistz256_add_toq
818
819 movq 96+0(%rsp),%r12
820 movq 96+8(%rsp),%r13
821 leaq 64(%rsp),%rbx
822 movq 96+16(%rsp),%r8
823 movq 96+24(%rsp),%r9
824 leaq 64(%rsp),%rdi
825 call __ecp_nistz256_sub_fromq
826
827 movq 0+0(%rsp),%rax
828 movq 8+0(%rsp),%r14
829 leaq 0+0(%rsp),%rsi
830 movq 16+0(%rsp),%r15
831 movq 24+0(%rsp),%r8
832 .byte 102,72,15,126,207
833 call __ecp_nistz256_sqr_montq
834 xorq %r9,%r9
835 movq %r12,%rax
836 addq $-1,%r12
837 movq %r13,%r10
838 adcq %rsi,%r13
839 movq %r14,%rcx
840 adcq $0,%r14
841 movq %r15,%r8
842 adcq %rbp,%r15
843 adcq $0,%r9
844 xorq %rsi,%rsi
845 testq $1,%rax
846
847 cmovzq %rax,%r12
848 cmovzq %r10,%r13
849 cmovzq %rcx,%r14
850 cmovzq %r8,%r15
851 cmovzq %rsi,%r9
852
853 movq %r13,%rax
854 shrq $1,%r12
855 shlq $63,%rax
856 movq %r14,%r10
857 shrq $1,%r13
858 orq %rax,%r12
859 shlq $63,%r10
860 movq %r15,%rcx
861 shrq $1,%r14
862 orq %r10,%r13
863 shlq $63,%rcx
864 movq %r12,0(%rdi)
865 shrq $1,%r15
866 movq %r13,8(%rdi)
867 shlq $63,%r9
868 orq %rcx,%r14
869 orq %r9,%r15
870 movq %r14,16(%rdi)
871 movq %r15,24(%rdi)
872 movq 64(%rsp),%rax
873 leaq 64(%rsp),%rbx
874 movq 0+32(%rsp),%r9
875 movq 8+32(%rsp),%r10
876 leaq 0+32(%rsp),%rsi
877 movq 16+32(%rsp),%r11
878 movq 24+32(%rsp),%r12
879 leaq 32(%rsp),%rdi
880 call __ecp_nistz256_mul_montq
881
882 leaq 128(%rsp),%rdi
883 call __ecp_nistz256_mul_by_2q
884
885 leaq 32(%rsp),%rbx
886 leaq 32(%rsp),%rdi
887 call __ecp_nistz256_add_toq
888
889 movq 96(%rsp),%rax
890 leaq 96(%rsp),%rbx
891 movq 0+0(%rsp),%r9
892 movq 8+0(%rsp),%r10
893 leaq 0+0(%rsp),%rsi
894 movq 16+0(%rsp),%r11
895 movq 24+0(%rsp),%r12
896 leaq 0(%rsp),%rdi
897 call __ecp_nistz256_mul_montq
898
899 leaq 128(%rsp),%rdi
900 call __ecp_nistz256_mul_by_2q
901
902 movq 0+32(%rsp),%rax
903 movq 8+32(%rsp),%r14
904 leaq 0+32(%rsp),%rsi
905 movq 16+32(%rsp),%r15
906 movq 24+32(%rsp),%r8
907 .byte 102,72,15,126,199
908 call __ecp_nistz256_sqr_montq
909
910 leaq 128(%rsp),%rbx
911 movq %r14,%r8
912 movq %r15,%r9
913 movq %rsi,%r14
914 movq %rbp,%r15
915 call __ecp_nistz256_sub_fromq
916
917 movq 0+0(%rsp),%rax
918 movq 0+8(%rsp),%rbp
919 movq 0+16(%rsp),%rcx
920 movq 0+24(%rsp),%r10
921 leaq 0(%rsp),%rdi
922 call __ecp_nistz256_subq
923
924 movq 32(%rsp),%rax
925 leaq 32(%rsp),%rbx
926 movq %r12,%r14
927 xorl %ecx,%ecx
928 movq %r12,0+0(%rsp)
929 movq %r13,%r10
930 movq %r13,0+8(%rsp)
931 cmovzq %r8,%r11
932 movq %r8,0+16(%rsp)
933 leaq 0-0(%rsp),%rsi
934 cmovzq %r9,%r12
935 movq %r9,0+24(%rsp)
936 movq %r14,%r9
937 leaq 0(%rsp),%rdi
938 call __ecp_nistz256_mul_montq
939
940 .byte 102,72,15,126,203
941 .byte 102,72,15,126,207
942 call __ecp_nistz256_sub_fromq
943
944 addq $160+8,%rsp
945 popq %r15
946 popq %r14
947 popq %r13
948 popq %r12
949 popq %rbx
950 popq %rbp
951 .byte 0xf3,0xc3
952 .size ecp_nistz256_point_double,.-ecp_nistz256_point_double
953 .globl ecp_nistz256_point_add
954 .hidden ecp_nistz256_point_add
955 .type ecp_nistz256_point_add,@function
956 .align 32
957 ecp_nistz256_point_add:
958 pushq %rbp
959 pushq %rbx
960 pushq %r12
961 pushq %r13
962 pushq %r14
963 pushq %r15
964 subq $576+8,%rsp
965
966 movdqu 0(%rsi),%xmm0
967 movdqu 16(%rsi),%xmm1
968 movdqu 32(%rsi),%xmm2
969 movdqu 48(%rsi),%xmm3
970 movdqu 64(%rsi),%xmm4
971 movdqu 80(%rsi),%xmm5
972 movq %rsi,%rbx
973 movq %rdx,%rsi
974 movdqa %xmm0,384(%rsp)
975 movdqa %xmm1,384+16(%rsp)
976 movdqa %xmm2,416(%rsp)
977 movdqa %xmm3,416+16(%rsp)
978 movdqa %xmm4,448(%rsp)
979 movdqa %xmm5,448+16(%rsp)
980 por %xmm4,%xmm5
981
982 movdqu 0(%rsi),%xmm0
983 pshufd $0xb1,%xmm5,%xmm3
984 movdqu 16(%rsi),%xmm1
985 movdqu 32(%rsi),%xmm2
986 por %xmm3,%xmm5
987 movdqu 48(%rsi),%xmm3
988 movq 64+0(%rsi),%rax
989 movq 64+8(%rsi),%r14
990 movq 64+16(%rsi),%r15
991 movq 64+24(%rsi),%r8
992 movdqa %xmm0,480(%rsp)
993 pshufd $0x1e,%xmm5,%xmm4
994 movdqa %xmm1,480+16(%rsp)
995 movdqu 64(%rsi),%xmm0
996 movdqu 80(%rsi),%xmm1
997 movdqa %xmm2,512(%rsp)
998 movdqa %xmm3,512+16(%rsp)
999 por %xmm4,%xmm5
1000 pxor %xmm4,%xmm4
1001 por %xmm0,%xmm1
1002 .byte 102,72,15,110,199
1003
1004 leaq 64-0(%rsi),%rsi
1005 movq %rax,544+0(%rsp)
1006 movq %r14,544+8(%rsp)
1007 movq %r15,544+16(%rsp)
1008 movq %r8,544+24(%rsp)
1009 leaq 96(%rsp),%rdi
1010 call __ecp_nistz256_sqr_montq
1011
1012 pcmpeqd %xmm4,%xmm5
1013 pshufd $0xb1,%xmm1,%xmm4
1014 por %xmm1,%xmm4
1015 pshufd $0,%xmm5,%xmm5
1016 pshufd $0x1e,%xmm4,%xmm3
1017 por %xmm3,%xmm4
1018 pxor %xmm3,%xmm3
1019 pcmpeqd %xmm3,%xmm4
1020 pshufd $0,%xmm4,%xmm4
1021 movq 64+0(%rbx),%rax
1022 movq 64+8(%rbx),%r14
1023 movq 64+16(%rbx),%r15
1024 movq 64+24(%rbx),%r8
1025 .byte 102,72,15,110,203
1026
1027 leaq 64-0(%rbx),%rsi
1028 leaq 32(%rsp),%rdi
1029 call __ecp_nistz256_sqr_montq
1030
1031 movq 544(%rsp),%rax
1032 leaq 544(%rsp),%rbx
1033 movq 0+96(%rsp),%r9
1034 movq 8+96(%rsp),%r10
1035 leaq 0+96(%rsp),%rsi
1036 movq 16+96(%rsp),%r11
1037 movq 24+96(%rsp),%r12
1038 leaq 224(%rsp),%rdi
1039 call __ecp_nistz256_mul_montq
1040
1041 movq 448(%rsp),%rax
1042 leaq 448(%rsp),%rbx
1043 movq 0+32(%rsp),%r9
1044 movq 8+32(%rsp),%r10
1045 leaq 0+32(%rsp),%rsi
1046 movq 16+32(%rsp),%r11
1047 movq 24+32(%rsp),%r12
1048 leaq 256(%rsp),%rdi
1049 call __ecp_nistz256_mul_montq
1050
1051 movq 416(%rsp),%rax
1052 leaq 416(%rsp),%rbx
1053 movq 0+224(%rsp),%r9
1054 movq 8+224(%rsp),%r10
1055 leaq 0+224(%rsp),%rsi
1056 movq 16+224(%rsp),%r11
1057 movq 24+224(%rsp),%r12
1058 leaq 224(%rsp),%rdi
1059 call __ecp_nistz256_mul_montq
1060
1061 movq 512(%rsp),%rax
1062 leaq 512(%rsp),%rbx
1063 movq 0+256(%rsp),%r9
1064 movq 8+256(%rsp),%r10
1065 leaq 0+256(%rsp),%rsi
1066 movq 16+256(%rsp),%r11
1067 movq 24+256(%rsp),%r12
1068 leaq 256(%rsp),%rdi
1069 call __ecp_nistz256_mul_montq
1070
1071 leaq 224(%rsp),%rbx
1072 leaq 64(%rsp),%rdi
1073 call __ecp_nistz256_sub_fromq
1074
1075 orq %r13,%r12
1076 movdqa %xmm4,%xmm2
1077 orq %r8,%r12
1078 orq %r9,%r12
1079 por %xmm5,%xmm2
1080 .byte 102,73,15,110,220
1081
1082 movq 384(%rsp),%rax
1083 leaq 384(%rsp),%rbx
1084 movq 0+96(%rsp),%r9
1085 movq 8+96(%rsp),%r10
1086 leaq 0+96(%rsp),%rsi
1087 movq 16+96(%rsp),%r11
1088 movq 24+96(%rsp),%r12
1089 leaq 160(%rsp),%rdi
1090 call __ecp_nistz256_mul_montq
1091
1092 movq 480(%rsp),%rax
1093 leaq 480(%rsp),%rbx
1094 movq 0+32(%rsp),%r9
1095 movq 8+32(%rsp),%r10
1096 leaq 0+32(%rsp),%rsi
1097 movq 16+32(%rsp),%r11
1098 movq 24+32(%rsp),%r12
1099 leaq 192(%rsp),%rdi
1100 call __ecp_nistz256_mul_montq
1101
1102 leaq 160(%rsp),%rbx
1103 leaq 0(%rsp),%rdi
1104 call __ecp_nistz256_sub_fromq
1105
1106 orq %r13,%r12
1107 orq %r8,%r12
1108 orq %r9,%r12
1109
1110 .byte 0x3e
1111 jnz .Ladd_proceedq
1112 .byte 102,73,15,126,208
1113 .byte 102,73,15,126,217
1114 testq %r8,%r8
1115 jnz .Ladd_proceedq
1116 testq %r9,%r9
1117 jz .Ladd_doubleq
1118
1119 .byte 102,72,15,126,199
1120 pxor %xmm0,%xmm0
1121 movdqu %xmm0,0(%rdi)
1122 movdqu %xmm0,16(%rdi)
1123 movdqu %xmm0,32(%rdi)
1124 movdqu %xmm0,48(%rdi)
1125 movdqu %xmm0,64(%rdi)
1126 movdqu %xmm0,80(%rdi)
1127 jmp .Ladd_doneq
1128
1129 .align 32
1130 .Ladd_doubleq:
1131 .byte 102,72,15,126,206
1132 .byte 102,72,15,126,199
1133 addq $416,%rsp
1134 jmp .Lpoint_double_shortcutq
1135
1136 .align 32
1137 .Ladd_proceedq:
1138 movq 0+64(%rsp),%rax
1139 movq 8+64(%rsp),%r14
1140 leaq 0+64(%rsp),%rsi
1141 movq 16+64(%rsp),%r15
1142 movq 24+64(%rsp),%r8
1143 leaq 96(%rsp),%rdi
1144 call __ecp_nistz256_sqr_montq
1145
1146 movq 448(%rsp),%rax
1147 leaq 448(%rsp),%rbx
1148 movq 0+0(%rsp),%r9
1149 movq 8+0(%rsp),%r10
1150 leaq 0+0(%rsp),%rsi
1151 movq 16+0(%rsp),%r11
1152 movq 24+0(%rsp),%r12
1153 leaq 352(%rsp),%rdi
1154 call __ecp_nistz256_mul_montq
1155
1156 movq 0+0(%rsp),%rax
1157 movq 8+0(%rsp),%r14
1158 leaq 0+0(%rsp),%rsi
1159 movq 16+0(%rsp),%r15
1160 movq 24+0(%rsp),%r8
1161 leaq 32(%rsp),%rdi
1162 call __ecp_nistz256_sqr_montq
1163
1164 movq 544(%rsp),%rax
1165 leaq 544(%rsp),%rbx
1166 movq 0+352(%rsp),%r9
1167 movq 8+352(%rsp),%r10
1168 leaq 0+352(%rsp),%rsi
1169 movq 16+352(%rsp),%r11
1170 movq 24+352(%rsp),%r12
1171 leaq 352(%rsp),%rdi
1172 call __ecp_nistz256_mul_montq
1173
1174 movq 0(%rsp),%rax
1175 leaq 0(%rsp),%rbx
1176 movq 0+32(%rsp),%r9
1177 movq 8+32(%rsp),%r10
1178 leaq 0+32(%rsp),%rsi
1179 movq 16+32(%rsp),%r11
1180 movq 24+32(%rsp),%r12
1181 leaq 128(%rsp),%rdi
1182 call __ecp_nistz256_mul_montq
1183
1184 movq 160(%rsp),%rax
1185 leaq 160(%rsp),%rbx
1186 movq 0+32(%rsp),%r9
1187 movq 8+32(%rsp),%r10
1188 leaq 0+32(%rsp),%rsi
1189 movq 16+32(%rsp),%r11
1190 movq 24+32(%rsp),%r12
1191 leaq 192(%rsp),%rdi
1192 call __ecp_nistz256_mul_montq
1193
1194
1195
1196
1197 xorq %r11,%r11
1198 addq %r12,%r12
1199 leaq 96(%rsp),%rsi
1200 adcq %r13,%r13
1201 movq %r12,%rax
1202 adcq %r8,%r8
1203 adcq %r9,%r9
1204 movq %r13,%rbp
1205 adcq $0,%r11
1206
1207 subq $-1,%r12
1208 movq %r8,%rcx
1209 sbbq %r14,%r13
1210 sbbq $0,%r8
1211 movq %r9,%r10
1212 sbbq %r15,%r9
1213 sbbq $0,%r11
1214
1215 cmovcq %rax,%r12
1216 movq 0(%rsi),%rax
1217 cmovcq %rbp,%r13
1218 movq 8(%rsi),%rbp
1219 cmovcq %rcx,%r8
1220 movq 16(%rsi),%rcx
1221 cmovcq %r10,%r9
1222 movq 24(%rsi),%r10
1223
1224 call __ecp_nistz256_subq
1225
1226 leaq 128(%rsp),%rbx
1227 leaq 288(%rsp),%rdi
1228 call __ecp_nistz256_sub_fromq
1229
1230 movq 192+0(%rsp),%rax
1231 movq 192+8(%rsp),%rbp
1232 movq 192+16(%rsp),%rcx
1233 movq 192+24(%rsp),%r10
1234 leaq 320(%rsp),%rdi
1235
1236 call __ecp_nistz256_subq
1237
1238 movq %r12,0(%rdi)
1239 movq %r13,8(%rdi)
1240 movq %r8,16(%rdi)
1241 movq %r9,24(%rdi)
1242 movq 128(%rsp),%rax
1243 leaq 128(%rsp),%rbx
1244 movq 0+224(%rsp),%r9
1245 movq 8+224(%rsp),%r10
1246 leaq 0+224(%rsp),%rsi
1247 movq 16+224(%rsp),%r11
1248 movq 24+224(%rsp),%r12
1249 leaq 256(%rsp),%rdi
1250 call __ecp_nistz256_mul_montq
1251
1252 movq 320(%rsp),%rax
1253 leaq 320(%rsp),%rbx
1254 movq 0+64(%rsp),%r9
1255 movq 8+64(%rsp),%r10
1256 leaq 0+64(%rsp),%rsi
1257 movq 16+64(%rsp),%r11
1258 movq 24+64(%rsp),%r12
1259 leaq 320(%rsp),%rdi
1260 call __ecp_nistz256_mul_montq
1261
1262 leaq 256(%rsp),%rbx
1263 leaq 320(%rsp),%rdi
1264 call __ecp_nistz256_sub_fromq
1265
1266 .byte 102,72,15,126,199
1267
1268 movdqa %xmm5,%xmm0
1269 movdqa %xmm5,%xmm1
1270 pandn 352(%rsp),%xmm0
1271 movdqa %xmm5,%xmm2
1272 pandn 352+16(%rsp),%xmm1
1273 movdqa %xmm5,%xmm3
1274 pand 544(%rsp),%xmm2
1275 pand 544+16(%rsp),%xmm3
1276 por %xmm0,%xmm2
1277 por %xmm1,%xmm3
1278
1279 movdqa %xmm4,%xmm0
1280 movdqa %xmm4,%xmm1
1281 pandn %xmm2,%xmm0
1282 movdqa %xmm4,%xmm2
1283 pandn %xmm3,%xmm1
1284 movdqa %xmm4,%xmm3
1285 pand 448(%rsp),%xmm2
1286 pand 448+16(%rsp),%xmm3
1287 por %xmm0,%xmm2
1288 por %xmm1,%xmm3
1289 movdqu %xmm2,64(%rdi)
1290 movdqu %xmm3,80(%rdi)
1291
1292 movdqa %xmm5,%xmm0
1293 movdqa %xmm5,%xmm1
1294 pandn 288(%rsp),%xmm0
1295 movdqa %xmm5,%xmm2
1296 pandn 288+16(%rsp),%xmm1
1297 movdqa %xmm5,%xmm3
1298 pand 480(%rsp),%xmm2
1299 pand 480+16(%rsp),%xmm3
1300 por %xmm0,%xmm2
1301 por %xmm1,%xmm3
1302
1303 movdqa %xmm4,%xmm0
1304 movdqa %xmm4,%xmm1
1305 pandn %xmm2,%xmm0
1306 movdqa %xmm4,%xmm2
1307 pandn %xmm3,%xmm1
1308 movdqa %xmm4,%xmm3
1309 pand 384(%rsp),%xmm2
1310 pand 384+16(%rsp),%xmm3
1311 por %xmm0,%xmm2
1312 por %xmm1,%xmm3
1313 movdqu %xmm2,0(%rdi)
1314 movdqu %xmm3,16(%rdi)
1315
1316 movdqa %xmm5,%xmm0
1317 movdqa %xmm5,%xmm1
1318 pandn 320(%rsp),%xmm0
1319 movdqa %xmm5,%xmm2
1320 pandn 320+16(%rsp),%xmm1
1321 movdqa %xmm5,%xmm3
1322 pand 512(%rsp),%xmm2
1323 pand 512+16(%rsp),%xmm3
1324 por %xmm0,%xmm2
1325 por %xmm1,%xmm3
1326
1327 movdqa %xmm4,%xmm0
1328 movdqa %xmm4,%xmm1
1329 pandn %xmm2,%xmm0
1330 movdqa %xmm4,%xmm2
1331 pandn %xmm3,%xmm1
1332 movdqa %xmm4,%xmm3
1333 pand 416(%rsp),%xmm2
1334 pand 416+16(%rsp),%xmm3
1335 por %xmm0,%xmm2
1336 por %xmm1,%xmm3
1337 movdqu %xmm2,32(%rdi)
1338 movdqu %xmm3,48(%rdi)
1339
1340 .Ladd_doneq:
1341 addq $576+8,%rsp
1342 popq %r15
1343 popq %r14
1344 popq %r13
1345 popq %r12
1346 popq %rbx
1347 popq %rbp
1348 .byte 0xf3,0xc3
1349 .size ecp_nistz256_point_add,.-ecp_nistz256_point_add
1350 .globl ecp_nistz256_point_add_affine
1351 .hidden ecp_nistz256_point_add_affine
1352 .type ecp_nistz256_point_add_affine,@function
1353 .align 32
1354 ecp_nistz256_point_add_affine:
1355 pushq %rbp
1356 pushq %rbx
1357 pushq %r12
1358 pushq %r13
1359 pushq %r14
1360 pushq %r15
1361 subq $480+8,%rsp
1362
1363 movdqu 0(%rsi),%xmm0
1364 movq %rdx,%rbx
1365 movdqu 16(%rsi),%xmm1
1366 movdqu 32(%rsi),%xmm2
1367 movdqu 48(%rsi),%xmm3
1368 movdqu 64(%rsi),%xmm4
1369 movdqu 80(%rsi),%xmm5
1370 movq 64+0(%rsi),%rax
1371 movq 64+8(%rsi),%r14
1372 movq 64+16(%rsi),%r15
1373 movq 64+24(%rsi),%r8
1374 movdqa %xmm0,320(%rsp)
1375 movdqa %xmm1,320+16(%rsp)
1376 movdqa %xmm2,352(%rsp)
1377 movdqa %xmm3,352+16(%rsp)
1378 movdqa %xmm4,384(%rsp)
1379 movdqa %xmm5,384+16(%rsp)
1380 por %xmm4,%xmm5
1381
1382 movdqu 0(%rbx),%xmm0
1383 pshufd $0xb1,%xmm5,%xmm3
1384 movdqu 16(%rbx),%xmm1
1385 movdqu 32(%rbx),%xmm2
1386 por %xmm3,%xmm5
1387 movdqu 48(%rbx),%xmm3
1388 movdqa %xmm0,416(%rsp)
1389 pshufd $0x1e,%xmm5,%xmm4
1390 movdqa %xmm1,416+16(%rsp)
1391 por %xmm0,%xmm1
1392 .byte 102,72,15,110,199
1393 movdqa %xmm2,448(%rsp)
1394 movdqa %xmm3,448+16(%rsp)
1395 por %xmm2,%xmm3
1396 por %xmm4,%xmm5
1397 pxor %xmm4,%xmm4
1398 por %xmm1,%xmm3
1399
1400 leaq 64-0(%rsi),%rsi
1401 leaq 32(%rsp),%rdi
1402 call __ecp_nistz256_sqr_montq
1403
1404 pcmpeqd %xmm4,%xmm5
1405 pshufd $0xb1,%xmm3,%xmm4
1406 movq 0(%rbx),%rax
1407
1408 movq %r12,%r9
1409 por %xmm3,%xmm4
1410 pshufd $0,%xmm5,%xmm5
1411 pshufd $0x1e,%xmm4,%xmm3
1412 movq %r13,%r10
1413 por %xmm3,%xmm4
1414 pxor %xmm3,%xmm3
1415 movq %r14,%r11
1416 pcmpeqd %xmm3,%xmm4
1417 pshufd $0,%xmm4,%xmm4
1418
1419 leaq 32-0(%rsp),%rsi
1420 movq %r15,%r12
1421 leaq 0(%rsp),%rdi
1422 call __ecp_nistz256_mul_montq
1423
1424 leaq 320(%rsp),%rbx
1425 leaq 64(%rsp),%rdi
1426 call __ecp_nistz256_sub_fromq
1427
1428 movq 384(%rsp),%rax
1429 leaq 384(%rsp),%rbx
1430 movq 0+32(%rsp),%r9
1431 movq 8+32(%rsp),%r10
1432 leaq 0+32(%rsp),%rsi
1433 movq 16+32(%rsp),%r11
1434 movq 24+32(%rsp),%r12
1435 leaq 32(%rsp),%rdi
1436 call __ecp_nistz256_mul_montq
1437
1438 movq 384(%rsp),%rax
1439 leaq 384(%rsp),%rbx
1440 movq 0+64(%rsp),%r9
1441 movq 8+64(%rsp),%r10
1442 leaq 0+64(%rsp),%rsi
1443 movq 16+64(%rsp),%r11
1444 movq 24+64(%rsp),%r12
1445 leaq 288(%rsp),%rdi
1446 call __ecp_nistz256_mul_montq
1447
1448 movq 448(%rsp),%rax
1449 leaq 448(%rsp),%rbx
1450 movq 0+32(%rsp),%r9
1451 movq 8+32(%rsp),%r10
1452 leaq 0+32(%rsp),%rsi
1453 movq 16+32(%rsp),%r11
1454 movq 24+32(%rsp),%r12
1455 leaq 32(%rsp),%rdi
1456 call __ecp_nistz256_mul_montq
1457
1458 leaq 352(%rsp),%rbx
1459 leaq 96(%rsp),%rdi
1460 call __ecp_nistz256_sub_fromq
1461
1462 movq 0+64(%rsp),%rax
1463 movq 8+64(%rsp),%r14
1464 leaq 0+64(%rsp),%rsi
1465 movq 16+64(%rsp),%r15
1466 movq 24+64(%rsp),%r8
1467 leaq 128(%rsp),%rdi
1468 call __ecp_nistz256_sqr_montq
1469
1470 movq 0+96(%rsp),%rax
1471 movq 8+96(%rsp),%r14
1472 leaq 0+96(%rsp),%rsi
1473 movq 16+96(%rsp),%r15
1474 movq 24+96(%rsp),%r8
1475 leaq 192(%rsp),%rdi
1476 call __ecp_nistz256_sqr_montq
1477
1478 movq 128(%rsp),%rax
1479 leaq 128(%rsp),%rbx
1480 movq 0+64(%rsp),%r9
1481 movq 8+64(%rsp),%r10
1482 leaq 0+64(%rsp),%rsi
1483 movq 16+64(%rsp),%r11
1484 movq 24+64(%rsp),%r12
1485 leaq 160(%rsp),%rdi
1486 call __ecp_nistz256_mul_montq
1487
1488 movq 320(%rsp),%rax
1489 leaq 320(%rsp),%rbx
1490 movq 0+128(%rsp),%r9
1491 movq 8+128(%rsp),%r10
1492 leaq 0+128(%rsp),%rsi
1493 movq 16+128(%rsp),%r11
1494 movq 24+128(%rsp),%r12
1495 leaq 0(%rsp),%rdi
1496 call __ecp_nistz256_mul_montq
1497
1498
1499
1500
1501 xorq %r11,%r11
1502 addq %r12,%r12
1503 leaq 192(%rsp),%rsi
1504 adcq %r13,%r13
1505 movq %r12,%rax
1506 adcq %r8,%r8
1507 adcq %r9,%r9
1508 movq %r13,%rbp
1509 adcq $0,%r11
1510
1511 subq $-1,%r12
1512 movq %r8,%rcx
1513 sbbq %r14,%r13
1514 sbbq $0,%r8
1515 movq %r9,%r10
1516 sbbq %r15,%r9
1517 sbbq $0,%r11
1518
1519 cmovcq %rax,%r12
1520 movq 0(%rsi),%rax
1521 cmovcq %rbp,%r13
1522 movq 8(%rsi),%rbp
1523 cmovcq %rcx,%r8
1524 movq 16(%rsi),%rcx
1525 cmovcq %r10,%r9
1526 movq 24(%rsi),%r10
1527
1528 call __ecp_nistz256_subq
1529
1530 leaq 160(%rsp),%rbx
1531 leaq 224(%rsp),%rdi
1532 call __ecp_nistz256_sub_fromq
1533
1534 movq 0+0(%rsp),%rax
1535 movq 0+8(%rsp),%rbp
1536 movq 0+16(%rsp),%rcx
1537 movq 0+24(%rsp),%r10
1538 leaq 64(%rsp),%rdi
1539
1540 call __ecp_nistz256_subq
1541
1542 movq %r12,0(%rdi)
1543 movq %r13,8(%rdi)
1544 movq %r8,16(%rdi)
1545 movq %r9,24(%rdi)
1546 movq 352(%rsp),%rax
1547 leaq 352(%rsp),%rbx
1548 movq 0+160(%rsp),%r9
1549 movq 8+160(%rsp),%r10
1550 leaq 0+160(%rsp),%rsi
1551 movq 16+160(%rsp),%r11
1552 movq 24+160(%rsp),%r12
1553 leaq 32(%rsp),%rdi
1554 call __ecp_nistz256_mul_montq
1555
1556 movq 96(%rsp),%rax
1557 leaq 96(%rsp),%rbx
1558 movq 0+64(%rsp),%r9
1559 movq 8+64(%rsp),%r10
1560 leaq 0+64(%rsp),%rsi
1561 movq 16+64(%rsp),%r11
1562 movq 24+64(%rsp),%r12
1563 leaq 64(%rsp),%rdi
1564 call __ecp_nistz256_mul_montq
1565
1566 leaq 32(%rsp),%rbx
1567 leaq 256(%rsp),%rdi
1568 call __ecp_nistz256_sub_fromq
1569
1570 .byte 102,72,15,126,199
1571
1572 movdqa %xmm5,%xmm0
1573 movdqa %xmm5,%xmm1
1574 pandn 288(%rsp),%xmm0
1575 movdqa %xmm5,%xmm2
1576 pandn 288+16(%rsp),%xmm1
1577 movdqa %xmm5,%xmm3
1578 pand .LONE_mont(%rip),%xmm2
1579 pand .LONE_mont+16(%rip),%xmm3
1580 por %xmm0,%xmm2
1581 por %xmm1,%xmm3
1582
1583 movdqa %xmm4,%xmm0
1584 movdqa %xmm4,%xmm1
1585 pandn %xmm2,%xmm0
1586 movdqa %xmm4,%xmm2
1587 pandn %xmm3,%xmm1
1588 movdqa %xmm4,%xmm3
1589 pand 384(%rsp),%xmm2
1590 pand 384+16(%rsp),%xmm3
1591 por %xmm0,%xmm2
1592 por %xmm1,%xmm3
1593 movdqu %xmm2,64(%rdi)
1594 movdqu %xmm3,80(%rdi)
1595
1596 movdqa %xmm5,%xmm0
1597 movdqa %xmm5,%xmm1
1598 pandn 224(%rsp),%xmm0
1599 movdqa %xmm5,%xmm2
1600 pandn 224+16(%rsp),%xmm1
1601 movdqa %xmm5,%xmm3
1602 pand 416(%rsp),%xmm2
1603 pand 416+16(%rsp),%xmm3
1604 por %xmm0,%xmm2
1605 por %xmm1,%xmm3
1606
1607 movdqa %xmm4,%xmm0
1608 movdqa %xmm4,%xmm1
1609 pandn %xmm2,%xmm0
1610 movdqa %xmm4,%xmm2
1611 pandn %xmm3,%xmm1
1612 movdqa %xmm4,%xmm3
1613 pand 320(%rsp),%xmm2
1614 pand 320+16(%rsp),%xmm3
1615 por %xmm0,%xmm2
1616 por %xmm1,%xmm3
1617 movdqu %xmm2,0(%rdi)
1618 movdqu %xmm3,16(%rdi)
1619
1620 movdqa %xmm5,%xmm0
1621 movdqa %xmm5,%xmm1
1622 pandn 256(%rsp),%xmm0
1623 movdqa %xmm5,%xmm2
1624 pandn 256+16(%rsp),%xmm1
1625 movdqa %xmm5,%xmm3
1626 pand 448(%rsp),%xmm2
1627 pand 448+16(%rsp),%xmm3
1628 por %xmm0,%xmm2
1629 por %xmm1,%xmm3
1630
1631 movdqa %xmm4,%xmm0
1632 movdqa %xmm4,%xmm1
1633 pandn %xmm2,%xmm0
1634 movdqa %xmm4,%xmm2
1635 pandn %xmm3,%xmm1
1636 movdqa %xmm4,%xmm3
1637 pand 352(%rsp),%xmm2
1638 pand 352+16(%rsp),%xmm3
1639 por %xmm0,%xmm2
1640 por %xmm1,%xmm3
1641 movdqu %xmm2,32(%rdi)
1642 movdqu %xmm3,48(%rdi)
1643
1644 addq $480+8,%rsp
1645 popq %r15
1646 popq %r14
1647 popq %r13
1648 popq %r12
1649 popq %rbx
1650 popq %rbp
1651 .byte 0xf3,0xc3
1652 .size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
1653 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698