Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(293)

Side by Side Diff: third_party/boringssl/win-x86_64/crypto/ec/p256-x86_64-asm.asm

Issue 2219933002: Land BoringSSL roll on master (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 default rel
2 %define XMMWORD
3 %define YMMWORD
4 %define ZMMWORD
5 section .text code align=64
6
7 EXTERN OPENSSL_ia32cap_P
8
9
10 ALIGN 64
11 $L$poly:
12 DQ 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xfffff fff00000001
13
14 $L$One:
15 DD 1,1,1,1,1,1,1,1
16 $L$Two:
17 DD 2,2,2,2,2,2,2,2
18 $L$Three:
19 DD 3,3,3,3,3,3,3,3
20 $L$ONE_mont:
21 DQ 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000 000fffffffe
22
23
24 ALIGN 64
25 ecp_nistz256_mul_by_2:
26 mov QWORD[8+rsp],rdi ;WIN64 prologue
27 mov QWORD[16+rsp],rsi
28 mov rax,rsp
29 $L$SEH_begin_ecp_nistz256_mul_by_2:
30 mov rdi,rcx
31 mov rsi,rdx
32
33
34 push r12
35 push r13
36
37 mov r8,QWORD[rsi]
38 mov r9,QWORD[8+rsi]
39 add r8,r8
40 mov r10,QWORD[16+rsi]
41 adc r9,r9
42 mov r11,QWORD[24+rsi]
43 lea rsi,[$L$poly]
44 mov rax,r8
45 adc r10,r10
46 adc r11,r11
47 mov rdx,r9
48 sbb r13,r13
49
50 sub r8,QWORD[rsi]
51 mov rcx,r10
52 sbb r9,QWORD[8+rsi]
53 sbb r10,QWORD[16+rsi]
54 mov r12,r11
55 sbb r11,QWORD[24+rsi]
56 test r13,r13
57
58 cmovz r8,rax
59 cmovz r9,rdx
60 mov QWORD[rdi],r8
61 cmovz r10,rcx
62 mov QWORD[8+rdi],r9
63 cmovz r11,r12
64 mov QWORD[16+rdi],r10
65 mov QWORD[24+rdi],r11
66
67 pop r13
68 pop r12
69 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
70 mov rsi,QWORD[16+rsp]
71 DB 0F3h,0C3h ;repret
72 $L$SEH_end_ecp_nistz256_mul_by_2:
73
74
75
76 global ecp_nistz256_neg
77
78 ALIGN 32
79 ecp_nistz256_neg:
80 mov QWORD[8+rsp],rdi ;WIN64 prologue
81 mov QWORD[16+rsp],rsi
82 mov rax,rsp
83 $L$SEH_begin_ecp_nistz256_neg:
84 mov rdi,rcx
85 mov rsi,rdx
86
87
88 push r12
89 push r13
90
91 xor r8,r8
92 xor r9,r9
93 xor r10,r10
94 xor r11,r11
95 xor r13,r13
96
97 sub r8,QWORD[rsi]
98 sbb r9,QWORD[8+rsi]
99 sbb r10,QWORD[16+rsi]
100 mov rax,r8
101 sbb r11,QWORD[24+rsi]
102 lea rsi,[$L$poly]
103 mov rdx,r9
104 sbb r13,0
105
106 add r8,QWORD[rsi]
107 mov rcx,r10
108 adc r9,QWORD[8+rsi]
109 adc r10,QWORD[16+rsi]
110 mov r12,r11
111 adc r11,QWORD[24+rsi]
112 test r13,r13
113
114 cmovz r8,rax
115 cmovz r9,rdx
116 mov QWORD[rdi],r8
117 cmovz r10,rcx
118 mov QWORD[8+rdi],r9
119 cmovz r11,r12
120 mov QWORD[16+rdi],r10
121 mov QWORD[24+rdi],r11
122
123 pop r13
124 pop r12
125 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
126 mov rsi,QWORD[16+rsp]
127 DB 0F3h,0C3h ;repret
128 $L$SEH_end_ecp_nistz256_neg:
129
130
131
132
133
134
135 global ecp_nistz256_mul_mont
136
137 ALIGN 32
138 ecp_nistz256_mul_mont:
139 mov QWORD[8+rsp],rdi ;WIN64 prologue
140 mov QWORD[16+rsp],rsi
141 mov rax,rsp
142 $L$SEH_begin_ecp_nistz256_mul_mont:
143 mov rdi,rcx
144 mov rsi,rdx
145 mov rdx,r8
146
147
148 $L$mul_mont:
149 push rbp
150 push rbx
151 push r12
152 push r13
153 push r14
154 push r15
155 mov rbx,rdx
156 mov rax,QWORD[rdx]
157 mov r9,QWORD[rsi]
158 mov r10,QWORD[8+rsi]
159 mov r11,QWORD[16+rsi]
160 mov r12,QWORD[24+rsi]
161
162 call __ecp_nistz256_mul_montq
163 $L$mul_mont_done:
164 pop r15
165 pop r14
166 pop r13
167 pop r12
168 pop rbx
169 pop rbp
170 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
171 mov rsi,QWORD[16+rsp]
172 DB 0F3h,0C3h ;repret
173 $L$SEH_end_ecp_nistz256_mul_mont:
174
175
176 ALIGN 32
177 __ecp_nistz256_mul_montq:
178
179
180 mov rbp,rax
181 mul r9
182 mov r14,QWORD[(($L$poly+8))]
183 mov r8,rax
184 mov rax,rbp
185 mov r9,rdx
186
187 mul r10
188 mov r15,QWORD[(($L$poly+24))]
189 add r9,rax
190 mov rax,rbp
191 adc rdx,0
192 mov r10,rdx
193
194 mul r11
195 add r10,rax
196 mov rax,rbp
197 adc rdx,0
198 mov r11,rdx
199
200 mul r12
201 add r11,rax
202 mov rax,r8
203 adc rdx,0
204 xor r13,r13
205 mov r12,rdx
206
207
208
209
210
211
212
213
214
215
216 mov rbp,r8
217 shl r8,32
218 mul r15
219 shr rbp,32
220 add r9,r8
221 adc r10,rbp
222 adc r11,rax
223 mov rax,QWORD[8+rbx]
224 adc r12,rdx
225 adc r13,0
226 xor r8,r8
227
228
229
230 mov rbp,rax
231 mul QWORD[rsi]
232 add r9,rax
233 mov rax,rbp
234 adc rdx,0
235 mov rcx,rdx
236
237 mul QWORD[8+rsi]
238 add r10,rcx
239 adc rdx,0
240 add r10,rax
241 mov rax,rbp
242 adc rdx,0
243 mov rcx,rdx
244
245 mul QWORD[16+rsi]
246 add r11,rcx
247 adc rdx,0
248 add r11,rax
249 mov rax,rbp
250 adc rdx,0
251 mov rcx,rdx
252
253 mul QWORD[24+rsi]
254 add r12,rcx
255 adc rdx,0
256 add r12,rax
257 mov rax,r9
258 adc r13,rdx
259 adc r8,0
260
261
262
263 mov rbp,r9
264 shl r9,32
265 mul r15
266 shr rbp,32
267 add r10,r9
268 adc r11,rbp
269 adc r12,rax
270 mov rax,QWORD[16+rbx]
271 adc r13,rdx
272 adc r8,0
273 xor r9,r9
274
275
276
277 mov rbp,rax
278 mul QWORD[rsi]
279 add r10,rax
280 mov rax,rbp
281 adc rdx,0
282 mov rcx,rdx
283
284 mul QWORD[8+rsi]
285 add r11,rcx
286 adc rdx,0
287 add r11,rax
288 mov rax,rbp
289 adc rdx,0
290 mov rcx,rdx
291
292 mul QWORD[16+rsi]
293 add r12,rcx
294 adc rdx,0
295 add r12,rax
296 mov rax,rbp
297 adc rdx,0
298 mov rcx,rdx
299
300 mul QWORD[24+rsi]
301 add r13,rcx
302 adc rdx,0
303 add r13,rax
304 mov rax,r10
305 adc r8,rdx
306 adc r9,0
307
308
309
310 mov rbp,r10
311 shl r10,32
312 mul r15
313 shr rbp,32
314 add r11,r10
315 adc r12,rbp
316 adc r13,rax
317 mov rax,QWORD[24+rbx]
318 adc r8,rdx
319 adc r9,0
320 xor r10,r10
321
322
323
324 mov rbp,rax
325 mul QWORD[rsi]
326 add r11,rax
327 mov rax,rbp
328 adc rdx,0
329 mov rcx,rdx
330
331 mul QWORD[8+rsi]
332 add r12,rcx
333 adc rdx,0
334 add r12,rax
335 mov rax,rbp
336 adc rdx,0
337 mov rcx,rdx
338
339 mul QWORD[16+rsi]
340 add r13,rcx
341 adc rdx,0
342 add r13,rax
343 mov rax,rbp
344 adc rdx,0
345 mov rcx,rdx
346
347 mul QWORD[24+rsi]
348 add r8,rcx
349 adc rdx,0
350 add r8,rax
351 mov rax,r11
352 adc r9,rdx
353 adc r10,0
354
355
356
357 mov rbp,r11
358 shl r11,32
359 mul r15
360 shr rbp,32
361 add r12,r11
362 adc r13,rbp
363 mov rcx,r12
364 adc r8,rax
365 adc r9,rdx
366 mov rbp,r13
367 adc r10,0
368
369
370
371 sub r12,-1
372 mov rbx,r8
373 sbb r13,r14
374 sbb r8,0
375 mov rdx,r9
376 sbb r9,r15
377 sbb r10,0
378
379 cmovc r12,rcx
380 cmovc r13,rbp
381 mov QWORD[rdi],r12
382 cmovc r8,rbx
383 mov QWORD[8+rdi],r13
384 cmovc r9,rdx
385 mov QWORD[16+rdi],r8
386 mov QWORD[24+rdi],r9
387
388 DB 0F3h,0C3h ;repret
389
390
391
392
393
394
395
396
397
398 global ecp_nistz256_sqr_mont
399
400 ALIGN 32
401 ecp_nistz256_sqr_mont:
402 mov QWORD[8+rsp],rdi ;WIN64 prologue
403 mov QWORD[16+rsp],rsi
404 mov rax,rsp
405 $L$SEH_begin_ecp_nistz256_sqr_mont:
406 mov rdi,rcx
407 mov rsi,rdx
408
409
410 push rbp
411 push rbx
412 push r12
413 push r13
414 push r14
415 push r15
416 mov rax,QWORD[rsi]
417 mov r14,QWORD[8+rsi]
418 mov r15,QWORD[16+rsi]
419 mov r8,QWORD[24+rsi]
420
421 call __ecp_nistz256_sqr_montq
422 $L$sqr_mont_done:
423 pop r15
424 pop r14
425 pop r13
426 pop r12
427 pop rbx
428 pop rbp
429 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
430 mov rsi,QWORD[16+rsp]
431 DB 0F3h,0C3h ;repret
432 $L$SEH_end_ecp_nistz256_sqr_mont:
433
434
435 ALIGN 32
436 __ecp_nistz256_sqr_montq:
437 mov r13,rax
438 mul r14
439 mov r9,rax
440 mov rax,r15
441 mov r10,rdx
442
443 mul r13
444 add r10,rax
445 mov rax,r8
446 adc rdx,0
447 mov r11,rdx
448
449 mul r13
450 add r11,rax
451 mov rax,r15
452 adc rdx,0
453 mov r12,rdx
454
455
456 mul r14
457 add r11,rax
458 mov rax,r8
459 adc rdx,0
460 mov rbp,rdx
461
462 mul r14
463 add r12,rax
464 mov rax,r8
465 adc rdx,0
466 add r12,rbp
467 mov r13,rdx
468 adc r13,0
469
470
471 mul r15
472 xor r15,r15
473 add r13,rax
474 mov rax,QWORD[rsi]
475 mov r14,rdx
476 adc r14,0
477
478 add r9,r9
479 adc r10,r10
480 adc r11,r11
481 adc r12,r12
482 adc r13,r13
483 adc r14,r14
484 adc r15,0
485
486 mul rax
487 mov r8,rax
488 mov rax,QWORD[8+rsi]
489 mov rcx,rdx
490
491 mul rax
492 add r9,rcx
493 adc r10,rax
494 mov rax,QWORD[16+rsi]
495 adc rdx,0
496 mov rcx,rdx
497
498 mul rax
499 add r11,rcx
500 adc r12,rax
501 mov rax,QWORD[24+rsi]
502 adc rdx,0
503 mov rcx,rdx
504
505 mul rax
506 add r13,rcx
507 adc r14,rax
508 mov rax,r8
509 adc r15,rdx
510
511 mov rsi,QWORD[(($L$poly+8))]
512 mov rbp,QWORD[(($L$poly+24))]
513
514
515
516
517 mov rcx,r8
518 shl r8,32
519 mul rbp
520 shr rcx,32
521 add r9,r8
522 adc r10,rcx
523 adc r11,rax
524 mov rax,r9
525 adc rdx,0
526
527
528
529 mov rcx,r9
530 shl r9,32
531 mov r8,rdx
532 mul rbp
533 shr rcx,32
534 add r10,r9
535 adc r11,rcx
536 adc r8,rax
537 mov rax,r10
538 adc rdx,0
539
540
541
542 mov rcx,r10
543 shl r10,32
544 mov r9,rdx
545 mul rbp
546 shr rcx,32
547 add r11,r10
548 adc r8,rcx
549 adc r9,rax
550 mov rax,r11
551 adc rdx,0
552
553
554
555 mov rcx,r11
556 shl r11,32
557 mov r10,rdx
558 mul rbp
559 shr rcx,32
560 add r8,r11
561 adc r9,rcx
562 adc r10,rax
563 adc rdx,0
564 xor r11,r11
565
566
567
568 add r12,r8
569 adc r13,r9
570 mov r8,r12
571 adc r14,r10
572 adc r15,rdx
573 mov r9,r13
574 adc r11,0
575
576 sub r12,-1
577 mov r10,r14
578 sbb r13,rsi
579 sbb r14,0
580 mov rcx,r15
581 sbb r15,rbp
582 sbb r11,0
583
584 cmovc r12,r8
585 cmovc r13,r9
586 mov QWORD[rdi],r12
587 cmovc r14,r10
588 mov QWORD[8+rdi],r13
589 cmovc r15,rcx
590 mov QWORD[16+rdi],r14
591 mov QWORD[24+rdi],r15
592
593 DB 0F3h,0C3h ;repret
594
595
596
597
598
599
600
601 global ecp_nistz256_from_mont
602
603 ALIGN 32
604 ecp_nistz256_from_mont:
605 mov QWORD[8+rsp],rdi ;WIN64 prologue
606 mov QWORD[16+rsp],rsi
607 mov rax,rsp
608 $L$SEH_begin_ecp_nistz256_from_mont:
609 mov rdi,rcx
610 mov rsi,rdx
611
612
613 push r12
614 push r13
615
616 mov rax,QWORD[rsi]
617 mov r13,QWORD[(($L$poly+24))]
618 mov r9,QWORD[8+rsi]
619 mov r10,QWORD[16+rsi]
620 mov r11,QWORD[24+rsi]
621 mov r8,rax
622 mov r12,QWORD[(($L$poly+8))]
623
624
625
626 mov rcx,rax
627 shl r8,32
628 mul r13
629 shr rcx,32
630 add r9,r8
631 adc r10,rcx
632 adc r11,rax
633 mov rax,r9
634 adc rdx,0
635
636
637
638 mov rcx,r9
639 shl r9,32
640 mov r8,rdx
641 mul r13
642 shr rcx,32
643 add r10,r9
644 adc r11,rcx
645 adc r8,rax
646 mov rax,r10
647 adc rdx,0
648
649
650
651 mov rcx,r10
652 shl r10,32
653 mov r9,rdx
654 mul r13
655 shr rcx,32
656 add r11,r10
657 adc r8,rcx
658 adc r9,rax
659 mov rax,r11
660 adc rdx,0
661
662
663
664 mov rcx,r11
665 shl r11,32
666 mov r10,rdx
667 mul r13
668 shr rcx,32
669 add r8,r11
670 adc r9,rcx
671 mov rcx,r8
672 adc r10,rax
673 mov rsi,r9
674 adc rdx,0
675
676 sub r8,-1
677 mov rax,r10
678 sbb r9,r12
679 sbb r10,0
680 mov r11,rdx
681 sbb rdx,r13
682 sbb r13,r13
683
684 cmovnz r8,rcx
685 cmovnz r9,rsi
686 mov QWORD[rdi],r8
687 cmovnz r10,rax
688 mov QWORD[8+rdi],r9
689 cmovz r11,rdx
690 mov QWORD[16+rdi],r10
691 mov QWORD[24+rdi],r11
692
693 pop r13
694 pop r12
695 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
696 mov rsi,QWORD[16+rsp]
697 DB 0F3h,0C3h ;repret
698 $L$SEH_end_ecp_nistz256_from_mont:
699
700
701 global ecp_nistz256_select_w5
702
703 ALIGN 32
704 ecp_nistz256_select_w5:
705 lea rax,[((-136))+rsp]
706 $L$SEH_begin_ecp_nistz256_select_w5:
707 DB 0x48,0x8d,0x60,0xe0
708 DB 0x0f,0x29,0x70,0xe0
709 DB 0x0f,0x29,0x78,0xf0
710 DB 0x44,0x0f,0x29,0x00
711 DB 0x44,0x0f,0x29,0x48,0x10
712 DB 0x44,0x0f,0x29,0x50,0x20
713 DB 0x44,0x0f,0x29,0x58,0x30
714 DB 0x44,0x0f,0x29,0x60,0x40
715 DB 0x44,0x0f,0x29,0x68,0x50
716 DB 0x44,0x0f,0x29,0x70,0x60
717 DB 0x44,0x0f,0x29,0x78,0x70
718 movdqa xmm0,XMMWORD[$L$One]
719 movd xmm1,r8d
720
721 pxor xmm2,xmm2
722 pxor xmm3,xmm3
723 pxor xmm4,xmm4
724 pxor xmm5,xmm5
725 pxor xmm6,xmm6
726 pxor xmm7,xmm7
727
728 movdqa xmm8,xmm0
729 pshufd xmm1,xmm1,0
730
731 mov rax,16
732 $L$select_loop_sse_w5:
733
734 movdqa xmm15,xmm8
735 paddd xmm8,xmm0
736 pcmpeqd xmm15,xmm1
737
738 movdqa xmm9,XMMWORD[rdx]
739 movdqa xmm10,XMMWORD[16+rdx]
740 movdqa xmm11,XMMWORD[32+rdx]
741 movdqa xmm12,XMMWORD[48+rdx]
742 movdqa xmm13,XMMWORD[64+rdx]
743 movdqa xmm14,XMMWORD[80+rdx]
744 lea rdx,[96+rdx]
745
746 pand xmm9,xmm15
747 pand xmm10,xmm15
748 por xmm2,xmm9
749 pand xmm11,xmm15
750 por xmm3,xmm10
751 pand xmm12,xmm15
752 por xmm4,xmm11
753 pand xmm13,xmm15
754 por xmm5,xmm12
755 pand xmm14,xmm15
756 por xmm6,xmm13
757 por xmm7,xmm14
758
759 dec rax
760 jnz NEAR $L$select_loop_sse_w5
761
762 movdqu XMMWORD[rcx],xmm2
763 movdqu XMMWORD[16+rcx],xmm3
764 movdqu XMMWORD[32+rcx],xmm4
765 movdqu XMMWORD[48+rcx],xmm5
766 movdqu XMMWORD[64+rcx],xmm6
767 movdqu XMMWORD[80+rcx],xmm7
768 movaps xmm6,XMMWORD[rsp]
769 movaps xmm7,XMMWORD[16+rsp]
770 movaps xmm8,XMMWORD[32+rsp]
771 movaps xmm9,XMMWORD[48+rsp]
772 movaps xmm10,XMMWORD[64+rsp]
773 movaps xmm11,XMMWORD[80+rsp]
774 movaps xmm12,XMMWORD[96+rsp]
775 movaps xmm13,XMMWORD[112+rsp]
776 movaps xmm14,XMMWORD[128+rsp]
777 movaps xmm15,XMMWORD[144+rsp]
778 lea rsp,[168+rsp]
779 $L$SEH_end_ecp_nistz256_select_w5:
780 DB 0F3h,0C3h ;repret
781
782
783
784
785 global ecp_nistz256_select_w7
786
787 ALIGN 32
788 ecp_nistz256_select_w7:
789 lea rax,[((-136))+rsp]
790 $L$SEH_begin_ecp_nistz256_select_w7:
791 DB 0x48,0x8d,0x60,0xe0
792 DB 0x0f,0x29,0x70,0xe0
793 DB 0x0f,0x29,0x78,0xf0
794 DB 0x44,0x0f,0x29,0x00
795 DB 0x44,0x0f,0x29,0x48,0x10
796 DB 0x44,0x0f,0x29,0x50,0x20
797 DB 0x44,0x0f,0x29,0x58,0x30
798 DB 0x44,0x0f,0x29,0x60,0x40
799 DB 0x44,0x0f,0x29,0x68,0x50
800 DB 0x44,0x0f,0x29,0x70,0x60
801 DB 0x44,0x0f,0x29,0x78,0x70
802 movdqa xmm8,XMMWORD[$L$One]
803 movd xmm1,r8d
804
805 pxor xmm2,xmm2
806 pxor xmm3,xmm3
807 pxor xmm4,xmm4
808 pxor xmm5,xmm5
809
810 movdqa xmm0,xmm8
811 pshufd xmm1,xmm1,0
812 mov rax,64
813
814 $L$select_loop_sse_w7:
815 movdqa xmm15,xmm8
816 paddd xmm8,xmm0
817 movdqa xmm9,XMMWORD[rdx]
818 movdqa xmm10,XMMWORD[16+rdx]
819 pcmpeqd xmm15,xmm1
820 movdqa xmm11,XMMWORD[32+rdx]
821 movdqa xmm12,XMMWORD[48+rdx]
822 lea rdx,[64+rdx]
823
824 pand xmm9,xmm15
825 pand xmm10,xmm15
826 por xmm2,xmm9
827 pand xmm11,xmm15
828 por xmm3,xmm10
829 pand xmm12,xmm15
830 por xmm4,xmm11
831 prefetcht0 [255+rdx]
832 por xmm5,xmm12
833
834 dec rax
835 jnz NEAR $L$select_loop_sse_w7
836
837 movdqu XMMWORD[rcx],xmm2
838 movdqu XMMWORD[16+rcx],xmm3
839 movdqu XMMWORD[32+rcx],xmm4
840 movdqu XMMWORD[48+rcx],xmm5
841 movaps xmm6,XMMWORD[rsp]
842 movaps xmm7,XMMWORD[16+rsp]
843 movaps xmm8,XMMWORD[32+rsp]
844 movaps xmm9,XMMWORD[48+rsp]
845 movaps xmm10,XMMWORD[64+rsp]
846 movaps xmm11,XMMWORD[80+rsp]
847 movaps xmm12,XMMWORD[96+rsp]
848 movaps xmm13,XMMWORD[112+rsp]
849 movaps xmm14,XMMWORD[128+rsp]
850 movaps xmm15,XMMWORD[144+rsp]
851 lea rsp,[168+rsp]
852 $L$SEH_end_ecp_nistz256_select_w7:
853 DB 0F3h,0C3h ;repret
854
855 global ecp_nistz256_avx2_select_w7
856
857 ALIGN 32
858 ecp_nistz256_avx2_select_w7:
859 mov QWORD[8+rsp],rdi ;WIN64 prologue
860 mov QWORD[16+rsp],rsi
861 mov rax,rsp
862 $L$SEH_begin_ecp_nistz256_avx2_select_w7:
863 mov rdi,rcx
864 mov rsi,rdx
865 mov rdx,r8
866
867
868 DB 0x0f,0x0b
869 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
870 mov rsi,QWORD[16+rsp]
871 DB 0F3h,0C3h ;repret
872 $L$SEH_end_ecp_nistz256_avx2_select_w7:
873
874 ALIGN 32
875 __ecp_nistz256_add_toq:
876 add r12,QWORD[rbx]
877 adc r13,QWORD[8+rbx]
878 mov rax,r12
879 adc r8,QWORD[16+rbx]
880 adc r9,QWORD[24+rbx]
881 mov rbp,r13
882 sbb r11,r11
883
884 sub r12,-1
885 mov rcx,r8
886 sbb r13,r14
887 sbb r8,0
888 mov r10,r9
889 sbb r9,r15
890 test r11,r11
891
892 cmovz r12,rax
893 cmovz r13,rbp
894 mov QWORD[rdi],r12
895 cmovz r8,rcx
896 mov QWORD[8+rdi],r13
897 cmovz r9,r10
898 mov QWORD[16+rdi],r8
899 mov QWORD[24+rdi],r9
900
901 DB 0F3h,0C3h ;repret
902
903
904
905 ALIGN 32
906 __ecp_nistz256_sub_fromq:
907 sub r12,QWORD[rbx]
908 sbb r13,QWORD[8+rbx]
909 mov rax,r12
910 sbb r8,QWORD[16+rbx]
911 sbb r9,QWORD[24+rbx]
912 mov rbp,r13
913 sbb r11,r11
914
915 add r12,-1
916 mov rcx,r8
917 adc r13,r14
918 adc r8,0
919 mov r10,r9
920 adc r9,r15
921 test r11,r11
922
923 cmovz r12,rax
924 cmovz r13,rbp
925 mov QWORD[rdi],r12
926 cmovz r8,rcx
927 mov QWORD[8+rdi],r13
928 cmovz r9,r10
929 mov QWORD[16+rdi],r8
930 mov QWORD[24+rdi],r9
931
932 DB 0F3h,0C3h ;repret
933
934
935
936 ALIGN 32
937 __ecp_nistz256_subq:
938 sub rax,r12
939 sbb rbp,r13
940 mov r12,rax
941 sbb rcx,r8
942 sbb r10,r9
943 mov r13,rbp
944 sbb r11,r11
945
946 add rax,-1
947 mov r8,rcx
948 adc rbp,r14
949 adc rcx,0
950 mov r9,r10
951 adc r10,r15
952 test r11,r11
953
954 cmovnz r12,rax
955 cmovnz r13,rbp
956 cmovnz r8,rcx
957 cmovnz r9,r10
958
959 DB 0F3h,0C3h ;repret
960
961
962
963 ALIGN 32
964 __ecp_nistz256_mul_by_2q:
965 add r12,r12
966 adc r13,r13
967 mov rax,r12
968 adc r8,r8
969 adc r9,r9
970 mov rbp,r13
971 sbb r11,r11
972
973 sub r12,-1
974 mov rcx,r8
975 sbb r13,r14
976 sbb r8,0
977 mov r10,r9
978 sbb r9,r15
979 test r11,r11
980
981 cmovz r12,rax
982 cmovz r13,rbp
983 mov QWORD[rdi],r12
984 cmovz r8,rcx
985 mov QWORD[8+rdi],r13
986 cmovz r9,r10
987 mov QWORD[16+rdi],r8
988 mov QWORD[24+rdi],r9
989
990 DB 0F3h,0C3h ;repret
991
992 global ecp_nistz256_point_double
993
994 ALIGN 32
995 ecp_nistz256_point_double:
996 mov QWORD[8+rsp],rdi ;WIN64 prologue
997 mov QWORD[16+rsp],rsi
998 mov rax,rsp
999 $L$SEH_begin_ecp_nistz256_point_double:
1000 mov rdi,rcx
1001 mov rsi,rdx
1002
1003
1004 push rbp
1005 push rbx
1006 push r12
1007 push r13
1008 push r14
1009 push r15
1010 sub rsp,32*5+8
1011
1012 $L$point_double_shortcutq:
1013 movdqu xmm0,XMMWORD[rsi]
1014 mov rbx,rsi
1015 movdqu xmm1,XMMWORD[16+rsi]
1016 mov r12,QWORD[((32+0))+rsi]
1017 mov r13,QWORD[((32+8))+rsi]
1018 mov r8,QWORD[((32+16))+rsi]
1019 mov r9,QWORD[((32+24))+rsi]
1020 mov r14,QWORD[(($L$poly+8))]
1021 mov r15,QWORD[(($L$poly+24))]
1022 movdqa XMMWORD[96+rsp],xmm0
1023 movdqa XMMWORD[(96+16)+rsp],xmm1
1024 lea r10,[32+rdi]
1025 lea r11,[64+rdi]
1026 DB 102,72,15,110,199
1027 DB 102,73,15,110,202
1028 DB 102,73,15,110,211
1029
1030 lea rdi,[rsp]
1031 call __ecp_nistz256_mul_by_2q
1032
1033 mov rax,QWORD[((64+0))+rsi]
1034 mov r14,QWORD[((64+8))+rsi]
1035 mov r15,QWORD[((64+16))+rsi]
1036 mov r8,QWORD[((64+24))+rsi]
1037 lea rsi,[((64-0))+rsi]
1038 lea rdi,[64+rsp]
1039 call __ecp_nistz256_sqr_montq
1040
1041 mov rax,QWORD[((0+0))+rsp]
1042 mov r14,QWORD[((8+0))+rsp]
1043 lea rsi,[((0+0))+rsp]
1044 mov r15,QWORD[((16+0))+rsp]
1045 mov r8,QWORD[((24+0))+rsp]
1046 lea rdi,[rsp]
1047 call __ecp_nistz256_sqr_montq
1048
1049 mov rax,QWORD[32+rbx]
1050 mov r9,QWORD[((64+0))+rbx]
1051 mov r10,QWORD[((64+8))+rbx]
1052 mov r11,QWORD[((64+16))+rbx]
1053 mov r12,QWORD[((64+24))+rbx]
1054 lea rsi,[((64-0))+rbx]
1055 lea rbx,[32+rbx]
1056 DB 102,72,15,126,215
1057 call __ecp_nistz256_mul_montq
1058 call __ecp_nistz256_mul_by_2q
1059
1060 mov r12,QWORD[((96+0))+rsp]
1061 mov r13,QWORD[((96+8))+rsp]
1062 lea rbx,[64+rsp]
1063 mov r8,QWORD[((96+16))+rsp]
1064 mov r9,QWORD[((96+24))+rsp]
1065 lea rdi,[32+rsp]
1066 call __ecp_nistz256_add_toq
1067
1068 mov r12,QWORD[((96+0))+rsp]
1069 mov r13,QWORD[((96+8))+rsp]
1070 lea rbx,[64+rsp]
1071 mov r8,QWORD[((96+16))+rsp]
1072 mov r9,QWORD[((96+24))+rsp]
1073 lea rdi,[64+rsp]
1074 call __ecp_nistz256_sub_fromq
1075
1076 mov rax,QWORD[((0+0))+rsp]
1077 mov r14,QWORD[((8+0))+rsp]
1078 lea rsi,[((0+0))+rsp]
1079 mov r15,QWORD[((16+0))+rsp]
1080 mov r8,QWORD[((24+0))+rsp]
1081 DB 102,72,15,126,207
1082 call __ecp_nistz256_sqr_montq
1083 xor r9,r9
1084 mov rax,r12
1085 add r12,-1
1086 mov r10,r13
1087 adc r13,rsi
1088 mov rcx,r14
1089 adc r14,0
1090 mov r8,r15
1091 adc r15,rbp
1092 adc r9,0
1093 xor rsi,rsi
1094 test rax,1
1095
1096 cmovz r12,rax
1097 cmovz r13,r10
1098 cmovz r14,rcx
1099 cmovz r15,r8
1100 cmovz r9,rsi
1101
1102 mov rax,r13
1103 shr r12,1
1104 shl rax,63
1105 mov r10,r14
1106 shr r13,1
1107 or r12,rax
1108 shl r10,63
1109 mov rcx,r15
1110 shr r14,1
1111 or r13,r10
1112 shl rcx,63
1113 mov QWORD[rdi],r12
1114 shr r15,1
1115 mov QWORD[8+rdi],r13
1116 shl r9,63
1117 or r14,rcx
1118 or r15,r9
1119 mov QWORD[16+rdi],r14
1120 mov QWORD[24+rdi],r15
1121 mov rax,QWORD[64+rsp]
1122 lea rbx,[64+rsp]
1123 mov r9,QWORD[((0+32))+rsp]
1124 mov r10,QWORD[((8+32))+rsp]
1125 lea rsi,[((0+32))+rsp]
1126 mov r11,QWORD[((16+32))+rsp]
1127 mov r12,QWORD[((24+32))+rsp]
1128 lea rdi,[32+rsp]
1129 call __ecp_nistz256_mul_montq
1130
1131 lea rdi,[128+rsp]
1132 call __ecp_nistz256_mul_by_2q
1133
1134 lea rbx,[32+rsp]
1135 lea rdi,[32+rsp]
1136 call __ecp_nistz256_add_toq
1137
1138 mov rax,QWORD[96+rsp]
1139 lea rbx,[96+rsp]
1140 mov r9,QWORD[((0+0))+rsp]
1141 mov r10,QWORD[((8+0))+rsp]
1142 lea rsi,[((0+0))+rsp]
1143 mov r11,QWORD[((16+0))+rsp]
1144 mov r12,QWORD[((24+0))+rsp]
1145 lea rdi,[rsp]
1146 call __ecp_nistz256_mul_montq
1147
1148 lea rdi,[128+rsp]
1149 call __ecp_nistz256_mul_by_2q
1150
1151 mov rax,QWORD[((0+32))+rsp]
1152 mov r14,QWORD[((8+32))+rsp]
1153 lea rsi,[((0+32))+rsp]
1154 mov r15,QWORD[((16+32))+rsp]
1155 mov r8,QWORD[((24+32))+rsp]
1156 DB 102,72,15,126,199
1157 call __ecp_nistz256_sqr_montq
1158
1159 lea rbx,[128+rsp]
1160 mov r8,r14
1161 mov r9,r15
1162 mov r14,rsi
1163 mov r15,rbp
1164 call __ecp_nistz256_sub_fromq
1165
1166 mov rax,QWORD[((0+0))+rsp]
1167 mov rbp,QWORD[((0+8))+rsp]
1168 mov rcx,QWORD[((0+16))+rsp]
1169 mov r10,QWORD[((0+24))+rsp]
1170 lea rdi,[rsp]
1171 call __ecp_nistz256_subq
1172
1173 mov rax,QWORD[32+rsp]
1174 lea rbx,[32+rsp]
1175 mov r14,r12
1176 xor ecx,ecx
1177 mov QWORD[((0+0))+rsp],r12
1178 mov r10,r13
1179 mov QWORD[((0+8))+rsp],r13
1180 cmovz r11,r8
1181 mov QWORD[((0+16))+rsp],r8
1182 lea rsi,[((0-0))+rsp]
1183 cmovz r12,r9
1184 mov QWORD[((0+24))+rsp],r9
1185 mov r9,r14
1186 lea rdi,[rsp]
1187 call __ecp_nistz256_mul_montq
1188
1189 DB 102,72,15,126,203
1190 DB 102,72,15,126,207
1191 call __ecp_nistz256_sub_fromq
1192
1193 add rsp,32*5+8
1194 pop r15
1195 pop r14
1196 pop r13
1197 pop r12
1198 pop rbx
1199 pop rbp
1200 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1201 mov rsi,QWORD[16+rsp]
1202 DB 0F3h,0C3h ;repret
1203 $L$SEH_end_ecp_nistz256_point_double:
1204 global ecp_nistz256_point_add
1205
1206 ALIGN 32
1207 ecp_nistz256_point_add:
1208 mov QWORD[8+rsp],rdi ;WIN64 prologue
1209 mov QWORD[16+rsp],rsi
1210 mov rax,rsp
1211 $L$SEH_begin_ecp_nistz256_point_add:
1212 mov rdi,rcx
1213 mov rsi,rdx
1214 mov rdx,r8
1215
1216
1217 push rbp
1218 push rbx
1219 push r12
1220 push r13
1221 push r14
1222 push r15
1223 sub rsp,32*18+8
1224
1225 movdqu xmm0,XMMWORD[rsi]
1226 movdqu xmm1,XMMWORD[16+rsi]
1227 movdqu xmm2,XMMWORD[32+rsi]
1228 movdqu xmm3,XMMWORD[48+rsi]
1229 movdqu xmm4,XMMWORD[64+rsi]
1230 movdqu xmm5,XMMWORD[80+rsi]
1231 mov rbx,rsi
1232 mov rsi,rdx
1233 movdqa XMMWORD[384+rsp],xmm0
1234 movdqa XMMWORD[(384+16)+rsp],xmm1
1235 por xmm1,xmm0
1236 movdqa XMMWORD[416+rsp],xmm2
1237 movdqa XMMWORD[(416+16)+rsp],xmm3
1238 por xmm3,xmm2
1239 movdqa XMMWORD[448+rsp],xmm4
1240 movdqa XMMWORD[(448+16)+rsp],xmm5
1241 por xmm3,xmm1
1242
1243 movdqu xmm0,XMMWORD[rsi]
1244 pshufd xmm5,xmm3,0xb1
1245 movdqu xmm1,XMMWORD[16+rsi]
1246 movdqu xmm2,XMMWORD[32+rsi]
1247 por xmm5,xmm3
1248 movdqu xmm3,XMMWORD[48+rsi]
1249 mov rax,QWORD[((64+0))+rsi]
1250 mov r14,QWORD[((64+8))+rsi]
1251 mov r15,QWORD[((64+16))+rsi]
1252 mov r8,QWORD[((64+24))+rsi]
1253 movdqa XMMWORD[480+rsp],xmm0
1254 pshufd xmm4,xmm5,0x1e
1255 movdqa XMMWORD[(480+16)+rsp],xmm1
1256 por xmm1,xmm0
1257 DB 102,72,15,110,199
1258 movdqa XMMWORD[512+rsp],xmm2
1259 movdqa XMMWORD[(512+16)+rsp],xmm3
1260 por xmm3,xmm2
1261 por xmm5,xmm4
1262 pxor xmm4,xmm4
1263 por xmm3,xmm1
1264
1265 lea rsi,[((64-0))+rsi]
1266 mov QWORD[((544+0))+rsp],rax
1267 mov QWORD[((544+8))+rsp],r14
1268 mov QWORD[((544+16))+rsp],r15
1269 mov QWORD[((544+24))+rsp],r8
1270 lea rdi,[96+rsp]
1271 call __ecp_nistz256_sqr_montq
1272
1273 pcmpeqd xmm5,xmm4
1274 pshufd xmm4,xmm3,0xb1
1275 por xmm4,xmm3
1276 pshufd xmm5,xmm5,0
1277 pshufd xmm3,xmm4,0x1e
1278 por xmm4,xmm3
1279 pxor xmm3,xmm3
1280 pcmpeqd xmm4,xmm3
1281 pshufd xmm4,xmm4,0
1282 mov rax,QWORD[((64+0))+rbx]
1283 mov r14,QWORD[((64+8))+rbx]
1284 mov r15,QWORD[((64+16))+rbx]
1285 mov r8,QWORD[((64+24))+rbx]
1286 DB 102,72,15,110,203
1287
1288 lea rsi,[((64-0))+rbx]
1289 lea rdi,[32+rsp]
1290 call __ecp_nistz256_sqr_montq
1291
1292 mov rax,QWORD[544+rsp]
1293 lea rbx,[544+rsp]
1294 mov r9,QWORD[((0+96))+rsp]
1295 mov r10,QWORD[((8+96))+rsp]
1296 lea rsi,[((0+96))+rsp]
1297 mov r11,QWORD[((16+96))+rsp]
1298 mov r12,QWORD[((24+96))+rsp]
1299 lea rdi,[224+rsp]
1300 call __ecp_nistz256_mul_montq
1301
1302 mov rax,QWORD[448+rsp]
1303 lea rbx,[448+rsp]
1304 mov r9,QWORD[((0+32))+rsp]
1305 mov r10,QWORD[((8+32))+rsp]
1306 lea rsi,[((0+32))+rsp]
1307 mov r11,QWORD[((16+32))+rsp]
1308 mov r12,QWORD[((24+32))+rsp]
1309 lea rdi,[256+rsp]
1310 call __ecp_nistz256_mul_montq
1311
1312 mov rax,QWORD[416+rsp]
1313 lea rbx,[416+rsp]
1314 mov r9,QWORD[((0+224))+rsp]
1315 mov r10,QWORD[((8+224))+rsp]
1316 lea rsi,[((0+224))+rsp]
1317 mov r11,QWORD[((16+224))+rsp]
1318 mov r12,QWORD[((24+224))+rsp]
1319 lea rdi,[224+rsp]
1320 call __ecp_nistz256_mul_montq
1321
1322 mov rax,QWORD[512+rsp]
1323 lea rbx,[512+rsp]
1324 mov r9,QWORD[((0+256))+rsp]
1325 mov r10,QWORD[((8+256))+rsp]
1326 lea rsi,[((0+256))+rsp]
1327 mov r11,QWORD[((16+256))+rsp]
1328 mov r12,QWORD[((24+256))+rsp]
1329 lea rdi,[256+rsp]
1330 call __ecp_nistz256_mul_montq
1331
1332 lea rbx,[224+rsp]
1333 lea rdi,[64+rsp]
1334 call __ecp_nistz256_sub_fromq
1335
1336 or r12,r13
1337 movdqa xmm2,xmm4
1338 or r12,r8
1339 or r12,r9
1340 por xmm2,xmm5
1341 DB 102,73,15,110,220
1342
1343 mov rax,QWORD[384+rsp]
1344 lea rbx,[384+rsp]
1345 mov r9,QWORD[((0+96))+rsp]
1346 mov r10,QWORD[((8+96))+rsp]
1347 lea rsi,[((0+96))+rsp]
1348 mov r11,QWORD[((16+96))+rsp]
1349 mov r12,QWORD[((24+96))+rsp]
1350 lea rdi,[160+rsp]
1351 call __ecp_nistz256_mul_montq
1352
1353 mov rax,QWORD[480+rsp]
1354 lea rbx,[480+rsp]
1355 mov r9,QWORD[((0+32))+rsp]
1356 mov r10,QWORD[((8+32))+rsp]
1357 lea rsi,[((0+32))+rsp]
1358 mov r11,QWORD[((16+32))+rsp]
1359 mov r12,QWORD[((24+32))+rsp]
1360 lea rdi,[192+rsp]
1361 call __ecp_nistz256_mul_montq
1362
1363 lea rbx,[160+rsp]
1364 lea rdi,[rsp]
1365 call __ecp_nistz256_sub_fromq
1366
1367 or r12,r13
1368 or r12,r8
1369 or r12,r9
1370
1371 DB 0x3e
1372 jnz NEAR $L$add_proceedq
1373 DB 102,73,15,126,208
1374 DB 102,73,15,126,217
1375 test r8,r8
1376 jnz NEAR $L$add_proceedq
1377 test r9,r9
1378 jz NEAR $L$add_doubleq
1379
1380 DB 102,72,15,126,199
1381 pxor xmm0,xmm0
1382 movdqu XMMWORD[rdi],xmm0
1383 movdqu XMMWORD[16+rdi],xmm0
1384 movdqu XMMWORD[32+rdi],xmm0
1385 movdqu XMMWORD[48+rdi],xmm0
1386 movdqu XMMWORD[64+rdi],xmm0
1387 movdqu XMMWORD[80+rdi],xmm0
1388 jmp NEAR $L$add_doneq
1389
1390 ALIGN 32
1391 $L$add_doubleq:
1392 DB 102,72,15,126,206
1393 DB 102,72,15,126,199
1394 add rsp,416
1395 jmp NEAR $L$point_double_shortcutq
1396
1397 ALIGN 32
1398 $L$add_proceedq:
1399 mov rax,QWORD[((0+64))+rsp]
1400 mov r14,QWORD[((8+64))+rsp]
1401 lea rsi,[((0+64))+rsp]
1402 mov r15,QWORD[((16+64))+rsp]
1403 mov r8,QWORD[((24+64))+rsp]
1404 lea rdi,[96+rsp]
1405 call __ecp_nistz256_sqr_montq
1406
1407 mov rax,QWORD[448+rsp]
1408 lea rbx,[448+rsp]
1409 mov r9,QWORD[((0+0))+rsp]
1410 mov r10,QWORD[((8+0))+rsp]
1411 lea rsi,[((0+0))+rsp]
1412 mov r11,QWORD[((16+0))+rsp]
1413 mov r12,QWORD[((24+0))+rsp]
1414 lea rdi,[352+rsp]
1415 call __ecp_nistz256_mul_montq
1416
1417 mov rax,QWORD[((0+0))+rsp]
1418 mov r14,QWORD[((8+0))+rsp]
1419 lea rsi,[((0+0))+rsp]
1420 mov r15,QWORD[((16+0))+rsp]
1421 mov r8,QWORD[((24+0))+rsp]
1422 lea rdi,[32+rsp]
1423 call __ecp_nistz256_sqr_montq
1424
1425 mov rax,QWORD[544+rsp]
1426 lea rbx,[544+rsp]
1427 mov r9,QWORD[((0+352))+rsp]
1428 mov r10,QWORD[((8+352))+rsp]
1429 lea rsi,[((0+352))+rsp]
1430 mov r11,QWORD[((16+352))+rsp]
1431 mov r12,QWORD[((24+352))+rsp]
1432 lea rdi,[352+rsp]
1433 call __ecp_nistz256_mul_montq
1434
1435 mov rax,QWORD[rsp]
1436 lea rbx,[rsp]
1437 mov r9,QWORD[((0+32))+rsp]
1438 mov r10,QWORD[((8+32))+rsp]
1439 lea rsi,[((0+32))+rsp]
1440 mov r11,QWORD[((16+32))+rsp]
1441 mov r12,QWORD[((24+32))+rsp]
1442 lea rdi,[128+rsp]
1443 call __ecp_nistz256_mul_montq
1444
1445 mov rax,QWORD[160+rsp]
1446 lea rbx,[160+rsp]
1447 mov r9,QWORD[((0+32))+rsp]
1448 mov r10,QWORD[((8+32))+rsp]
1449 lea rsi,[((0+32))+rsp]
1450 mov r11,QWORD[((16+32))+rsp]
1451 mov r12,QWORD[((24+32))+rsp]
1452 lea rdi,[192+rsp]
1453 call __ecp_nistz256_mul_montq
1454
1455
1456
1457
1458 add r12,r12
1459 lea rsi,[96+rsp]
1460 adc r13,r13
1461 mov rax,r12
1462 adc r8,r8
1463 adc r9,r9
1464 mov rbp,r13
1465 sbb r11,r11
1466
1467 sub r12,-1
1468 mov rcx,r8
1469 sbb r13,r14
1470 sbb r8,0
1471 mov r10,r9
1472 sbb r9,r15
1473 test r11,r11
1474
1475 cmovz r12,rax
1476 mov rax,QWORD[rsi]
1477 cmovz r13,rbp
1478 mov rbp,QWORD[8+rsi]
1479 cmovz r8,rcx
1480 mov rcx,QWORD[16+rsi]
1481 cmovz r9,r10
1482 mov r10,QWORD[24+rsi]
1483
1484 call __ecp_nistz256_subq
1485
1486 lea rbx,[128+rsp]
1487 lea rdi,[288+rsp]
1488 call __ecp_nistz256_sub_fromq
1489
1490 mov rax,QWORD[((192+0))+rsp]
1491 mov rbp,QWORD[((192+8))+rsp]
1492 mov rcx,QWORD[((192+16))+rsp]
1493 mov r10,QWORD[((192+24))+rsp]
1494 lea rdi,[320+rsp]
1495
1496 call __ecp_nistz256_subq
1497
1498 mov QWORD[rdi],r12
1499 mov QWORD[8+rdi],r13
1500 mov QWORD[16+rdi],r8
1501 mov QWORD[24+rdi],r9
1502 mov rax,QWORD[128+rsp]
1503 lea rbx,[128+rsp]
1504 mov r9,QWORD[((0+224))+rsp]
1505 mov r10,QWORD[((8+224))+rsp]
1506 lea rsi,[((0+224))+rsp]
1507 mov r11,QWORD[((16+224))+rsp]
1508 mov r12,QWORD[((24+224))+rsp]
1509 lea rdi,[256+rsp]
1510 call __ecp_nistz256_mul_montq
1511
1512 mov rax,QWORD[320+rsp]
1513 lea rbx,[320+rsp]
1514 mov r9,QWORD[((0+64))+rsp]
1515 mov r10,QWORD[((8+64))+rsp]
1516 lea rsi,[((0+64))+rsp]
1517 mov r11,QWORD[((16+64))+rsp]
1518 mov r12,QWORD[((24+64))+rsp]
1519 lea rdi,[320+rsp]
1520 call __ecp_nistz256_mul_montq
1521
1522 lea rbx,[256+rsp]
1523 lea rdi,[320+rsp]
1524 call __ecp_nistz256_sub_fromq
1525
1526 DB 102,72,15,126,199
1527
1528 movdqa xmm0,xmm5
1529 movdqa xmm1,xmm5
1530 pandn xmm0,XMMWORD[352+rsp]
1531 movdqa xmm2,xmm5
1532 pandn xmm1,XMMWORD[((352+16))+rsp]
1533 movdqa xmm3,xmm5
1534 pand xmm2,XMMWORD[544+rsp]
1535 pand xmm3,XMMWORD[((544+16))+rsp]
1536 por xmm2,xmm0
1537 por xmm3,xmm1
1538
1539 movdqa xmm0,xmm4
1540 movdqa xmm1,xmm4
1541 pandn xmm0,xmm2
1542 movdqa xmm2,xmm4
1543 pandn xmm1,xmm3
1544 movdqa xmm3,xmm4
1545 pand xmm2,XMMWORD[448+rsp]
1546 pand xmm3,XMMWORD[((448+16))+rsp]
1547 por xmm2,xmm0
1548 por xmm3,xmm1
1549 movdqu XMMWORD[64+rdi],xmm2
1550 movdqu XMMWORD[80+rdi],xmm3
1551
1552 movdqa xmm0,xmm5
1553 movdqa xmm1,xmm5
1554 pandn xmm0,XMMWORD[288+rsp]
1555 movdqa xmm2,xmm5
1556 pandn xmm1,XMMWORD[((288+16))+rsp]
1557 movdqa xmm3,xmm5
1558 pand xmm2,XMMWORD[480+rsp]
1559 pand xmm3,XMMWORD[((480+16))+rsp]
1560 por xmm2,xmm0
1561 por xmm3,xmm1
1562
1563 movdqa xmm0,xmm4
1564 movdqa xmm1,xmm4
1565 pandn xmm0,xmm2
1566 movdqa xmm2,xmm4
1567 pandn xmm1,xmm3
1568 movdqa xmm3,xmm4
1569 pand xmm2,XMMWORD[384+rsp]
1570 pand xmm3,XMMWORD[((384+16))+rsp]
1571 por xmm2,xmm0
1572 por xmm3,xmm1
1573 movdqu XMMWORD[rdi],xmm2
1574 movdqu XMMWORD[16+rdi],xmm3
1575
1576 movdqa xmm0,xmm5
1577 movdqa xmm1,xmm5
1578 pandn xmm0,XMMWORD[320+rsp]
1579 movdqa xmm2,xmm5
1580 pandn xmm1,XMMWORD[((320+16))+rsp]
1581 movdqa xmm3,xmm5
1582 pand xmm2,XMMWORD[512+rsp]
1583 pand xmm3,XMMWORD[((512+16))+rsp]
1584 por xmm2,xmm0
1585 por xmm3,xmm1
1586
1587 movdqa xmm0,xmm4
1588 movdqa xmm1,xmm4
1589 pandn xmm0,xmm2
1590 movdqa xmm2,xmm4
1591 pandn xmm1,xmm3
1592 movdqa xmm3,xmm4
1593 pand xmm2,XMMWORD[416+rsp]
1594 pand xmm3,XMMWORD[((416+16))+rsp]
1595 por xmm2,xmm0
1596 por xmm3,xmm1
1597 movdqu XMMWORD[32+rdi],xmm2
1598 movdqu XMMWORD[48+rdi],xmm3
1599
1600 $L$add_doneq:
1601 add rsp,32*18+8
1602 pop r15
1603 pop r14
1604 pop r13
1605 pop r12
1606 pop rbx
1607 pop rbp
1608 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1609 mov rsi,QWORD[16+rsp]
1610 DB 0F3h,0C3h ;repret
1611 $L$SEH_end_ecp_nistz256_point_add:
1612 global ecp_nistz256_point_add_affine
1613
1614 ALIGN 32
1615 ecp_nistz256_point_add_affine:
1616 mov QWORD[8+rsp],rdi ;WIN64 prologue
1617 mov QWORD[16+rsp],rsi
1618 mov rax,rsp
1619 $L$SEH_begin_ecp_nistz256_point_add_affine:
1620 mov rdi,rcx
1621 mov rsi,rdx
1622 mov rdx,r8
1623
1624
1625 push rbp
1626 push rbx
1627 push r12
1628 push r13
1629 push r14
1630 push r15
1631 sub rsp,32*15+8
1632
1633 movdqu xmm0,XMMWORD[rsi]
1634 mov rbx,rdx
1635 movdqu xmm1,XMMWORD[16+rsi]
1636 movdqu xmm2,XMMWORD[32+rsi]
1637 movdqu xmm3,XMMWORD[48+rsi]
1638 movdqu xmm4,XMMWORD[64+rsi]
1639 movdqu xmm5,XMMWORD[80+rsi]
1640 mov rax,QWORD[((64+0))+rsi]
1641 mov r14,QWORD[((64+8))+rsi]
1642 mov r15,QWORD[((64+16))+rsi]
1643 mov r8,QWORD[((64+24))+rsi]
1644 movdqa XMMWORD[320+rsp],xmm0
1645 movdqa XMMWORD[(320+16)+rsp],xmm1
1646 por xmm1,xmm0
1647 movdqa XMMWORD[352+rsp],xmm2
1648 movdqa XMMWORD[(352+16)+rsp],xmm3
1649 por xmm3,xmm2
1650 movdqa XMMWORD[384+rsp],xmm4
1651 movdqa XMMWORD[(384+16)+rsp],xmm5
1652 por xmm3,xmm1
1653
1654 movdqu xmm0,XMMWORD[rbx]
1655 pshufd xmm5,xmm3,0xb1
1656 movdqu xmm1,XMMWORD[16+rbx]
1657 movdqu xmm2,XMMWORD[32+rbx]
1658 por xmm5,xmm3
1659 movdqu xmm3,XMMWORD[48+rbx]
1660 movdqa XMMWORD[416+rsp],xmm0
1661 pshufd xmm4,xmm5,0x1e
1662 movdqa XMMWORD[(416+16)+rsp],xmm1
1663 por xmm1,xmm0
1664 DB 102,72,15,110,199
1665 movdqa XMMWORD[448+rsp],xmm2
1666 movdqa XMMWORD[(448+16)+rsp],xmm3
1667 por xmm3,xmm2
1668 por xmm5,xmm4
1669 pxor xmm4,xmm4
1670 por xmm3,xmm1
1671
1672 lea rsi,[((64-0))+rsi]
1673 lea rdi,[32+rsp]
1674 call __ecp_nistz256_sqr_montq
1675
1676 pcmpeqd xmm5,xmm4
1677 pshufd xmm4,xmm3,0xb1
1678 mov rax,QWORD[rbx]
1679
1680 mov r9,r12
1681 por xmm4,xmm3
1682 pshufd xmm5,xmm5,0
1683 pshufd xmm3,xmm4,0x1e
1684 mov r10,r13
1685 por xmm4,xmm3
1686 pxor xmm3,xmm3
1687 mov r11,r14
1688 pcmpeqd xmm4,xmm3
1689 pshufd xmm4,xmm4,0
1690
1691 lea rsi,[((32-0))+rsp]
1692 mov r12,r15
1693 lea rdi,[rsp]
1694 call __ecp_nistz256_mul_montq
1695
1696 lea rbx,[320+rsp]
1697 lea rdi,[64+rsp]
1698 call __ecp_nistz256_sub_fromq
1699
1700 mov rax,QWORD[384+rsp]
1701 lea rbx,[384+rsp]
1702 mov r9,QWORD[((0+32))+rsp]
1703 mov r10,QWORD[((8+32))+rsp]
1704 lea rsi,[((0+32))+rsp]
1705 mov r11,QWORD[((16+32))+rsp]
1706 mov r12,QWORD[((24+32))+rsp]
1707 lea rdi,[32+rsp]
1708 call __ecp_nistz256_mul_montq
1709
1710 mov rax,QWORD[384+rsp]
1711 lea rbx,[384+rsp]
1712 mov r9,QWORD[((0+64))+rsp]
1713 mov r10,QWORD[((8+64))+rsp]
1714 lea rsi,[((0+64))+rsp]
1715 mov r11,QWORD[((16+64))+rsp]
1716 mov r12,QWORD[((24+64))+rsp]
1717 lea rdi,[288+rsp]
1718 call __ecp_nistz256_mul_montq
1719
1720 mov rax,QWORD[448+rsp]
1721 lea rbx,[448+rsp]
1722 mov r9,QWORD[((0+32))+rsp]
1723 mov r10,QWORD[((8+32))+rsp]
1724 lea rsi,[((0+32))+rsp]
1725 mov r11,QWORD[((16+32))+rsp]
1726 mov r12,QWORD[((24+32))+rsp]
1727 lea rdi,[32+rsp]
1728 call __ecp_nistz256_mul_montq
1729
1730 lea rbx,[352+rsp]
1731 lea rdi,[96+rsp]
1732 call __ecp_nistz256_sub_fromq
1733
1734 mov rax,QWORD[((0+64))+rsp]
1735 mov r14,QWORD[((8+64))+rsp]
1736 lea rsi,[((0+64))+rsp]
1737 mov r15,QWORD[((16+64))+rsp]
1738 mov r8,QWORD[((24+64))+rsp]
1739 lea rdi,[128+rsp]
1740 call __ecp_nistz256_sqr_montq
1741
1742 mov rax,QWORD[((0+96))+rsp]
1743 mov r14,QWORD[((8+96))+rsp]
1744 lea rsi,[((0+96))+rsp]
1745 mov r15,QWORD[((16+96))+rsp]
1746 mov r8,QWORD[((24+96))+rsp]
1747 lea rdi,[192+rsp]
1748 call __ecp_nistz256_sqr_montq
1749
1750 mov rax,QWORD[128+rsp]
1751 lea rbx,[128+rsp]
1752 mov r9,QWORD[((0+64))+rsp]
1753 mov r10,QWORD[((8+64))+rsp]
1754 lea rsi,[((0+64))+rsp]
1755 mov r11,QWORD[((16+64))+rsp]
1756 mov r12,QWORD[((24+64))+rsp]
1757 lea rdi,[160+rsp]
1758 call __ecp_nistz256_mul_montq
1759
1760 mov rax,QWORD[320+rsp]
1761 lea rbx,[320+rsp]
1762 mov r9,QWORD[((0+128))+rsp]
1763 mov r10,QWORD[((8+128))+rsp]
1764 lea rsi,[((0+128))+rsp]
1765 mov r11,QWORD[((16+128))+rsp]
1766 mov r12,QWORD[((24+128))+rsp]
1767 lea rdi,[rsp]
1768 call __ecp_nistz256_mul_montq
1769
1770
1771
1772
1773 add r12,r12
1774 lea rsi,[192+rsp]
1775 adc r13,r13
1776 mov rax,r12
1777 adc r8,r8
1778 adc r9,r9
1779 mov rbp,r13
1780 sbb r11,r11
1781
1782 sub r12,-1
1783 mov rcx,r8
1784 sbb r13,r14
1785 sbb r8,0
1786 mov r10,r9
1787 sbb r9,r15
1788 test r11,r11
1789
1790 cmovz r12,rax
1791 mov rax,QWORD[rsi]
1792 cmovz r13,rbp
1793 mov rbp,QWORD[8+rsi]
1794 cmovz r8,rcx
1795 mov rcx,QWORD[16+rsi]
1796 cmovz r9,r10
1797 mov r10,QWORD[24+rsi]
1798
1799 call __ecp_nistz256_subq
1800
1801 lea rbx,[160+rsp]
1802 lea rdi,[224+rsp]
1803 call __ecp_nistz256_sub_fromq
1804
1805 mov rax,QWORD[((0+0))+rsp]
1806 mov rbp,QWORD[((0+8))+rsp]
1807 mov rcx,QWORD[((0+16))+rsp]
1808 mov r10,QWORD[((0+24))+rsp]
1809 lea rdi,[64+rsp]
1810
1811 call __ecp_nistz256_subq
1812
1813 mov QWORD[rdi],r12
1814 mov QWORD[8+rdi],r13
1815 mov QWORD[16+rdi],r8
1816 mov QWORD[24+rdi],r9
1817 mov rax,QWORD[352+rsp]
1818 lea rbx,[352+rsp]
1819 mov r9,QWORD[((0+160))+rsp]
1820 mov r10,QWORD[((8+160))+rsp]
1821 lea rsi,[((0+160))+rsp]
1822 mov r11,QWORD[((16+160))+rsp]
1823 mov r12,QWORD[((24+160))+rsp]
1824 lea rdi,[32+rsp]
1825 call __ecp_nistz256_mul_montq
1826
1827 mov rax,QWORD[96+rsp]
1828 lea rbx,[96+rsp]
1829 mov r9,QWORD[((0+64))+rsp]
1830 mov r10,QWORD[((8+64))+rsp]
1831 lea rsi,[((0+64))+rsp]
1832 mov r11,QWORD[((16+64))+rsp]
1833 mov r12,QWORD[((24+64))+rsp]
1834 lea rdi,[64+rsp]
1835 call __ecp_nistz256_mul_montq
1836
1837 lea rbx,[32+rsp]
1838 lea rdi,[256+rsp]
1839 call __ecp_nistz256_sub_fromq
1840
1841 DB 102,72,15,126,199
1842
1843 movdqa xmm0,xmm5
1844 movdqa xmm1,xmm5
1845 pandn xmm0,XMMWORD[288+rsp]
1846 movdqa xmm2,xmm5
1847 pandn xmm1,XMMWORD[((288+16))+rsp]
1848 movdqa xmm3,xmm5
1849 pand xmm2,XMMWORD[$L$ONE_mont]
1850 pand xmm3,XMMWORD[(($L$ONE_mont+16))]
1851 por xmm2,xmm0
1852 por xmm3,xmm1
1853
1854 movdqa xmm0,xmm4
1855 movdqa xmm1,xmm4
1856 pandn xmm0,xmm2
1857 movdqa xmm2,xmm4
1858 pandn xmm1,xmm3
1859 movdqa xmm3,xmm4
1860 pand xmm2,XMMWORD[384+rsp]
1861 pand xmm3,XMMWORD[((384+16))+rsp]
1862 por xmm2,xmm0
1863 por xmm3,xmm1
1864 movdqu XMMWORD[64+rdi],xmm2
1865 movdqu XMMWORD[80+rdi],xmm3
1866
1867 movdqa xmm0,xmm5
1868 movdqa xmm1,xmm5
1869 pandn xmm0,XMMWORD[224+rsp]
1870 movdqa xmm2,xmm5
1871 pandn xmm1,XMMWORD[((224+16))+rsp]
1872 movdqa xmm3,xmm5
1873 pand xmm2,XMMWORD[416+rsp]
1874 pand xmm3,XMMWORD[((416+16))+rsp]
1875 por xmm2,xmm0
1876 por xmm3,xmm1
1877
1878 movdqa xmm0,xmm4
1879 movdqa xmm1,xmm4
1880 pandn xmm0,xmm2
1881 movdqa xmm2,xmm4
1882 pandn xmm1,xmm3
1883 movdqa xmm3,xmm4
1884 pand xmm2,XMMWORD[320+rsp]
1885 pand xmm3,XMMWORD[((320+16))+rsp]
1886 por xmm2,xmm0
1887 por xmm3,xmm1
1888 movdqu XMMWORD[rdi],xmm2
1889 movdqu XMMWORD[16+rdi],xmm3
1890
1891 movdqa xmm0,xmm5
1892 movdqa xmm1,xmm5
1893 pandn xmm0,XMMWORD[256+rsp]
1894 movdqa xmm2,xmm5
1895 pandn xmm1,XMMWORD[((256+16))+rsp]
1896 movdqa xmm3,xmm5
1897 pand xmm2,XMMWORD[448+rsp]
1898 pand xmm3,XMMWORD[((448+16))+rsp]
1899 por xmm2,xmm0
1900 por xmm3,xmm1
1901
1902 movdqa xmm0,xmm4
1903 movdqa xmm1,xmm4
1904 pandn xmm0,xmm2
1905 movdqa xmm2,xmm4
1906 pandn xmm1,xmm3
1907 movdqa xmm3,xmm4
1908 pand xmm2,XMMWORD[352+rsp]
1909 pand xmm3,XMMWORD[((352+16))+rsp]
1910 por xmm2,xmm0
1911 por xmm3,xmm1
1912 movdqu XMMWORD[32+rdi],xmm2
1913 movdqu XMMWORD[48+rdi],xmm3
1914
1915 add rsp,32*15+8
1916 pop r15
1917 pop r14
1918 pop r13
1919 pop r12
1920 pop rbx
1921 pop rbp
1922 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1923 mov rsi,QWORD[16+rsp]
1924 DB 0F3h,0C3h ;repret
1925 $L$SEH_end_ecp_nistz256_point_add_affine:
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698