Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(243)

Side by Side Diff: third_party/boringssl/win-x86_64/crypto/ec/p256-x86_64-asm.asm

Issue 1930203003: Try BoringSSL roll again (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 default rel
2 %define XMMWORD
3 %define YMMWORD
4 %define ZMMWORD
5 section .text code align=64
6
7 EXTERN OPENSSL_ia32cap_P
8
9
10 ALIGN 64
11 $L$poly:
12 DQ 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xfffff fff00000001
13
14 $L$One:
15 DD 1,1,1,1,1,1,1,1
16 $L$Two:
17 DD 2,2,2,2,2,2,2,2
18 $L$Three:
19 DD 3,3,3,3,3,3,3,3
20 $L$ONE_mont:
21 DQ 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000 000fffffffe
22
23
24 ALIGN 64
25 ecp_nistz256_mul_by_2:
26 mov QWORD[8+rsp],rdi ;WIN64 prologue
27 mov QWORD[16+rsp],rsi
28 mov rax,rsp
29 $L$SEH_begin_ecp_nistz256_mul_by_2:
30 mov rdi,rcx
31 mov rsi,rdx
32
33
34 push r12
35 push r13
36
37 mov r8,QWORD[rsi]
38 mov r9,QWORD[8+rsi]
39 add r8,r8
40 mov r10,QWORD[16+rsi]
41 adc r9,r9
42 mov r11,QWORD[24+rsi]
43 lea rsi,[$L$poly]
44 mov rax,r8
45 adc r10,r10
46 adc r11,r11
47 mov rdx,r9
48 sbb r13,r13
49
50 sub r8,QWORD[rsi]
51 mov rcx,r10
52 sbb r9,QWORD[8+rsi]
53 sbb r10,QWORD[16+rsi]
54 mov r12,r11
55 sbb r11,QWORD[24+rsi]
56 test r13,r13
57
58 cmovz r8,rax
59 cmovz r9,rdx
60 mov QWORD[rdi],r8
61 cmovz r10,rcx
62 mov QWORD[8+rdi],r9
63 cmovz r11,r12
64 mov QWORD[16+rdi],r10
65 mov QWORD[24+rdi],r11
66
67 pop r13
68 pop r12
69 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
70 mov rsi,QWORD[16+rsp]
71 DB 0F3h,0C3h ;repret
72 $L$SEH_end_ecp_nistz256_mul_by_2:
73
74
75
76 global ecp_nistz256_neg
77
78 ALIGN 32
79 ecp_nistz256_neg:
80 mov QWORD[8+rsp],rdi ;WIN64 prologue
81 mov QWORD[16+rsp],rsi
82 mov rax,rsp
83 $L$SEH_begin_ecp_nistz256_neg:
84 mov rdi,rcx
85 mov rsi,rdx
86
87
88 push r12
89 push r13
90
91 xor r8,r8
92 xor r9,r9
93 xor r10,r10
94 xor r11,r11
95 xor r13,r13
96
97 sub r8,QWORD[rsi]
98 sbb r9,QWORD[8+rsi]
99 sbb r10,QWORD[16+rsi]
100 mov rax,r8
101 sbb r11,QWORD[24+rsi]
102 lea rsi,[$L$poly]
103 mov rdx,r9
104 sbb r13,0
105
106 add r8,QWORD[rsi]
107 mov rcx,r10
108 adc r9,QWORD[8+rsi]
109 adc r10,QWORD[16+rsi]
110 mov r12,r11
111 adc r11,QWORD[24+rsi]
112 test r13,r13
113
114 cmovz r8,rax
115 cmovz r9,rdx
116 mov QWORD[rdi],r8
117 cmovz r10,rcx
118 mov QWORD[8+rdi],r9
119 cmovz r11,r12
120 mov QWORD[16+rdi],r10
121 mov QWORD[24+rdi],r11
122
123 pop r13
124 pop r12
125 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
126 mov rsi,QWORD[16+rsp]
127 DB 0F3h,0C3h ;repret
128 $L$SEH_end_ecp_nistz256_neg:
129
130
131
132
133
134
135 global ecp_nistz256_mul_mont
136
137 ALIGN 32
138 ecp_nistz256_mul_mont:
139 mov QWORD[8+rsp],rdi ;WIN64 prologue
140 mov QWORD[16+rsp],rsi
141 mov rax,rsp
142 $L$SEH_begin_ecp_nistz256_mul_mont:
143 mov rdi,rcx
144 mov rsi,rdx
145 mov rdx,r8
146
147
148 $L$mul_mont:
149 push rbp
150 push rbx
151 push r12
152 push r13
153 push r14
154 push r15
155 mov rbx,rdx
156 mov rax,QWORD[rdx]
157 mov r9,QWORD[rsi]
158 mov r10,QWORD[8+rsi]
159 mov r11,QWORD[16+rsi]
160 mov r12,QWORD[24+rsi]
161
162 call __ecp_nistz256_mul_montq
163 $L$mul_mont_done:
164 pop r15
165 pop r14
166 pop r13
167 pop r12
168 pop rbx
169 pop rbp
170 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
171 mov rsi,QWORD[16+rsp]
172 DB 0F3h,0C3h ;repret
173 $L$SEH_end_ecp_nistz256_mul_mont:
174
175
176 ALIGN 32
177 __ecp_nistz256_mul_montq:
178
179
180 mov rbp,rax
181 mul r9
182 mov r14,QWORD[(($L$poly+8))]
183 mov r8,rax
184 mov rax,rbp
185 mov r9,rdx
186
187 mul r10
188 mov r15,QWORD[(($L$poly+24))]
189 add r9,rax
190 mov rax,rbp
191 adc rdx,0
192 mov r10,rdx
193
194 mul r11
195 add r10,rax
196 mov rax,rbp
197 adc rdx,0
198 mov r11,rdx
199
200 mul r12
201 add r11,rax
202 mov rax,r8
203 adc rdx,0
204 xor r13,r13
205 mov r12,rdx
206
207
208
209
210
211
212
213
214
215
216 mov rbp,r8
217 shl r8,32
218 mul r15
219 shr rbp,32
220 add r9,r8
221 adc r10,rbp
222 adc r11,rax
223 mov rax,QWORD[8+rbx]
224 adc r12,rdx
225 adc r13,0
226 xor r8,r8
227
228
229
230 mov rbp,rax
231 mul QWORD[rsi]
232 add r9,rax
233 mov rax,rbp
234 adc rdx,0
235 mov rcx,rdx
236
237 mul QWORD[8+rsi]
238 add r10,rcx
239 adc rdx,0
240 add r10,rax
241 mov rax,rbp
242 adc rdx,0
243 mov rcx,rdx
244
245 mul QWORD[16+rsi]
246 add r11,rcx
247 adc rdx,0
248 add r11,rax
249 mov rax,rbp
250 adc rdx,0
251 mov rcx,rdx
252
253 mul QWORD[24+rsi]
254 add r12,rcx
255 adc rdx,0
256 add r12,rax
257 mov rax,r9
258 adc r13,rdx
259 adc r8,0
260
261
262
263 mov rbp,r9
264 shl r9,32
265 mul r15
266 shr rbp,32
267 add r10,r9
268 adc r11,rbp
269 adc r12,rax
270 mov rax,QWORD[16+rbx]
271 adc r13,rdx
272 adc r8,0
273 xor r9,r9
274
275
276
277 mov rbp,rax
278 mul QWORD[rsi]
279 add r10,rax
280 mov rax,rbp
281 adc rdx,0
282 mov rcx,rdx
283
284 mul QWORD[8+rsi]
285 add r11,rcx
286 adc rdx,0
287 add r11,rax
288 mov rax,rbp
289 adc rdx,0
290 mov rcx,rdx
291
292 mul QWORD[16+rsi]
293 add r12,rcx
294 adc rdx,0
295 add r12,rax
296 mov rax,rbp
297 adc rdx,0
298 mov rcx,rdx
299
300 mul QWORD[24+rsi]
301 add r13,rcx
302 adc rdx,0
303 add r13,rax
304 mov rax,r10
305 adc r8,rdx
306 adc r9,0
307
308
309
310 mov rbp,r10
311 shl r10,32
312 mul r15
313 shr rbp,32
314 add r11,r10
315 adc r12,rbp
316 adc r13,rax
317 mov rax,QWORD[24+rbx]
318 adc r8,rdx
319 adc r9,0
320 xor r10,r10
321
322
323
324 mov rbp,rax
325 mul QWORD[rsi]
326 add r11,rax
327 mov rax,rbp
328 adc rdx,0
329 mov rcx,rdx
330
331 mul QWORD[8+rsi]
332 add r12,rcx
333 adc rdx,0
334 add r12,rax
335 mov rax,rbp
336 adc rdx,0
337 mov rcx,rdx
338
339 mul QWORD[16+rsi]
340 add r13,rcx
341 adc rdx,0
342 add r13,rax
343 mov rax,rbp
344 adc rdx,0
345 mov rcx,rdx
346
347 mul QWORD[24+rsi]
348 add r8,rcx
349 adc rdx,0
350 add r8,rax
351 mov rax,r11
352 adc r9,rdx
353 adc r10,0
354
355
356
357 mov rbp,r11
358 shl r11,32
359 mul r15
360 shr rbp,32
361 add r12,r11
362 adc r13,rbp
363 mov rcx,r12
364 adc r8,rax
365 adc r9,rdx
366 mov rbp,r13
367 adc r10,0
368
369
370
371 sub r12,-1
372 mov rbx,r8
373 sbb r13,r14
374 sbb r8,0
375 mov rdx,r9
376 sbb r9,r15
377 sbb r10,0
378
379 cmovc r12,rcx
380 cmovc r13,rbp
381 mov QWORD[rdi],r12
382 cmovc r8,rbx
383 mov QWORD[8+rdi],r13
384 cmovc r9,rdx
385 mov QWORD[16+rdi],r8
386 mov QWORD[24+rdi],r9
387
388 DB 0F3h,0C3h ;repret
389
390
391
392
393
394
395
396
397
398 global ecp_nistz256_sqr_mont
399
400 ALIGN 32
401 ecp_nistz256_sqr_mont:
402 mov QWORD[8+rsp],rdi ;WIN64 prologue
403 mov QWORD[16+rsp],rsi
404 mov rax,rsp
405 $L$SEH_begin_ecp_nistz256_sqr_mont:
406 mov rdi,rcx
407 mov rsi,rdx
408
409
410 push rbp
411 push rbx
412 push r12
413 push r13
414 push r14
415 push r15
416 mov rax,QWORD[rsi]
417 mov r14,QWORD[8+rsi]
418 mov r15,QWORD[16+rsi]
419 mov r8,QWORD[24+rsi]
420
421 call __ecp_nistz256_sqr_montq
422 $L$sqr_mont_done:
423 pop r15
424 pop r14
425 pop r13
426 pop r12
427 pop rbx
428 pop rbp
429 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
430 mov rsi,QWORD[16+rsp]
431 DB 0F3h,0C3h ;repret
432 $L$SEH_end_ecp_nistz256_sqr_mont:
433
434
435 ALIGN 32
436 __ecp_nistz256_sqr_montq:
437 mov r13,rax
438 mul r14
439 mov r9,rax
440 mov rax,r15
441 mov r10,rdx
442
443 mul r13
444 add r10,rax
445 mov rax,r8
446 adc rdx,0
447 mov r11,rdx
448
449 mul r13
450 add r11,rax
451 mov rax,r15
452 adc rdx,0
453 mov r12,rdx
454
455
456 mul r14
457 add r11,rax
458 mov rax,r8
459 adc rdx,0
460 mov rbp,rdx
461
462 mul r14
463 add r12,rax
464 mov rax,r8
465 adc rdx,0
466 add r12,rbp
467 mov r13,rdx
468 adc r13,0
469
470
471 mul r15
472 xor r15,r15
473 add r13,rax
474 mov rax,QWORD[rsi]
475 mov r14,rdx
476 adc r14,0
477
478 add r9,r9
479 adc r10,r10
480 adc r11,r11
481 adc r12,r12
482 adc r13,r13
483 adc r14,r14
484 adc r15,0
485
486 mul rax
487 mov r8,rax
488 mov rax,QWORD[8+rsi]
489 mov rcx,rdx
490
491 mul rax
492 add r9,rcx
493 adc r10,rax
494 mov rax,QWORD[16+rsi]
495 adc rdx,0
496 mov rcx,rdx
497
498 mul rax
499 add r11,rcx
500 adc r12,rax
501 mov rax,QWORD[24+rsi]
502 adc rdx,0
503 mov rcx,rdx
504
505 mul rax
506 add r13,rcx
507 adc r14,rax
508 mov rax,r8
509 adc r15,rdx
510
511 mov rsi,QWORD[(($L$poly+8))]
512 mov rbp,QWORD[(($L$poly+24))]
513
514
515
516
517 mov rcx,r8
518 shl r8,32
519 mul rbp
520 shr rcx,32
521 add r9,r8
522 adc r10,rcx
523 adc r11,rax
524 mov rax,r9
525 adc rdx,0
526
527
528
529 mov rcx,r9
530 shl r9,32
531 mov r8,rdx
532 mul rbp
533 shr rcx,32
534 add r10,r9
535 adc r11,rcx
536 adc r8,rax
537 mov rax,r10
538 adc rdx,0
539
540
541
542 mov rcx,r10
543 shl r10,32
544 mov r9,rdx
545 mul rbp
546 shr rcx,32
547 add r11,r10
548 adc r8,rcx
549 adc r9,rax
550 mov rax,r11
551 adc rdx,0
552
553
554
555 mov rcx,r11
556 shl r11,32
557 mov r10,rdx
558 mul rbp
559 shr rcx,32
560 add r8,r11
561 adc r9,rcx
562 adc r10,rax
563 adc rdx,0
564 xor r11,r11
565
566
567
568 add r12,r8
569 adc r13,r9
570 mov r8,r12
571 adc r14,r10
572 adc r15,rdx
573 mov r9,r13
574 adc r11,0
575
576 sub r12,-1
577 mov r10,r14
578 sbb r13,rsi
579 sbb r14,0
580 mov rcx,r15
581 sbb r15,rbp
582 sbb r11,0
583
584 cmovc r12,r8
585 cmovc r13,r9
586 mov QWORD[rdi],r12
587 cmovc r14,r10
588 mov QWORD[8+rdi],r13
589 cmovc r15,rcx
590 mov QWORD[16+rdi],r14
591 mov QWORD[24+rdi],r15
592
593 DB 0F3h,0C3h ;repret
594
595
596
597
598
599
600
601 global ecp_nistz256_from_mont
602
603 ALIGN 32
604 ecp_nistz256_from_mont:
605 mov QWORD[8+rsp],rdi ;WIN64 prologue
606 mov QWORD[16+rsp],rsi
607 mov rax,rsp
608 $L$SEH_begin_ecp_nistz256_from_mont:
609 mov rdi,rcx
610 mov rsi,rdx
611
612
613 push r12
614 push r13
615
616 mov rax,QWORD[rsi]
617 mov r13,QWORD[(($L$poly+24))]
618 mov r9,QWORD[8+rsi]
619 mov r10,QWORD[16+rsi]
620 mov r11,QWORD[24+rsi]
621 mov r8,rax
622 mov r12,QWORD[(($L$poly+8))]
623
624
625
626 mov rcx,rax
627 shl r8,32
628 mul r13
629 shr rcx,32
630 add r9,r8
631 adc r10,rcx
632 adc r11,rax
633 mov rax,r9
634 adc rdx,0
635
636
637
638 mov rcx,r9
639 shl r9,32
640 mov r8,rdx
641 mul r13
642 shr rcx,32
643 add r10,r9
644 adc r11,rcx
645 adc r8,rax
646 mov rax,r10
647 adc rdx,0
648
649
650
651 mov rcx,r10
652 shl r10,32
653 mov r9,rdx
654 mul r13
655 shr rcx,32
656 add r11,r10
657 adc r8,rcx
658 adc r9,rax
659 mov rax,r11
660 adc rdx,0
661
662
663
664 mov rcx,r11
665 shl r11,32
666 mov r10,rdx
667 mul r13
668 shr rcx,32
669 add r8,r11
670 adc r9,rcx
671 mov rcx,r8
672 adc r10,rax
673 mov rsi,r9
674 adc rdx,0
675
676 sub r8,-1
677 mov rax,r10
678 sbb r9,r12
679 sbb r10,0
680 mov r11,rdx
681 sbb rdx,r13
682 sbb r13,r13
683
684 cmovnz r8,rcx
685 cmovnz r9,rsi
686 mov QWORD[rdi],r8
687 cmovnz r10,rax
688 mov QWORD[8+rdi],r9
689 cmovz r11,rdx
690 mov QWORD[16+rdi],r10
691 mov QWORD[24+rdi],r11
692
693 pop r13
694 pop r12
695 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
696 mov rsi,QWORD[16+rsp]
697 DB 0F3h,0C3h ;repret
698 $L$SEH_end_ecp_nistz256_from_mont:
699
700
701 global ecp_nistz256_select_w5
702
703 ALIGN 32
704 ecp_nistz256_select_w5:
705 lea rax,[((-136))+rsp]
706 $L$SEH_begin_ecp_nistz256_select_w5:
707 DB 0x48,0x8d,0x60,0xe0
708 DB 0x0f,0x29,0x70,0xe0
709 DB 0x0f,0x29,0x78,0xf0
710 DB 0x44,0x0f,0x29,0x00
711 DB 0x44,0x0f,0x29,0x48,0x10
712 DB 0x44,0x0f,0x29,0x50,0x20
713 DB 0x44,0x0f,0x29,0x58,0x30
714 DB 0x44,0x0f,0x29,0x60,0x40
715 DB 0x44,0x0f,0x29,0x68,0x50
716 DB 0x44,0x0f,0x29,0x70,0x60
717 DB 0x44,0x0f,0x29,0x78,0x70
718 movdqa xmm0,XMMWORD[$L$One]
719 movd xmm1,r8d
720
721 pxor xmm2,xmm2
722 pxor xmm3,xmm3
723 pxor xmm4,xmm4
724 pxor xmm5,xmm5
725 pxor xmm6,xmm6
726 pxor xmm7,xmm7
727
728 movdqa xmm8,xmm0
729 pshufd xmm1,xmm1,0
730
731 mov rax,16
732 $L$select_loop_sse_w5:
733
734 movdqa xmm15,xmm8
735 paddd xmm8,xmm0
736 pcmpeqd xmm15,xmm1
737
738 movdqa xmm9,XMMWORD[rdx]
739 movdqa xmm10,XMMWORD[16+rdx]
740 movdqa xmm11,XMMWORD[32+rdx]
741 movdqa xmm12,XMMWORD[48+rdx]
742 movdqa xmm13,XMMWORD[64+rdx]
743 movdqa xmm14,XMMWORD[80+rdx]
744 lea rdx,[96+rdx]
745
746 pand xmm9,xmm15
747 pand xmm10,xmm15
748 por xmm2,xmm9
749 pand xmm11,xmm15
750 por xmm3,xmm10
751 pand xmm12,xmm15
752 por xmm4,xmm11
753 pand xmm13,xmm15
754 por xmm5,xmm12
755 pand xmm14,xmm15
756 por xmm6,xmm13
757 por xmm7,xmm14
758
759 dec rax
760 jnz NEAR $L$select_loop_sse_w5
761
762 movdqu XMMWORD[rcx],xmm2
763 movdqu XMMWORD[16+rcx],xmm3
764 movdqu XMMWORD[32+rcx],xmm4
765 movdqu XMMWORD[48+rcx],xmm5
766 movdqu XMMWORD[64+rcx],xmm6
767 movdqu XMMWORD[80+rcx],xmm7
768 movaps xmm6,XMMWORD[rsp]
769 movaps xmm7,XMMWORD[16+rsp]
770 movaps xmm8,XMMWORD[32+rsp]
771 movaps xmm9,XMMWORD[48+rsp]
772 movaps xmm10,XMMWORD[64+rsp]
773 movaps xmm11,XMMWORD[80+rsp]
774 movaps xmm12,XMMWORD[96+rsp]
775 movaps xmm13,XMMWORD[112+rsp]
776 movaps xmm14,XMMWORD[128+rsp]
777 movaps xmm15,XMMWORD[144+rsp]
778 lea rsp,[168+rsp]
779 $L$SEH_end_ecp_nistz256_select_w5:
780 DB 0F3h,0C3h ;repret
781
782
783
784
785 global ecp_nistz256_select_w7
786
787 ALIGN 32
788 ecp_nistz256_select_w7:
789 lea rax,[((-136))+rsp]
790 $L$SEH_begin_ecp_nistz256_select_w7:
791 DB 0x48,0x8d,0x60,0xe0
792 DB 0x0f,0x29,0x70,0xe0
793 DB 0x0f,0x29,0x78,0xf0
794 DB 0x44,0x0f,0x29,0x00
795 DB 0x44,0x0f,0x29,0x48,0x10
796 DB 0x44,0x0f,0x29,0x50,0x20
797 DB 0x44,0x0f,0x29,0x58,0x30
798 DB 0x44,0x0f,0x29,0x60,0x40
799 DB 0x44,0x0f,0x29,0x68,0x50
800 DB 0x44,0x0f,0x29,0x70,0x60
801 DB 0x44,0x0f,0x29,0x78,0x70
802 movdqa xmm8,XMMWORD[$L$One]
803 movd xmm1,r8d
804
805 pxor xmm2,xmm2
806 pxor xmm3,xmm3
807 pxor xmm4,xmm4
808 pxor xmm5,xmm5
809
810 movdqa xmm0,xmm8
811 pshufd xmm1,xmm1,0
812 mov rax,64
813
814 $L$select_loop_sse_w7:
815 movdqa xmm15,xmm8
816 paddd xmm8,xmm0
817 movdqa xmm9,XMMWORD[rdx]
818 movdqa xmm10,XMMWORD[16+rdx]
819 pcmpeqd xmm15,xmm1
820 movdqa xmm11,XMMWORD[32+rdx]
821 movdqa xmm12,XMMWORD[48+rdx]
822 lea rdx,[64+rdx]
823
824 pand xmm9,xmm15
825 pand xmm10,xmm15
826 por xmm2,xmm9
827 pand xmm11,xmm15
828 por xmm3,xmm10
829 pand xmm12,xmm15
830 por xmm4,xmm11
831 prefetcht0 [255+rdx]
832 por xmm5,xmm12
833
834 dec rax
835 jnz NEAR $L$select_loop_sse_w7
836
837 movdqu XMMWORD[rcx],xmm2
838 movdqu XMMWORD[16+rcx],xmm3
839 movdqu XMMWORD[32+rcx],xmm4
840 movdqu XMMWORD[48+rcx],xmm5
841 movaps xmm6,XMMWORD[rsp]
842 movaps xmm7,XMMWORD[16+rsp]
843 movaps xmm8,XMMWORD[32+rsp]
844 movaps xmm9,XMMWORD[48+rsp]
845 movaps xmm10,XMMWORD[64+rsp]
846 movaps xmm11,XMMWORD[80+rsp]
847 movaps xmm12,XMMWORD[96+rsp]
848 movaps xmm13,XMMWORD[112+rsp]
849 movaps xmm14,XMMWORD[128+rsp]
850 movaps xmm15,XMMWORD[144+rsp]
851 lea rsp,[168+rsp]
852 $L$SEH_end_ecp_nistz256_select_w7:
853 DB 0F3h,0C3h ;repret
854
855 global ecp_nistz256_avx2_select_w7
856
857 ALIGN 32
858 ecp_nistz256_avx2_select_w7:
859 mov QWORD[8+rsp],rdi ;WIN64 prologue
860 mov QWORD[16+rsp],rsi
861 mov rax,rsp
862 $L$SEH_begin_ecp_nistz256_avx2_select_w7:
863 mov rdi,rcx
864 mov rsi,rdx
865 mov rdx,r8
866
867
868 DB 0x0f,0x0b
869 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
870 mov rsi,QWORD[16+rsp]
871 DB 0F3h,0C3h ;repret
872 $L$SEH_end_ecp_nistz256_avx2_select_w7:
873
874 ALIGN 32
875 __ecp_nistz256_add_toq:
876 add r12,QWORD[rbx]
877 adc r13,QWORD[8+rbx]
878 mov rax,r12
879 adc r8,QWORD[16+rbx]
880 adc r9,QWORD[24+rbx]
881 mov rbp,r13
882 sbb r11,r11
883
884 sub r12,-1
885 mov rcx,r8
886 sbb r13,r14
887 sbb r8,0
888 mov r10,r9
889 sbb r9,r15
890 test r11,r11
891
892 cmovz r12,rax
893 cmovz r13,rbp
894 mov QWORD[rdi],r12
895 cmovz r8,rcx
896 mov QWORD[8+rdi],r13
897 cmovz r9,r10
898 mov QWORD[16+rdi],r8
899 mov QWORD[24+rdi],r9
900
901 DB 0F3h,0C3h ;repret
902
903
904
905 ALIGN 32
906 __ecp_nistz256_sub_fromq:
907 sub r12,QWORD[rbx]
908 sbb r13,QWORD[8+rbx]
909 mov rax,r12
910 sbb r8,QWORD[16+rbx]
911 sbb r9,QWORD[24+rbx]
912 mov rbp,r13
913 sbb r11,r11
914
915 add r12,-1
916 mov rcx,r8
917 adc r13,r14
918 adc r8,0
919 mov r10,r9
920 adc r9,r15
921 test r11,r11
922
923 cmovz r12,rax
924 cmovz r13,rbp
925 mov QWORD[rdi],r12
926 cmovz r8,rcx
927 mov QWORD[8+rdi],r13
928 cmovz r9,r10
929 mov QWORD[16+rdi],r8
930 mov QWORD[24+rdi],r9
931
932 DB 0F3h,0C3h ;repret
933
934
935
936 ALIGN 32
937 __ecp_nistz256_subq:
938 sub rax,r12
939 sbb rbp,r13
940 mov r12,rax
941 sbb rcx,r8
942 sbb r10,r9
943 mov r13,rbp
944 sbb r11,r11
945
946 add rax,-1
947 mov r8,rcx
948 adc rbp,r14
949 adc rcx,0
950 mov r9,r10
951 adc r10,r15
952 test r11,r11
953
954 cmovnz r12,rax
955 cmovnz r13,rbp
956 cmovnz r8,rcx
957 cmovnz r9,r10
958
959 DB 0F3h,0C3h ;repret
960
961
962
963 ALIGN 32
964 __ecp_nistz256_mul_by_2q:
965 add r12,r12
966 adc r13,r13
967 mov rax,r12
968 adc r8,r8
969 adc r9,r9
970 mov rbp,r13
971 sbb r11,r11
972
973 sub r12,-1
974 mov rcx,r8
975 sbb r13,r14
976 sbb r8,0
977 mov r10,r9
978 sbb r9,r15
979 test r11,r11
980
981 cmovz r12,rax
982 cmovz r13,rbp
983 mov QWORD[rdi],r12
984 cmovz r8,rcx
985 mov QWORD[8+rdi],r13
986 cmovz r9,r10
987 mov QWORD[16+rdi],r8
988 mov QWORD[24+rdi],r9
989
990 DB 0F3h,0C3h ;repret
991
992 global ecp_nistz256_point_double
993
994 ALIGN 32
995 ecp_nistz256_point_double:
996 mov QWORD[8+rsp],rdi ;WIN64 prologue
997 mov QWORD[16+rsp],rsi
998 mov rax,rsp
999 $L$SEH_begin_ecp_nistz256_point_double:
1000 mov rdi,rcx
1001 mov rsi,rdx
1002
1003
1004 push rbp
1005 push rbx
1006 push r12
1007 push r13
1008 push r14
1009 push r15
1010 sub rsp,32*5+8
1011
1012 movdqu xmm0,XMMWORD[rsi]
1013 mov rbx,rsi
1014 movdqu xmm1,XMMWORD[16+rsi]
1015 mov r12,QWORD[((32+0))+rsi]
1016 mov r13,QWORD[((32+8))+rsi]
1017 mov r8,QWORD[((32+16))+rsi]
1018 mov r9,QWORD[((32+24))+rsi]
1019 mov r14,QWORD[(($L$poly+8))]
1020 mov r15,QWORD[(($L$poly+24))]
1021 movdqa XMMWORD[96+rsp],xmm0
1022 movdqa XMMWORD[(96+16)+rsp],xmm1
1023 lea r10,[32+rdi]
1024 lea r11,[64+rdi]
1025 DB 102,72,15,110,199
1026 DB 102,73,15,110,202
1027 DB 102,73,15,110,211
1028
1029 lea rdi,[rsp]
1030 call __ecp_nistz256_mul_by_2q
1031
1032 mov rax,QWORD[((64+0))+rsi]
1033 mov r14,QWORD[((64+8))+rsi]
1034 mov r15,QWORD[((64+16))+rsi]
1035 mov r8,QWORD[((64+24))+rsi]
1036 lea rsi,[((64-0))+rsi]
1037 lea rdi,[64+rsp]
1038 call __ecp_nistz256_sqr_montq
1039
1040 mov rax,QWORD[((0+0))+rsp]
1041 mov r14,QWORD[((8+0))+rsp]
1042 lea rsi,[((0+0))+rsp]
1043 mov r15,QWORD[((16+0))+rsp]
1044 mov r8,QWORD[((24+0))+rsp]
1045 lea rdi,[rsp]
1046 call __ecp_nistz256_sqr_montq
1047
1048 mov rax,QWORD[32+rbx]
1049 mov r9,QWORD[((64+0))+rbx]
1050 mov r10,QWORD[((64+8))+rbx]
1051 mov r11,QWORD[((64+16))+rbx]
1052 mov r12,QWORD[((64+24))+rbx]
1053 lea rsi,[((64-0))+rbx]
1054 lea rbx,[32+rbx]
1055 DB 102,72,15,126,215
1056 call __ecp_nistz256_mul_montq
1057 call __ecp_nistz256_mul_by_2q
1058
1059 mov r12,QWORD[((96+0))+rsp]
1060 mov r13,QWORD[((96+8))+rsp]
1061 lea rbx,[64+rsp]
1062 mov r8,QWORD[((96+16))+rsp]
1063 mov r9,QWORD[((96+24))+rsp]
1064 lea rdi,[32+rsp]
1065 call __ecp_nistz256_add_toq
1066
1067 mov r12,QWORD[((96+0))+rsp]
1068 mov r13,QWORD[((96+8))+rsp]
1069 lea rbx,[64+rsp]
1070 mov r8,QWORD[((96+16))+rsp]
1071 mov r9,QWORD[((96+24))+rsp]
1072 lea rdi,[64+rsp]
1073 call __ecp_nistz256_sub_fromq
1074
1075 mov rax,QWORD[((0+0))+rsp]
1076 mov r14,QWORD[((8+0))+rsp]
1077 lea rsi,[((0+0))+rsp]
1078 mov r15,QWORD[((16+0))+rsp]
1079 mov r8,QWORD[((24+0))+rsp]
1080 DB 102,72,15,126,207
1081 call __ecp_nistz256_sqr_montq
1082 xor r9,r9
1083 mov rax,r12
1084 add r12,-1
1085 mov r10,r13
1086 adc r13,rsi
1087 mov rcx,r14
1088 adc r14,0
1089 mov r8,r15
1090 adc r15,rbp
1091 adc r9,0
1092 xor rsi,rsi
1093 test rax,1
1094
1095 cmovz r12,rax
1096 cmovz r13,r10
1097 cmovz r14,rcx
1098 cmovz r15,r8
1099 cmovz r9,rsi
1100
1101 mov rax,r13
1102 shr r12,1
1103 shl rax,63
1104 mov r10,r14
1105 shr r13,1
1106 or r12,rax
1107 shl r10,63
1108 mov rcx,r15
1109 shr r14,1
1110 or r13,r10
1111 shl rcx,63
1112 mov QWORD[rdi],r12
1113 shr r15,1
1114 mov QWORD[8+rdi],r13
1115 shl r9,63
1116 or r14,rcx
1117 or r15,r9
1118 mov QWORD[16+rdi],r14
1119 mov QWORD[24+rdi],r15
1120 mov rax,QWORD[64+rsp]
1121 lea rbx,[64+rsp]
1122 mov r9,QWORD[((0+32))+rsp]
1123 mov r10,QWORD[((8+32))+rsp]
1124 lea rsi,[((0+32))+rsp]
1125 mov r11,QWORD[((16+32))+rsp]
1126 mov r12,QWORD[((24+32))+rsp]
1127 lea rdi,[32+rsp]
1128 call __ecp_nistz256_mul_montq
1129
1130 lea rdi,[128+rsp]
1131 call __ecp_nistz256_mul_by_2q
1132
1133 lea rbx,[32+rsp]
1134 lea rdi,[32+rsp]
1135 call __ecp_nistz256_add_toq
1136
1137 mov rax,QWORD[96+rsp]
1138 lea rbx,[96+rsp]
1139 mov r9,QWORD[((0+0))+rsp]
1140 mov r10,QWORD[((8+0))+rsp]
1141 lea rsi,[((0+0))+rsp]
1142 mov r11,QWORD[((16+0))+rsp]
1143 mov r12,QWORD[((24+0))+rsp]
1144 lea rdi,[rsp]
1145 call __ecp_nistz256_mul_montq
1146
1147 lea rdi,[128+rsp]
1148 call __ecp_nistz256_mul_by_2q
1149
1150 mov rax,QWORD[((0+32))+rsp]
1151 mov r14,QWORD[((8+32))+rsp]
1152 lea rsi,[((0+32))+rsp]
1153 mov r15,QWORD[((16+32))+rsp]
1154 mov r8,QWORD[((24+32))+rsp]
1155 DB 102,72,15,126,199
1156 call __ecp_nistz256_sqr_montq
1157
1158 lea rbx,[128+rsp]
1159 mov r8,r14
1160 mov r9,r15
1161 mov r14,rsi
1162 mov r15,rbp
1163 call __ecp_nistz256_sub_fromq
1164
1165 mov rax,QWORD[((0+0))+rsp]
1166 mov rbp,QWORD[((0+8))+rsp]
1167 mov rcx,QWORD[((0+16))+rsp]
1168 mov r10,QWORD[((0+24))+rsp]
1169 lea rdi,[rsp]
1170 call __ecp_nistz256_subq
1171
1172 mov rax,QWORD[32+rsp]
1173 lea rbx,[32+rsp]
1174 mov r14,r12
1175 xor ecx,ecx
1176 mov QWORD[((0+0))+rsp],r12
1177 mov r10,r13
1178 mov QWORD[((0+8))+rsp],r13
1179 cmovz r11,r8
1180 mov QWORD[((0+16))+rsp],r8
1181 lea rsi,[((0-0))+rsp]
1182 cmovz r12,r9
1183 mov QWORD[((0+24))+rsp],r9
1184 mov r9,r14
1185 lea rdi,[rsp]
1186 call __ecp_nistz256_mul_montq
1187
1188 DB 102,72,15,126,203
1189 DB 102,72,15,126,207
1190 call __ecp_nistz256_sub_fromq
1191
1192 add rsp,32*5+8
1193 pop r15
1194 pop r14
1195 pop r13
1196 pop r12
1197 pop rbx
1198 pop rbp
1199 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1200 mov rsi,QWORD[16+rsp]
1201 DB 0F3h,0C3h ;repret
1202 $L$SEH_end_ecp_nistz256_point_double:
1203 global ecp_nistz256_point_add
1204
1205 ALIGN 32
1206 ecp_nistz256_point_add:
1207 mov QWORD[8+rsp],rdi ;WIN64 prologue
1208 mov QWORD[16+rsp],rsi
1209 mov rax,rsp
1210 $L$SEH_begin_ecp_nistz256_point_add:
1211 mov rdi,rcx
1212 mov rsi,rdx
1213 mov rdx,r8
1214
1215
1216 push rbp
1217 push rbx
1218 push r12
1219 push r13
1220 push r14
1221 push r15
1222 sub rsp,32*18+8
1223
1224 movdqu xmm0,XMMWORD[rsi]
1225 movdqu xmm1,XMMWORD[16+rsi]
1226 movdqu xmm2,XMMWORD[32+rsi]
1227 movdqu xmm3,XMMWORD[48+rsi]
1228 movdqu xmm4,XMMWORD[64+rsi]
1229 movdqu xmm5,XMMWORD[80+rsi]
1230 mov rbx,rsi
1231 mov rsi,rdx
1232 movdqa XMMWORD[384+rsp],xmm0
1233 movdqa XMMWORD[(384+16)+rsp],xmm1
1234 por xmm1,xmm0
1235 movdqa XMMWORD[416+rsp],xmm2
1236 movdqa XMMWORD[(416+16)+rsp],xmm3
1237 por xmm3,xmm2
1238 movdqa XMMWORD[448+rsp],xmm4
1239 movdqa XMMWORD[(448+16)+rsp],xmm5
1240 por xmm3,xmm1
1241
1242 movdqu xmm0,XMMWORD[rsi]
1243 pshufd xmm5,xmm3,0xb1
1244 movdqu xmm1,XMMWORD[16+rsi]
1245 movdqu xmm2,XMMWORD[32+rsi]
1246 por xmm5,xmm3
1247 movdqu xmm3,XMMWORD[48+rsi]
1248 mov rax,QWORD[((64+0))+rsi]
1249 mov r14,QWORD[((64+8))+rsi]
1250 mov r15,QWORD[((64+16))+rsi]
1251 mov r8,QWORD[((64+24))+rsi]
1252 movdqa XMMWORD[480+rsp],xmm0
1253 pshufd xmm4,xmm5,0x1e
1254 movdqa XMMWORD[(480+16)+rsp],xmm1
1255 por xmm1,xmm0
1256 DB 102,72,15,110,199
1257 movdqa XMMWORD[512+rsp],xmm2
1258 movdqa XMMWORD[(512+16)+rsp],xmm3
1259 por xmm3,xmm2
1260 por xmm5,xmm4
1261 pxor xmm4,xmm4
1262 por xmm3,xmm1
1263
1264 lea rsi,[((64-0))+rsi]
1265 mov QWORD[((544+0))+rsp],rax
1266 mov QWORD[((544+8))+rsp],r14
1267 mov QWORD[((544+16))+rsp],r15
1268 mov QWORD[((544+24))+rsp],r8
1269 lea rdi,[96+rsp]
1270 call __ecp_nistz256_sqr_montq
1271
1272 pcmpeqd xmm5,xmm4
1273 pshufd xmm4,xmm3,0xb1
1274 por xmm4,xmm3
1275 pshufd xmm5,xmm5,0
1276 pshufd xmm3,xmm4,0x1e
1277 por xmm4,xmm3
1278 pxor xmm3,xmm3
1279 pcmpeqd xmm4,xmm3
1280 pshufd xmm4,xmm4,0
1281 mov rax,QWORD[((64+0))+rbx]
1282 mov r14,QWORD[((64+8))+rbx]
1283 mov r15,QWORD[((64+16))+rbx]
1284 mov r8,QWORD[((64+24))+rbx]
1285
1286 lea rsi,[((64-0))+rbx]
1287 lea rdi,[32+rsp]
1288 call __ecp_nistz256_sqr_montq
1289
1290 mov rax,QWORD[544+rsp]
1291 lea rbx,[544+rsp]
1292 mov r9,QWORD[((0+96))+rsp]
1293 mov r10,QWORD[((8+96))+rsp]
1294 lea rsi,[((0+96))+rsp]
1295 mov r11,QWORD[((16+96))+rsp]
1296 mov r12,QWORD[((24+96))+rsp]
1297 lea rdi,[224+rsp]
1298 call __ecp_nistz256_mul_montq
1299
1300 mov rax,QWORD[448+rsp]
1301 lea rbx,[448+rsp]
1302 mov r9,QWORD[((0+32))+rsp]
1303 mov r10,QWORD[((8+32))+rsp]
1304 lea rsi,[((0+32))+rsp]
1305 mov r11,QWORD[((16+32))+rsp]
1306 mov r12,QWORD[((24+32))+rsp]
1307 lea rdi,[256+rsp]
1308 call __ecp_nistz256_mul_montq
1309
1310 mov rax,QWORD[416+rsp]
1311 lea rbx,[416+rsp]
1312 mov r9,QWORD[((0+224))+rsp]
1313 mov r10,QWORD[((8+224))+rsp]
1314 lea rsi,[((0+224))+rsp]
1315 mov r11,QWORD[((16+224))+rsp]
1316 mov r12,QWORD[((24+224))+rsp]
1317 lea rdi,[224+rsp]
1318 call __ecp_nistz256_mul_montq
1319
1320 mov rax,QWORD[512+rsp]
1321 lea rbx,[512+rsp]
1322 mov r9,QWORD[((0+256))+rsp]
1323 mov r10,QWORD[((8+256))+rsp]
1324 lea rsi,[((0+256))+rsp]
1325 mov r11,QWORD[((16+256))+rsp]
1326 mov r12,QWORD[((24+256))+rsp]
1327 lea rdi,[256+rsp]
1328 call __ecp_nistz256_mul_montq
1329
1330 lea rbx,[224+rsp]
1331 lea rdi,[64+rsp]
1332 call __ecp_nistz256_sub_fromq
1333
1334 or r12,r13
1335 movdqa xmm2,xmm4
1336 or r12,r8
1337 or r12,r9
1338 por xmm2,xmm5
1339 DB 102,73,15,110,220
1340
1341 mov rax,QWORD[384+rsp]
1342 lea rbx,[384+rsp]
1343 mov r9,QWORD[((0+96))+rsp]
1344 mov r10,QWORD[((8+96))+rsp]
1345 lea rsi,[((0+96))+rsp]
1346 mov r11,QWORD[((16+96))+rsp]
1347 mov r12,QWORD[((24+96))+rsp]
1348 lea rdi,[160+rsp]
1349 call __ecp_nistz256_mul_montq
1350
1351 mov rax,QWORD[480+rsp]
1352 lea rbx,[480+rsp]
1353 mov r9,QWORD[((0+32))+rsp]
1354 mov r10,QWORD[((8+32))+rsp]
1355 lea rsi,[((0+32))+rsp]
1356 mov r11,QWORD[((16+32))+rsp]
1357 mov r12,QWORD[((24+32))+rsp]
1358 lea rdi,[192+rsp]
1359 call __ecp_nistz256_mul_montq
1360
1361 lea rbx,[160+rsp]
1362 lea rdi,[rsp]
1363 call __ecp_nistz256_sub_fromq
1364
1365 or r12,r13
1366 or r12,r8
1367 or r12,r9
1368
1369 DB 0x3e
1370 jnz NEAR $L$add_proceedq
1371 DB 102,73,15,126,208
1372 DB 102,73,15,126,217
1373 test r8,r8
1374 jnz NEAR $L$add_proceedq
1375 test r9,r9
1376 jz NEAR $L$add_proceedq
1377
1378 DB 102,72,15,126,199
1379 pxor xmm0,xmm0
1380 movdqu XMMWORD[rdi],xmm0
1381 movdqu XMMWORD[16+rdi],xmm0
1382 movdqu XMMWORD[32+rdi],xmm0
1383 movdqu XMMWORD[48+rdi],xmm0
1384 movdqu XMMWORD[64+rdi],xmm0
1385 movdqu XMMWORD[80+rdi],xmm0
1386 jmp NEAR $L$add_doneq
1387
1388 ALIGN 32
1389 $L$add_proceedq:
1390 mov rax,QWORD[((0+64))+rsp]
1391 mov r14,QWORD[((8+64))+rsp]
1392 lea rsi,[((0+64))+rsp]
1393 mov r15,QWORD[((16+64))+rsp]
1394 mov r8,QWORD[((24+64))+rsp]
1395 lea rdi,[96+rsp]
1396 call __ecp_nistz256_sqr_montq
1397
1398 mov rax,QWORD[448+rsp]
1399 lea rbx,[448+rsp]
1400 mov r9,QWORD[((0+0))+rsp]
1401 mov r10,QWORD[((8+0))+rsp]
1402 lea rsi,[((0+0))+rsp]
1403 mov r11,QWORD[((16+0))+rsp]
1404 mov r12,QWORD[((24+0))+rsp]
1405 lea rdi,[352+rsp]
1406 call __ecp_nistz256_mul_montq
1407
1408 mov rax,QWORD[((0+0))+rsp]
1409 mov r14,QWORD[((8+0))+rsp]
1410 lea rsi,[((0+0))+rsp]
1411 mov r15,QWORD[((16+0))+rsp]
1412 mov r8,QWORD[((24+0))+rsp]
1413 lea rdi,[32+rsp]
1414 call __ecp_nistz256_sqr_montq
1415
1416 mov rax,QWORD[544+rsp]
1417 lea rbx,[544+rsp]
1418 mov r9,QWORD[((0+352))+rsp]
1419 mov r10,QWORD[((8+352))+rsp]
1420 lea rsi,[((0+352))+rsp]
1421 mov r11,QWORD[((16+352))+rsp]
1422 mov r12,QWORD[((24+352))+rsp]
1423 lea rdi,[352+rsp]
1424 call __ecp_nistz256_mul_montq
1425
1426 mov rax,QWORD[rsp]
1427 lea rbx,[rsp]
1428 mov r9,QWORD[((0+32))+rsp]
1429 mov r10,QWORD[((8+32))+rsp]
1430 lea rsi,[((0+32))+rsp]
1431 mov r11,QWORD[((16+32))+rsp]
1432 mov r12,QWORD[((24+32))+rsp]
1433 lea rdi,[128+rsp]
1434 call __ecp_nistz256_mul_montq
1435
1436 mov rax,QWORD[160+rsp]
1437 lea rbx,[160+rsp]
1438 mov r9,QWORD[((0+32))+rsp]
1439 mov r10,QWORD[((8+32))+rsp]
1440 lea rsi,[((0+32))+rsp]
1441 mov r11,QWORD[((16+32))+rsp]
1442 mov r12,QWORD[((24+32))+rsp]
1443 lea rdi,[192+rsp]
1444 call __ecp_nistz256_mul_montq
1445
1446
1447
1448
1449 add r12,r12
1450 lea rsi,[96+rsp]
1451 adc r13,r13
1452 mov rax,r12
1453 adc r8,r8
1454 adc r9,r9
1455 mov rbp,r13
1456 sbb r11,r11
1457
1458 sub r12,-1
1459 mov rcx,r8
1460 sbb r13,r14
1461 sbb r8,0
1462 mov r10,r9
1463 sbb r9,r15
1464 test r11,r11
1465
1466 cmovz r12,rax
1467 mov rax,QWORD[rsi]
1468 cmovz r13,rbp
1469 mov rbp,QWORD[8+rsi]
1470 cmovz r8,rcx
1471 mov rcx,QWORD[16+rsi]
1472 cmovz r9,r10
1473 mov r10,QWORD[24+rsi]
1474
1475 call __ecp_nistz256_subq
1476
1477 lea rbx,[128+rsp]
1478 lea rdi,[288+rsp]
1479 call __ecp_nistz256_sub_fromq
1480
1481 mov rax,QWORD[((192+0))+rsp]
1482 mov rbp,QWORD[((192+8))+rsp]
1483 mov rcx,QWORD[((192+16))+rsp]
1484 mov r10,QWORD[((192+24))+rsp]
1485 lea rdi,[320+rsp]
1486
1487 call __ecp_nistz256_subq
1488
1489 mov QWORD[rdi],r12
1490 mov QWORD[8+rdi],r13
1491 mov QWORD[16+rdi],r8
1492 mov QWORD[24+rdi],r9
1493 mov rax,QWORD[128+rsp]
1494 lea rbx,[128+rsp]
1495 mov r9,QWORD[((0+224))+rsp]
1496 mov r10,QWORD[((8+224))+rsp]
1497 lea rsi,[((0+224))+rsp]
1498 mov r11,QWORD[((16+224))+rsp]
1499 mov r12,QWORD[((24+224))+rsp]
1500 lea rdi,[256+rsp]
1501 call __ecp_nistz256_mul_montq
1502
1503 mov rax,QWORD[320+rsp]
1504 lea rbx,[320+rsp]
1505 mov r9,QWORD[((0+64))+rsp]
1506 mov r10,QWORD[((8+64))+rsp]
1507 lea rsi,[((0+64))+rsp]
1508 mov r11,QWORD[((16+64))+rsp]
1509 mov r12,QWORD[((24+64))+rsp]
1510 lea rdi,[320+rsp]
1511 call __ecp_nistz256_mul_montq
1512
1513 lea rbx,[256+rsp]
1514 lea rdi,[320+rsp]
1515 call __ecp_nistz256_sub_fromq
1516
1517 DB 102,72,15,126,199
1518
1519 movdqa xmm0,xmm5
1520 movdqa xmm1,xmm5
1521 pandn xmm0,XMMWORD[352+rsp]
1522 movdqa xmm2,xmm5
1523 pandn xmm1,XMMWORD[((352+16))+rsp]
1524 movdqa xmm3,xmm5
1525 pand xmm2,XMMWORD[544+rsp]
1526 pand xmm3,XMMWORD[((544+16))+rsp]
1527 por xmm2,xmm0
1528 por xmm3,xmm1
1529
1530 movdqa xmm0,xmm4
1531 movdqa xmm1,xmm4
1532 pandn xmm0,xmm2
1533 movdqa xmm2,xmm4
1534 pandn xmm1,xmm3
1535 movdqa xmm3,xmm4
1536 pand xmm2,XMMWORD[448+rsp]
1537 pand xmm3,XMMWORD[((448+16))+rsp]
1538 por xmm2,xmm0
1539 por xmm3,xmm1
1540 movdqu XMMWORD[64+rdi],xmm2
1541 movdqu XMMWORD[80+rdi],xmm3
1542
1543 movdqa xmm0,xmm5
1544 movdqa xmm1,xmm5
1545 pandn xmm0,XMMWORD[288+rsp]
1546 movdqa xmm2,xmm5
1547 pandn xmm1,XMMWORD[((288+16))+rsp]
1548 movdqa xmm3,xmm5
1549 pand xmm2,XMMWORD[480+rsp]
1550 pand xmm3,XMMWORD[((480+16))+rsp]
1551 por xmm2,xmm0
1552 por xmm3,xmm1
1553
1554 movdqa xmm0,xmm4
1555 movdqa xmm1,xmm4
1556 pandn xmm0,xmm2
1557 movdqa xmm2,xmm4
1558 pandn xmm1,xmm3
1559 movdqa xmm3,xmm4
1560 pand xmm2,XMMWORD[384+rsp]
1561 pand xmm3,XMMWORD[((384+16))+rsp]
1562 por xmm2,xmm0
1563 por xmm3,xmm1
1564 movdqu XMMWORD[rdi],xmm2
1565 movdqu XMMWORD[16+rdi],xmm3
1566
1567 movdqa xmm0,xmm5
1568 movdqa xmm1,xmm5
1569 pandn xmm0,XMMWORD[320+rsp]
1570 movdqa xmm2,xmm5
1571 pandn xmm1,XMMWORD[((320+16))+rsp]
1572 movdqa xmm3,xmm5
1573 pand xmm2,XMMWORD[512+rsp]
1574 pand xmm3,XMMWORD[((512+16))+rsp]
1575 por xmm2,xmm0
1576 por xmm3,xmm1
1577
1578 movdqa xmm0,xmm4
1579 movdqa xmm1,xmm4
1580 pandn xmm0,xmm2
1581 movdqa xmm2,xmm4
1582 pandn xmm1,xmm3
1583 movdqa xmm3,xmm4
1584 pand xmm2,XMMWORD[416+rsp]
1585 pand xmm3,XMMWORD[((416+16))+rsp]
1586 por xmm2,xmm0
1587 por xmm3,xmm1
1588 movdqu XMMWORD[32+rdi],xmm2
1589 movdqu XMMWORD[48+rdi],xmm3
1590
1591 $L$add_doneq:
1592 add rsp,32*18+8
1593 pop r15
1594 pop r14
1595 pop r13
1596 pop r12
1597 pop rbx
1598 pop rbp
1599 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1600 mov rsi,QWORD[16+rsp]
1601 DB 0F3h,0C3h ;repret
1602 $L$SEH_end_ecp_nistz256_point_add:
1603 global ecp_nistz256_point_add_affine
1604
1605 ALIGN 32
1606 ecp_nistz256_point_add_affine:
1607 mov QWORD[8+rsp],rdi ;WIN64 prologue
1608 mov QWORD[16+rsp],rsi
1609 mov rax,rsp
1610 $L$SEH_begin_ecp_nistz256_point_add_affine:
1611 mov rdi,rcx
1612 mov rsi,rdx
1613 mov rdx,r8
1614
1615
1616 push rbp
1617 push rbx
1618 push r12
1619 push r13
1620 push r14
1621 push r15
1622 sub rsp,32*15+8
1623
1624 movdqu xmm0,XMMWORD[rsi]
1625 mov rbx,rdx
1626 movdqu xmm1,XMMWORD[16+rsi]
1627 movdqu xmm2,XMMWORD[32+rsi]
1628 movdqu xmm3,XMMWORD[48+rsi]
1629 movdqu xmm4,XMMWORD[64+rsi]
1630 movdqu xmm5,XMMWORD[80+rsi]
1631 mov rax,QWORD[((64+0))+rsi]
1632 mov r14,QWORD[((64+8))+rsi]
1633 mov r15,QWORD[((64+16))+rsi]
1634 mov r8,QWORD[((64+24))+rsi]
1635 movdqa XMMWORD[320+rsp],xmm0
1636 movdqa XMMWORD[(320+16)+rsp],xmm1
1637 por xmm1,xmm0
1638 movdqa XMMWORD[352+rsp],xmm2
1639 movdqa XMMWORD[(352+16)+rsp],xmm3
1640 por xmm3,xmm2
1641 movdqa XMMWORD[384+rsp],xmm4
1642 movdqa XMMWORD[(384+16)+rsp],xmm5
1643 por xmm3,xmm1
1644
1645 movdqu xmm0,XMMWORD[rbx]
1646 pshufd xmm5,xmm3,0xb1
1647 movdqu xmm1,XMMWORD[16+rbx]
1648 movdqu xmm2,XMMWORD[32+rbx]
1649 por xmm5,xmm3
1650 movdqu xmm3,XMMWORD[48+rbx]
1651 movdqa XMMWORD[416+rsp],xmm0
1652 pshufd xmm4,xmm5,0x1e
1653 movdqa XMMWORD[(416+16)+rsp],xmm1
1654 por xmm1,xmm0
1655 DB 102,72,15,110,199
1656 movdqa XMMWORD[448+rsp],xmm2
1657 movdqa XMMWORD[(448+16)+rsp],xmm3
1658 por xmm3,xmm2
1659 por xmm5,xmm4
1660 pxor xmm4,xmm4
1661 por xmm3,xmm1
1662
1663 lea rsi,[((64-0))+rsi]
1664 lea rdi,[32+rsp]
1665 call __ecp_nistz256_sqr_montq
1666
1667 pcmpeqd xmm5,xmm4
1668 pshufd xmm4,xmm3,0xb1
1669 mov rax,QWORD[rbx]
1670
1671 mov r9,r12
1672 por xmm4,xmm3
1673 pshufd xmm5,xmm5,0
1674 pshufd xmm3,xmm4,0x1e
1675 mov r10,r13
1676 por xmm4,xmm3
1677 pxor xmm3,xmm3
1678 mov r11,r14
1679 pcmpeqd xmm4,xmm3
1680 pshufd xmm4,xmm4,0
1681
1682 lea rsi,[((32-0))+rsp]
1683 mov r12,r15
1684 lea rdi,[rsp]
1685 call __ecp_nistz256_mul_montq
1686
1687 lea rbx,[320+rsp]
1688 lea rdi,[64+rsp]
1689 call __ecp_nistz256_sub_fromq
1690
1691 mov rax,QWORD[384+rsp]
1692 lea rbx,[384+rsp]
1693 mov r9,QWORD[((0+32))+rsp]
1694 mov r10,QWORD[((8+32))+rsp]
1695 lea rsi,[((0+32))+rsp]
1696 mov r11,QWORD[((16+32))+rsp]
1697 mov r12,QWORD[((24+32))+rsp]
1698 lea rdi,[32+rsp]
1699 call __ecp_nistz256_mul_montq
1700
1701 mov rax,QWORD[384+rsp]
1702 lea rbx,[384+rsp]
1703 mov r9,QWORD[((0+64))+rsp]
1704 mov r10,QWORD[((8+64))+rsp]
1705 lea rsi,[((0+64))+rsp]
1706 mov r11,QWORD[((16+64))+rsp]
1707 mov r12,QWORD[((24+64))+rsp]
1708 lea rdi,[288+rsp]
1709 call __ecp_nistz256_mul_montq
1710
1711 mov rax,QWORD[448+rsp]
1712 lea rbx,[448+rsp]
1713 mov r9,QWORD[((0+32))+rsp]
1714 mov r10,QWORD[((8+32))+rsp]
1715 lea rsi,[((0+32))+rsp]
1716 mov r11,QWORD[((16+32))+rsp]
1717 mov r12,QWORD[((24+32))+rsp]
1718 lea rdi,[32+rsp]
1719 call __ecp_nistz256_mul_montq
1720
1721 lea rbx,[352+rsp]
1722 lea rdi,[96+rsp]
1723 call __ecp_nistz256_sub_fromq
1724
1725 mov rax,QWORD[((0+64))+rsp]
1726 mov r14,QWORD[((8+64))+rsp]
1727 lea rsi,[((0+64))+rsp]
1728 mov r15,QWORD[((16+64))+rsp]
1729 mov r8,QWORD[((24+64))+rsp]
1730 lea rdi,[128+rsp]
1731 call __ecp_nistz256_sqr_montq
1732
1733 mov rax,QWORD[((0+96))+rsp]
1734 mov r14,QWORD[((8+96))+rsp]
1735 lea rsi,[((0+96))+rsp]
1736 mov r15,QWORD[((16+96))+rsp]
1737 mov r8,QWORD[((24+96))+rsp]
1738 lea rdi,[192+rsp]
1739 call __ecp_nistz256_sqr_montq
1740
1741 mov rax,QWORD[128+rsp]
1742 lea rbx,[128+rsp]
1743 mov r9,QWORD[((0+64))+rsp]
1744 mov r10,QWORD[((8+64))+rsp]
1745 lea rsi,[((0+64))+rsp]
1746 mov r11,QWORD[((16+64))+rsp]
1747 mov r12,QWORD[((24+64))+rsp]
1748 lea rdi,[160+rsp]
1749 call __ecp_nistz256_mul_montq
1750
1751 mov rax,QWORD[320+rsp]
1752 lea rbx,[320+rsp]
1753 mov r9,QWORD[((0+128))+rsp]
1754 mov r10,QWORD[((8+128))+rsp]
1755 lea rsi,[((0+128))+rsp]
1756 mov r11,QWORD[((16+128))+rsp]
1757 mov r12,QWORD[((24+128))+rsp]
1758 lea rdi,[rsp]
1759 call __ecp_nistz256_mul_montq
1760
1761
1762
1763
1764 add r12,r12
1765 lea rsi,[192+rsp]
1766 adc r13,r13
1767 mov rax,r12
1768 adc r8,r8
1769 adc r9,r9
1770 mov rbp,r13
1771 sbb r11,r11
1772
1773 sub r12,-1
1774 mov rcx,r8
1775 sbb r13,r14
1776 sbb r8,0
1777 mov r10,r9
1778 sbb r9,r15
1779 test r11,r11
1780
1781 cmovz r12,rax
1782 mov rax,QWORD[rsi]
1783 cmovz r13,rbp
1784 mov rbp,QWORD[8+rsi]
1785 cmovz r8,rcx
1786 mov rcx,QWORD[16+rsi]
1787 cmovz r9,r10
1788 mov r10,QWORD[24+rsi]
1789
1790 call __ecp_nistz256_subq
1791
1792 lea rbx,[160+rsp]
1793 lea rdi,[224+rsp]
1794 call __ecp_nistz256_sub_fromq
1795
1796 mov rax,QWORD[((0+0))+rsp]
1797 mov rbp,QWORD[((0+8))+rsp]
1798 mov rcx,QWORD[((0+16))+rsp]
1799 mov r10,QWORD[((0+24))+rsp]
1800 lea rdi,[64+rsp]
1801
1802 call __ecp_nistz256_subq
1803
1804 mov QWORD[rdi],r12
1805 mov QWORD[8+rdi],r13
1806 mov QWORD[16+rdi],r8
1807 mov QWORD[24+rdi],r9
1808 mov rax,QWORD[352+rsp]
1809 lea rbx,[352+rsp]
1810 mov r9,QWORD[((0+160))+rsp]
1811 mov r10,QWORD[((8+160))+rsp]
1812 lea rsi,[((0+160))+rsp]
1813 mov r11,QWORD[((16+160))+rsp]
1814 mov r12,QWORD[((24+160))+rsp]
1815 lea rdi,[32+rsp]
1816 call __ecp_nistz256_mul_montq
1817
1818 mov rax,QWORD[96+rsp]
1819 lea rbx,[96+rsp]
1820 mov r9,QWORD[((0+64))+rsp]
1821 mov r10,QWORD[((8+64))+rsp]
1822 lea rsi,[((0+64))+rsp]
1823 mov r11,QWORD[((16+64))+rsp]
1824 mov r12,QWORD[((24+64))+rsp]
1825 lea rdi,[64+rsp]
1826 call __ecp_nistz256_mul_montq
1827
1828 lea rbx,[32+rsp]
1829 lea rdi,[256+rsp]
1830 call __ecp_nistz256_sub_fromq
1831
1832 DB 102,72,15,126,199
1833
1834 movdqa xmm0,xmm5
1835 movdqa xmm1,xmm5
1836 pandn xmm0,XMMWORD[288+rsp]
1837 movdqa xmm2,xmm5
1838 pandn xmm1,XMMWORD[((288+16))+rsp]
1839 movdqa xmm3,xmm5
1840 pand xmm2,XMMWORD[$L$ONE_mont]
1841 pand xmm3,XMMWORD[(($L$ONE_mont+16))]
1842 por xmm2,xmm0
1843 por xmm3,xmm1
1844
1845 movdqa xmm0,xmm4
1846 movdqa xmm1,xmm4
1847 pandn xmm0,xmm2
1848 movdqa xmm2,xmm4
1849 pandn xmm1,xmm3
1850 movdqa xmm3,xmm4
1851 pand xmm2,XMMWORD[384+rsp]
1852 pand xmm3,XMMWORD[((384+16))+rsp]
1853 por xmm2,xmm0
1854 por xmm3,xmm1
1855 movdqu XMMWORD[64+rdi],xmm2
1856 movdqu XMMWORD[80+rdi],xmm3
1857
1858 movdqa xmm0,xmm5
1859 movdqa xmm1,xmm5
1860 pandn xmm0,XMMWORD[224+rsp]
1861 movdqa xmm2,xmm5
1862 pandn xmm1,XMMWORD[((224+16))+rsp]
1863 movdqa xmm3,xmm5
1864 pand xmm2,XMMWORD[416+rsp]
1865 pand xmm3,XMMWORD[((416+16))+rsp]
1866 por xmm2,xmm0
1867 por xmm3,xmm1
1868
1869 movdqa xmm0,xmm4
1870 movdqa xmm1,xmm4
1871 pandn xmm0,xmm2
1872 movdqa xmm2,xmm4
1873 pandn xmm1,xmm3
1874 movdqa xmm3,xmm4
1875 pand xmm2,XMMWORD[320+rsp]
1876 pand xmm3,XMMWORD[((320+16))+rsp]
1877 por xmm2,xmm0
1878 por xmm3,xmm1
1879 movdqu XMMWORD[rdi],xmm2
1880 movdqu XMMWORD[16+rdi],xmm3
1881
1882 movdqa xmm0,xmm5
1883 movdqa xmm1,xmm5
1884 pandn xmm0,XMMWORD[256+rsp]
1885 movdqa xmm2,xmm5
1886 pandn xmm1,XMMWORD[((256+16))+rsp]
1887 movdqa xmm3,xmm5
1888 pand xmm2,XMMWORD[448+rsp]
1889 pand xmm3,XMMWORD[((448+16))+rsp]
1890 por xmm2,xmm0
1891 por xmm3,xmm1
1892
1893 movdqa xmm0,xmm4
1894 movdqa xmm1,xmm4
1895 pandn xmm0,xmm2
1896 movdqa xmm2,xmm4
1897 pandn xmm1,xmm3
1898 movdqa xmm3,xmm4
1899 pand xmm2,XMMWORD[352+rsp]
1900 pand xmm3,XMMWORD[((352+16))+rsp]
1901 por xmm2,xmm0
1902 por xmm3,xmm1
1903 movdqu XMMWORD[32+rdi],xmm2
1904 movdqu XMMWORD[48+rdi],xmm3
1905
1906 add rsp,32*15+8
1907 pop r15
1908 pop r14
1909 pop r13
1910 pop r12
1911 pop rbx
1912 pop rbp
1913 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1914 mov rsi,QWORD[16+rsp]
1915 DB 0F3h,0C3h ;repret
1916 $L$SEH_end_ecp_nistz256_point_add_affine:
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698