Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(107)

Side by Side Diff: third_party/boringssl/win-x86_64/crypto/ec/p256-x86_64-asm.asm

Issue 2869243005: Roll src/third_party/boringssl/src ddfcc6a60..1e5cb820d (Closed)
Patch Set: Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 default rel
2 %define XMMWORD
3 %define YMMWORD
4 %define ZMMWORD
5 section .text code align=64
6
7 EXTERN OPENSSL_ia32cap_P
8
9
10 ALIGN 64
11 $L$poly:
12 DQ 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xfffff fff00000001
13
14 $L$One:
15 DD 1,1,1,1,1,1,1,1
16 $L$Two:
17 DD 2,2,2,2,2,2,2,2
18 $L$Three:
19 DD 3,3,3,3,3,3,3,3
20 $L$ONE_mont:
21 DQ 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000 000fffffffe
22
23
24
25 global ecp_nistz256_neg
26
27 ALIGN 32
28 ecp_nistz256_neg:
29 mov QWORD[8+rsp],rdi ;WIN64 prologue
30 mov QWORD[16+rsp],rsi
31 mov rax,rsp
32 $L$SEH_begin_ecp_nistz256_neg:
33 mov rdi,rcx
34 mov rsi,rdx
35
36
37 push r12
38 push r13
39
40 xor r8,r8
41 xor r9,r9
42 xor r10,r10
43 xor r11,r11
44 xor r13,r13
45
46 sub r8,QWORD[rsi]
47 sbb r9,QWORD[8+rsi]
48 sbb r10,QWORD[16+rsi]
49 mov rax,r8
50 sbb r11,QWORD[24+rsi]
51 lea rsi,[$L$poly]
52 mov rdx,r9
53 sbb r13,0
54
55 add r8,QWORD[rsi]
56 mov rcx,r10
57 adc r9,QWORD[8+rsi]
58 adc r10,QWORD[16+rsi]
59 mov r12,r11
60 adc r11,QWORD[24+rsi]
61 test r13,r13
62
63 cmovz r8,rax
64 cmovz r9,rdx
65 mov QWORD[rdi],r8
66 cmovz r10,rcx
67 mov QWORD[8+rdi],r9
68 cmovz r11,r12
69 mov QWORD[16+rdi],r10
70 mov QWORD[24+rdi],r11
71
72 pop r13
73 pop r12
74 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
75 mov rsi,QWORD[16+rsp]
76 DB 0F3h,0C3h ;repret
77 $L$SEH_end_ecp_nistz256_neg:
78
79
80
81
82
83
84 global ecp_nistz256_mul_mont
85
86 ALIGN 32
87 ecp_nistz256_mul_mont:
88 mov QWORD[8+rsp],rdi ;WIN64 prologue
89 mov QWORD[16+rsp],rsi
90 mov rax,rsp
91 $L$SEH_begin_ecp_nistz256_mul_mont:
92 mov rdi,rcx
93 mov rsi,rdx
94 mov rdx,r8
95
96
97 $L$mul_mont:
98 push rbp
99 push rbx
100 push r12
101 push r13
102 push r14
103 push r15
104 mov rbx,rdx
105 mov rax,QWORD[rdx]
106 mov r9,QWORD[rsi]
107 mov r10,QWORD[8+rsi]
108 mov r11,QWORD[16+rsi]
109 mov r12,QWORD[24+rsi]
110
111 call __ecp_nistz256_mul_montq
112 $L$mul_mont_done:
113 pop r15
114 pop r14
115 pop r13
116 pop r12
117 pop rbx
118 pop rbp
119 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
120 mov rsi,QWORD[16+rsp]
121 DB 0F3h,0C3h ;repret
122 $L$SEH_end_ecp_nistz256_mul_mont:
123
124
125 ALIGN 32
126 __ecp_nistz256_mul_montq:
127
128
129 mov rbp,rax
130 mul r9
131 mov r14,QWORD[(($L$poly+8))]
132 mov r8,rax
133 mov rax,rbp
134 mov r9,rdx
135
136 mul r10
137 mov r15,QWORD[(($L$poly+24))]
138 add r9,rax
139 mov rax,rbp
140 adc rdx,0
141 mov r10,rdx
142
143 mul r11
144 add r10,rax
145 mov rax,rbp
146 adc rdx,0
147 mov r11,rdx
148
149 mul r12
150 add r11,rax
151 mov rax,r8
152 adc rdx,0
153 xor r13,r13
154 mov r12,rdx
155
156
157
158
159
160
161
162
163
164
165 mov rbp,r8
166 shl r8,32
167 mul r15
168 shr rbp,32
169 add r9,r8
170 adc r10,rbp
171 adc r11,rax
172 mov rax,QWORD[8+rbx]
173 adc r12,rdx
174 adc r13,0
175 xor r8,r8
176
177
178
179 mov rbp,rax
180 mul QWORD[rsi]
181 add r9,rax
182 mov rax,rbp
183 adc rdx,0
184 mov rcx,rdx
185
186 mul QWORD[8+rsi]
187 add r10,rcx
188 adc rdx,0
189 add r10,rax
190 mov rax,rbp
191 adc rdx,0
192 mov rcx,rdx
193
194 mul QWORD[16+rsi]
195 add r11,rcx
196 adc rdx,0
197 add r11,rax
198 mov rax,rbp
199 adc rdx,0
200 mov rcx,rdx
201
202 mul QWORD[24+rsi]
203 add r12,rcx
204 adc rdx,0
205 add r12,rax
206 mov rax,r9
207 adc r13,rdx
208 adc r8,0
209
210
211
212 mov rbp,r9
213 shl r9,32
214 mul r15
215 shr rbp,32
216 add r10,r9
217 adc r11,rbp
218 adc r12,rax
219 mov rax,QWORD[16+rbx]
220 adc r13,rdx
221 adc r8,0
222 xor r9,r9
223
224
225
226 mov rbp,rax
227 mul QWORD[rsi]
228 add r10,rax
229 mov rax,rbp
230 adc rdx,0
231 mov rcx,rdx
232
233 mul QWORD[8+rsi]
234 add r11,rcx
235 adc rdx,0
236 add r11,rax
237 mov rax,rbp
238 adc rdx,0
239 mov rcx,rdx
240
241 mul QWORD[16+rsi]
242 add r12,rcx
243 adc rdx,0
244 add r12,rax
245 mov rax,rbp
246 adc rdx,0
247 mov rcx,rdx
248
249 mul QWORD[24+rsi]
250 add r13,rcx
251 adc rdx,0
252 add r13,rax
253 mov rax,r10
254 adc r8,rdx
255 adc r9,0
256
257
258
259 mov rbp,r10
260 shl r10,32
261 mul r15
262 shr rbp,32
263 add r11,r10
264 adc r12,rbp
265 adc r13,rax
266 mov rax,QWORD[24+rbx]
267 adc r8,rdx
268 adc r9,0
269 xor r10,r10
270
271
272
273 mov rbp,rax
274 mul QWORD[rsi]
275 add r11,rax
276 mov rax,rbp
277 adc rdx,0
278 mov rcx,rdx
279
280 mul QWORD[8+rsi]
281 add r12,rcx
282 adc rdx,0
283 add r12,rax
284 mov rax,rbp
285 adc rdx,0
286 mov rcx,rdx
287
288 mul QWORD[16+rsi]
289 add r13,rcx
290 adc rdx,0
291 add r13,rax
292 mov rax,rbp
293 adc rdx,0
294 mov rcx,rdx
295
296 mul QWORD[24+rsi]
297 add r8,rcx
298 adc rdx,0
299 add r8,rax
300 mov rax,r11
301 adc r9,rdx
302 adc r10,0
303
304
305
306 mov rbp,r11
307 shl r11,32
308 mul r15
309 shr rbp,32
310 add r12,r11
311 adc r13,rbp
312 mov rcx,r12
313 adc r8,rax
314 adc r9,rdx
315 mov rbp,r13
316 adc r10,0
317
318
319
320 sub r12,-1
321 mov rbx,r8
322 sbb r13,r14
323 sbb r8,0
324 mov rdx,r9
325 sbb r9,r15
326 sbb r10,0
327
328 cmovc r12,rcx
329 cmovc r13,rbp
330 mov QWORD[rdi],r12
331 cmovc r8,rbx
332 mov QWORD[8+rdi],r13
333 cmovc r9,rdx
334 mov QWORD[16+rdi],r8
335 mov QWORD[24+rdi],r9
336
337 DB 0F3h,0C3h ;repret
338
339
340
341
342
343
344
345
346
347 global ecp_nistz256_sqr_mont
348
349 ALIGN 32
350 ecp_nistz256_sqr_mont:
351 mov QWORD[8+rsp],rdi ;WIN64 prologue
352 mov QWORD[16+rsp],rsi
353 mov rax,rsp
354 $L$SEH_begin_ecp_nistz256_sqr_mont:
355 mov rdi,rcx
356 mov rsi,rdx
357
358
359 push rbp
360 push rbx
361 push r12
362 push r13
363 push r14
364 push r15
365 mov rax,QWORD[rsi]
366 mov r14,QWORD[8+rsi]
367 mov r15,QWORD[16+rsi]
368 mov r8,QWORD[24+rsi]
369
370 call __ecp_nistz256_sqr_montq
371 $L$sqr_mont_done:
372 pop r15
373 pop r14
374 pop r13
375 pop r12
376 pop rbx
377 pop rbp
378 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
379 mov rsi,QWORD[16+rsp]
380 DB 0F3h,0C3h ;repret
381 $L$SEH_end_ecp_nistz256_sqr_mont:
382
383
384 ALIGN 32
385 __ecp_nistz256_sqr_montq:
386 mov r13,rax
387 mul r14
388 mov r9,rax
389 mov rax,r15
390 mov r10,rdx
391
392 mul r13
393 add r10,rax
394 mov rax,r8
395 adc rdx,0
396 mov r11,rdx
397
398 mul r13
399 add r11,rax
400 mov rax,r15
401 adc rdx,0
402 mov r12,rdx
403
404
405 mul r14
406 add r11,rax
407 mov rax,r8
408 adc rdx,0
409 mov rbp,rdx
410
411 mul r14
412 add r12,rax
413 mov rax,r8
414 adc rdx,0
415 add r12,rbp
416 mov r13,rdx
417 adc r13,0
418
419
420 mul r15
421 xor r15,r15
422 add r13,rax
423 mov rax,QWORD[rsi]
424 mov r14,rdx
425 adc r14,0
426
427 add r9,r9
428 adc r10,r10
429 adc r11,r11
430 adc r12,r12
431 adc r13,r13
432 adc r14,r14
433 adc r15,0
434
435 mul rax
436 mov r8,rax
437 mov rax,QWORD[8+rsi]
438 mov rcx,rdx
439
440 mul rax
441 add r9,rcx
442 adc r10,rax
443 mov rax,QWORD[16+rsi]
444 adc rdx,0
445 mov rcx,rdx
446
447 mul rax
448 add r11,rcx
449 adc r12,rax
450 mov rax,QWORD[24+rsi]
451 adc rdx,0
452 mov rcx,rdx
453
454 mul rax
455 add r13,rcx
456 adc r14,rax
457 mov rax,r8
458 adc r15,rdx
459
460 mov rsi,QWORD[(($L$poly+8))]
461 mov rbp,QWORD[(($L$poly+24))]
462
463
464
465
466 mov rcx,r8
467 shl r8,32
468 mul rbp
469 shr rcx,32
470 add r9,r8
471 adc r10,rcx
472 adc r11,rax
473 mov rax,r9
474 adc rdx,0
475
476
477
478 mov rcx,r9
479 shl r9,32
480 mov r8,rdx
481 mul rbp
482 shr rcx,32
483 add r10,r9
484 adc r11,rcx
485 adc r8,rax
486 mov rax,r10
487 adc rdx,0
488
489
490
491 mov rcx,r10
492 shl r10,32
493 mov r9,rdx
494 mul rbp
495 shr rcx,32
496 add r11,r10
497 adc r8,rcx
498 adc r9,rax
499 mov rax,r11
500 adc rdx,0
501
502
503
504 mov rcx,r11
505 shl r11,32
506 mov r10,rdx
507 mul rbp
508 shr rcx,32
509 add r8,r11
510 adc r9,rcx
511 adc r10,rax
512 adc rdx,0
513 xor r11,r11
514
515
516
517 add r12,r8
518 adc r13,r9
519 mov r8,r12
520 adc r14,r10
521 adc r15,rdx
522 mov r9,r13
523 adc r11,0
524
525 sub r12,-1
526 mov r10,r14
527 sbb r13,rsi
528 sbb r14,0
529 mov rcx,r15
530 sbb r15,rbp
531 sbb r11,0
532
533 cmovc r12,r8
534 cmovc r13,r9
535 mov QWORD[rdi],r12
536 cmovc r14,r10
537 mov QWORD[8+rdi],r13
538 cmovc r15,rcx
539 mov QWORD[16+rdi],r14
540 mov QWORD[24+rdi],r15
541
542 DB 0F3h,0C3h ;repret
543
544
545
546 global ecp_nistz256_select_w5
547
548 ALIGN 32
549 ecp_nistz256_select_w5:
550 lea rax,[((-136))+rsp]
551 $L$SEH_begin_ecp_nistz256_select_w5:
552 DB 0x48,0x8d,0x60,0xe0
553 DB 0x0f,0x29,0x70,0xe0
554 DB 0x0f,0x29,0x78,0xf0
555 DB 0x44,0x0f,0x29,0x00
556 DB 0x44,0x0f,0x29,0x48,0x10
557 DB 0x44,0x0f,0x29,0x50,0x20
558 DB 0x44,0x0f,0x29,0x58,0x30
559 DB 0x44,0x0f,0x29,0x60,0x40
560 DB 0x44,0x0f,0x29,0x68,0x50
561 DB 0x44,0x0f,0x29,0x70,0x60
562 DB 0x44,0x0f,0x29,0x78,0x70
563 movdqa xmm0,XMMWORD[$L$One]
564 movd xmm1,r8d
565
566 pxor xmm2,xmm2
567 pxor xmm3,xmm3
568 pxor xmm4,xmm4
569 pxor xmm5,xmm5
570 pxor xmm6,xmm6
571 pxor xmm7,xmm7
572
573 movdqa xmm8,xmm0
574 pshufd xmm1,xmm1,0
575
576 mov rax,16
577 $L$select_loop_sse_w5:
578
579 movdqa xmm15,xmm8
580 paddd xmm8,xmm0
581 pcmpeqd xmm15,xmm1
582
583 movdqa xmm9,XMMWORD[rdx]
584 movdqa xmm10,XMMWORD[16+rdx]
585 movdqa xmm11,XMMWORD[32+rdx]
586 movdqa xmm12,XMMWORD[48+rdx]
587 movdqa xmm13,XMMWORD[64+rdx]
588 movdqa xmm14,XMMWORD[80+rdx]
589 lea rdx,[96+rdx]
590
591 pand xmm9,xmm15
592 pand xmm10,xmm15
593 por xmm2,xmm9
594 pand xmm11,xmm15
595 por xmm3,xmm10
596 pand xmm12,xmm15
597 por xmm4,xmm11
598 pand xmm13,xmm15
599 por xmm5,xmm12
600 pand xmm14,xmm15
601 por xmm6,xmm13
602 por xmm7,xmm14
603
604 dec rax
605 jnz NEAR $L$select_loop_sse_w5
606
607 movdqu XMMWORD[rcx],xmm2
608 movdqu XMMWORD[16+rcx],xmm3
609 movdqu XMMWORD[32+rcx],xmm4
610 movdqu XMMWORD[48+rcx],xmm5
611 movdqu XMMWORD[64+rcx],xmm6
612 movdqu XMMWORD[80+rcx],xmm7
613 movaps xmm6,XMMWORD[rsp]
614 movaps xmm7,XMMWORD[16+rsp]
615 movaps xmm8,XMMWORD[32+rsp]
616 movaps xmm9,XMMWORD[48+rsp]
617 movaps xmm10,XMMWORD[64+rsp]
618 movaps xmm11,XMMWORD[80+rsp]
619 movaps xmm12,XMMWORD[96+rsp]
620 movaps xmm13,XMMWORD[112+rsp]
621 movaps xmm14,XMMWORD[128+rsp]
622 movaps xmm15,XMMWORD[144+rsp]
623 lea rsp,[168+rsp]
624 $L$SEH_end_ecp_nistz256_select_w5:
625 DB 0F3h,0C3h ;repret
626
627
628
629
630 global ecp_nistz256_select_w7
631
632 ALIGN 32
633 ecp_nistz256_select_w7:
634 lea rax,[((-136))+rsp]
635 $L$SEH_begin_ecp_nistz256_select_w7:
636 DB 0x48,0x8d,0x60,0xe0
637 DB 0x0f,0x29,0x70,0xe0
638 DB 0x0f,0x29,0x78,0xf0
639 DB 0x44,0x0f,0x29,0x00
640 DB 0x44,0x0f,0x29,0x48,0x10
641 DB 0x44,0x0f,0x29,0x50,0x20
642 DB 0x44,0x0f,0x29,0x58,0x30
643 DB 0x44,0x0f,0x29,0x60,0x40
644 DB 0x44,0x0f,0x29,0x68,0x50
645 DB 0x44,0x0f,0x29,0x70,0x60
646 DB 0x44,0x0f,0x29,0x78,0x70
647 movdqa xmm8,XMMWORD[$L$One]
648 movd xmm1,r8d
649
650 pxor xmm2,xmm2
651 pxor xmm3,xmm3
652 pxor xmm4,xmm4
653 pxor xmm5,xmm5
654
655 movdqa xmm0,xmm8
656 pshufd xmm1,xmm1,0
657 mov rax,64
658
659 $L$select_loop_sse_w7:
660 movdqa xmm15,xmm8
661 paddd xmm8,xmm0
662 movdqa xmm9,XMMWORD[rdx]
663 movdqa xmm10,XMMWORD[16+rdx]
664 pcmpeqd xmm15,xmm1
665 movdqa xmm11,XMMWORD[32+rdx]
666 movdqa xmm12,XMMWORD[48+rdx]
667 lea rdx,[64+rdx]
668
669 pand xmm9,xmm15
670 pand xmm10,xmm15
671 por xmm2,xmm9
672 pand xmm11,xmm15
673 por xmm3,xmm10
674 pand xmm12,xmm15
675 por xmm4,xmm11
676 prefetcht0 [255+rdx]
677 por xmm5,xmm12
678
679 dec rax
680 jnz NEAR $L$select_loop_sse_w7
681
682 movdqu XMMWORD[rcx],xmm2
683 movdqu XMMWORD[16+rcx],xmm3
684 movdqu XMMWORD[32+rcx],xmm4
685 movdqu XMMWORD[48+rcx],xmm5
686 movaps xmm6,XMMWORD[rsp]
687 movaps xmm7,XMMWORD[16+rsp]
688 movaps xmm8,XMMWORD[32+rsp]
689 movaps xmm9,XMMWORD[48+rsp]
690 movaps xmm10,XMMWORD[64+rsp]
691 movaps xmm11,XMMWORD[80+rsp]
692 movaps xmm12,XMMWORD[96+rsp]
693 movaps xmm13,XMMWORD[112+rsp]
694 movaps xmm14,XMMWORD[128+rsp]
695 movaps xmm15,XMMWORD[144+rsp]
696 lea rsp,[168+rsp]
697 $L$SEH_end_ecp_nistz256_select_w7:
698 DB 0F3h,0C3h ;repret
699
700 global ecp_nistz256_avx2_select_w7
701
702 ALIGN 32
703 ecp_nistz256_avx2_select_w7:
704 mov QWORD[8+rsp],rdi ;WIN64 prologue
705 mov QWORD[16+rsp],rsi
706 mov rax,rsp
707 $L$SEH_begin_ecp_nistz256_avx2_select_w7:
708 mov rdi,rcx
709 mov rsi,rdx
710 mov rdx,r8
711
712
713 DB 0x0f,0x0b
714 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
715 mov rsi,QWORD[16+rsp]
716 DB 0F3h,0C3h ;repret
717 $L$SEH_end_ecp_nistz256_avx2_select_w7:
718
719 ALIGN 32
720 __ecp_nistz256_add_toq:
721 xor r11,r11
722 add r12,QWORD[rbx]
723 adc r13,QWORD[8+rbx]
724 mov rax,r12
725 adc r8,QWORD[16+rbx]
726 adc r9,QWORD[24+rbx]
727 mov rbp,r13
728 adc r11,0
729
730 sub r12,-1
731 mov rcx,r8
732 sbb r13,r14
733 sbb r8,0
734 mov r10,r9
735 sbb r9,r15
736 sbb r11,0
737
738 cmovc r12,rax
739 cmovc r13,rbp
740 mov QWORD[rdi],r12
741 cmovc r8,rcx
742 mov QWORD[8+rdi],r13
743 cmovc r9,r10
744 mov QWORD[16+rdi],r8
745 mov QWORD[24+rdi],r9
746
747 DB 0F3h,0C3h ;repret
748
749
750
751 ALIGN 32
752 __ecp_nistz256_sub_fromq:
753 sub r12,QWORD[rbx]
754 sbb r13,QWORD[8+rbx]
755 mov rax,r12
756 sbb r8,QWORD[16+rbx]
757 sbb r9,QWORD[24+rbx]
758 mov rbp,r13
759 sbb r11,r11
760
761 add r12,-1
762 mov rcx,r8
763 adc r13,r14
764 adc r8,0
765 mov r10,r9
766 adc r9,r15
767 test r11,r11
768
769 cmovz r12,rax
770 cmovz r13,rbp
771 mov QWORD[rdi],r12
772 cmovz r8,rcx
773 mov QWORD[8+rdi],r13
774 cmovz r9,r10
775 mov QWORD[16+rdi],r8
776 mov QWORD[24+rdi],r9
777
778 DB 0F3h,0C3h ;repret
779
780
781
782 ALIGN 32
783 __ecp_nistz256_subq:
784 sub rax,r12
785 sbb rbp,r13
786 mov r12,rax
787 sbb rcx,r8
788 sbb r10,r9
789 mov r13,rbp
790 sbb r11,r11
791
792 add rax,-1
793 mov r8,rcx
794 adc rbp,r14
795 adc rcx,0
796 mov r9,r10
797 adc r10,r15
798 test r11,r11
799
800 cmovnz r12,rax
801 cmovnz r13,rbp
802 cmovnz r8,rcx
803 cmovnz r9,r10
804
805 DB 0F3h,0C3h ;repret
806
807
808
809 ALIGN 32
810 __ecp_nistz256_mul_by_2q:
811 xor r11,r11
812 add r12,r12
813 adc r13,r13
814 mov rax,r12
815 adc r8,r8
816 adc r9,r9
817 mov rbp,r13
818 adc r11,0
819
820 sub r12,-1
821 mov rcx,r8
822 sbb r13,r14
823 sbb r8,0
824 mov r10,r9
825 sbb r9,r15
826 sbb r11,0
827
828 cmovc r12,rax
829 cmovc r13,rbp
830 mov QWORD[rdi],r12
831 cmovc r8,rcx
832 mov QWORD[8+rdi],r13
833 cmovc r9,r10
834 mov QWORD[16+rdi],r8
835 mov QWORD[24+rdi],r9
836
837 DB 0F3h,0C3h ;repret
838
839 global ecp_nistz256_point_double
840
841 ALIGN 32
842 ecp_nistz256_point_double:
843 mov QWORD[8+rsp],rdi ;WIN64 prologue
844 mov QWORD[16+rsp],rsi
845 mov rax,rsp
846 $L$SEH_begin_ecp_nistz256_point_double:
847 mov rdi,rcx
848 mov rsi,rdx
849
850
851 push rbp
852 push rbx
853 push r12
854 push r13
855 push r14
856 push r15
857 sub rsp,32*5+8
858
859 $L$point_double_shortcutq:
860 movdqu xmm0,XMMWORD[rsi]
861 mov rbx,rsi
862 movdqu xmm1,XMMWORD[16+rsi]
863 mov r12,QWORD[((32+0))+rsi]
864 mov r13,QWORD[((32+8))+rsi]
865 mov r8,QWORD[((32+16))+rsi]
866 mov r9,QWORD[((32+24))+rsi]
867 mov r14,QWORD[(($L$poly+8))]
868 mov r15,QWORD[(($L$poly+24))]
869 movdqa XMMWORD[96+rsp],xmm0
870 movdqa XMMWORD[(96+16)+rsp],xmm1
871 lea r10,[32+rdi]
872 lea r11,[64+rdi]
873 DB 102,72,15,110,199
874 DB 102,73,15,110,202
875 DB 102,73,15,110,211
876
877 lea rdi,[rsp]
878 call __ecp_nistz256_mul_by_2q
879
880 mov rax,QWORD[((64+0))+rsi]
881 mov r14,QWORD[((64+8))+rsi]
882 mov r15,QWORD[((64+16))+rsi]
883 mov r8,QWORD[((64+24))+rsi]
884 lea rsi,[((64-0))+rsi]
885 lea rdi,[64+rsp]
886 call __ecp_nistz256_sqr_montq
887
888 mov rax,QWORD[((0+0))+rsp]
889 mov r14,QWORD[((8+0))+rsp]
890 lea rsi,[((0+0))+rsp]
891 mov r15,QWORD[((16+0))+rsp]
892 mov r8,QWORD[((24+0))+rsp]
893 lea rdi,[rsp]
894 call __ecp_nistz256_sqr_montq
895
896 mov rax,QWORD[32+rbx]
897 mov r9,QWORD[((64+0))+rbx]
898 mov r10,QWORD[((64+8))+rbx]
899 mov r11,QWORD[((64+16))+rbx]
900 mov r12,QWORD[((64+24))+rbx]
901 lea rsi,[((64-0))+rbx]
902 lea rbx,[32+rbx]
903 DB 102,72,15,126,215
904 call __ecp_nistz256_mul_montq
905 call __ecp_nistz256_mul_by_2q
906
907 mov r12,QWORD[((96+0))+rsp]
908 mov r13,QWORD[((96+8))+rsp]
909 lea rbx,[64+rsp]
910 mov r8,QWORD[((96+16))+rsp]
911 mov r9,QWORD[((96+24))+rsp]
912 lea rdi,[32+rsp]
913 call __ecp_nistz256_add_toq
914
915 mov r12,QWORD[((96+0))+rsp]
916 mov r13,QWORD[((96+8))+rsp]
917 lea rbx,[64+rsp]
918 mov r8,QWORD[((96+16))+rsp]
919 mov r9,QWORD[((96+24))+rsp]
920 lea rdi,[64+rsp]
921 call __ecp_nistz256_sub_fromq
922
923 mov rax,QWORD[((0+0))+rsp]
924 mov r14,QWORD[((8+0))+rsp]
925 lea rsi,[((0+0))+rsp]
926 mov r15,QWORD[((16+0))+rsp]
927 mov r8,QWORD[((24+0))+rsp]
928 DB 102,72,15,126,207
929 call __ecp_nistz256_sqr_montq
930 xor r9,r9
931 mov rax,r12
932 add r12,-1
933 mov r10,r13
934 adc r13,rsi
935 mov rcx,r14
936 adc r14,0
937 mov r8,r15
938 adc r15,rbp
939 adc r9,0
940 xor rsi,rsi
941 test rax,1
942
943 cmovz r12,rax
944 cmovz r13,r10
945 cmovz r14,rcx
946 cmovz r15,r8
947 cmovz r9,rsi
948
949 mov rax,r13
950 shr r12,1
951 shl rax,63
952 mov r10,r14
953 shr r13,1
954 or r12,rax
955 shl r10,63
956 mov rcx,r15
957 shr r14,1
958 or r13,r10
959 shl rcx,63
960 mov QWORD[rdi],r12
961 shr r15,1
962 mov QWORD[8+rdi],r13
963 shl r9,63
964 or r14,rcx
965 or r15,r9
966 mov QWORD[16+rdi],r14
967 mov QWORD[24+rdi],r15
968 mov rax,QWORD[64+rsp]
969 lea rbx,[64+rsp]
970 mov r9,QWORD[((0+32))+rsp]
971 mov r10,QWORD[((8+32))+rsp]
972 lea rsi,[((0+32))+rsp]
973 mov r11,QWORD[((16+32))+rsp]
974 mov r12,QWORD[((24+32))+rsp]
975 lea rdi,[32+rsp]
976 call __ecp_nistz256_mul_montq
977
978 lea rdi,[128+rsp]
979 call __ecp_nistz256_mul_by_2q
980
981 lea rbx,[32+rsp]
982 lea rdi,[32+rsp]
983 call __ecp_nistz256_add_toq
984
985 mov rax,QWORD[96+rsp]
986 lea rbx,[96+rsp]
987 mov r9,QWORD[((0+0))+rsp]
988 mov r10,QWORD[((8+0))+rsp]
989 lea rsi,[((0+0))+rsp]
990 mov r11,QWORD[((16+0))+rsp]
991 mov r12,QWORD[((24+0))+rsp]
992 lea rdi,[rsp]
993 call __ecp_nistz256_mul_montq
994
995 lea rdi,[128+rsp]
996 call __ecp_nistz256_mul_by_2q
997
998 mov rax,QWORD[((0+32))+rsp]
999 mov r14,QWORD[((8+32))+rsp]
1000 lea rsi,[((0+32))+rsp]
1001 mov r15,QWORD[((16+32))+rsp]
1002 mov r8,QWORD[((24+32))+rsp]
1003 DB 102,72,15,126,199
1004 call __ecp_nistz256_sqr_montq
1005
1006 lea rbx,[128+rsp]
1007 mov r8,r14
1008 mov r9,r15
1009 mov r14,rsi
1010 mov r15,rbp
1011 call __ecp_nistz256_sub_fromq
1012
1013 mov rax,QWORD[((0+0))+rsp]
1014 mov rbp,QWORD[((0+8))+rsp]
1015 mov rcx,QWORD[((0+16))+rsp]
1016 mov r10,QWORD[((0+24))+rsp]
1017 lea rdi,[rsp]
1018 call __ecp_nistz256_subq
1019
1020 mov rax,QWORD[32+rsp]
1021 lea rbx,[32+rsp]
1022 mov r14,r12
1023 xor ecx,ecx
1024 mov QWORD[((0+0))+rsp],r12
1025 mov r10,r13
1026 mov QWORD[((0+8))+rsp],r13
1027 cmovz r11,r8
1028 mov QWORD[((0+16))+rsp],r8
1029 lea rsi,[((0-0))+rsp]
1030 cmovz r12,r9
1031 mov QWORD[((0+24))+rsp],r9
1032 mov r9,r14
1033 lea rdi,[rsp]
1034 call __ecp_nistz256_mul_montq
1035
1036 DB 102,72,15,126,203
1037 DB 102,72,15,126,207
1038 call __ecp_nistz256_sub_fromq
1039
1040 add rsp,32*5+8
1041 pop r15
1042 pop r14
1043 pop r13
1044 pop r12
1045 pop rbx
1046 pop rbp
1047 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1048 mov rsi,QWORD[16+rsp]
1049 DB 0F3h,0C3h ;repret
1050 $L$SEH_end_ecp_nistz256_point_double:
1051 global ecp_nistz256_point_add
1052
1053 ALIGN 32
1054 ecp_nistz256_point_add:
1055 mov QWORD[8+rsp],rdi ;WIN64 prologue
1056 mov QWORD[16+rsp],rsi
1057 mov rax,rsp
1058 $L$SEH_begin_ecp_nistz256_point_add:
1059 mov rdi,rcx
1060 mov rsi,rdx
1061 mov rdx,r8
1062
1063
1064 push rbp
1065 push rbx
1066 push r12
1067 push r13
1068 push r14
1069 push r15
1070 sub rsp,32*18+8
1071
1072 movdqu xmm0,XMMWORD[rsi]
1073 movdqu xmm1,XMMWORD[16+rsi]
1074 movdqu xmm2,XMMWORD[32+rsi]
1075 movdqu xmm3,XMMWORD[48+rsi]
1076 movdqu xmm4,XMMWORD[64+rsi]
1077 movdqu xmm5,XMMWORD[80+rsi]
1078 mov rbx,rsi
1079 mov rsi,rdx
1080 movdqa XMMWORD[384+rsp],xmm0
1081 movdqa XMMWORD[(384+16)+rsp],xmm1
1082 movdqa XMMWORD[416+rsp],xmm2
1083 movdqa XMMWORD[(416+16)+rsp],xmm3
1084 movdqa XMMWORD[448+rsp],xmm4
1085 movdqa XMMWORD[(448+16)+rsp],xmm5
1086 por xmm5,xmm4
1087
1088 movdqu xmm0,XMMWORD[rsi]
1089 pshufd xmm3,xmm5,0xb1
1090 movdqu xmm1,XMMWORD[16+rsi]
1091 movdqu xmm2,XMMWORD[32+rsi]
1092 por xmm5,xmm3
1093 movdqu xmm3,XMMWORD[48+rsi]
1094 mov rax,QWORD[((64+0))+rsi]
1095 mov r14,QWORD[((64+8))+rsi]
1096 mov r15,QWORD[((64+16))+rsi]
1097 mov r8,QWORD[((64+24))+rsi]
1098 movdqa XMMWORD[480+rsp],xmm0
1099 pshufd xmm4,xmm5,0x1e
1100 movdqa XMMWORD[(480+16)+rsp],xmm1
1101 movdqu xmm0,XMMWORD[64+rsi]
1102 movdqu xmm1,XMMWORD[80+rsi]
1103 movdqa XMMWORD[512+rsp],xmm2
1104 movdqa XMMWORD[(512+16)+rsp],xmm3
1105 por xmm5,xmm4
1106 pxor xmm4,xmm4
1107 por xmm1,xmm0
1108 DB 102,72,15,110,199
1109
1110 lea rsi,[((64-0))+rsi]
1111 mov QWORD[((544+0))+rsp],rax
1112 mov QWORD[((544+8))+rsp],r14
1113 mov QWORD[((544+16))+rsp],r15
1114 mov QWORD[((544+24))+rsp],r8
1115 lea rdi,[96+rsp]
1116 call __ecp_nistz256_sqr_montq
1117
1118 pcmpeqd xmm5,xmm4
1119 pshufd xmm4,xmm1,0xb1
1120 por xmm4,xmm1
1121 pshufd xmm5,xmm5,0
1122 pshufd xmm3,xmm4,0x1e
1123 por xmm4,xmm3
1124 pxor xmm3,xmm3
1125 pcmpeqd xmm4,xmm3
1126 pshufd xmm4,xmm4,0
1127 mov rax,QWORD[((64+0))+rbx]
1128 mov r14,QWORD[((64+8))+rbx]
1129 mov r15,QWORD[((64+16))+rbx]
1130 mov r8,QWORD[((64+24))+rbx]
1131 DB 102,72,15,110,203
1132
1133 lea rsi,[((64-0))+rbx]
1134 lea rdi,[32+rsp]
1135 call __ecp_nistz256_sqr_montq
1136
1137 mov rax,QWORD[544+rsp]
1138 lea rbx,[544+rsp]
1139 mov r9,QWORD[((0+96))+rsp]
1140 mov r10,QWORD[((8+96))+rsp]
1141 lea rsi,[((0+96))+rsp]
1142 mov r11,QWORD[((16+96))+rsp]
1143 mov r12,QWORD[((24+96))+rsp]
1144 lea rdi,[224+rsp]
1145 call __ecp_nistz256_mul_montq
1146
1147 mov rax,QWORD[448+rsp]
1148 lea rbx,[448+rsp]
1149 mov r9,QWORD[((0+32))+rsp]
1150 mov r10,QWORD[((8+32))+rsp]
1151 lea rsi,[((0+32))+rsp]
1152 mov r11,QWORD[((16+32))+rsp]
1153 mov r12,QWORD[((24+32))+rsp]
1154 lea rdi,[256+rsp]
1155 call __ecp_nistz256_mul_montq
1156
1157 mov rax,QWORD[416+rsp]
1158 lea rbx,[416+rsp]
1159 mov r9,QWORD[((0+224))+rsp]
1160 mov r10,QWORD[((8+224))+rsp]
1161 lea rsi,[((0+224))+rsp]
1162 mov r11,QWORD[((16+224))+rsp]
1163 mov r12,QWORD[((24+224))+rsp]
1164 lea rdi,[224+rsp]
1165 call __ecp_nistz256_mul_montq
1166
1167 mov rax,QWORD[512+rsp]
1168 lea rbx,[512+rsp]
1169 mov r9,QWORD[((0+256))+rsp]
1170 mov r10,QWORD[((8+256))+rsp]
1171 lea rsi,[((0+256))+rsp]
1172 mov r11,QWORD[((16+256))+rsp]
1173 mov r12,QWORD[((24+256))+rsp]
1174 lea rdi,[256+rsp]
1175 call __ecp_nistz256_mul_montq
1176
1177 lea rbx,[224+rsp]
1178 lea rdi,[64+rsp]
1179 call __ecp_nistz256_sub_fromq
1180
1181 or r12,r13
1182 movdqa xmm2,xmm4
1183 or r12,r8
1184 or r12,r9
1185 por xmm2,xmm5
1186 DB 102,73,15,110,220
1187
1188 mov rax,QWORD[384+rsp]
1189 lea rbx,[384+rsp]
1190 mov r9,QWORD[((0+96))+rsp]
1191 mov r10,QWORD[((8+96))+rsp]
1192 lea rsi,[((0+96))+rsp]
1193 mov r11,QWORD[((16+96))+rsp]
1194 mov r12,QWORD[((24+96))+rsp]
1195 lea rdi,[160+rsp]
1196 call __ecp_nistz256_mul_montq
1197
1198 mov rax,QWORD[480+rsp]
1199 lea rbx,[480+rsp]
1200 mov r9,QWORD[((0+32))+rsp]
1201 mov r10,QWORD[((8+32))+rsp]
1202 lea rsi,[((0+32))+rsp]
1203 mov r11,QWORD[((16+32))+rsp]
1204 mov r12,QWORD[((24+32))+rsp]
1205 lea rdi,[192+rsp]
1206 call __ecp_nistz256_mul_montq
1207
1208 lea rbx,[160+rsp]
1209 lea rdi,[rsp]
1210 call __ecp_nistz256_sub_fromq
1211
1212 or r12,r13
1213 or r12,r8
1214 or r12,r9
1215
1216 DB 0x3e
1217 jnz NEAR $L$add_proceedq
1218 DB 102,73,15,126,208
1219 DB 102,73,15,126,217
1220 test r8,r8
1221 jnz NEAR $L$add_proceedq
1222 test r9,r9
1223 jz NEAR $L$add_doubleq
1224
1225 DB 102,72,15,126,199
1226 pxor xmm0,xmm0
1227 movdqu XMMWORD[rdi],xmm0
1228 movdqu XMMWORD[16+rdi],xmm0
1229 movdqu XMMWORD[32+rdi],xmm0
1230 movdqu XMMWORD[48+rdi],xmm0
1231 movdqu XMMWORD[64+rdi],xmm0
1232 movdqu XMMWORD[80+rdi],xmm0
1233 jmp NEAR $L$add_doneq
1234
1235 ALIGN 32
1236 $L$add_doubleq:
1237 DB 102,72,15,126,206
1238 DB 102,72,15,126,199
1239 add rsp,416
1240 jmp NEAR $L$point_double_shortcutq
1241
1242 ALIGN 32
1243 $L$add_proceedq:
1244 mov rax,QWORD[((0+64))+rsp]
1245 mov r14,QWORD[((8+64))+rsp]
1246 lea rsi,[((0+64))+rsp]
1247 mov r15,QWORD[((16+64))+rsp]
1248 mov r8,QWORD[((24+64))+rsp]
1249 lea rdi,[96+rsp]
1250 call __ecp_nistz256_sqr_montq
1251
1252 mov rax,QWORD[448+rsp]
1253 lea rbx,[448+rsp]
1254 mov r9,QWORD[((0+0))+rsp]
1255 mov r10,QWORD[((8+0))+rsp]
1256 lea rsi,[((0+0))+rsp]
1257 mov r11,QWORD[((16+0))+rsp]
1258 mov r12,QWORD[((24+0))+rsp]
1259 lea rdi,[352+rsp]
1260 call __ecp_nistz256_mul_montq
1261
1262 mov rax,QWORD[((0+0))+rsp]
1263 mov r14,QWORD[((8+0))+rsp]
1264 lea rsi,[((0+0))+rsp]
1265 mov r15,QWORD[((16+0))+rsp]
1266 mov r8,QWORD[((24+0))+rsp]
1267 lea rdi,[32+rsp]
1268 call __ecp_nistz256_sqr_montq
1269
1270 mov rax,QWORD[544+rsp]
1271 lea rbx,[544+rsp]
1272 mov r9,QWORD[((0+352))+rsp]
1273 mov r10,QWORD[((8+352))+rsp]
1274 lea rsi,[((0+352))+rsp]
1275 mov r11,QWORD[((16+352))+rsp]
1276 mov r12,QWORD[((24+352))+rsp]
1277 lea rdi,[352+rsp]
1278 call __ecp_nistz256_mul_montq
1279
1280 mov rax,QWORD[rsp]
1281 lea rbx,[rsp]
1282 mov r9,QWORD[((0+32))+rsp]
1283 mov r10,QWORD[((8+32))+rsp]
1284 lea rsi,[((0+32))+rsp]
1285 mov r11,QWORD[((16+32))+rsp]
1286 mov r12,QWORD[((24+32))+rsp]
1287 lea rdi,[128+rsp]
1288 call __ecp_nistz256_mul_montq
1289
1290 mov rax,QWORD[160+rsp]
1291 lea rbx,[160+rsp]
1292 mov r9,QWORD[((0+32))+rsp]
1293 mov r10,QWORD[((8+32))+rsp]
1294 lea rsi,[((0+32))+rsp]
1295 mov r11,QWORD[((16+32))+rsp]
1296 mov r12,QWORD[((24+32))+rsp]
1297 lea rdi,[192+rsp]
1298 call __ecp_nistz256_mul_montq
1299
1300
1301
1302
1303 xor r11,r11
1304 add r12,r12
1305 lea rsi,[96+rsp]
1306 adc r13,r13
1307 mov rax,r12
1308 adc r8,r8
1309 adc r9,r9
1310 mov rbp,r13
1311 adc r11,0
1312
1313 sub r12,-1
1314 mov rcx,r8
1315 sbb r13,r14
1316 sbb r8,0
1317 mov r10,r9
1318 sbb r9,r15
1319 sbb r11,0
1320
1321 cmovc r12,rax
1322 mov rax,QWORD[rsi]
1323 cmovc r13,rbp
1324 mov rbp,QWORD[8+rsi]
1325 cmovc r8,rcx
1326 mov rcx,QWORD[16+rsi]
1327 cmovc r9,r10
1328 mov r10,QWORD[24+rsi]
1329
1330 call __ecp_nistz256_subq
1331
1332 lea rbx,[128+rsp]
1333 lea rdi,[288+rsp]
1334 call __ecp_nistz256_sub_fromq
1335
1336 mov rax,QWORD[((192+0))+rsp]
1337 mov rbp,QWORD[((192+8))+rsp]
1338 mov rcx,QWORD[((192+16))+rsp]
1339 mov r10,QWORD[((192+24))+rsp]
1340 lea rdi,[320+rsp]
1341
1342 call __ecp_nistz256_subq
1343
1344 mov QWORD[rdi],r12
1345 mov QWORD[8+rdi],r13
1346 mov QWORD[16+rdi],r8
1347 mov QWORD[24+rdi],r9
1348 mov rax,QWORD[128+rsp]
1349 lea rbx,[128+rsp]
1350 mov r9,QWORD[((0+224))+rsp]
1351 mov r10,QWORD[((8+224))+rsp]
1352 lea rsi,[((0+224))+rsp]
1353 mov r11,QWORD[((16+224))+rsp]
1354 mov r12,QWORD[((24+224))+rsp]
1355 lea rdi,[256+rsp]
1356 call __ecp_nistz256_mul_montq
1357
1358 mov rax,QWORD[320+rsp]
1359 lea rbx,[320+rsp]
1360 mov r9,QWORD[((0+64))+rsp]
1361 mov r10,QWORD[((8+64))+rsp]
1362 lea rsi,[((0+64))+rsp]
1363 mov r11,QWORD[((16+64))+rsp]
1364 mov r12,QWORD[((24+64))+rsp]
1365 lea rdi,[320+rsp]
1366 call __ecp_nistz256_mul_montq
1367
1368 lea rbx,[256+rsp]
1369 lea rdi,[320+rsp]
1370 call __ecp_nistz256_sub_fromq
1371
1372 DB 102,72,15,126,199
1373
1374 movdqa xmm0,xmm5
1375 movdqa xmm1,xmm5
1376 pandn xmm0,XMMWORD[352+rsp]
1377 movdqa xmm2,xmm5
1378 pandn xmm1,XMMWORD[((352+16))+rsp]
1379 movdqa xmm3,xmm5
1380 pand xmm2,XMMWORD[544+rsp]
1381 pand xmm3,XMMWORD[((544+16))+rsp]
1382 por xmm2,xmm0
1383 por xmm3,xmm1
1384
1385 movdqa xmm0,xmm4
1386 movdqa xmm1,xmm4
1387 pandn xmm0,xmm2
1388 movdqa xmm2,xmm4
1389 pandn xmm1,xmm3
1390 movdqa xmm3,xmm4
1391 pand xmm2,XMMWORD[448+rsp]
1392 pand xmm3,XMMWORD[((448+16))+rsp]
1393 por xmm2,xmm0
1394 por xmm3,xmm1
1395 movdqu XMMWORD[64+rdi],xmm2
1396 movdqu XMMWORD[80+rdi],xmm3
1397
1398 movdqa xmm0,xmm5
1399 movdqa xmm1,xmm5
1400 pandn xmm0,XMMWORD[288+rsp]
1401 movdqa xmm2,xmm5
1402 pandn xmm1,XMMWORD[((288+16))+rsp]
1403 movdqa xmm3,xmm5
1404 pand xmm2,XMMWORD[480+rsp]
1405 pand xmm3,XMMWORD[((480+16))+rsp]
1406 por xmm2,xmm0
1407 por xmm3,xmm1
1408
1409 movdqa xmm0,xmm4
1410 movdqa xmm1,xmm4
1411 pandn xmm0,xmm2
1412 movdqa xmm2,xmm4
1413 pandn xmm1,xmm3
1414 movdqa xmm3,xmm4
1415 pand xmm2,XMMWORD[384+rsp]
1416 pand xmm3,XMMWORD[((384+16))+rsp]
1417 por xmm2,xmm0
1418 por xmm3,xmm1
1419 movdqu XMMWORD[rdi],xmm2
1420 movdqu XMMWORD[16+rdi],xmm3
1421
1422 movdqa xmm0,xmm5
1423 movdqa xmm1,xmm5
1424 pandn xmm0,XMMWORD[320+rsp]
1425 movdqa xmm2,xmm5
1426 pandn xmm1,XMMWORD[((320+16))+rsp]
1427 movdqa xmm3,xmm5
1428 pand xmm2,XMMWORD[512+rsp]
1429 pand xmm3,XMMWORD[((512+16))+rsp]
1430 por xmm2,xmm0
1431 por xmm3,xmm1
1432
1433 movdqa xmm0,xmm4
1434 movdqa xmm1,xmm4
1435 pandn xmm0,xmm2
1436 movdqa xmm2,xmm4
1437 pandn xmm1,xmm3
1438 movdqa xmm3,xmm4
1439 pand xmm2,XMMWORD[416+rsp]
1440 pand xmm3,XMMWORD[((416+16))+rsp]
1441 por xmm2,xmm0
1442 por xmm3,xmm1
1443 movdqu XMMWORD[32+rdi],xmm2
1444 movdqu XMMWORD[48+rdi],xmm3
1445
1446 $L$add_doneq:
1447 add rsp,32*18+8
1448 pop r15
1449 pop r14
1450 pop r13
1451 pop r12
1452 pop rbx
1453 pop rbp
1454 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1455 mov rsi,QWORD[16+rsp]
1456 DB 0F3h,0C3h ;repret
1457 $L$SEH_end_ecp_nistz256_point_add:
1458 global ecp_nistz256_point_add_affine
1459
1460 ALIGN 32
1461 ecp_nistz256_point_add_affine:
1462 mov QWORD[8+rsp],rdi ;WIN64 prologue
1463 mov QWORD[16+rsp],rsi
1464 mov rax,rsp
1465 $L$SEH_begin_ecp_nistz256_point_add_affine:
1466 mov rdi,rcx
1467 mov rsi,rdx
1468 mov rdx,r8
1469
1470
1471 push rbp
1472 push rbx
1473 push r12
1474 push r13
1475 push r14
1476 push r15
1477 sub rsp,32*15+8
1478
1479 movdqu xmm0,XMMWORD[rsi]
1480 mov rbx,rdx
1481 movdqu xmm1,XMMWORD[16+rsi]
1482 movdqu xmm2,XMMWORD[32+rsi]
1483 movdqu xmm3,XMMWORD[48+rsi]
1484 movdqu xmm4,XMMWORD[64+rsi]
1485 movdqu xmm5,XMMWORD[80+rsi]
1486 mov rax,QWORD[((64+0))+rsi]
1487 mov r14,QWORD[((64+8))+rsi]
1488 mov r15,QWORD[((64+16))+rsi]
1489 mov r8,QWORD[((64+24))+rsi]
1490 movdqa XMMWORD[320+rsp],xmm0
1491 movdqa XMMWORD[(320+16)+rsp],xmm1
1492 movdqa XMMWORD[352+rsp],xmm2
1493 movdqa XMMWORD[(352+16)+rsp],xmm3
1494 movdqa XMMWORD[384+rsp],xmm4
1495 movdqa XMMWORD[(384+16)+rsp],xmm5
1496 por xmm5,xmm4
1497
1498 movdqu xmm0,XMMWORD[rbx]
1499 pshufd xmm3,xmm5,0xb1
1500 movdqu xmm1,XMMWORD[16+rbx]
1501 movdqu xmm2,XMMWORD[32+rbx]
1502 por xmm5,xmm3
1503 movdqu xmm3,XMMWORD[48+rbx]
1504 movdqa XMMWORD[416+rsp],xmm0
1505 pshufd xmm4,xmm5,0x1e
1506 movdqa XMMWORD[(416+16)+rsp],xmm1
1507 por xmm1,xmm0
1508 DB 102,72,15,110,199
1509 movdqa XMMWORD[448+rsp],xmm2
1510 movdqa XMMWORD[(448+16)+rsp],xmm3
1511 por xmm3,xmm2
1512 por xmm5,xmm4
1513 pxor xmm4,xmm4
1514 por xmm3,xmm1
1515
1516 lea rsi,[((64-0))+rsi]
1517 lea rdi,[32+rsp]
1518 call __ecp_nistz256_sqr_montq
1519
1520 pcmpeqd xmm5,xmm4
1521 pshufd xmm4,xmm3,0xb1
1522 mov rax,QWORD[rbx]
1523
1524 mov r9,r12
1525 por xmm4,xmm3
1526 pshufd xmm5,xmm5,0
1527 pshufd xmm3,xmm4,0x1e
1528 mov r10,r13
1529 por xmm4,xmm3
1530 pxor xmm3,xmm3
1531 mov r11,r14
1532 pcmpeqd xmm4,xmm3
1533 pshufd xmm4,xmm4,0
1534
1535 lea rsi,[((32-0))+rsp]
1536 mov r12,r15
1537 lea rdi,[rsp]
1538 call __ecp_nistz256_mul_montq
1539
1540 lea rbx,[320+rsp]
1541 lea rdi,[64+rsp]
1542 call __ecp_nistz256_sub_fromq
1543
1544 mov rax,QWORD[384+rsp]
1545 lea rbx,[384+rsp]
1546 mov r9,QWORD[((0+32))+rsp]
1547 mov r10,QWORD[((8+32))+rsp]
1548 lea rsi,[((0+32))+rsp]
1549 mov r11,QWORD[((16+32))+rsp]
1550 mov r12,QWORD[((24+32))+rsp]
1551 lea rdi,[32+rsp]
1552 call __ecp_nistz256_mul_montq
1553
1554 mov rax,QWORD[384+rsp]
1555 lea rbx,[384+rsp]
1556 mov r9,QWORD[((0+64))+rsp]
1557 mov r10,QWORD[((8+64))+rsp]
1558 lea rsi,[((0+64))+rsp]
1559 mov r11,QWORD[((16+64))+rsp]
1560 mov r12,QWORD[((24+64))+rsp]
1561 lea rdi,[288+rsp]
1562 call __ecp_nistz256_mul_montq
1563
1564 mov rax,QWORD[448+rsp]
1565 lea rbx,[448+rsp]
1566 mov r9,QWORD[((0+32))+rsp]
1567 mov r10,QWORD[((8+32))+rsp]
1568 lea rsi,[((0+32))+rsp]
1569 mov r11,QWORD[((16+32))+rsp]
1570 mov r12,QWORD[((24+32))+rsp]
1571 lea rdi,[32+rsp]
1572 call __ecp_nistz256_mul_montq
1573
1574 lea rbx,[352+rsp]
1575 lea rdi,[96+rsp]
1576 call __ecp_nistz256_sub_fromq
1577
1578 mov rax,QWORD[((0+64))+rsp]
1579 mov r14,QWORD[((8+64))+rsp]
1580 lea rsi,[((0+64))+rsp]
1581 mov r15,QWORD[((16+64))+rsp]
1582 mov r8,QWORD[((24+64))+rsp]
1583 lea rdi,[128+rsp]
1584 call __ecp_nistz256_sqr_montq
1585
1586 mov rax,QWORD[((0+96))+rsp]
1587 mov r14,QWORD[((8+96))+rsp]
1588 lea rsi,[((0+96))+rsp]
1589 mov r15,QWORD[((16+96))+rsp]
1590 mov r8,QWORD[((24+96))+rsp]
1591 lea rdi,[192+rsp]
1592 call __ecp_nistz256_sqr_montq
1593
1594 mov rax,QWORD[128+rsp]
1595 lea rbx,[128+rsp]
1596 mov r9,QWORD[((0+64))+rsp]
1597 mov r10,QWORD[((8+64))+rsp]
1598 lea rsi,[((0+64))+rsp]
1599 mov r11,QWORD[((16+64))+rsp]
1600 mov r12,QWORD[((24+64))+rsp]
1601 lea rdi,[160+rsp]
1602 call __ecp_nistz256_mul_montq
1603
1604 mov rax,QWORD[320+rsp]
1605 lea rbx,[320+rsp]
1606 mov r9,QWORD[((0+128))+rsp]
1607 mov r10,QWORD[((8+128))+rsp]
1608 lea rsi,[((0+128))+rsp]
1609 mov r11,QWORD[((16+128))+rsp]
1610 mov r12,QWORD[((24+128))+rsp]
1611 lea rdi,[rsp]
1612 call __ecp_nistz256_mul_montq
1613
1614
1615
1616
1617 xor r11,r11
1618 add r12,r12
1619 lea rsi,[192+rsp]
1620 adc r13,r13
1621 mov rax,r12
1622 adc r8,r8
1623 adc r9,r9
1624 mov rbp,r13
1625 adc r11,0
1626
1627 sub r12,-1
1628 mov rcx,r8
1629 sbb r13,r14
1630 sbb r8,0
1631 mov r10,r9
1632 sbb r9,r15
1633 sbb r11,0
1634
1635 cmovc r12,rax
1636 mov rax,QWORD[rsi]
1637 cmovc r13,rbp
1638 mov rbp,QWORD[8+rsi]
1639 cmovc r8,rcx
1640 mov rcx,QWORD[16+rsi]
1641 cmovc r9,r10
1642 mov r10,QWORD[24+rsi]
1643
1644 call __ecp_nistz256_subq
1645
1646 lea rbx,[160+rsp]
1647 lea rdi,[224+rsp]
1648 call __ecp_nistz256_sub_fromq
1649
1650 mov rax,QWORD[((0+0))+rsp]
1651 mov rbp,QWORD[((0+8))+rsp]
1652 mov rcx,QWORD[((0+16))+rsp]
1653 mov r10,QWORD[((0+24))+rsp]
1654 lea rdi,[64+rsp]
1655
1656 call __ecp_nistz256_subq
1657
1658 mov QWORD[rdi],r12
1659 mov QWORD[8+rdi],r13
1660 mov QWORD[16+rdi],r8
1661 mov QWORD[24+rdi],r9
1662 mov rax,QWORD[352+rsp]
1663 lea rbx,[352+rsp]
1664 mov r9,QWORD[((0+160))+rsp]
1665 mov r10,QWORD[((8+160))+rsp]
1666 lea rsi,[((0+160))+rsp]
1667 mov r11,QWORD[((16+160))+rsp]
1668 mov r12,QWORD[((24+160))+rsp]
1669 lea rdi,[32+rsp]
1670 call __ecp_nistz256_mul_montq
1671
1672 mov rax,QWORD[96+rsp]
1673 lea rbx,[96+rsp]
1674 mov r9,QWORD[((0+64))+rsp]
1675 mov r10,QWORD[((8+64))+rsp]
1676 lea rsi,[((0+64))+rsp]
1677 mov r11,QWORD[((16+64))+rsp]
1678 mov r12,QWORD[((24+64))+rsp]
1679 lea rdi,[64+rsp]
1680 call __ecp_nistz256_mul_montq
1681
1682 lea rbx,[32+rsp]
1683 lea rdi,[256+rsp]
1684 call __ecp_nistz256_sub_fromq
1685
1686 DB 102,72,15,126,199
1687
1688 movdqa xmm0,xmm5
1689 movdqa xmm1,xmm5
1690 pandn xmm0,XMMWORD[288+rsp]
1691 movdqa xmm2,xmm5
1692 pandn xmm1,XMMWORD[((288+16))+rsp]
1693 movdqa xmm3,xmm5
1694 pand xmm2,XMMWORD[$L$ONE_mont]
1695 pand xmm3,XMMWORD[(($L$ONE_mont+16))]
1696 por xmm2,xmm0
1697 por xmm3,xmm1
1698
1699 movdqa xmm0,xmm4
1700 movdqa xmm1,xmm4
1701 pandn xmm0,xmm2
1702 movdqa xmm2,xmm4
1703 pandn xmm1,xmm3
1704 movdqa xmm3,xmm4
1705 pand xmm2,XMMWORD[384+rsp]
1706 pand xmm3,XMMWORD[((384+16))+rsp]
1707 por xmm2,xmm0
1708 por xmm3,xmm1
1709 movdqu XMMWORD[64+rdi],xmm2
1710 movdqu XMMWORD[80+rdi],xmm3
1711
1712 movdqa xmm0,xmm5
1713 movdqa xmm1,xmm5
1714 pandn xmm0,XMMWORD[224+rsp]
1715 movdqa xmm2,xmm5
1716 pandn xmm1,XMMWORD[((224+16))+rsp]
1717 movdqa xmm3,xmm5
1718 pand xmm2,XMMWORD[416+rsp]
1719 pand xmm3,XMMWORD[((416+16))+rsp]
1720 por xmm2,xmm0
1721 por xmm3,xmm1
1722
1723 movdqa xmm0,xmm4
1724 movdqa xmm1,xmm4
1725 pandn xmm0,xmm2
1726 movdqa xmm2,xmm4
1727 pandn xmm1,xmm3
1728 movdqa xmm3,xmm4
1729 pand xmm2,XMMWORD[320+rsp]
1730 pand xmm3,XMMWORD[((320+16))+rsp]
1731 por xmm2,xmm0
1732 por xmm3,xmm1
1733 movdqu XMMWORD[rdi],xmm2
1734 movdqu XMMWORD[16+rdi],xmm3
1735
1736 movdqa xmm0,xmm5
1737 movdqa xmm1,xmm5
1738 pandn xmm0,XMMWORD[256+rsp]
1739 movdqa xmm2,xmm5
1740 pandn xmm1,XMMWORD[((256+16))+rsp]
1741 movdqa xmm3,xmm5
1742 pand xmm2,XMMWORD[448+rsp]
1743 pand xmm3,XMMWORD[((448+16))+rsp]
1744 por xmm2,xmm0
1745 por xmm3,xmm1
1746
1747 movdqa xmm0,xmm4
1748 movdqa xmm1,xmm4
1749 pandn xmm0,xmm2
1750 movdqa xmm2,xmm4
1751 pandn xmm1,xmm3
1752 movdqa xmm3,xmm4
1753 pand xmm2,XMMWORD[352+rsp]
1754 pand xmm3,XMMWORD[((352+16))+rsp]
1755 por xmm2,xmm0
1756 por xmm3,xmm1
1757 movdqu XMMWORD[32+rdi],xmm2
1758 movdqu XMMWORD[48+rdi],xmm3
1759
1760 add rsp,32*15+8
1761 pop r15
1762 pop r14
1763 pop r13
1764 pop r12
1765 pop rbx
1766 pop rbp
1767 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1768 mov rsi,QWORD[16+rsp]
1769 DB 0F3h,0C3h ;repret
1770 $L$SEH_end_ecp_nistz256_point_add_affine:
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698