OLD | NEW |
| (Empty) |
1 default rel | |
2 %define XMMWORD | |
3 %define YMMWORD | |
4 %define ZMMWORD | |
5 section .text code align=64 | |
6 | |
7 EXTERN OPENSSL_ia32cap_P | |
8 | |
9 | |
10 ALIGN 64 | |
11 $L$poly: | |
12 DQ 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xfffff
fff00000001 | |
13 | |
14 $L$One: | |
15 DD 1,1,1,1,1,1,1,1 | |
16 $L$Two: | |
17 DD 2,2,2,2,2,2,2,2 | |
18 $L$Three: | |
19 DD 3,3,3,3,3,3,3,3 | |
20 $L$ONE_mont: | |
21 DQ 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000
000fffffffe | |
22 | |
23 | |
24 ALIGN 64 | |
25 ecp_nistz256_mul_by_2: | |
26 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
27 mov QWORD[16+rsp],rsi | |
28 mov rax,rsp | |
29 $L$SEH_begin_ecp_nistz256_mul_by_2: | |
30 mov rdi,rcx | |
31 mov rsi,rdx | |
32 | |
33 | |
34 push r12 | |
35 push r13 | |
36 | |
37 mov r8,QWORD[rsi] | |
38 mov r9,QWORD[8+rsi] | |
39 add r8,r8 | |
40 mov r10,QWORD[16+rsi] | |
41 adc r9,r9 | |
42 mov r11,QWORD[24+rsi] | |
43 lea rsi,[$L$poly] | |
44 mov rax,r8 | |
45 adc r10,r10 | |
46 adc r11,r11 | |
47 mov rdx,r9 | |
48 sbb r13,r13 | |
49 | |
50 sub r8,QWORD[rsi] | |
51 mov rcx,r10 | |
52 sbb r9,QWORD[8+rsi] | |
53 sbb r10,QWORD[16+rsi] | |
54 mov r12,r11 | |
55 sbb r11,QWORD[24+rsi] | |
56 test r13,r13 | |
57 | |
58 cmovz r8,rax | |
59 cmovz r9,rdx | |
60 mov QWORD[rdi],r8 | |
61 cmovz r10,rcx | |
62 mov QWORD[8+rdi],r9 | |
63 cmovz r11,r12 | |
64 mov QWORD[16+rdi],r10 | |
65 mov QWORD[24+rdi],r11 | |
66 | |
67 pop r13 | |
68 pop r12 | |
69 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
70 mov rsi,QWORD[16+rsp] | |
71 DB 0F3h,0C3h ;repret | |
72 $L$SEH_end_ecp_nistz256_mul_by_2: | |
73 | |
74 | |
75 | |
76 global ecp_nistz256_neg | |
77 | |
78 ALIGN 32 | |
79 ecp_nistz256_neg: | |
80 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
81 mov QWORD[16+rsp],rsi | |
82 mov rax,rsp | |
83 $L$SEH_begin_ecp_nistz256_neg: | |
84 mov rdi,rcx | |
85 mov rsi,rdx | |
86 | |
87 | |
88 push r12 | |
89 push r13 | |
90 | |
91 xor r8,r8 | |
92 xor r9,r9 | |
93 xor r10,r10 | |
94 xor r11,r11 | |
95 xor r13,r13 | |
96 | |
97 sub r8,QWORD[rsi] | |
98 sbb r9,QWORD[8+rsi] | |
99 sbb r10,QWORD[16+rsi] | |
100 mov rax,r8 | |
101 sbb r11,QWORD[24+rsi] | |
102 lea rsi,[$L$poly] | |
103 mov rdx,r9 | |
104 sbb r13,0 | |
105 | |
106 add r8,QWORD[rsi] | |
107 mov rcx,r10 | |
108 adc r9,QWORD[8+rsi] | |
109 adc r10,QWORD[16+rsi] | |
110 mov r12,r11 | |
111 adc r11,QWORD[24+rsi] | |
112 test r13,r13 | |
113 | |
114 cmovz r8,rax | |
115 cmovz r9,rdx | |
116 mov QWORD[rdi],r8 | |
117 cmovz r10,rcx | |
118 mov QWORD[8+rdi],r9 | |
119 cmovz r11,r12 | |
120 mov QWORD[16+rdi],r10 | |
121 mov QWORD[24+rdi],r11 | |
122 | |
123 pop r13 | |
124 pop r12 | |
125 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
126 mov rsi,QWORD[16+rsp] | |
127 DB 0F3h,0C3h ;repret | |
128 $L$SEH_end_ecp_nistz256_neg: | |
129 | |
130 | |
131 | |
132 | |
133 | |
134 | |
135 global ecp_nistz256_mul_mont | |
136 | |
137 ALIGN 32 | |
138 ecp_nistz256_mul_mont: | |
139 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
140 mov QWORD[16+rsp],rsi | |
141 mov rax,rsp | |
142 $L$SEH_begin_ecp_nistz256_mul_mont: | |
143 mov rdi,rcx | |
144 mov rsi,rdx | |
145 mov rdx,r8 | |
146 | |
147 | |
148 $L$mul_mont: | |
149 push rbp | |
150 push rbx | |
151 push r12 | |
152 push r13 | |
153 push r14 | |
154 push r15 | |
155 mov rbx,rdx | |
156 mov rax,QWORD[rdx] | |
157 mov r9,QWORD[rsi] | |
158 mov r10,QWORD[8+rsi] | |
159 mov r11,QWORD[16+rsi] | |
160 mov r12,QWORD[24+rsi] | |
161 | |
162 call __ecp_nistz256_mul_montq | |
163 $L$mul_mont_done: | |
164 pop r15 | |
165 pop r14 | |
166 pop r13 | |
167 pop r12 | |
168 pop rbx | |
169 pop rbp | |
170 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
171 mov rsi,QWORD[16+rsp] | |
172 DB 0F3h,0C3h ;repret | |
173 $L$SEH_end_ecp_nistz256_mul_mont: | |
174 | |
175 | |
176 ALIGN 32 | |
177 __ecp_nistz256_mul_montq: | |
178 | |
179 | |
180 mov rbp,rax | |
181 mul r9 | |
182 mov r14,QWORD[(($L$poly+8))] | |
183 mov r8,rax | |
184 mov rax,rbp | |
185 mov r9,rdx | |
186 | |
187 mul r10 | |
188 mov r15,QWORD[(($L$poly+24))] | |
189 add r9,rax | |
190 mov rax,rbp | |
191 adc rdx,0 | |
192 mov r10,rdx | |
193 | |
194 mul r11 | |
195 add r10,rax | |
196 mov rax,rbp | |
197 adc rdx,0 | |
198 mov r11,rdx | |
199 | |
200 mul r12 | |
201 add r11,rax | |
202 mov rax,r8 | |
203 adc rdx,0 | |
204 xor r13,r13 | |
205 mov r12,rdx | |
206 | |
207 | |
208 | |
209 | |
210 | |
211 | |
212 | |
213 | |
214 | |
215 | |
216 mov rbp,r8 | |
217 shl r8,32 | |
218 mul r15 | |
219 shr rbp,32 | |
220 add r9,r8 | |
221 adc r10,rbp | |
222 adc r11,rax | |
223 mov rax,QWORD[8+rbx] | |
224 adc r12,rdx | |
225 adc r13,0 | |
226 xor r8,r8 | |
227 | |
228 | |
229 | |
230 mov rbp,rax | |
231 mul QWORD[rsi] | |
232 add r9,rax | |
233 mov rax,rbp | |
234 adc rdx,0 | |
235 mov rcx,rdx | |
236 | |
237 mul QWORD[8+rsi] | |
238 add r10,rcx | |
239 adc rdx,0 | |
240 add r10,rax | |
241 mov rax,rbp | |
242 adc rdx,0 | |
243 mov rcx,rdx | |
244 | |
245 mul QWORD[16+rsi] | |
246 add r11,rcx | |
247 adc rdx,0 | |
248 add r11,rax | |
249 mov rax,rbp | |
250 adc rdx,0 | |
251 mov rcx,rdx | |
252 | |
253 mul QWORD[24+rsi] | |
254 add r12,rcx | |
255 adc rdx,0 | |
256 add r12,rax | |
257 mov rax,r9 | |
258 adc r13,rdx | |
259 adc r8,0 | |
260 | |
261 | |
262 | |
263 mov rbp,r9 | |
264 shl r9,32 | |
265 mul r15 | |
266 shr rbp,32 | |
267 add r10,r9 | |
268 adc r11,rbp | |
269 adc r12,rax | |
270 mov rax,QWORD[16+rbx] | |
271 adc r13,rdx | |
272 adc r8,0 | |
273 xor r9,r9 | |
274 | |
275 | |
276 | |
277 mov rbp,rax | |
278 mul QWORD[rsi] | |
279 add r10,rax | |
280 mov rax,rbp | |
281 adc rdx,0 | |
282 mov rcx,rdx | |
283 | |
284 mul QWORD[8+rsi] | |
285 add r11,rcx | |
286 adc rdx,0 | |
287 add r11,rax | |
288 mov rax,rbp | |
289 adc rdx,0 | |
290 mov rcx,rdx | |
291 | |
292 mul QWORD[16+rsi] | |
293 add r12,rcx | |
294 adc rdx,0 | |
295 add r12,rax | |
296 mov rax,rbp | |
297 adc rdx,0 | |
298 mov rcx,rdx | |
299 | |
300 mul QWORD[24+rsi] | |
301 add r13,rcx | |
302 adc rdx,0 | |
303 add r13,rax | |
304 mov rax,r10 | |
305 adc r8,rdx | |
306 adc r9,0 | |
307 | |
308 | |
309 | |
310 mov rbp,r10 | |
311 shl r10,32 | |
312 mul r15 | |
313 shr rbp,32 | |
314 add r11,r10 | |
315 adc r12,rbp | |
316 adc r13,rax | |
317 mov rax,QWORD[24+rbx] | |
318 adc r8,rdx | |
319 adc r9,0 | |
320 xor r10,r10 | |
321 | |
322 | |
323 | |
324 mov rbp,rax | |
325 mul QWORD[rsi] | |
326 add r11,rax | |
327 mov rax,rbp | |
328 adc rdx,0 | |
329 mov rcx,rdx | |
330 | |
331 mul QWORD[8+rsi] | |
332 add r12,rcx | |
333 adc rdx,0 | |
334 add r12,rax | |
335 mov rax,rbp | |
336 adc rdx,0 | |
337 mov rcx,rdx | |
338 | |
339 mul QWORD[16+rsi] | |
340 add r13,rcx | |
341 adc rdx,0 | |
342 add r13,rax | |
343 mov rax,rbp | |
344 adc rdx,0 | |
345 mov rcx,rdx | |
346 | |
347 mul QWORD[24+rsi] | |
348 add r8,rcx | |
349 adc rdx,0 | |
350 add r8,rax | |
351 mov rax,r11 | |
352 adc r9,rdx | |
353 adc r10,0 | |
354 | |
355 | |
356 | |
357 mov rbp,r11 | |
358 shl r11,32 | |
359 mul r15 | |
360 shr rbp,32 | |
361 add r12,r11 | |
362 adc r13,rbp | |
363 mov rcx,r12 | |
364 adc r8,rax | |
365 adc r9,rdx | |
366 mov rbp,r13 | |
367 adc r10,0 | |
368 | |
369 | |
370 | |
371 sub r12,-1 | |
372 mov rbx,r8 | |
373 sbb r13,r14 | |
374 sbb r8,0 | |
375 mov rdx,r9 | |
376 sbb r9,r15 | |
377 sbb r10,0 | |
378 | |
379 cmovc r12,rcx | |
380 cmovc r13,rbp | |
381 mov QWORD[rdi],r12 | |
382 cmovc r8,rbx | |
383 mov QWORD[8+rdi],r13 | |
384 cmovc r9,rdx | |
385 mov QWORD[16+rdi],r8 | |
386 mov QWORD[24+rdi],r9 | |
387 | |
388 DB 0F3h,0C3h ;repret | |
389 | |
390 | |
391 | |
392 | |
393 | |
394 | |
395 | |
396 | |
397 | |
398 global ecp_nistz256_sqr_mont | |
399 | |
400 ALIGN 32 | |
401 ecp_nistz256_sqr_mont: | |
402 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
403 mov QWORD[16+rsp],rsi | |
404 mov rax,rsp | |
405 $L$SEH_begin_ecp_nistz256_sqr_mont: | |
406 mov rdi,rcx | |
407 mov rsi,rdx | |
408 | |
409 | |
410 push rbp | |
411 push rbx | |
412 push r12 | |
413 push r13 | |
414 push r14 | |
415 push r15 | |
416 mov rax,QWORD[rsi] | |
417 mov r14,QWORD[8+rsi] | |
418 mov r15,QWORD[16+rsi] | |
419 mov r8,QWORD[24+rsi] | |
420 | |
421 call __ecp_nistz256_sqr_montq | |
422 $L$sqr_mont_done: | |
423 pop r15 | |
424 pop r14 | |
425 pop r13 | |
426 pop r12 | |
427 pop rbx | |
428 pop rbp | |
429 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
430 mov rsi,QWORD[16+rsp] | |
431 DB 0F3h,0C3h ;repret | |
432 $L$SEH_end_ecp_nistz256_sqr_mont: | |
433 | |
434 | |
435 ALIGN 32 | |
436 __ecp_nistz256_sqr_montq: | |
437 mov r13,rax | |
438 mul r14 | |
439 mov r9,rax | |
440 mov rax,r15 | |
441 mov r10,rdx | |
442 | |
443 mul r13 | |
444 add r10,rax | |
445 mov rax,r8 | |
446 adc rdx,0 | |
447 mov r11,rdx | |
448 | |
449 mul r13 | |
450 add r11,rax | |
451 mov rax,r15 | |
452 adc rdx,0 | |
453 mov r12,rdx | |
454 | |
455 | |
456 mul r14 | |
457 add r11,rax | |
458 mov rax,r8 | |
459 adc rdx,0 | |
460 mov rbp,rdx | |
461 | |
462 mul r14 | |
463 add r12,rax | |
464 mov rax,r8 | |
465 adc rdx,0 | |
466 add r12,rbp | |
467 mov r13,rdx | |
468 adc r13,0 | |
469 | |
470 | |
471 mul r15 | |
472 xor r15,r15 | |
473 add r13,rax | |
474 mov rax,QWORD[rsi] | |
475 mov r14,rdx | |
476 adc r14,0 | |
477 | |
478 add r9,r9 | |
479 adc r10,r10 | |
480 adc r11,r11 | |
481 adc r12,r12 | |
482 adc r13,r13 | |
483 adc r14,r14 | |
484 adc r15,0 | |
485 | |
486 mul rax | |
487 mov r8,rax | |
488 mov rax,QWORD[8+rsi] | |
489 mov rcx,rdx | |
490 | |
491 mul rax | |
492 add r9,rcx | |
493 adc r10,rax | |
494 mov rax,QWORD[16+rsi] | |
495 adc rdx,0 | |
496 mov rcx,rdx | |
497 | |
498 mul rax | |
499 add r11,rcx | |
500 adc r12,rax | |
501 mov rax,QWORD[24+rsi] | |
502 adc rdx,0 | |
503 mov rcx,rdx | |
504 | |
505 mul rax | |
506 add r13,rcx | |
507 adc r14,rax | |
508 mov rax,r8 | |
509 adc r15,rdx | |
510 | |
511 mov rsi,QWORD[(($L$poly+8))] | |
512 mov rbp,QWORD[(($L$poly+24))] | |
513 | |
514 | |
515 | |
516 | |
517 mov rcx,r8 | |
518 shl r8,32 | |
519 mul rbp | |
520 shr rcx,32 | |
521 add r9,r8 | |
522 adc r10,rcx | |
523 adc r11,rax | |
524 mov rax,r9 | |
525 adc rdx,0 | |
526 | |
527 | |
528 | |
529 mov rcx,r9 | |
530 shl r9,32 | |
531 mov r8,rdx | |
532 mul rbp | |
533 shr rcx,32 | |
534 add r10,r9 | |
535 adc r11,rcx | |
536 adc r8,rax | |
537 mov rax,r10 | |
538 adc rdx,0 | |
539 | |
540 | |
541 | |
542 mov rcx,r10 | |
543 shl r10,32 | |
544 mov r9,rdx | |
545 mul rbp | |
546 shr rcx,32 | |
547 add r11,r10 | |
548 adc r8,rcx | |
549 adc r9,rax | |
550 mov rax,r11 | |
551 adc rdx,0 | |
552 | |
553 | |
554 | |
555 mov rcx,r11 | |
556 shl r11,32 | |
557 mov r10,rdx | |
558 mul rbp | |
559 shr rcx,32 | |
560 add r8,r11 | |
561 adc r9,rcx | |
562 adc r10,rax | |
563 adc rdx,0 | |
564 xor r11,r11 | |
565 | |
566 | |
567 | |
568 add r12,r8 | |
569 adc r13,r9 | |
570 mov r8,r12 | |
571 adc r14,r10 | |
572 adc r15,rdx | |
573 mov r9,r13 | |
574 adc r11,0 | |
575 | |
576 sub r12,-1 | |
577 mov r10,r14 | |
578 sbb r13,rsi | |
579 sbb r14,0 | |
580 mov rcx,r15 | |
581 sbb r15,rbp | |
582 sbb r11,0 | |
583 | |
584 cmovc r12,r8 | |
585 cmovc r13,r9 | |
586 mov QWORD[rdi],r12 | |
587 cmovc r14,r10 | |
588 mov QWORD[8+rdi],r13 | |
589 cmovc r15,rcx | |
590 mov QWORD[16+rdi],r14 | |
591 mov QWORD[24+rdi],r15 | |
592 | |
593 DB 0F3h,0C3h ;repret | |
594 | |
595 | |
596 | |
597 | |
598 | |
599 | |
600 | |
601 global ecp_nistz256_from_mont | |
602 | |
603 ALIGN 32 | |
604 ecp_nistz256_from_mont: | |
605 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
606 mov QWORD[16+rsp],rsi | |
607 mov rax,rsp | |
608 $L$SEH_begin_ecp_nistz256_from_mont: | |
609 mov rdi,rcx | |
610 mov rsi,rdx | |
611 | |
612 | |
613 push r12 | |
614 push r13 | |
615 | |
616 mov rax,QWORD[rsi] | |
617 mov r13,QWORD[(($L$poly+24))] | |
618 mov r9,QWORD[8+rsi] | |
619 mov r10,QWORD[16+rsi] | |
620 mov r11,QWORD[24+rsi] | |
621 mov r8,rax | |
622 mov r12,QWORD[(($L$poly+8))] | |
623 | |
624 | |
625 | |
626 mov rcx,rax | |
627 shl r8,32 | |
628 mul r13 | |
629 shr rcx,32 | |
630 add r9,r8 | |
631 adc r10,rcx | |
632 adc r11,rax | |
633 mov rax,r9 | |
634 adc rdx,0 | |
635 | |
636 | |
637 | |
638 mov rcx,r9 | |
639 shl r9,32 | |
640 mov r8,rdx | |
641 mul r13 | |
642 shr rcx,32 | |
643 add r10,r9 | |
644 adc r11,rcx | |
645 adc r8,rax | |
646 mov rax,r10 | |
647 adc rdx,0 | |
648 | |
649 | |
650 | |
651 mov rcx,r10 | |
652 shl r10,32 | |
653 mov r9,rdx | |
654 mul r13 | |
655 shr rcx,32 | |
656 add r11,r10 | |
657 adc r8,rcx | |
658 adc r9,rax | |
659 mov rax,r11 | |
660 adc rdx,0 | |
661 | |
662 | |
663 | |
664 mov rcx,r11 | |
665 shl r11,32 | |
666 mov r10,rdx | |
667 mul r13 | |
668 shr rcx,32 | |
669 add r8,r11 | |
670 adc r9,rcx | |
671 mov rcx,r8 | |
672 adc r10,rax | |
673 mov rsi,r9 | |
674 adc rdx,0 | |
675 | |
676 sub r8,-1 | |
677 mov rax,r10 | |
678 sbb r9,r12 | |
679 sbb r10,0 | |
680 mov r11,rdx | |
681 sbb rdx,r13 | |
682 sbb r13,r13 | |
683 | |
684 cmovnz r8,rcx | |
685 cmovnz r9,rsi | |
686 mov QWORD[rdi],r8 | |
687 cmovnz r10,rax | |
688 mov QWORD[8+rdi],r9 | |
689 cmovz r11,rdx | |
690 mov QWORD[16+rdi],r10 | |
691 mov QWORD[24+rdi],r11 | |
692 | |
693 pop r13 | |
694 pop r12 | |
695 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
696 mov rsi,QWORD[16+rsp] | |
697 DB 0F3h,0C3h ;repret | |
698 $L$SEH_end_ecp_nistz256_from_mont: | |
699 | |
700 | |
701 global ecp_nistz256_select_w5 | |
702 | |
703 ALIGN 32 | |
704 ecp_nistz256_select_w5: | |
705 lea rax,[((-136))+rsp] | |
706 $L$SEH_begin_ecp_nistz256_select_w5: | |
707 DB 0x48,0x8d,0x60,0xe0 | |
708 DB 0x0f,0x29,0x70,0xe0 | |
709 DB 0x0f,0x29,0x78,0xf0 | |
710 DB 0x44,0x0f,0x29,0x00 | |
711 DB 0x44,0x0f,0x29,0x48,0x10 | |
712 DB 0x44,0x0f,0x29,0x50,0x20 | |
713 DB 0x44,0x0f,0x29,0x58,0x30 | |
714 DB 0x44,0x0f,0x29,0x60,0x40 | |
715 DB 0x44,0x0f,0x29,0x68,0x50 | |
716 DB 0x44,0x0f,0x29,0x70,0x60 | |
717 DB 0x44,0x0f,0x29,0x78,0x70 | |
718 movdqa xmm0,XMMWORD[$L$One] | |
719 movd xmm1,r8d | |
720 | |
721 pxor xmm2,xmm2 | |
722 pxor xmm3,xmm3 | |
723 pxor xmm4,xmm4 | |
724 pxor xmm5,xmm5 | |
725 pxor xmm6,xmm6 | |
726 pxor xmm7,xmm7 | |
727 | |
728 movdqa xmm8,xmm0 | |
729 pshufd xmm1,xmm1,0 | |
730 | |
731 mov rax,16 | |
732 $L$select_loop_sse_w5: | |
733 | |
734 movdqa xmm15,xmm8 | |
735 paddd xmm8,xmm0 | |
736 pcmpeqd xmm15,xmm1 | |
737 | |
738 movdqa xmm9,XMMWORD[rdx] | |
739 movdqa xmm10,XMMWORD[16+rdx] | |
740 movdqa xmm11,XMMWORD[32+rdx] | |
741 movdqa xmm12,XMMWORD[48+rdx] | |
742 movdqa xmm13,XMMWORD[64+rdx] | |
743 movdqa xmm14,XMMWORD[80+rdx] | |
744 lea rdx,[96+rdx] | |
745 | |
746 pand xmm9,xmm15 | |
747 pand xmm10,xmm15 | |
748 por xmm2,xmm9 | |
749 pand xmm11,xmm15 | |
750 por xmm3,xmm10 | |
751 pand xmm12,xmm15 | |
752 por xmm4,xmm11 | |
753 pand xmm13,xmm15 | |
754 por xmm5,xmm12 | |
755 pand xmm14,xmm15 | |
756 por xmm6,xmm13 | |
757 por xmm7,xmm14 | |
758 | |
759 dec rax | |
760 jnz NEAR $L$select_loop_sse_w5 | |
761 | |
762 movdqu XMMWORD[rcx],xmm2 | |
763 movdqu XMMWORD[16+rcx],xmm3 | |
764 movdqu XMMWORD[32+rcx],xmm4 | |
765 movdqu XMMWORD[48+rcx],xmm5 | |
766 movdqu XMMWORD[64+rcx],xmm6 | |
767 movdqu XMMWORD[80+rcx],xmm7 | |
768 movaps xmm6,XMMWORD[rsp] | |
769 movaps xmm7,XMMWORD[16+rsp] | |
770 movaps xmm8,XMMWORD[32+rsp] | |
771 movaps xmm9,XMMWORD[48+rsp] | |
772 movaps xmm10,XMMWORD[64+rsp] | |
773 movaps xmm11,XMMWORD[80+rsp] | |
774 movaps xmm12,XMMWORD[96+rsp] | |
775 movaps xmm13,XMMWORD[112+rsp] | |
776 movaps xmm14,XMMWORD[128+rsp] | |
777 movaps xmm15,XMMWORD[144+rsp] | |
778 lea rsp,[168+rsp] | |
779 $L$SEH_end_ecp_nistz256_select_w5: | |
780 DB 0F3h,0C3h ;repret | |
781 | |
782 | |
783 | |
784 | |
785 global ecp_nistz256_select_w7 | |
786 | |
787 ALIGN 32 | |
788 ecp_nistz256_select_w7: | |
789 lea rax,[((-136))+rsp] | |
790 $L$SEH_begin_ecp_nistz256_select_w7: | |
791 DB 0x48,0x8d,0x60,0xe0 | |
792 DB 0x0f,0x29,0x70,0xe0 | |
793 DB 0x0f,0x29,0x78,0xf0 | |
794 DB 0x44,0x0f,0x29,0x00 | |
795 DB 0x44,0x0f,0x29,0x48,0x10 | |
796 DB 0x44,0x0f,0x29,0x50,0x20 | |
797 DB 0x44,0x0f,0x29,0x58,0x30 | |
798 DB 0x44,0x0f,0x29,0x60,0x40 | |
799 DB 0x44,0x0f,0x29,0x68,0x50 | |
800 DB 0x44,0x0f,0x29,0x70,0x60 | |
801 DB 0x44,0x0f,0x29,0x78,0x70 | |
802 movdqa xmm8,XMMWORD[$L$One] | |
803 movd xmm1,r8d | |
804 | |
805 pxor xmm2,xmm2 | |
806 pxor xmm3,xmm3 | |
807 pxor xmm4,xmm4 | |
808 pxor xmm5,xmm5 | |
809 | |
810 movdqa xmm0,xmm8 | |
811 pshufd xmm1,xmm1,0 | |
812 mov rax,64 | |
813 | |
814 $L$select_loop_sse_w7: | |
815 movdqa xmm15,xmm8 | |
816 paddd xmm8,xmm0 | |
817 movdqa xmm9,XMMWORD[rdx] | |
818 movdqa xmm10,XMMWORD[16+rdx] | |
819 pcmpeqd xmm15,xmm1 | |
820 movdqa xmm11,XMMWORD[32+rdx] | |
821 movdqa xmm12,XMMWORD[48+rdx] | |
822 lea rdx,[64+rdx] | |
823 | |
824 pand xmm9,xmm15 | |
825 pand xmm10,xmm15 | |
826 por xmm2,xmm9 | |
827 pand xmm11,xmm15 | |
828 por xmm3,xmm10 | |
829 pand xmm12,xmm15 | |
830 por xmm4,xmm11 | |
831 prefetcht0 [255+rdx] | |
832 por xmm5,xmm12 | |
833 | |
834 dec rax | |
835 jnz NEAR $L$select_loop_sse_w7 | |
836 | |
837 movdqu XMMWORD[rcx],xmm2 | |
838 movdqu XMMWORD[16+rcx],xmm3 | |
839 movdqu XMMWORD[32+rcx],xmm4 | |
840 movdqu XMMWORD[48+rcx],xmm5 | |
841 movaps xmm6,XMMWORD[rsp] | |
842 movaps xmm7,XMMWORD[16+rsp] | |
843 movaps xmm8,XMMWORD[32+rsp] | |
844 movaps xmm9,XMMWORD[48+rsp] | |
845 movaps xmm10,XMMWORD[64+rsp] | |
846 movaps xmm11,XMMWORD[80+rsp] | |
847 movaps xmm12,XMMWORD[96+rsp] | |
848 movaps xmm13,XMMWORD[112+rsp] | |
849 movaps xmm14,XMMWORD[128+rsp] | |
850 movaps xmm15,XMMWORD[144+rsp] | |
851 lea rsp,[168+rsp] | |
852 $L$SEH_end_ecp_nistz256_select_w7: | |
853 DB 0F3h,0C3h ;repret | |
854 | |
855 global ecp_nistz256_avx2_select_w7 | |
856 | |
857 ALIGN 32 | |
858 ecp_nistz256_avx2_select_w7: | |
859 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
860 mov QWORD[16+rsp],rsi | |
861 mov rax,rsp | |
862 $L$SEH_begin_ecp_nistz256_avx2_select_w7: | |
863 mov rdi,rcx | |
864 mov rsi,rdx | |
865 mov rdx,r8 | |
866 | |
867 | |
868 DB 0x0f,0x0b | |
869 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
870 mov rsi,QWORD[16+rsp] | |
871 DB 0F3h,0C3h ;repret | |
872 $L$SEH_end_ecp_nistz256_avx2_select_w7: | |
873 | |
874 ALIGN 32 | |
875 __ecp_nistz256_add_toq: | |
876 add r12,QWORD[rbx] | |
877 adc r13,QWORD[8+rbx] | |
878 mov rax,r12 | |
879 adc r8,QWORD[16+rbx] | |
880 adc r9,QWORD[24+rbx] | |
881 mov rbp,r13 | |
882 sbb r11,r11 | |
883 | |
884 sub r12,-1 | |
885 mov rcx,r8 | |
886 sbb r13,r14 | |
887 sbb r8,0 | |
888 mov r10,r9 | |
889 sbb r9,r15 | |
890 test r11,r11 | |
891 | |
892 cmovz r12,rax | |
893 cmovz r13,rbp | |
894 mov QWORD[rdi],r12 | |
895 cmovz r8,rcx | |
896 mov QWORD[8+rdi],r13 | |
897 cmovz r9,r10 | |
898 mov QWORD[16+rdi],r8 | |
899 mov QWORD[24+rdi],r9 | |
900 | |
901 DB 0F3h,0C3h ;repret | |
902 | |
903 | |
904 | |
905 ALIGN 32 | |
906 __ecp_nistz256_sub_fromq: | |
907 sub r12,QWORD[rbx] | |
908 sbb r13,QWORD[8+rbx] | |
909 mov rax,r12 | |
910 sbb r8,QWORD[16+rbx] | |
911 sbb r9,QWORD[24+rbx] | |
912 mov rbp,r13 | |
913 sbb r11,r11 | |
914 | |
915 add r12,-1 | |
916 mov rcx,r8 | |
917 adc r13,r14 | |
918 adc r8,0 | |
919 mov r10,r9 | |
920 adc r9,r15 | |
921 test r11,r11 | |
922 | |
923 cmovz r12,rax | |
924 cmovz r13,rbp | |
925 mov QWORD[rdi],r12 | |
926 cmovz r8,rcx | |
927 mov QWORD[8+rdi],r13 | |
928 cmovz r9,r10 | |
929 mov QWORD[16+rdi],r8 | |
930 mov QWORD[24+rdi],r9 | |
931 | |
932 DB 0F3h,0C3h ;repret | |
933 | |
934 | |
935 | |
936 ALIGN 32 | |
937 __ecp_nistz256_subq: | |
938 sub rax,r12 | |
939 sbb rbp,r13 | |
940 mov r12,rax | |
941 sbb rcx,r8 | |
942 sbb r10,r9 | |
943 mov r13,rbp | |
944 sbb r11,r11 | |
945 | |
946 add rax,-1 | |
947 mov r8,rcx | |
948 adc rbp,r14 | |
949 adc rcx,0 | |
950 mov r9,r10 | |
951 adc r10,r15 | |
952 test r11,r11 | |
953 | |
954 cmovnz r12,rax | |
955 cmovnz r13,rbp | |
956 cmovnz r8,rcx | |
957 cmovnz r9,r10 | |
958 | |
959 DB 0F3h,0C3h ;repret | |
960 | |
961 | |
962 | |
963 ALIGN 32 | |
964 __ecp_nistz256_mul_by_2q: | |
965 add r12,r12 | |
966 adc r13,r13 | |
967 mov rax,r12 | |
968 adc r8,r8 | |
969 adc r9,r9 | |
970 mov rbp,r13 | |
971 sbb r11,r11 | |
972 | |
973 sub r12,-1 | |
974 mov rcx,r8 | |
975 sbb r13,r14 | |
976 sbb r8,0 | |
977 mov r10,r9 | |
978 sbb r9,r15 | |
979 test r11,r11 | |
980 | |
981 cmovz r12,rax | |
982 cmovz r13,rbp | |
983 mov QWORD[rdi],r12 | |
984 cmovz r8,rcx | |
985 mov QWORD[8+rdi],r13 | |
986 cmovz r9,r10 | |
987 mov QWORD[16+rdi],r8 | |
988 mov QWORD[24+rdi],r9 | |
989 | |
990 DB 0F3h,0C3h ;repret | |
991 | |
992 global ecp_nistz256_point_double | |
993 | |
994 ALIGN 32 | |
995 ecp_nistz256_point_double: | |
996 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
997 mov QWORD[16+rsp],rsi | |
998 mov rax,rsp | |
999 $L$SEH_begin_ecp_nistz256_point_double: | |
1000 mov rdi,rcx | |
1001 mov rsi,rdx | |
1002 | |
1003 | |
1004 push rbp | |
1005 push rbx | |
1006 push r12 | |
1007 push r13 | |
1008 push r14 | |
1009 push r15 | |
1010 sub rsp,32*5+8 | |
1011 | |
1012 $L$point_double_shortcutq: | |
1013 movdqu xmm0,XMMWORD[rsi] | |
1014 mov rbx,rsi | |
1015 movdqu xmm1,XMMWORD[16+rsi] | |
1016 mov r12,QWORD[((32+0))+rsi] | |
1017 mov r13,QWORD[((32+8))+rsi] | |
1018 mov r8,QWORD[((32+16))+rsi] | |
1019 mov r9,QWORD[((32+24))+rsi] | |
1020 mov r14,QWORD[(($L$poly+8))] | |
1021 mov r15,QWORD[(($L$poly+24))] | |
1022 movdqa XMMWORD[96+rsp],xmm0 | |
1023 movdqa XMMWORD[(96+16)+rsp],xmm1 | |
1024 lea r10,[32+rdi] | |
1025 lea r11,[64+rdi] | |
1026 DB 102,72,15,110,199 | |
1027 DB 102,73,15,110,202 | |
1028 DB 102,73,15,110,211 | |
1029 | |
1030 lea rdi,[rsp] | |
1031 call __ecp_nistz256_mul_by_2q | |
1032 | |
1033 mov rax,QWORD[((64+0))+rsi] | |
1034 mov r14,QWORD[((64+8))+rsi] | |
1035 mov r15,QWORD[((64+16))+rsi] | |
1036 mov r8,QWORD[((64+24))+rsi] | |
1037 lea rsi,[((64-0))+rsi] | |
1038 lea rdi,[64+rsp] | |
1039 call __ecp_nistz256_sqr_montq | |
1040 | |
1041 mov rax,QWORD[((0+0))+rsp] | |
1042 mov r14,QWORD[((8+0))+rsp] | |
1043 lea rsi,[((0+0))+rsp] | |
1044 mov r15,QWORD[((16+0))+rsp] | |
1045 mov r8,QWORD[((24+0))+rsp] | |
1046 lea rdi,[rsp] | |
1047 call __ecp_nistz256_sqr_montq | |
1048 | |
1049 mov rax,QWORD[32+rbx] | |
1050 mov r9,QWORD[((64+0))+rbx] | |
1051 mov r10,QWORD[((64+8))+rbx] | |
1052 mov r11,QWORD[((64+16))+rbx] | |
1053 mov r12,QWORD[((64+24))+rbx] | |
1054 lea rsi,[((64-0))+rbx] | |
1055 lea rbx,[32+rbx] | |
1056 DB 102,72,15,126,215 | |
1057 call __ecp_nistz256_mul_montq | |
1058 call __ecp_nistz256_mul_by_2q | |
1059 | |
1060 mov r12,QWORD[((96+0))+rsp] | |
1061 mov r13,QWORD[((96+8))+rsp] | |
1062 lea rbx,[64+rsp] | |
1063 mov r8,QWORD[((96+16))+rsp] | |
1064 mov r9,QWORD[((96+24))+rsp] | |
1065 lea rdi,[32+rsp] | |
1066 call __ecp_nistz256_add_toq | |
1067 | |
1068 mov r12,QWORD[((96+0))+rsp] | |
1069 mov r13,QWORD[((96+8))+rsp] | |
1070 lea rbx,[64+rsp] | |
1071 mov r8,QWORD[((96+16))+rsp] | |
1072 mov r9,QWORD[((96+24))+rsp] | |
1073 lea rdi,[64+rsp] | |
1074 call __ecp_nistz256_sub_fromq | |
1075 | |
1076 mov rax,QWORD[((0+0))+rsp] | |
1077 mov r14,QWORD[((8+0))+rsp] | |
1078 lea rsi,[((0+0))+rsp] | |
1079 mov r15,QWORD[((16+0))+rsp] | |
1080 mov r8,QWORD[((24+0))+rsp] | |
1081 DB 102,72,15,126,207 | |
1082 call __ecp_nistz256_sqr_montq | |
1083 xor r9,r9 | |
1084 mov rax,r12 | |
1085 add r12,-1 | |
1086 mov r10,r13 | |
1087 adc r13,rsi | |
1088 mov rcx,r14 | |
1089 adc r14,0 | |
1090 mov r8,r15 | |
1091 adc r15,rbp | |
1092 adc r9,0 | |
1093 xor rsi,rsi | |
1094 test rax,1 | |
1095 | |
1096 cmovz r12,rax | |
1097 cmovz r13,r10 | |
1098 cmovz r14,rcx | |
1099 cmovz r15,r8 | |
1100 cmovz r9,rsi | |
1101 | |
1102 mov rax,r13 | |
1103 shr r12,1 | |
1104 shl rax,63 | |
1105 mov r10,r14 | |
1106 shr r13,1 | |
1107 or r12,rax | |
1108 shl r10,63 | |
1109 mov rcx,r15 | |
1110 shr r14,1 | |
1111 or r13,r10 | |
1112 shl rcx,63 | |
1113 mov QWORD[rdi],r12 | |
1114 shr r15,1 | |
1115 mov QWORD[8+rdi],r13 | |
1116 shl r9,63 | |
1117 or r14,rcx | |
1118 or r15,r9 | |
1119 mov QWORD[16+rdi],r14 | |
1120 mov QWORD[24+rdi],r15 | |
1121 mov rax,QWORD[64+rsp] | |
1122 lea rbx,[64+rsp] | |
1123 mov r9,QWORD[((0+32))+rsp] | |
1124 mov r10,QWORD[((8+32))+rsp] | |
1125 lea rsi,[((0+32))+rsp] | |
1126 mov r11,QWORD[((16+32))+rsp] | |
1127 mov r12,QWORD[((24+32))+rsp] | |
1128 lea rdi,[32+rsp] | |
1129 call __ecp_nistz256_mul_montq | |
1130 | |
1131 lea rdi,[128+rsp] | |
1132 call __ecp_nistz256_mul_by_2q | |
1133 | |
1134 lea rbx,[32+rsp] | |
1135 lea rdi,[32+rsp] | |
1136 call __ecp_nistz256_add_toq | |
1137 | |
1138 mov rax,QWORD[96+rsp] | |
1139 lea rbx,[96+rsp] | |
1140 mov r9,QWORD[((0+0))+rsp] | |
1141 mov r10,QWORD[((8+0))+rsp] | |
1142 lea rsi,[((0+0))+rsp] | |
1143 mov r11,QWORD[((16+0))+rsp] | |
1144 mov r12,QWORD[((24+0))+rsp] | |
1145 lea rdi,[rsp] | |
1146 call __ecp_nistz256_mul_montq | |
1147 | |
1148 lea rdi,[128+rsp] | |
1149 call __ecp_nistz256_mul_by_2q | |
1150 | |
1151 mov rax,QWORD[((0+32))+rsp] | |
1152 mov r14,QWORD[((8+32))+rsp] | |
1153 lea rsi,[((0+32))+rsp] | |
1154 mov r15,QWORD[((16+32))+rsp] | |
1155 mov r8,QWORD[((24+32))+rsp] | |
1156 DB 102,72,15,126,199 | |
1157 call __ecp_nistz256_sqr_montq | |
1158 | |
1159 lea rbx,[128+rsp] | |
1160 mov r8,r14 | |
1161 mov r9,r15 | |
1162 mov r14,rsi | |
1163 mov r15,rbp | |
1164 call __ecp_nistz256_sub_fromq | |
1165 | |
1166 mov rax,QWORD[((0+0))+rsp] | |
1167 mov rbp,QWORD[((0+8))+rsp] | |
1168 mov rcx,QWORD[((0+16))+rsp] | |
1169 mov r10,QWORD[((0+24))+rsp] | |
1170 lea rdi,[rsp] | |
1171 call __ecp_nistz256_subq | |
1172 | |
1173 mov rax,QWORD[32+rsp] | |
1174 lea rbx,[32+rsp] | |
1175 mov r14,r12 | |
1176 xor ecx,ecx | |
1177 mov QWORD[((0+0))+rsp],r12 | |
1178 mov r10,r13 | |
1179 mov QWORD[((0+8))+rsp],r13 | |
1180 cmovz r11,r8 | |
1181 mov QWORD[((0+16))+rsp],r8 | |
1182 lea rsi,[((0-0))+rsp] | |
1183 cmovz r12,r9 | |
1184 mov QWORD[((0+24))+rsp],r9 | |
1185 mov r9,r14 | |
1186 lea rdi,[rsp] | |
1187 call __ecp_nistz256_mul_montq | |
1188 | |
1189 DB 102,72,15,126,203 | |
1190 DB 102,72,15,126,207 | |
1191 call __ecp_nistz256_sub_fromq | |
1192 | |
1193 add rsp,32*5+8 | |
1194 pop r15 | |
1195 pop r14 | |
1196 pop r13 | |
1197 pop r12 | |
1198 pop rbx | |
1199 pop rbp | |
1200 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
1201 mov rsi,QWORD[16+rsp] | |
1202 DB 0F3h,0C3h ;repret | |
1203 $L$SEH_end_ecp_nistz256_point_double: | |
1204 global ecp_nistz256_point_add | |
1205 | |
1206 ALIGN 32 | |
1207 ecp_nistz256_point_add: | |
1208 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
1209 mov QWORD[16+rsp],rsi | |
1210 mov rax,rsp | |
1211 $L$SEH_begin_ecp_nistz256_point_add: | |
1212 mov rdi,rcx | |
1213 mov rsi,rdx | |
1214 mov rdx,r8 | |
1215 | |
1216 | |
1217 push rbp | |
1218 push rbx | |
1219 push r12 | |
1220 push r13 | |
1221 push r14 | |
1222 push r15 | |
1223 sub rsp,32*18+8 | |
1224 | |
1225 movdqu xmm0,XMMWORD[rsi] | |
1226 movdqu xmm1,XMMWORD[16+rsi] | |
1227 movdqu xmm2,XMMWORD[32+rsi] | |
1228 movdqu xmm3,XMMWORD[48+rsi] | |
1229 movdqu xmm4,XMMWORD[64+rsi] | |
1230 movdqu xmm5,XMMWORD[80+rsi] | |
1231 mov rbx,rsi | |
1232 mov rsi,rdx | |
1233 movdqa XMMWORD[384+rsp],xmm0 | |
1234 movdqa XMMWORD[(384+16)+rsp],xmm1 | |
1235 por xmm1,xmm0 | |
1236 movdqa XMMWORD[416+rsp],xmm2 | |
1237 movdqa XMMWORD[(416+16)+rsp],xmm3 | |
1238 por xmm3,xmm2 | |
1239 movdqa XMMWORD[448+rsp],xmm4 | |
1240 movdqa XMMWORD[(448+16)+rsp],xmm5 | |
1241 por xmm3,xmm1 | |
1242 | |
1243 movdqu xmm0,XMMWORD[rsi] | |
1244 pshufd xmm5,xmm3,0xb1 | |
1245 movdqu xmm1,XMMWORD[16+rsi] | |
1246 movdqu xmm2,XMMWORD[32+rsi] | |
1247 por xmm5,xmm3 | |
1248 movdqu xmm3,XMMWORD[48+rsi] | |
1249 mov rax,QWORD[((64+0))+rsi] | |
1250 mov r14,QWORD[((64+8))+rsi] | |
1251 mov r15,QWORD[((64+16))+rsi] | |
1252 mov r8,QWORD[((64+24))+rsi] | |
1253 movdqa XMMWORD[480+rsp],xmm0 | |
1254 pshufd xmm4,xmm5,0x1e | |
1255 movdqa XMMWORD[(480+16)+rsp],xmm1 | |
1256 por xmm1,xmm0 | |
1257 DB 102,72,15,110,199 | |
1258 movdqa XMMWORD[512+rsp],xmm2 | |
1259 movdqa XMMWORD[(512+16)+rsp],xmm3 | |
1260 por xmm3,xmm2 | |
1261 por xmm5,xmm4 | |
1262 pxor xmm4,xmm4 | |
1263 por xmm3,xmm1 | |
1264 | |
1265 lea rsi,[((64-0))+rsi] | |
1266 mov QWORD[((544+0))+rsp],rax | |
1267 mov QWORD[((544+8))+rsp],r14 | |
1268 mov QWORD[((544+16))+rsp],r15 | |
1269 mov QWORD[((544+24))+rsp],r8 | |
1270 lea rdi,[96+rsp] | |
1271 call __ecp_nistz256_sqr_montq | |
1272 | |
1273 pcmpeqd xmm5,xmm4 | |
1274 pshufd xmm4,xmm3,0xb1 | |
1275 por xmm4,xmm3 | |
1276 pshufd xmm5,xmm5,0 | |
1277 pshufd xmm3,xmm4,0x1e | |
1278 por xmm4,xmm3 | |
1279 pxor xmm3,xmm3 | |
1280 pcmpeqd xmm4,xmm3 | |
1281 pshufd xmm4,xmm4,0 | |
1282 mov rax,QWORD[((64+0))+rbx] | |
1283 mov r14,QWORD[((64+8))+rbx] | |
1284 mov r15,QWORD[((64+16))+rbx] | |
1285 mov r8,QWORD[((64+24))+rbx] | |
1286 DB 102,72,15,110,203 | |
1287 | |
1288 lea rsi,[((64-0))+rbx] | |
1289 lea rdi,[32+rsp] | |
1290 call __ecp_nistz256_sqr_montq | |
1291 | |
1292 mov rax,QWORD[544+rsp] | |
1293 lea rbx,[544+rsp] | |
1294 mov r9,QWORD[((0+96))+rsp] | |
1295 mov r10,QWORD[((8+96))+rsp] | |
1296 lea rsi,[((0+96))+rsp] | |
1297 mov r11,QWORD[((16+96))+rsp] | |
1298 mov r12,QWORD[((24+96))+rsp] | |
1299 lea rdi,[224+rsp] | |
1300 call __ecp_nistz256_mul_montq | |
1301 | |
1302 mov rax,QWORD[448+rsp] | |
1303 lea rbx,[448+rsp] | |
1304 mov r9,QWORD[((0+32))+rsp] | |
1305 mov r10,QWORD[((8+32))+rsp] | |
1306 lea rsi,[((0+32))+rsp] | |
1307 mov r11,QWORD[((16+32))+rsp] | |
1308 mov r12,QWORD[((24+32))+rsp] | |
1309 lea rdi,[256+rsp] | |
1310 call __ecp_nistz256_mul_montq | |
1311 | |
1312 mov rax,QWORD[416+rsp] | |
1313 lea rbx,[416+rsp] | |
1314 mov r9,QWORD[((0+224))+rsp] | |
1315 mov r10,QWORD[((8+224))+rsp] | |
1316 lea rsi,[((0+224))+rsp] | |
1317 mov r11,QWORD[((16+224))+rsp] | |
1318 mov r12,QWORD[((24+224))+rsp] | |
1319 lea rdi,[224+rsp] | |
1320 call __ecp_nistz256_mul_montq | |
1321 | |
1322 mov rax,QWORD[512+rsp] | |
1323 lea rbx,[512+rsp] | |
1324 mov r9,QWORD[((0+256))+rsp] | |
1325 mov r10,QWORD[((8+256))+rsp] | |
1326 lea rsi,[((0+256))+rsp] | |
1327 mov r11,QWORD[((16+256))+rsp] | |
1328 mov r12,QWORD[((24+256))+rsp] | |
1329 lea rdi,[256+rsp] | |
1330 call __ecp_nistz256_mul_montq | |
1331 | |
1332 lea rbx,[224+rsp] | |
1333 lea rdi,[64+rsp] | |
1334 call __ecp_nistz256_sub_fromq | |
1335 | |
1336 or r12,r13 | |
1337 movdqa xmm2,xmm4 | |
1338 or r12,r8 | |
1339 or r12,r9 | |
1340 por xmm2,xmm5 | |
1341 DB 102,73,15,110,220 | |
1342 | |
1343 mov rax,QWORD[384+rsp] | |
1344 lea rbx,[384+rsp] | |
1345 mov r9,QWORD[((0+96))+rsp] | |
1346 mov r10,QWORD[((8+96))+rsp] | |
1347 lea rsi,[((0+96))+rsp] | |
1348 mov r11,QWORD[((16+96))+rsp] | |
1349 mov r12,QWORD[((24+96))+rsp] | |
1350 lea rdi,[160+rsp] | |
1351 call __ecp_nistz256_mul_montq | |
1352 | |
1353 mov rax,QWORD[480+rsp] | |
1354 lea rbx,[480+rsp] | |
1355 mov r9,QWORD[((0+32))+rsp] | |
1356 mov r10,QWORD[((8+32))+rsp] | |
1357 lea rsi,[((0+32))+rsp] | |
1358 mov r11,QWORD[((16+32))+rsp] | |
1359 mov r12,QWORD[((24+32))+rsp] | |
1360 lea rdi,[192+rsp] | |
1361 call __ecp_nistz256_mul_montq | |
1362 | |
1363 lea rbx,[160+rsp] | |
1364 lea rdi,[rsp] | |
1365 call __ecp_nistz256_sub_fromq | |
1366 | |
1367 or r12,r13 | |
1368 or r12,r8 | |
1369 or r12,r9 | |
1370 | |
1371 DB 0x3e | |
1372 jnz NEAR $L$add_proceedq | |
1373 DB 102,73,15,126,208 | |
1374 DB 102,73,15,126,217 | |
1375 test r8,r8 | |
1376 jnz NEAR $L$add_proceedq | |
1377 test r9,r9 | |
1378 jz NEAR $L$add_doubleq | |
1379 | |
1380 DB 102,72,15,126,199 | |
1381 pxor xmm0,xmm0 | |
1382 movdqu XMMWORD[rdi],xmm0 | |
1383 movdqu XMMWORD[16+rdi],xmm0 | |
1384 movdqu XMMWORD[32+rdi],xmm0 | |
1385 movdqu XMMWORD[48+rdi],xmm0 | |
1386 movdqu XMMWORD[64+rdi],xmm0 | |
1387 movdqu XMMWORD[80+rdi],xmm0 | |
1388 jmp NEAR $L$add_doneq | |
1389 | |
1390 ALIGN 32 | |
1391 $L$add_doubleq: | |
1392 DB 102,72,15,126,206 | |
1393 DB 102,72,15,126,199 | |
1394 add rsp,416 | |
1395 jmp NEAR $L$point_double_shortcutq | |
1396 | |
1397 ALIGN 32 | |
1398 $L$add_proceedq: | |
1399 mov rax,QWORD[((0+64))+rsp] | |
1400 mov r14,QWORD[((8+64))+rsp] | |
1401 lea rsi,[((0+64))+rsp] | |
1402 mov r15,QWORD[((16+64))+rsp] | |
1403 mov r8,QWORD[((24+64))+rsp] | |
1404 lea rdi,[96+rsp] | |
1405 call __ecp_nistz256_sqr_montq | |
1406 | |
1407 mov rax,QWORD[448+rsp] | |
1408 lea rbx,[448+rsp] | |
1409 mov r9,QWORD[((0+0))+rsp] | |
1410 mov r10,QWORD[((8+0))+rsp] | |
1411 lea rsi,[((0+0))+rsp] | |
1412 mov r11,QWORD[((16+0))+rsp] | |
1413 mov r12,QWORD[((24+0))+rsp] | |
1414 lea rdi,[352+rsp] | |
1415 call __ecp_nistz256_mul_montq | |
1416 | |
1417 mov rax,QWORD[((0+0))+rsp] | |
1418 mov r14,QWORD[((8+0))+rsp] | |
1419 lea rsi,[((0+0))+rsp] | |
1420 mov r15,QWORD[((16+0))+rsp] | |
1421 mov r8,QWORD[((24+0))+rsp] | |
1422 lea rdi,[32+rsp] | |
1423 call __ecp_nistz256_sqr_montq | |
1424 | |
1425 mov rax,QWORD[544+rsp] | |
1426 lea rbx,[544+rsp] | |
1427 mov r9,QWORD[((0+352))+rsp] | |
1428 mov r10,QWORD[((8+352))+rsp] | |
1429 lea rsi,[((0+352))+rsp] | |
1430 mov r11,QWORD[((16+352))+rsp] | |
1431 mov r12,QWORD[((24+352))+rsp] | |
1432 lea rdi,[352+rsp] | |
1433 call __ecp_nistz256_mul_montq | |
1434 | |
1435 mov rax,QWORD[rsp] | |
1436 lea rbx,[rsp] | |
1437 mov r9,QWORD[((0+32))+rsp] | |
1438 mov r10,QWORD[((8+32))+rsp] | |
1439 lea rsi,[((0+32))+rsp] | |
1440 mov r11,QWORD[((16+32))+rsp] | |
1441 mov r12,QWORD[((24+32))+rsp] | |
1442 lea rdi,[128+rsp] | |
1443 call __ecp_nistz256_mul_montq | |
1444 | |
1445 mov rax,QWORD[160+rsp] | |
1446 lea rbx,[160+rsp] | |
1447 mov r9,QWORD[((0+32))+rsp] | |
1448 mov r10,QWORD[((8+32))+rsp] | |
1449 lea rsi,[((0+32))+rsp] | |
1450 mov r11,QWORD[((16+32))+rsp] | |
1451 mov r12,QWORD[((24+32))+rsp] | |
1452 lea rdi,[192+rsp] | |
1453 call __ecp_nistz256_mul_montq | |
1454 | |
1455 | |
1456 | |
1457 | |
1458 add r12,r12 | |
1459 lea rsi,[96+rsp] | |
1460 adc r13,r13 | |
1461 mov rax,r12 | |
1462 adc r8,r8 | |
1463 adc r9,r9 | |
1464 mov rbp,r13 | |
1465 sbb r11,r11 | |
1466 | |
1467 sub r12,-1 | |
1468 mov rcx,r8 | |
1469 sbb r13,r14 | |
1470 sbb r8,0 | |
1471 mov r10,r9 | |
1472 sbb r9,r15 | |
1473 test r11,r11 | |
1474 | |
1475 cmovz r12,rax | |
1476 mov rax,QWORD[rsi] | |
1477 cmovz r13,rbp | |
1478 mov rbp,QWORD[8+rsi] | |
1479 cmovz r8,rcx | |
1480 mov rcx,QWORD[16+rsi] | |
1481 cmovz r9,r10 | |
1482 mov r10,QWORD[24+rsi] | |
1483 | |
1484 call __ecp_nistz256_subq | |
1485 | |
1486 lea rbx,[128+rsp] | |
1487 lea rdi,[288+rsp] | |
1488 call __ecp_nistz256_sub_fromq | |
1489 | |
1490 mov rax,QWORD[((192+0))+rsp] | |
1491 mov rbp,QWORD[((192+8))+rsp] | |
1492 mov rcx,QWORD[((192+16))+rsp] | |
1493 mov r10,QWORD[((192+24))+rsp] | |
1494 lea rdi,[320+rsp] | |
1495 | |
1496 call __ecp_nistz256_subq | |
1497 | |
1498 mov QWORD[rdi],r12 | |
1499 mov QWORD[8+rdi],r13 | |
1500 mov QWORD[16+rdi],r8 | |
1501 mov QWORD[24+rdi],r9 | |
1502 mov rax,QWORD[128+rsp] | |
1503 lea rbx,[128+rsp] | |
1504 mov r9,QWORD[((0+224))+rsp] | |
1505 mov r10,QWORD[((8+224))+rsp] | |
1506 lea rsi,[((0+224))+rsp] | |
1507 mov r11,QWORD[((16+224))+rsp] | |
1508 mov r12,QWORD[((24+224))+rsp] | |
1509 lea rdi,[256+rsp] | |
1510 call __ecp_nistz256_mul_montq | |
1511 | |
1512 mov rax,QWORD[320+rsp] | |
1513 lea rbx,[320+rsp] | |
1514 mov r9,QWORD[((0+64))+rsp] | |
1515 mov r10,QWORD[((8+64))+rsp] | |
1516 lea rsi,[((0+64))+rsp] | |
1517 mov r11,QWORD[((16+64))+rsp] | |
1518 mov r12,QWORD[((24+64))+rsp] | |
1519 lea rdi,[320+rsp] | |
1520 call __ecp_nistz256_mul_montq | |
1521 | |
1522 lea rbx,[256+rsp] | |
1523 lea rdi,[320+rsp] | |
1524 call __ecp_nistz256_sub_fromq | |
1525 | |
1526 DB 102,72,15,126,199 | |
1527 | |
1528 movdqa xmm0,xmm5 | |
1529 movdqa xmm1,xmm5 | |
1530 pandn xmm0,XMMWORD[352+rsp] | |
1531 movdqa xmm2,xmm5 | |
1532 pandn xmm1,XMMWORD[((352+16))+rsp] | |
1533 movdqa xmm3,xmm5 | |
1534 pand xmm2,XMMWORD[544+rsp] | |
1535 pand xmm3,XMMWORD[((544+16))+rsp] | |
1536 por xmm2,xmm0 | |
1537 por xmm3,xmm1 | |
1538 | |
1539 movdqa xmm0,xmm4 | |
1540 movdqa xmm1,xmm4 | |
1541 pandn xmm0,xmm2 | |
1542 movdqa xmm2,xmm4 | |
1543 pandn xmm1,xmm3 | |
1544 movdqa xmm3,xmm4 | |
1545 pand xmm2,XMMWORD[448+rsp] | |
1546 pand xmm3,XMMWORD[((448+16))+rsp] | |
1547 por xmm2,xmm0 | |
1548 por xmm3,xmm1 | |
1549 movdqu XMMWORD[64+rdi],xmm2 | |
1550 movdqu XMMWORD[80+rdi],xmm3 | |
1551 | |
1552 movdqa xmm0,xmm5 | |
1553 movdqa xmm1,xmm5 | |
1554 pandn xmm0,XMMWORD[288+rsp] | |
1555 movdqa xmm2,xmm5 | |
1556 pandn xmm1,XMMWORD[((288+16))+rsp] | |
1557 movdqa xmm3,xmm5 | |
1558 pand xmm2,XMMWORD[480+rsp] | |
1559 pand xmm3,XMMWORD[((480+16))+rsp] | |
1560 por xmm2,xmm0 | |
1561 por xmm3,xmm1 | |
1562 | |
1563 movdqa xmm0,xmm4 | |
1564 movdqa xmm1,xmm4 | |
1565 pandn xmm0,xmm2 | |
1566 movdqa xmm2,xmm4 | |
1567 pandn xmm1,xmm3 | |
1568 movdqa xmm3,xmm4 | |
1569 pand xmm2,XMMWORD[384+rsp] | |
1570 pand xmm3,XMMWORD[((384+16))+rsp] | |
1571 por xmm2,xmm0 | |
1572 por xmm3,xmm1 | |
1573 movdqu XMMWORD[rdi],xmm2 | |
1574 movdqu XMMWORD[16+rdi],xmm3 | |
1575 | |
1576 movdqa xmm0,xmm5 | |
1577 movdqa xmm1,xmm5 | |
1578 pandn xmm0,XMMWORD[320+rsp] | |
1579 movdqa xmm2,xmm5 | |
1580 pandn xmm1,XMMWORD[((320+16))+rsp] | |
1581 movdqa xmm3,xmm5 | |
1582 pand xmm2,XMMWORD[512+rsp] | |
1583 pand xmm3,XMMWORD[((512+16))+rsp] | |
1584 por xmm2,xmm0 | |
1585 por xmm3,xmm1 | |
1586 | |
1587 movdqa xmm0,xmm4 | |
1588 movdqa xmm1,xmm4 | |
1589 pandn xmm0,xmm2 | |
1590 movdqa xmm2,xmm4 | |
1591 pandn xmm1,xmm3 | |
1592 movdqa xmm3,xmm4 | |
1593 pand xmm2,XMMWORD[416+rsp] | |
1594 pand xmm3,XMMWORD[((416+16))+rsp] | |
1595 por xmm2,xmm0 | |
1596 por xmm3,xmm1 | |
1597 movdqu XMMWORD[32+rdi],xmm2 | |
1598 movdqu XMMWORD[48+rdi],xmm3 | |
1599 | |
1600 $L$add_doneq: | |
1601 add rsp,32*18+8 | |
1602 pop r15 | |
1603 pop r14 | |
1604 pop r13 | |
1605 pop r12 | |
1606 pop rbx | |
1607 pop rbp | |
1608 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
1609 mov rsi,QWORD[16+rsp] | |
1610 DB 0F3h,0C3h ;repret | |
1611 $L$SEH_end_ecp_nistz256_point_add: | |
1612 global ecp_nistz256_point_add_affine | |
1613 | |
1614 ALIGN 32 | |
1615 ecp_nistz256_point_add_affine: | |
1616 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
1617 mov QWORD[16+rsp],rsi | |
1618 mov rax,rsp | |
1619 $L$SEH_begin_ecp_nistz256_point_add_affine: | |
1620 mov rdi,rcx | |
1621 mov rsi,rdx | |
1622 mov rdx,r8 | |
1623 | |
1624 | |
1625 push rbp | |
1626 push rbx | |
1627 push r12 | |
1628 push r13 | |
1629 push r14 | |
1630 push r15 | |
1631 sub rsp,32*15+8 | |
1632 | |
1633 movdqu xmm0,XMMWORD[rsi] | |
1634 mov rbx,rdx | |
1635 movdqu xmm1,XMMWORD[16+rsi] | |
1636 movdqu xmm2,XMMWORD[32+rsi] | |
1637 movdqu xmm3,XMMWORD[48+rsi] | |
1638 movdqu xmm4,XMMWORD[64+rsi] | |
1639 movdqu xmm5,XMMWORD[80+rsi] | |
1640 mov rax,QWORD[((64+0))+rsi] | |
1641 mov r14,QWORD[((64+8))+rsi] | |
1642 mov r15,QWORD[((64+16))+rsi] | |
1643 mov r8,QWORD[((64+24))+rsi] | |
1644 movdqa XMMWORD[320+rsp],xmm0 | |
1645 movdqa XMMWORD[(320+16)+rsp],xmm1 | |
1646 por xmm1,xmm0 | |
1647 movdqa XMMWORD[352+rsp],xmm2 | |
1648 movdqa XMMWORD[(352+16)+rsp],xmm3 | |
1649 por xmm3,xmm2 | |
1650 movdqa XMMWORD[384+rsp],xmm4 | |
1651 movdqa XMMWORD[(384+16)+rsp],xmm5 | |
1652 por xmm3,xmm1 | |
1653 | |
1654 movdqu xmm0,XMMWORD[rbx] | |
1655 pshufd xmm5,xmm3,0xb1 | |
1656 movdqu xmm1,XMMWORD[16+rbx] | |
1657 movdqu xmm2,XMMWORD[32+rbx] | |
1658 por xmm5,xmm3 | |
1659 movdqu xmm3,XMMWORD[48+rbx] | |
1660 movdqa XMMWORD[416+rsp],xmm0 | |
1661 pshufd xmm4,xmm5,0x1e | |
1662 movdqa XMMWORD[(416+16)+rsp],xmm1 | |
1663 por xmm1,xmm0 | |
1664 DB 102,72,15,110,199 | |
1665 movdqa XMMWORD[448+rsp],xmm2 | |
1666 movdqa XMMWORD[(448+16)+rsp],xmm3 | |
1667 por xmm3,xmm2 | |
1668 por xmm5,xmm4 | |
1669 pxor xmm4,xmm4 | |
1670 por xmm3,xmm1 | |
1671 | |
1672 lea rsi,[((64-0))+rsi] | |
1673 lea rdi,[32+rsp] | |
1674 call __ecp_nistz256_sqr_montq | |
1675 | |
1676 pcmpeqd xmm5,xmm4 | |
1677 pshufd xmm4,xmm3,0xb1 | |
1678 mov rax,QWORD[rbx] | |
1679 | |
1680 mov r9,r12 | |
1681 por xmm4,xmm3 | |
1682 pshufd xmm5,xmm5,0 | |
1683 pshufd xmm3,xmm4,0x1e | |
1684 mov r10,r13 | |
1685 por xmm4,xmm3 | |
1686 pxor xmm3,xmm3 | |
1687 mov r11,r14 | |
1688 pcmpeqd xmm4,xmm3 | |
1689 pshufd xmm4,xmm4,0 | |
1690 | |
1691 lea rsi,[((32-0))+rsp] | |
1692 mov r12,r15 | |
1693 lea rdi,[rsp] | |
1694 call __ecp_nistz256_mul_montq | |
1695 | |
1696 lea rbx,[320+rsp] | |
1697 lea rdi,[64+rsp] | |
1698 call __ecp_nistz256_sub_fromq | |
1699 | |
1700 mov rax,QWORD[384+rsp] | |
1701 lea rbx,[384+rsp] | |
1702 mov r9,QWORD[((0+32))+rsp] | |
1703 mov r10,QWORD[((8+32))+rsp] | |
1704 lea rsi,[((0+32))+rsp] | |
1705 mov r11,QWORD[((16+32))+rsp] | |
1706 mov r12,QWORD[((24+32))+rsp] | |
1707 lea rdi,[32+rsp] | |
1708 call __ecp_nistz256_mul_montq | |
1709 | |
1710 mov rax,QWORD[384+rsp] | |
1711 lea rbx,[384+rsp] | |
1712 mov r9,QWORD[((0+64))+rsp] | |
1713 mov r10,QWORD[((8+64))+rsp] | |
1714 lea rsi,[((0+64))+rsp] | |
1715 mov r11,QWORD[((16+64))+rsp] | |
1716 mov r12,QWORD[((24+64))+rsp] | |
1717 lea rdi,[288+rsp] | |
1718 call __ecp_nistz256_mul_montq | |
1719 | |
1720 mov rax,QWORD[448+rsp] | |
1721 lea rbx,[448+rsp] | |
1722 mov r9,QWORD[((0+32))+rsp] | |
1723 mov r10,QWORD[((8+32))+rsp] | |
1724 lea rsi,[((0+32))+rsp] | |
1725 mov r11,QWORD[((16+32))+rsp] | |
1726 mov r12,QWORD[((24+32))+rsp] | |
1727 lea rdi,[32+rsp] | |
1728 call __ecp_nistz256_mul_montq | |
1729 | |
1730 lea rbx,[352+rsp] | |
1731 lea rdi,[96+rsp] | |
1732 call __ecp_nistz256_sub_fromq | |
1733 | |
1734 mov rax,QWORD[((0+64))+rsp] | |
1735 mov r14,QWORD[((8+64))+rsp] | |
1736 lea rsi,[((0+64))+rsp] | |
1737 mov r15,QWORD[((16+64))+rsp] | |
1738 mov r8,QWORD[((24+64))+rsp] | |
1739 lea rdi,[128+rsp] | |
1740 call __ecp_nistz256_sqr_montq | |
1741 | |
1742 mov rax,QWORD[((0+96))+rsp] | |
1743 mov r14,QWORD[((8+96))+rsp] | |
1744 lea rsi,[((0+96))+rsp] | |
1745 mov r15,QWORD[((16+96))+rsp] | |
1746 mov r8,QWORD[((24+96))+rsp] | |
1747 lea rdi,[192+rsp] | |
1748 call __ecp_nistz256_sqr_montq | |
1749 | |
1750 mov rax,QWORD[128+rsp] | |
1751 lea rbx,[128+rsp] | |
1752 mov r9,QWORD[((0+64))+rsp] | |
1753 mov r10,QWORD[((8+64))+rsp] | |
1754 lea rsi,[((0+64))+rsp] | |
1755 mov r11,QWORD[((16+64))+rsp] | |
1756 mov r12,QWORD[((24+64))+rsp] | |
1757 lea rdi,[160+rsp] | |
1758 call __ecp_nistz256_mul_montq | |
1759 | |
1760 mov rax,QWORD[320+rsp] | |
1761 lea rbx,[320+rsp] | |
1762 mov r9,QWORD[((0+128))+rsp] | |
1763 mov r10,QWORD[((8+128))+rsp] | |
1764 lea rsi,[((0+128))+rsp] | |
1765 mov r11,QWORD[((16+128))+rsp] | |
1766 mov r12,QWORD[((24+128))+rsp] | |
1767 lea rdi,[rsp] | |
1768 call __ecp_nistz256_mul_montq | |
1769 | |
1770 | |
1771 | |
1772 | |
1773 add r12,r12 | |
1774 lea rsi,[192+rsp] | |
1775 adc r13,r13 | |
1776 mov rax,r12 | |
1777 adc r8,r8 | |
1778 adc r9,r9 | |
1779 mov rbp,r13 | |
1780 sbb r11,r11 | |
1781 | |
1782 sub r12,-1 | |
1783 mov rcx,r8 | |
1784 sbb r13,r14 | |
1785 sbb r8,0 | |
1786 mov r10,r9 | |
1787 sbb r9,r15 | |
1788 test r11,r11 | |
1789 | |
1790 cmovz r12,rax | |
1791 mov rax,QWORD[rsi] | |
1792 cmovz r13,rbp | |
1793 mov rbp,QWORD[8+rsi] | |
1794 cmovz r8,rcx | |
1795 mov rcx,QWORD[16+rsi] | |
1796 cmovz r9,r10 | |
1797 mov r10,QWORD[24+rsi] | |
1798 | |
1799 call __ecp_nistz256_subq | |
1800 | |
1801 lea rbx,[160+rsp] | |
1802 lea rdi,[224+rsp] | |
1803 call __ecp_nistz256_sub_fromq | |
1804 | |
1805 mov rax,QWORD[((0+0))+rsp] | |
1806 mov rbp,QWORD[((0+8))+rsp] | |
1807 mov rcx,QWORD[((0+16))+rsp] | |
1808 mov r10,QWORD[((0+24))+rsp] | |
1809 lea rdi,[64+rsp] | |
1810 | |
1811 call __ecp_nistz256_subq | |
1812 | |
1813 mov QWORD[rdi],r12 | |
1814 mov QWORD[8+rdi],r13 | |
1815 mov QWORD[16+rdi],r8 | |
1816 mov QWORD[24+rdi],r9 | |
1817 mov rax,QWORD[352+rsp] | |
1818 lea rbx,[352+rsp] | |
1819 mov r9,QWORD[((0+160))+rsp] | |
1820 mov r10,QWORD[((8+160))+rsp] | |
1821 lea rsi,[((0+160))+rsp] | |
1822 mov r11,QWORD[((16+160))+rsp] | |
1823 mov r12,QWORD[((24+160))+rsp] | |
1824 lea rdi,[32+rsp] | |
1825 call __ecp_nistz256_mul_montq | |
1826 | |
1827 mov rax,QWORD[96+rsp] | |
1828 lea rbx,[96+rsp] | |
1829 mov r9,QWORD[((0+64))+rsp] | |
1830 mov r10,QWORD[((8+64))+rsp] | |
1831 lea rsi,[((0+64))+rsp] | |
1832 mov r11,QWORD[((16+64))+rsp] | |
1833 mov r12,QWORD[((24+64))+rsp] | |
1834 lea rdi,[64+rsp] | |
1835 call __ecp_nistz256_mul_montq | |
1836 | |
1837 lea rbx,[32+rsp] | |
1838 lea rdi,[256+rsp] | |
1839 call __ecp_nistz256_sub_fromq | |
1840 | |
1841 DB 102,72,15,126,199 | |
1842 | |
1843 movdqa xmm0,xmm5 | |
1844 movdqa xmm1,xmm5 | |
1845 pandn xmm0,XMMWORD[288+rsp] | |
1846 movdqa xmm2,xmm5 | |
1847 pandn xmm1,XMMWORD[((288+16))+rsp] | |
1848 movdqa xmm3,xmm5 | |
1849 pand xmm2,XMMWORD[$L$ONE_mont] | |
1850 pand xmm3,XMMWORD[(($L$ONE_mont+16))] | |
1851 por xmm2,xmm0 | |
1852 por xmm3,xmm1 | |
1853 | |
1854 movdqa xmm0,xmm4 | |
1855 movdqa xmm1,xmm4 | |
1856 pandn xmm0,xmm2 | |
1857 movdqa xmm2,xmm4 | |
1858 pandn xmm1,xmm3 | |
1859 movdqa xmm3,xmm4 | |
1860 pand xmm2,XMMWORD[384+rsp] | |
1861 pand xmm3,XMMWORD[((384+16))+rsp] | |
1862 por xmm2,xmm0 | |
1863 por xmm3,xmm1 | |
1864 movdqu XMMWORD[64+rdi],xmm2 | |
1865 movdqu XMMWORD[80+rdi],xmm3 | |
1866 | |
1867 movdqa xmm0,xmm5 | |
1868 movdqa xmm1,xmm5 | |
1869 pandn xmm0,XMMWORD[224+rsp] | |
1870 movdqa xmm2,xmm5 | |
1871 pandn xmm1,XMMWORD[((224+16))+rsp] | |
1872 movdqa xmm3,xmm5 | |
1873 pand xmm2,XMMWORD[416+rsp] | |
1874 pand xmm3,XMMWORD[((416+16))+rsp] | |
1875 por xmm2,xmm0 | |
1876 por xmm3,xmm1 | |
1877 | |
1878 movdqa xmm0,xmm4 | |
1879 movdqa xmm1,xmm4 | |
1880 pandn xmm0,xmm2 | |
1881 movdqa xmm2,xmm4 | |
1882 pandn xmm1,xmm3 | |
1883 movdqa xmm3,xmm4 | |
1884 pand xmm2,XMMWORD[320+rsp] | |
1885 pand xmm3,XMMWORD[((320+16))+rsp] | |
1886 por xmm2,xmm0 | |
1887 por xmm3,xmm1 | |
1888 movdqu XMMWORD[rdi],xmm2 | |
1889 movdqu XMMWORD[16+rdi],xmm3 | |
1890 | |
1891 movdqa xmm0,xmm5 | |
1892 movdqa xmm1,xmm5 | |
1893 pandn xmm0,XMMWORD[256+rsp] | |
1894 movdqa xmm2,xmm5 | |
1895 pandn xmm1,XMMWORD[((256+16))+rsp] | |
1896 movdqa xmm3,xmm5 | |
1897 pand xmm2,XMMWORD[448+rsp] | |
1898 pand xmm3,XMMWORD[((448+16))+rsp] | |
1899 por xmm2,xmm0 | |
1900 por xmm3,xmm1 | |
1901 | |
1902 movdqa xmm0,xmm4 | |
1903 movdqa xmm1,xmm4 | |
1904 pandn xmm0,xmm2 | |
1905 movdqa xmm2,xmm4 | |
1906 pandn xmm1,xmm3 | |
1907 movdqa xmm3,xmm4 | |
1908 pand xmm2,XMMWORD[352+rsp] | |
1909 pand xmm3,XMMWORD[((352+16))+rsp] | |
1910 por xmm2,xmm0 | |
1911 por xmm3,xmm1 | |
1912 movdqu XMMWORD[32+rdi],xmm2 | |
1913 movdqu XMMWORD[48+rdi],xmm3 | |
1914 | |
1915 add rsp,32*15+8 | |
1916 pop r15 | |
1917 pop r14 | |
1918 pop r13 | |
1919 pop r12 | |
1920 pop rbx | |
1921 pop rbp | |
1922 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
1923 mov rsi,QWORD[16+rsp] | |
1924 DB 0F3h,0C3h ;repret | |
1925 $L$SEH_end_ecp_nistz256_point_add_affine: | |
OLD | NEW |