OLD | NEW |
| (Empty) |
1 default rel | |
2 %define XMMWORD | |
3 %define YMMWORD | |
4 %define ZMMWORD | |
5 section .text code align=64 | |
6 | |
7 | |
8 | |
9 ALIGN 16 | |
10 MULADD_128x512: | |
11 mov rax,QWORD[rsi] | |
12 mul rbp | |
13 add r8,rax | |
14 adc rdx,0 | |
15 mov QWORD[rcx],r8 | |
16 mov rbx,rdx | |
17 | |
18 mov rax,QWORD[8+rsi] | |
19 mul rbp | |
20 add r9,rax | |
21 adc rdx,0 | |
22 add r9,rbx | |
23 adc rdx,0 | |
24 mov rbx,rdx | |
25 | |
26 mov rax,QWORD[16+rsi] | |
27 mul rbp | |
28 add r10,rax | |
29 adc rdx,0 | |
30 add r10,rbx | |
31 adc rdx,0 | |
32 mov rbx,rdx | |
33 | |
34 mov rax,QWORD[24+rsi] | |
35 mul rbp | |
36 add r11,rax | |
37 adc rdx,0 | |
38 add r11,rbx | |
39 adc rdx,0 | |
40 mov rbx,rdx | |
41 | |
42 mov rax,QWORD[32+rsi] | |
43 mul rbp | |
44 add r12,rax | |
45 adc rdx,0 | |
46 add r12,rbx | |
47 adc rdx,0 | |
48 mov rbx,rdx | |
49 | |
50 mov rax,QWORD[40+rsi] | |
51 mul rbp | |
52 add r13,rax | |
53 adc rdx,0 | |
54 add r13,rbx | |
55 adc rdx,0 | |
56 mov rbx,rdx | |
57 | |
58 mov rax,QWORD[48+rsi] | |
59 mul rbp | |
60 add r14,rax | |
61 adc rdx,0 | |
62 add r14,rbx | |
63 adc rdx,0 | |
64 mov rbx,rdx | |
65 | |
66 mov rax,QWORD[56+rsi] | |
67 mul rbp | |
68 add r15,rax | |
69 adc rdx,0 | |
70 add r15,rbx | |
71 adc rdx,0 | |
72 mov r8,rdx | |
73 mov rbp,QWORD[8+rdi] | |
74 mov rax,QWORD[rsi] | |
75 mul rbp | |
76 add r9,rax | |
77 adc rdx,0 | |
78 mov QWORD[8+rcx],r9 | |
79 mov rbx,rdx | |
80 | |
81 mov rax,QWORD[8+rsi] | |
82 mul rbp | |
83 add r10,rax | |
84 adc rdx,0 | |
85 add r10,rbx | |
86 adc rdx,0 | |
87 mov rbx,rdx | |
88 | |
89 mov rax,QWORD[16+rsi] | |
90 mul rbp | |
91 add r11,rax | |
92 adc rdx,0 | |
93 add r11,rbx | |
94 adc rdx,0 | |
95 mov rbx,rdx | |
96 | |
97 mov rax,QWORD[24+rsi] | |
98 mul rbp | |
99 add r12,rax | |
100 adc rdx,0 | |
101 add r12,rbx | |
102 adc rdx,0 | |
103 mov rbx,rdx | |
104 | |
105 mov rax,QWORD[32+rsi] | |
106 mul rbp | |
107 add r13,rax | |
108 adc rdx,0 | |
109 add r13,rbx | |
110 adc rdx,0 | |
111 mov rbx,rdx | |
112 | |
113 mov rax,QWORD[40+rsi] | |
114 mul rbp | |
115 add r14,rax | |
116 adc rdx,0 | |
117 add r14,rbx | |
118 adc rdx,0 | |
119 mov rbx,rdx | |
120 | |
121 mov rax,QWORD[48+rsi] | |
122 mul rbp | |
123 add r15,rax | |
124 adc rdx,0 | |
125 add r15,rbx | |
126 adc rdx,0 | |
127 mov rbx,rdx | |
128 | |
129 mov rax,QWORD[56+rsi] | |
130 mul rbp | |
131 add r8,rax | |
132 adc rdx,0 | |
133 add r8,rbx | |
134 adc rdx,0 | |
135 mov r9,rdx | |
136 DB 0F3h,0C3h ;repret | |
137 | |
138 | |
139 ALIGN 16 | |
140 mont_reduce: | |
141 lea rdi,[192+rsp] | |
142 mov rsi,QWORD[32+rsp] | |
143 add rsi,576 | |
144 lea rcx,[520+rsp] | |
145 | |
146 mov rbp,QWORD[96+rcx] | |
147 mov rax,QWORD[rsi] | |
148 mul rbp | |
149 mov r8,QWORD[rcx] | |
150 add r8,rax | |
151 adc rdx,0 | |
152 mov QWORD[rdi],r8 | |
153 mov rbx,rdx | |
154 | |
155 mov rax,QWORD[8+rsi] | |
156 mul rbp | |
157 mov r9,QWORD[8+rcx] | |
158 add r9,rax | |
159 adc rdx,0 | |
160 add r9,rbx | |
161 adc rdx,0 | |
162 mov rbx,rdx | |
163 | |
164 mov rax,QWORD[16+rsi] | |
165 mul rbp | |
166 mov r10,QWORD[16+rcx] | |
167 add r10,rax | |
168 adc rdx,0 | |
169 add r10,rbx | |
170 adc rdx,0 | |
171 mov rbx,rdx | |
172 | |
173 mov rax,QWORD[24+rsi] | |
174 mul rbp | |
175 mov r11,QWORD[24+rcx] | |
176 add r11,rax | |
177 adc rdx,0 | |
178 add r11,rbx | |
179 adc rdx,0 | |
180 mov rbx,rdx | |
181 | |
182 mov rax,QWORD[32+rsi] | |
183 mul rbp | |
184 mov r12,QWORD[32+rcx] | |
185 add r12,rax | |
186 adc rdx,0 | |
187 add r12,rbx | |
188 adc rdx,0 | |
189 mov rbx,rdx | |
190 | |
191 mov rax,QWORD[40+rsi] | |
192 mul rbp | |
193 mov r13,QWORD[40+rcx] | |
194 add r13,rax | |
195 adc rdx,0 | |
196 add r13,rbx | |
197 adc rdx,0 | |
198 mov rbx,rdx | |
199 | |
200 mov rax,QWORD[48+rsi] | |
201 mul rbp | |
202 mov r14,QWORD[48+rcx] | |
203 add r14,rax | |
204 adc rdx,0 | |
205 add r14,rbx | |
206 adc rdx,0 | |
207 mov rbx,rdx | |
208 | |
209 mov rax,QWORD[56+rsi] | |
210 mul rbp | |
211 mov r15,QWORD[56+rcx] | |
212 add r15,rax | |
213 adc rdx,0 | |
214 add r15,rbx | |
215 adc rdx,0 | |
216 mov r8,rdx | |
217 mov rbp,QWORD[104+rcx] | |
218 mov rax,QWORD[rsi] | |
219 mul rbp | |
220 add r9,rax | |
221 adc rdx,0 | |
222 mov QWORD[8+rdi],r9 | |
223 mov rbx,rdx | |
224 | |
225 mov rax,QWORD[8+rsi] | |
226 mul rbp | |
227 add r10,rax | |
228 adc rdx,0 | |
229 add r10,rbx | |
230 adc rdx,0 | |
231 mov rbx,rdx | |
232 | |
233 mov rax,QWORD[16+rsi] | |
234 mul rbp | |
235 add r11,rax | |
236 adc rdx,0 | |
237 add r11,rbx | |
238 adc rdx,0 | |
239 mov rbx,rdx | |
240 | |
241 mov rax,QWORD[24+rsi] | |
242 mul rbp | |
243 add r12,rax | |
244 adc rdx,0 | |
245 add r12,rbx | |
246 adc rdx,0 | |
247 mov rbx,rdx | |
248 | |
249 mov rax,QWORD[32+rsi] | |
250 mul rbp | |
251 add r13,rax | |
252 adc rdx,0 | |
253 add r13,rbx | |
254 adc rdx,0 | |
255 mov rbx,rdx | |
256 | |
257 mov rax,QWORD[40+rsi] | |
258 mul rbp | |
259 add r14,rax | |
260 adc rdx,0 | |
261 add r14,rbx | |
262 adc rdx,0 | |
263 mov rbx,rdx | |
264 | |
265 mov rax,QWORD[48+rsi] | |
266 mul rbp | |
267 add r15,rax | |
268 adc rdx,0 | |
269 add r15,rbx | |
270 adc rdx,0 | |
271 mov rbx,rdx | |
272 | |
273 mov rax,QWORD[56+rsi] | |
274 mul rbp | |
275 add r8,rax | |
276 adc rdx,0 | |
277 add r8,rbx | |
278 adc rdx,0 | |
279 mov r9,rdx | |
280 mov rbp,QWORD[112+rcx] | |
281 mov rax,QWORD[rsi] | |
282 mul rbp | |
283 add r10,rax | |
284 adc rdx,0 | |
285 mov QWORD[16+rdi],r10 | |
286 mov rbx,rdx | |
287 | |
288 mov rax,QWORD[8+rsi] | |
289 mul rbp | |
290 add r11,rax | |
291 adc rdx,0 | |
292 add r11,rbx | |
293 adc rdx,0 | |
294 mov rbx,rdx | |
295 | |
296 mov rax,QWORD[16+rsi] | |
297 mul rbp | |
298 add r12,rax | |
299 adc rdx,0 | |
300 add r12,rbx | |
301 adc rdx,0 | |
302 mov rbx,rdx | |
303 | |
304 mov rax,QWORD[24+rsi] | |
305 mul rbp | |
306 add r13,rax | |
307 adc rdx,0 | |
308 add r13,rbx | |
309 adc rdx,0 | |
310 mov rbx,rdx | |
311 | |
312 mov rax,QWORD[32+rsi] | |
313 mul rbp | |
314 add r14,rax | |
315 adc rdx,0 | |
316 add r14,rbx | |
317 adc rdx,0 | |
318 mov rbx,rdx | |
319 | |
320 mov rax,QWORD[40+rsi] | |
321 mul rbp | |
322 add r15,rax | |
323 adc rdx,0 | |
324 add r15,rbx | |
325 adc rdx,0 | |
326 mov rbx,rdx | |
327 | |
328 mov rax,QWORD[48+rsi] | |
329 mul rbp | |
330 add r8,rax | |
331 adc rdx,0 | |
332 add r8,rbx | |
333 adc rdx,0 | |
334 mov rbx,rdx | |
335 | |
336 mov rax,QWORD[56+rsi] | |
337 mul rbp | |
338 add r9,rax | |
339 adc rdx,0 | |
340 add r9,rbx | |
341 adc rdx,0 | |
342 mov r10,rdx | |
343 mov rbp,QWORD[120+rcx] | |
344 mov rax,QWORD[rsi] | |
345 mul rbp | |
346 add r11,rax | |
347 adc rdx,0 | |
348 mov QWORD[24+rdi],r11 | |
349 mov rbx,rdx | |
350 | |
351 mov rax,QWORD[8+rsi] | |
352 mul rbp | |
353 add r12,rax | |
354 adc rdx,0 | |
355 add r12,rbx | |
356 adc rdx,0 | |
357 mov rbx,rdx | |
358 | |
359 mov rax,QWORD[16+rsi] | |
360 mul rbp | |
361 add r13,rax | |
362 adc rdx,0 | |
363 add r13,rbx | |
364 adc rdx,0 | |
365 mov rbx,rdx | |
366 | |
367 mov rax,QWORD[24+rsi] | |
368 mul rbp | |
369 add r14,rax | |
370 adc rdx,0 | |
371 add r14,rbx | |
372 adc rdx,0 | |
373 mov rbx,rdx | |
374 | |
375 mov rax,QWORD[32+rsi] | |
376 mul rbp | |
377 add r15,rax | |
378 adc rdx,0 | |
379 add r15,rbx | |
380 adc rdx,0 | |
381 mov rbx,rdx | |
382 | |
383 mov rax,QWORD[40+rsi] | |
384 mul rbp | |
385 add r8,rax | |
386 adc rdx,0 | |
387 add r8,rbx | |
388 adc rdx,0 | |
389 mov rbx,rdx | |
390 | |
391 mov rax,QWORD[48+rsi] | |
392 mul rbp | |
393 add r9,rax | |
394 adc rdx,0 | |
395 add r9,rbx | |
396 adc rdx,0 | |
397 mov rbx,rdx | |
398 | |
399 mov rax,QWORD[56+rsi] | |
400 mul rbp | |
401 add r10,rax | |
402 adc rdx,0 | |
403 add r10,rbx | |
404 adc rdx,0 | |
405 mov r11,rdx | |
406 xor rax,rax | |
407 | |
408 add r8,QWORD[64+rcx] | |
409 adc r9,QWORD[72+rcx] | |
410 adc r10,QWORD[80+rcx] | |
411 adc r11,QWORD[88+rcx] | |
412 adc rax,0 | |
413 | |
414 | |
415 | |
416 | |
417 mov QWORD[64+rdi],r8 | |
418 mov QWORD[72+rdi],r9 | |
419 mov rbp,r10 | |
420 mov QWORD[88+rdi],r11 | |
421 | |
422 mov QWORD[384+rsp],rax | |
423 | |
424 mov r8,QWORD[rdi] | |
425 mov r9,QWORD[8+rdi] | |
426 mov r10,QWORD[16+rdi] | |
427 mov r11,QWORD[24+rdi] | |
428 | |
429 | |
430 | |
431 | |
432 | |
433 | |
434 | |
435 | |
436 add rdi,8*10 | |
437 | |
438 add rsi,64 | |
439 lea rcx,[296+rsp] | |
440 | |
441 call MULADD_128x512 | |
442 | |
443 mov rax,QWORD[384+rsp] | |
444 | |
445 | |
446 add r8,QWORD[((-16))+rdi] | |
447 adc r9,QWORD[((-8))+rdi] | |
448 mov QWORD[64+rcx],r8 | |
449 mov QWORD[72+rcx],r9 | |
450 | |
451 adc rax,rax | |
452 mov QWORD[384+rsp],rax | |
453 | |
454 lea rdi,[192+rsp] | |
455 add rsi,64 | |
456 | |
457 | |
458 | |
459 | |
460 | |
461 mov r8,QWORD[rsi] | |
462 mov rbx,QWORD[8+rsi] | |
463 | |
464 mov rax,QWORD[rcx] | |
465 mul r8 | |
466 mov rbp,rax | |
467 mov r9,rdx | |
468 | |
469 mov rax,QWORD[8+rcx] | |
470 mul r8 | |
471 add r9,rax | |
472 | |
473 mov rax,QWORD[rcx] | |
474 mul rbx | |
475 add r9,rax | |
476 | |
477 mov QWORD[8+rdi],r9 | |
478 | |
479 | |
480 sub rsi,192 | |
481 | |
482 mov r8,QWORD[rcx] | |
483 mov r9,QWORD[8+rcx] | |
484 | |
485 call MULADD_128x512 | |
486 | |
487 | |
488 | |
489 | |
490 mov rax,QWORD[rsi] | |
491 mov rbx,QWORD[8+rsi] | |
492 mov rdi,QWORD[16+rsi] | |
493 mov rdx,QWORD[24+rsi] | |
494 | |
495 | |
496 mov rbp,QWORD[384+rsp] | |
497 | |
498 add r8,QWORD[64+rcx] | |
499 adc r9,QWORD[72+rcx] | |
500 | |
501 | |
502 adc rbp,rbp | |
503 | |
504 | |
505 | |
506 shl rbp,3 | |
507 mov rcx,QWORD[32+rsp] | |
508 add rbp,rcx | |
509 | |
510 | |
511 xor rsi,rsi | |
512 | |
513 add r10,QWORD[rbp] | |
514 adc r11,QWORD[64+rbp] | |
515 adc r12,QWORD[128+rbp] | |
516 adc r13,QWORD[192+rbp] | |
517 adc r14,QWORD[256+rbp] | |
518 adc r15,QWORD[320+rbp] | |
519 adc r8,QWORD[384+rbp] | |
520 adc r9,QWORD[448+rbp] | |
521 | |
522 | |
523 | |
524 sbb rsi,0 | |
525 | |
526 | |
527 and rax,rsi | |
528 and rbx,rsi | |
529 and rdi,rsi | |
530 and rdx,rsi | |
531 | |
532 mov rbp,1 | |
533 sub r10,rax | |
534 sbb r11,rbx | |
535 sbb r12,rdi | |
536 sbb r13,rdx | |
537 | |
538 | |
539 | |
540 | |
541 sbb rbp,0 | |
542 | |
543 | |
544 | |
545 add rcx,512 | |
546 mov rax,QWORD[32+rcx] | |
547 mov rbx,QWORD[40+rcx] | |
548 mov rdi,QWORD[48+rcx] | |
549 mov rdx,QWORD[56+rcx] | |
550 | |
551 | |
552 | |
553 and rax,rsi | |
554 and rbx,rsi | |
555 and rdi,rsi | |
556 and rdx,rsi | |
557 | |
558 | |
559 | |
560 sub rbp,1 | |
561 | |
562 sbb r14,rax | |
563 sbb r15,rbx | |
564 sbb r8,rdi | |
565 sbb r9,rdx | |
566 | |
567 | |
568 | |
569 mov rsi,QWORD[144+rsp] | |
570 mov QWORD[rsi],r10 | |
571 mov QWORD[8+rsi],r11 | |
572 mov QWORD[16+rsi],r12 | |
573 mov QWORD[24+rsi],r13 | |
574 mov QWORD[32+rsi],r14 | |
575 mov QWORD[40+rsi],r15 | |
576 mov QWORD[48+rsi],r8 | |
577 mov QWORD[56+rsi],r9 | |
578 | |
579 DB 0F3h,0C3h ;repret | |
580 | |
581 | |
582 ALIGN 16 | |
583 mont_mul_a3b: | |
584 | |
585 | |
586 | |
587 | |
588 mov rbp,QWORD[rdi] | |
589 | |
590 mov rax,r10 | |
591 mul rbp | |
592 mov QWORD[520+rsp],rax | |
593 mov r10,rdx | |
594 mov rax,r11 | |
595 mul rbp | |
596 add r10,rax | |
597 adc rdx,0 | |
598 mov r11,rdx | |
599 mov rax,r12 | |
600 mul rbp | |
601 add r11,rax | |
602 adc rdx,0 | |
603 mov r12,rdx | |
604 mov rax,r13 | |
605 mul rbp | |
606 add r12,rax | |
607 adc rdx,0 | |
608 mov r13,rdx | |
609 mov rax,r14 | |
610 mul rbp | |
611 add r13,rax | |
612 adc rdx,0 | |
613 mov r14,rdx | |
614 mov rax,r15 | |
615 mul rbp | |
616 add r14,rax | |
617 adc rdx,0 | |
618 mov r15,rdx | |
619 mov rax,r8 | |
620 mul rbp | |
621 add r15,rax | |
622 adc rdx,0 | |
623 mov r8,rdx | |
624 mov rax,r9 | |
625 mul rbp | |
626 add r8,rax | |
627 adc rdx,0 | |
628 mov r9,rdx | |
629 mov rbp,QWORD[8+rdi] | |
630 mov rax,QWORD[rsi] | |
631 mul rbp | |
632 add r10,rax | |
633 adc rdx,0 | |
634 mov QWORD[528+rsp],r10 | |
635 mov rbx,rdx | |
636 | |
637 mov rax,QWORD[8+rsi] | |
638 mul rbp | |
639 add r11,rax | |
640 adc rdx,0 | |
641 add r11,rbx | |
642 adc rdx,0 | |
643 mov rbx,rdx | |
644 | |
645 mov rax,QWORD[16+rsi] | |
646 mul rbp | |
647 add r12,rax | |
648 adc rdx,0 | |
649 add r12,rbx | |
650 adc rdx,0 | |
651 mov rbx,rdx | |
652 | |
653 mov rax,QWORD[24+rsi] | |
654 mul rbp | |
655 add r13,rax | |
656 adc rdx,0 | |
657 add r13,rbx | |
658 adc rdx,0 | |
659 mov rbx,rdx | |
660 | |
661 mov rax,QWORD[32+rsi] | |
662 mul rbp | |
663 add r14,rax | |
664 adc rdx,0 | |
665 add r14,rbx | |
666 adc rdx,0 | |
667 mov rbx,rdx | |
668 | |
669 mov rax,QWORD[40+rsi] | |
670 mul rbp | |
671 add r15,rax | |
672 adc rdx,0 | |
673 add r15,rbx | |
674 adc rdx,0 | |
675 mov rbx,rdx | |
676 | |
677 mov rax,QWORD[48+rsi] | |
678 mul rbp | |
679 add r8,rax | |
680 adc rdx,0 | |
681 add r8,rbx | |
682 adc rdx,0 | |
683 mov rbx,rdx | |
684 | |
685 mov rax,QWORD[56+rsi] | |
686 mul rbp | |
687 add r9,rax | |
688 adc rdx,0 | |
689 add r9,rbx | |
690 adc rdx,0 | |
691 mov r10,rdx | |
692 mov rbp,QWORD[16+rdi] | |
693 mov rax,QWORD[rsi] | |
694 mul rbp | |
695 add r11,rax | |
696 adc rdx,0 | |
697 mov QWORD[536+rsp],r11 | |
698 mov rbx,rdx | |
699 | |
700 mov rax,QWORD[8+rsi] | |
701 mul rbp | |
702 add r12,rax | |
703 adc rdx,0 | |
704 add r12,rbx | |
705 adc rdx,0 | |
706 mov rbx,rdx | |
707 | |
708 mov rax,QWORD[16+rsi] | |
709 mul rbp | |
710 add r13,rax | |
711 adc rdx,0 | |
712 add r13,rbx | |
713 adc rdx,0 | |
714 mov rbx,rdx | |
715 | |
716 mov rax,QWORD[24+rsi] | |
717 mul rbp | |
718 add r14,rax | |
719 adc rdx,0 | |
720 add r14,rbx | |
721 adc rdx,0 | |
722 mov rbx,rdx | |
723 | |
724 mov rax,QWORD[32+rsi] | |
725 mul rbp | |
726 add r15,rax | |
727 adc rdx,0 | |
728 add r15,rbx | |
729 adc rdx,0 | |
730 mov rbx,rdx | |
731 | |
732 mov rax,QWORD[40+rsi] | |
733 mul rbp | |
734 add r8,rax | |
735 adc rdx,0 | |
736 add r8,rbx | |
737 adc rdx,0 | |
738 mov rbx,rdx | |
739 | |
740 mov rax,QWORD[48+rsi] | |
741 mul rbp | |
742 add r9,rax | |
743 adc rdx,0 | |
744 add r9,rbx | |
745 adc rdx,0 | |
746 mov rbx,rdx | |
747 | |
748 mov rax,QWORD[56+rsi] | |
749 mul rbp | |
750 add r10,rax | |
751 adc rdx,0 | |
752 add r10,rbx | |
753 adc rdx,0 | |
754 mov r11,rdx | |
755 mov rbp,QWORD[24+rdi] | |
756 mov rax,QWORD[rsi] | |
757 mul rbp | |
758 add r12,rax | |
759 adc rdx,0 | |
760 mov QWORD[544+rsp],r12 | |
761 mov rbx,rdx | |
762 | |
763 mov rax,QWORD[8+rsi] | |
764 mul rbp | |
765 add r13,rax | |
766 adc rdx,0 | |
767 add r13,rbx | |
768 adc rdx,0 | |
769 mov rbx,rdx | |
770 | |
771 mov rax,QWORD[16+rsi] | |
772 mul rbp | |
773 add r14,rax | |
774 adc rdx,0 | |
775 add r14,rbx | |
776 adc rdx,0 | |
777 mov rbx,rdx | |
778 | |
779 mov rax,QWORD[24+rsi] | |
780 mul rbp | |
781 add r15,rax | |
782 adc rdx,0 | |
783 add r15,rbx | |
784 adc rdx,0 | |
785 mov rbx,rdx | |
786 | |
787 mov rax,QWORD[32+rsi] | |
788 mul rbp | |
789 add r8,rax | |
790 adc rdx,0 | |
791 add r8,rbx | |
792 adc rdx,0 | |
793 mov rbx,rdx | |
794 | |
795 mov rax,QWORD[40+rsi] | |
796 mul rbp | |
797 add r9,rax | |
798 adc rdx,0 | |
799 add r9,rbx | |
800 adc rdx,0 | |
801 mov rbx,rdx | |
802 | |
803 mov rax,QWORD[48+rsi] | |
804 mul rbp | |
805 add r10,rax | |
806 adc rdx,0 | |
807 add r10,rbx | |
808 adc rdx,0 | |
809 mov rbx,rdx | |
810 | |
811 mov rax,QWORD[56+rsi] | |
812 mul rbp | |
813 add r11,rax | |
814 adc rdx,0 | |
815 add r11,rbx | |
816 adc rdx,0 | |
817 mov r12,rdx | |
818 mov rbp,QWORD[32+rdi] | |
819 mov rax,QWORD[rsi] | |
820 mul rbp | |
821 add r13,rax | |
822 adc rdx,0 | |
823 mov QWORD[552+rsp],r13 | |
824 mov rbx,rdx | |
825 | |
826 mov rax,QWORD[8+rsi] | |
827 mul rbp | |
828 add r14,rax | |
829 adc rdx,0 | |
830 add r14,rbx | |
831 adc rdx,0 | |
832 mov rbx,rdx | |
833 | |
834 mov rax,QWORD[16+rsi] | |
835 mul rbp | |
836 add r15,rax | |
837 adc rdx,0 | |
838 add r15,rbx | |
839 adc rdx,0 | |
840 mov rbx,rdx | |
841 | |
842 mov rax,QWORD[24+rsi] | |
843 mul rbp | |
844 add r8,rax | |
845 adc rdx,0 | |
846 add r8,rbx | |
847 adc rdx,0 | |
848 mov rbx,rdx | |
849 | |
850 mov rax,QWORD[32+rsi] | |
851 mul rbp | |
852 add r9,rax | |
853 adc rdx,0 | |
854 add r9,rbx | |
855 adc rdx,0 | |
856 mov rbx,rdx | |
857 | |
858 mov rax,QWORD[40+rsi] | |
859 mul rbp | |
860 add r10,rax | |
861 adc rdx,0 | |
862 add r10,rbx | |
863 adc rdx,0 | |
864 mov rbx,rdx | |
865 | |
866 mov rax,QWORD[48+rsi] | |
867 mul rbp | |
868 add r11,rax | |
869 adc rdx,0 | |
870 add r11,rbx | |
871 adc rdx,0 | |
872 mov rbx,rdx | |
873 | |
874 mov rax,QWORD[56+rsi] | |
875 mul rbp | |
876 add r12,rax | |
877 adc rdx,0 | |
878 add r12,rbx | |
879 adc rdx,0 | |
880 mov r13,rdx | |
881 mov rbp,QWORD[40+rdi] | |
882 mov rax,QWORD[rsi] | |
883 mul rbp | |
884 add r14,rax | |
885 adc rdx,0 | |
886 mov QWORD[560+rsp],r14 | |
887 mov rbx,rdx | |
888 | |
889 mov rax,QWORD[8+rsi] | |
890 mul rbp | |
891 add r15,rax | |
892 adc rdx,0 | |
893 add r15,rbx | |
894 adc rdx,0 | |
895 mov rbx,rdx | |
896 | |
897 mov rax,QWORD[16+rsi] | |
898 mul rbp | |
899 add r8,rax | |
900 adc rdx,0 | |
901 add r8,rbx | |
902 adc rdx,0 | |
903 mov rbx,rdx | |
904 | |
905 mov rax,QWORD[24+rsi] | |
906 mul rbp | |
907 add r9,rax | |
908 adc rdx,0 | |
909 add r9,rbx | |
910 adc rdx,0 | |
911 mov rbx,rdx | |
912 | |
913 mov rax,QWORD[32+rsi] | |
914 mul rbp | |
915 add r10,rax | |
916 adc rdx,0 | |
917 add r10,rbx | |
918 adc rdx,0 | |
919 mov rbx,rdx | |
920 | |
921 mov rax,QWORD[40+rsi] | |
922 mul rbp | |
923 add r11,rax | |
924 adc rdx,0 | |
925 add r11,rbx | |
926 adc rdx,0 | |
927 mov rbx,rdx | |
928 | |
929 mov rax,QWORD[48+rsi] | |
930 mul rbp | |
931 add r12,rax | |
932 adc rdx,0 | |
933 add r12,rbx | |
934 adc rdx,0 | |
935 mov rbx,rdx | |
936 | |
937 mov rax,QWORD[56+rsi] | |
938 mul rbp | |
939 add r13,rax | |
940 adc rdx,0 | |
941 add r13,rbx | |
942 adc rdx,0 | |
943 mov r14,rdx | |
944 mov rbp,QWORD[48+rdi] | |
945 mov rax,QWORD[rsi] | |
946 mul rbp | |
947 add r15,rax | |
948 adc rdx,0 | |
949 mov QWORD[568+rsp],r15 | |
950 mov rbx,rdx | |
951 | |
952 mov rax,QWORD[8+rsi] | |
953 mul rbp | |
954 add r8,rax | |
955 adc rdx,0 | |
956 add r8,rbx | |
957 adc rdx,0 | |
958 mov rbx,rdx | |
959 | |
960 mov rax,QWORD[16+rsi] | |
961 mul rbp | |
962 add r9,rax | |
963 adc rdx,0 | |
964 add r9,rbx | |
965 adc rdx,0 | |
966 mov rbx,rdx | |
967 | |
968 mov rax,QWORD[24+rsi] | |
969 mul rbp | |
970 add r10,rax | |
971 adc rdx,0 | |
972 add r10,rbx | |
973 adc rdx,0 | |
974 mov rbx,rdx | |
975 | |
976 mov rax,QWORD[32+rsi] | |
977 mul rbp | |
978 add r11,rax | |
979 adc rdx,0 | |
980 add r11,rbx | |
981 adc rdx,0 | |
982 mov rbx,rdx | |
983 | |
984 mov rax,QWORD[40+rsi] | |
985 mul rbp | |
986 add r12,rax | |
987 adc rdx,0 | |
988 add r12,rbx | |
989 adc rdx,0 | |
990 mov rbx,rdx | |
991 | |
992 mov rax,QWORD[48+rsi] | |
993 mul rbp | |
994 add r13,rax | |
995 adc rdx,0 | |
996 add r13,rbx | |
997 adc rdx,0 | |
998 mov rbx,rdx | |
999 | |
1000 mov rax,QWORD[56+rsi] | |
1001 mul rbp | |
1002 add r14,rax | |
1003 adc rdx,0 | |
1004 add r14,rbx | |
1005 adc rdx,0 | |
1006 mov r15,rdx | |
1007 mov rbp,QWORD[56+rdi] | |
1008 mov rax,QWORD[rsi] | |
1009 mul rbp | |
1010 add r8,rax | |
1011 adc rdx,0 | |
1012 mov QWORD[576+rsp],r8 | |
1013 mov rbx,rdx | |
1014 | |
1015 mov rax,QWORD[8+rsi] | |
1016 mul rbp | |
1017 add r9,rax | |
1018 adc rdx,0 | |
1019 add r9,rbx | |
1020 adc rdx,0 | |
1021 mov rbx,rdx | |
1022 | |
1023 mov rax,QWORD[16+rsi] | |
1024 mul rbp | |
1025 add r10,rax | |
1026 adc rdx,0 | |
1027 add r10,rbx | |
1028 adc rdx,0 | |
1029 mov rbx,rdx | |
1030 | |
1031 mov rax,QWORD[24+rsi] | |
1032 mul rbp | |
1033 add r11,rax | |
1034 adc rdx,0 | |
1035 add r11,rbx | |
1036 adc rdx,0 | |
1037 mov rbx,rdx | |
1038 | |
1039 mov rax,QWORD[32+rsi] | |
1040 mul rbp | |
1041 add r12,rax | |
1042 adc rdx,0 | |
1043 add r12,rbx | |
1044 adc rdx,0 | |
1045 mov rbx,rdx | |
1046 | |
1047 mov rax,QWORD[40+rsi] | |
1048 mul rbp | |
1049 add r13,rax | |
1050 adc rdx,0 | |
1051 add r13,rbx | |
1052 adc rdx,0 | |
1053 mov rbx,rdx | |
1054 | |
1055 mov rax,QWORD[48+rsi] | |
1056 mul rbp | |
1057 add r14,rax | |
1058 adc rdx,0 | |
1059 add r14,rbx | |
1060 adc rdx,0 | |
1061 mov rbx,rdx | |
1062 | |
1063 mov rax,QWORD[56+rsi] | |
1064 mul rbp | |
1065 add r15,rax | |
1066 adc rdx,0 | |
1067 add r15,rbx | |
1068 adc rdx,0 | |
1069 mov r8,rdx | |
1070 mov QWORD[584+rsp],r9 | |
1071 mov QWORD[592+rsp],r10 | |
1072 mov QWORD[600+rsp],r11 | |
1073 mov QWORD[608+rsp],r12 | |
1074 mov QWORD[616+rsp],r13 | |
1075 mov QWORD[624+rsp],r14 | |
1076 mov QWORD[632+rsp],r15 | |
1077 mov QWORD[640+rsp],r8 | |
1078 | |
1079 | |
1080 | |
1081 | |
1082 | |
1083 jmp NEAR mont_reduce | |
1084 | |
1085 | |
1086 | |
1087 | |
1088 ALIGN 16 | |
1089 sqr_reduce: | |
1090 mov rcx,QWORD[16+rsp] | |
1091 | |
1092 | |
1093 | |
1094 mov rbx,r10 | |
1095 | |
1096 mov rax,r11 | |
1097 mul rbx | |
1098 mov QWORD[528+rsp],rax | |
1099 mov r10,rdx | |
1100 mov rax,r12 | |
1101 mul rbx | |
1102 add r10,rax | |
1103 adc rdx,0 | |
1104 mov r11,rdx | |
1105 mov rax,r13 | |
1106 mul rbx | |
1107 add r11,rax | |
1108 adc rdx,0 | |
1109 mov r12,rdx | |
1110 mov rax,r14 | |
1111 mul rbx | |
1112 add r12,rax | |
1113 adc rdx,0 | |
1114 mov r13,rdx | |
1115 mov rax,r15 | |
1116 mul rbx | |
1117 add r13,rax | |
1118 adc rdx,0 | |
1119 mov r14,rdx | |
1120 mov rax,r8 | |
1121 mul rbx | |
1122 add r14,rax | |
1123 adc rdx,0 | |
1124 mov r15,rdx | |
1125 mov rax,r9 | |
1126 mul rbx | |
1127 add r15,rax | |
1128 adc rdx,0 | |
1129 mov rsi,rdx | |
1130 | |
1131 mov QWORD[536+rsp],r10 | |
1132 | |
1133 | |
1134 | |
1135 | |
1136 | |
1137 mov rbx,QWORD[8+rcx] | |
1138 | |
1139 mov rax,QWORD[16+rcx] | |
1140 mul rbx | |
1141 add r11,rax | |
1142 adc rdx,0 | |
1143 mov QWORD[544+rsp],r11 | |
1144 | |
1145 mov r10,rdx | |
1146 mov rax,QWORD[24+rcx] | |
1147 mul rbx | |
1148 add r12,rax | |
1149 adc rdx,0 | |
1150 add r12,r10 | |
1151 adc rdx,0 | |
1152 mov QWORD[552+rsp],r12 | |
1153 | |
1154 mov r10,rdx | |
1155 mov rax,QWORD[32+rcx] | |
1156 mul rbx | |
1157 add r13,rax | |
1158 adc rdx,0 | |
1159 add r13,r10 | |
1160 adc rdx,0 | |
1161 | |
1162 mov r10,rdx | |
1163 mov rax,QWORD[40+rcx] | |
1164 mul rbx | |
1165 add r14,rax | |
1166 adc rdx,0 | |
1167 add r14,r10 | |
1168 adc rdx,0 | |
1169 | |
1170 mov r10,rdx | |
1171 mov rax,r8 | |
1172 mul rbx | |
1173 add r15,rax | |
1174 adc rdx,0 | |
1175 add r15,r10 | |
1176 adc rdx,0 | |
1177 | |
1178 mov r10,rdx | |
1179 mov rax,r9 | |
1180 mul rbx | |
1181 add rsi,rax | |
1182 adc rdx,0 | |
1183 add rsi,r10 | |
1184 adc rdx,0 | |
1185 | |
1186 mov r11,rdx | |
1187 | |
1188 | |
1189 | |
1190 | |
1191 mov rbx,QWORD[16+rcx] | |
1192 | |
1193 mov rax,QWORD[24+rcx] | |
1194 mul rbx | |
1195 add r13,rax | |
1196 adc rdx,0 | |
1197 mov QWORD[560+rsp],r13 | |
1198 | |
1199 mov r10,rdx | |
1200 mov rax,QWORD[32+rcx] | |
1201 mul rbx | |
1202 add r14,rax | |
1203 adc rdx,0 | |
1204 add r14,r10 | |
1205 adc rdx,0 | |
1206 mov QWORD[568+rsp],r14 | |
1207 | |
1208 mov r10,rdx | |
1209 mov rax,QWORD[40+rcx] | |
1210 mul rbx | |
1211 add r15,rax | |
1212 adc rdx,0 | |
1213 add r15,r10 | |
1214 adc rdx,0 | |
1215 | |
1216 mov r10,rdx | |
1217 mov rax,r8 | |
1218 mul rbx | |
1219 add rsi,rax | |
1220 adc rdx,0 | |
1221 add rsi,r10 | |
1222 adc rdx,0 | |
1223 | |
1224 mov r10,rdx | |
1225 mov rax,r9 | |
1226 mul rbx | |
1227 add r11,rax | |
1228 adc rdx,0 | |
1229 add r11,r10 | |
1230 adc rdx,0 | |
1231 | |
1232 mov r12,rdx | |
1233 | |
1234 | |
1235 | |
1236 | |
1237 | |
1238 mov rbx,QWORD[24+rcx] | |
1239 | |
1240 mov rax,QWORD[32+rcx] | |
1241 mul rbx | |
1242 add r15,rax | |
1243 adc rdx,0 | |
1244 mov QWORD[576+rsp],r15 | |
1245 | |
1246 mov r10,rdx | |
1247 mov rax,QWORD[40+rcx] | |
1248 mul rbx | |
1249 add rsi,rax | |
1250 adc rdx,0 | |
1251 add rsi,r10 | |
1252 adc rdx,0 | |
1253 mov QWORD[584+rsp],rsi | |
1254 | |
1255 mov r10,rdx | |
1256 mov rax,r8 | |
1257 mul rbx | |
1258 add r11,rax | |
1259 adc rdx,0 | |
1260 add r11,r10 | |
1261 adc rdx,0 | |
1262 | |
1263 mov r10,rdx | |
1264 mov rax,r9 | |
1265 mul rbx | |
1266 add r12,rax | |
1267 adc rdx,0 | |
1268 add r12,r10 | |
1269 adc rdx,0 | |
1270 | |
1271 mov r15,rdx | |
1272 | |
1273 | |
1274 | |
1275 | |
1276 mov rbx,QWORD[32+rcx] | |
1277 | |
1278 mov rax,QWORD[40+rcx] | |
1279 mul rbx | |
1280 add r11,rax | |
1281 adc rdx,0 | |
1282 mov QWORD[592+rsp],r11 | |
1283 | |
1284 mov r10,rdx | |
1285 mov rax,r8 | |
1286 mul rbx | |
1287 add r12,rax | |
1288 adc rdx,0 | |
1289 add r12,r10 | |
1290 adc rdx,0 | |
1291 mov QWORD[600+rsp],r12 | |
1292 | |
1293 mov r10,rdx | |
1294 mov rax,r9 | |
1295 mul rbx | |
1296 add r15,rax | |
1297 adc rdx,0 | |
1298 add r15,r10 | |
1299 adc rdx,0 | |
1300 | |
1301 mov r11,rdx | |
1302 | |
1303 | |
1304 | |
1305 | |
1306 mov rbx,QWORD[40+rcx] | |
1307 | |
1308 mov rax,r8 | |
1309 mul rbx | |
1310 add r15,rax | |
1311 adc rdx,0 | |
1312 mov QWORD[608+rsp],r15 | |
1313 | |
1314 mov r10,rdx | |
1315 mov rax,r9 | |
1316 mul rbx | |
1317 add r11,rax | |
1318 adc rdx,0 | |
1319 add r11,r10 | |
1320 adc rdx,0 | |
1321 mov QWORD[616+rsp],r11 | |
1322 | |
1323 mov r12,rdx | |
1324 | |
1325 | |
1326 | |
1327 | |
1328 mov rbx,r8 | |
1329 | |
1330 mov rax,r9 | |
1331 mul rbx | |
1332 add r12,rax | |
1333 adc rdx,0 | |
1334 mov QWORD[624+rsp],r12 | |
1335 | |
1336 mov QWORD[632+rsp],rdx | |
1337 | |
1338 | |
1339 mov r10,QWORD[528+rsp] | |
1340 mov r11,QWORD[536+rsp] | |
1341 mov r12,QWORD[544+rsp] | |
1342 mov r13,QWORD[552+rsp] | |
1343 mov r14,QWORD[560+rsp] | |
1344 mov r15,QWORD[568+rsp] | |
1345 | |
1346 mov rax,QWORD[24+rcx] | |
1347 mul rax | |
1348 mov rdi,rax | |
1349 mov r8,rdx | |
1350 | |
1351 add r10,r10 | |
1352 adc r11,r11 | |
1353 adc r12,r12 | |
1354 adc r13,r13 | |
1355 adc r14,r14 | |
1356 adc r15,r15 | |
1357 adc r8,0 | |
1358 | |
1359 mov rax,QWORD[rcx] | |
1360 mul rax | |
1361 mov QWORD[520+rsp],rax | |
1362 mov rbx,rdx | |
1363 | |
1364 mov rax,QWORD[8+rcx] | |
1365 mul rax | |
1366 | |
1367 add r10,rbx | |
1368 adc r11,rax | |
1369 adc rdx,0 | |
1370 | |
1371 mov rbx,rdx | |
1372 mov QWORD[528+rsp],r10 | |
1373 mov QWORD[536+rsp],r11 | |
1374 | |
1375 mov rax,QWORD[16+rcx] | |
1376 mul rax | |
1377 | |
1378 add r12,rbx | |
1379 adc r13,rax | |
1380 adc rdx,0 | |
1381 | |
1382 mov rbx,rdx | |
1383 | |
1384 mov QWORD[544+rsp],r12 | |
1385 mov QWORD[552+rsp],r13 | |
1386 | |
1387 xor rbp,rbp | |
1388 add r14,rbx | |
1389 adc r15,rdi | |
1390 adc rbp,0 | |
1391 | |
1392 mov QWORD[560+rsp],r14 | |
1393 mov QWORD[568+rsp],r15 | |
1394 | |
1395 | |
1396 | |
1397 | |
1398 mov r10,QWORD[576+rsp] | |
1399 mov r11,QWORD[584+rsp] | |
1400 mov r12,QWORD[592+rsp] | |
1401 mov r13,QWORD[600+rsp] | |
1402 mov r14,QWORD[608+rsp] | |
1403 mov r15,QWORD[616+rsp] | |
1404 mov rdi,QWORD[624+rsp] | |
1405 mov rsi,QWORD[632+rsp] | |
1406 | |
1407 mov rax,r9 | |
1408 mul rax | |
1409 mov r9,rax | |
1410 mov rbx,rdx | |
1411 | |
1412 add r10,r10 | |
1413 adc r11,r11 | |
1414 adc r12,r12 | |
1415 adc r13,r13 | |
1416 adc r14,r14 | |
1417 adc r15,r15 | |
1418 adc rdi,rdi | |
1419 adc rsi,rsi | |
1420 adc rbx,0 | |
1421 | |
1422 add r10,rbp | |
1423 | |
1424 mov rax,QWORD[32+rcx] | |
1425 mul rax | |
1426 | |
1427 add r10,r8 | |
1428 adc r11,rax | |
1429 adc rdx,0 | |
1430 | |
1431 mov rbp,rdx | |
1432 | |
1433 mov QWORD[576+rsp],r10 | |
1434 mov QWORD[584+rsp],r11 | |
1435 | |
1436 mov rax,QWORD[40+rcx] | |
1437 mul rax | |
1438 | |
1439 add r12,rbp | |
1440 adc r13,rax | |
1441 adc rdx,0 | |
1442 | |
1443 mov rbp,rdx | |
1444 | |
1445 mov QWORD[592+rsp],r12 | |
1446 mov QWORD[600+rsp],r13 | |
1447 | |
1448 mov rax,QWORD[48+rcx] | |
1449 mul rax | |
1450 | |
1451 add r14,rbp | |
1452 adc r15,rax | |
1453 adc rdx,0 | |
1454 | |
1455 mov QWORD[608+rsp],r14 | |
1456 mov QWORD[616+rsp],r15 | |
1457 | |
1458 add rdi,rdx | |
1459 adc rsi,r9 | |
1460 adc rbx,0 | |
1461 | |
1462 mov QWORD[624+rsp],rdi | |
1463 mov QWORD[632+rsp],rsi | |
1464 mov QWORD[640+rsp],rbx | |
1465 | |
1466 jmp NEAR mont_reduce | |
1467 | |
1468 | |
1469 | |
1470 global mod_exp_512 | |
1471 | |
1472 mod_exp_512: | |
1473 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
1474 mov QWORD[16+rsp],rsi | |
1475 mov rax,rsp | |
1476 $L$SEH_begin_mod_exp_512: | |
1477 mov rdi,rcx | |
1478 mov rsi,rdx | |
1479 mov rdx,r8 | |
1480 mov rcx,r9 | |
1481 | |
1482 | |
1483 push rbp | |
1484 push rbx | |
1485 push r12 | |
1486 push r13 | |
1487 push r14 | |
1488 push r15 | |
1489 | |
1490 | |
1491 mov r8,rsp | |
1492 sub rsp,2688 | |
1493 and rsp,-64 | |
1494 | |
1495 | |
1496 mov QWORD[rsp],r8 | |
1497 mov QWORD[8+rsp],rdi | |
1498 mov QWORD[16+rsp],rsi | |
1499 mov QWORD[24+rsp],rcx | |
1500 $L$body: | |
1501 | |
1502 | |
1503 | |
1504 pxor xmm4,xmm4 | |
1505 movdqu xmm0,XMMWORD[rsi] | |
1506 movdqu xmm1,XMMWORD[16+rsi] | |
1507 movdqu xmm2,XMMWORD[32+rsi] | |
1508 movdqu xmm3,XMMWORD[48+rsi] | |
1509 movdqa XMMWORD[512+rsp],xmm4 | |
1510 movdqa XMMWORD[528+rsp],xmm4 | |
1511 movdqa XMMWORD[608+rsp],xmm4 | |
1512 movdqa XMMWORD[624+rsp],xmm4 | |
1513 movdqa XMMWORD[544+rsp],xmm0 | |
1514 movdqa XMMWORD[560+rsp],xmm1 | |
1515 movdqa XMMWORD[576+rsp],xmm2 | |
1516 movdqa XMMWORD[592+rsp],xmm3 | |
1517 | |
1518 | |
1519 movdqu xmm0,XMMWORD[rdx] | |
1520 movdqu xmm1,XMMWORD[16+rdx] | |
1521 movdqu xmm2,XMMWORD[32+rdx] | |
1522 movdqu xmm3,XMMWORD[48+rdx] | |
1523 | |
1524 lea rbx,[384+rsp] | |
1525 mov QWORD[136+rsp],rbx | |
1526 call mont_reduce | |
1527 | |
1528 | |
1529 lea rcx,[448+rsp] | |
1530 xor rax,rax | |
1531 mov QWORD[rcx],rax | |
1532 mov QWORD[8+rcx],rax | |
1533 mov QWORD[24+rcx],rax | |
1534 mov QWORD[32+rcx],rax | |
1535 mov QWORD[40+rcx],rax | |
1536 mov QWORD[48+rcx],rax | |
1537 mov QWORD[56+rcx],rax | |
1538 mov QWORD[128+rsp],rax | |
1539 mov QWORD[16+rcx],1 | |
1540 | |
1541 lea rbp,[640+rsp] | |
1542 mov rsi,rcx | |
1543 mov rdi,rbp | |
1544 mov rax,8 | |
1545 loop_0: | |
1546 mov rbx,QWORD[rcx] | |
1547 mov WORD[rdi],bx | |
1548 shr rbx,16 | |
1549 mov WORD[64+rdi],bx | |
1550 shr rbx,16 | |
1551 mov WORD[128+rdi],bx | |
1552 shr rbx,16 | |
1553 mov WORD[192+rdi],bx | |
1554 lea rcx,[8+rcx] | |
1555 lea rdi,[256+rdi] | |
1556 dec rax | |
1557 jnz NEAR loop_0 | |
1558 mov rax,31 | |
1559 mov QWORD[32+rsp],rax | |
1560 mov QWORD[40+rsp],rbp | |
1561 | |
1562 mov QWORD[136+rsp],rsi | |
1563 mov r10,QWORD[rsi] | |
1564 mov r11,QWORD[8+rsi] | |
1565 mov r12,QWORD[16+rsi] | |
1566 mov r13,QWORD[24+rsi] | |
1567 mov r14,QWORD[32+rsi] | |
1568 mov r15,QWORD[40+rsi] | |
1569 mov r8,QWORD[48+rsi] | |
1570 mov r9,QWORD[56+rsi] | |
1571 init_loop: | |
1572 lea rdi,[384+rsp] | |
1573 call mont_mul_a3b | |
1574 lea rsi,[448+rsp] | |
1575 mov rbp,QWORD[40+rsp] | |
1576 add rbp,2 | |
1577 mov QWORD[40+rsp],rbp | |
1578 mov rcx,rsi | |
1579 mov rax,8 | |
1580 loop_1: | |
1581 mov rbx,QWORD[rcx] | |
1582 mov WORD[rbp],bx | |
1583 shr rbx,16 | |
1584 mov WORD[64+rbp],bx | |
1585 shr rbx,16 | |
1586 mov WORD[128+rbp],bx | |
1587 shr rbx,16 | |
1588 mov WORD[192+rbp],bx | |
1589 lea rcx,[8+rcx] | |
1590 lea rbp,[256+rbp] | |
1591 dec rax | |
1592 jnz NEAR loop_1 | |
1593 mov rax,QWORD[32+rsp] | |
1594 sub rax,1 | |
1595 mov QWORD[32+rsp],rax | |
1596 jne NEAR init_loop | |
1597 | |
1598 | |
1599 | |
1600 movdqa XMMWORD[64+rsp],xmm0 | |
1601 movdqa XMMWORD[80+rsp],xmm1 | |
1602 movdqa XMMWORD[96+rsp],xmm2 | |
1603 movdqa XMMWORD[112+rsp],xmm3 | |
1604 | |
1605 | |
1606 | |
1607 | |
1608 | |
1609 mov eax,DWORD[126+rsp] | |
1610 mov rdx,rax | |
1611 shr rax,11 | |
1612 and edx,0x07FF | |
1613 mov DWORD[126+rsp],edx | |
1614 lea rsi,[640+rax*2+rsp] | |
1615 mov rdx,QWORD[8+rsp] | |
1616 mov rbp,4 | |
1617 loop_2: | |
1618 movzx rbx,WORD[192+rsi] | |
1619 movzx rax,WORD[448+rsi] | |
1620 shl rbx,16 | |
1621 shl rax,16 | |
1622 mov bx,WORD[128+rsi] | |
1623 mov ax,WORD[384+rsi] | |
1624 shl rbx,16 | |
1625 shl rax,16 | |
1626 mov bx,WORD[64+rsi] | |
1627 mov ax,WORD[320+rsi] | |
1628 shl rbx,16 | |
1629 shl rax,16 | |
1630 mov bx,WORD[rsi] | |
1631 mov ax,WORD[256+rsi] | |
1632 mov QWORD[rdx],rbx | |
1633 mov QWORD[8+rdx],rax | |
1634 lea rsi,[512+rsi] | |
1635 lea rdx,[16+rdx] | |
1636 sub rbp,1 | |
1637 jnz NEAR loop_2 | |
1638 mov QWORD[48+rsp],505 | |
1639 | |
1640 mov rcx,QWORD[8+rsp] | |
1641 mov QWORD[136+rsp],rcx | |
1642 mov r10,QWORD[rcx] | |
1643 mov r11,QWORD[8+rcx] | |
1644 mov r12,QWORD[16+rcx] | |
1645 mov r13,QWORD[24+rcx] | |
1646 mov r14,QWORD[32+rcx] | |
1647 mov r15,QWORD[40+rcx] | |
1648 mov r8,QWORD[48+rcx] | |
1649 mov r9,QWORD[56+rcx] | |
1650 jmp NEAR sqr_2 | |
1651 | |
1652 main_loop_a3b: | |
1653 call sqr_reduce | |
1654 call sqr_reduce | |
1655 call sqr_reduce | |
1656 sqr_2: | |
1657 call sqr_reduce | |
1658 call sqr_reduce | |
1659 | |
1660 | |
1661 | |
1662 mov rcx,QWORD[48+rsp] | |
1663 mov rax,rcx | |
1664 shr rax,4 | |
1665 mov edx,DWORD[64+rax*2+rsp] | |
1666 and rcx,15 | |
1667 shr rdx,cl | |
1668 and rdx,0x1F | |
1669 | |
1670 lea rsi,[640+rdx*2+rsp] | |
1671 lea rdx,[448+rsp] | |
1672 mov rdi,rdx | |
1673 mov rbp,4 | |
1674 loop_3: | |
1675 movzx rbx,WORD[192+rsi] | |
1676 movzx rax,WORD[448+rsi] | |
1677 shl rbx,16 | |
1678 shl rax,16 | |
1679 mov bx,WORD[128+rsi] | |
1680 mov ax,WORD[384+rsi] | |
1681 shl rbx,16 | |
1682 shl rax,16 | |
1683 mov bx,WORD[64+rsi] | |
1684 mov ax,WORD[320+rsi] | |
1685 shl rbx,16 | |
1686 shl rax,16 | |
1687 mov bx,WORD[rsi] | |
1688 mov ax,WORD[256+rsi] | |
1689 mov QWORD[rdx],rbx | |
1690 mov QWORD[8+rdx],rax | |
1691 lea rsi,[512+rsi] | |
1692 lea rdx,[16+rdx] | |
1693 sub rbp,1 | |
1694 jnz NEAR loop_3 | |
1695 mov rsi,QWORD[8+rsp] | |
1696 call mont_mul_a3b | |
1697 | |
1698 | |
1699 | |
1700 mov rcx,QWORD[48+rsp] | |
1701 sub rcx,5 | |
1702 mov QWORD[48+rsp],rcx | |
1703 jge NEAR main_loop_a3b | |
1704 | |
1705 | |
1706 | |
1707 end_main_loop_a3b: | |
1708 | |
1709 | |
1710 mov rdx,QWORD[8+rsp] | |
1711 pxor xmm4,xmm4 | |
1712 movdqu xmm0,XMMWORD[rdx] | |
1713 movdqu xmm1,XMMWORD[16+rdx] | |
1714 movdqu xmm2,XMMWORD[32+rdx] | |
1715 movdqu xmm3,XMMWORD[48+rdx] | |
1716 movdqa XMMWORD[576+rsp],xmm4 | |
1717 movdqa XMMWORD[592+rsp],xmm4 | |
1718 movdqa XMMWORD[608+rsp],xmm4 | |
1719 movdqa XMMWORD[624+rsp],xmm4 | |
1720 movdqa XMMWORD[512+rsp],xmm0 | |
1721 movdqa XMMWORD[528+rsp],xmm1 | |
1722 movdqa XMMWORD[544+rsp],xmm2 | |
1723 movdqa XMMWORD[560+rsp],xmm3 | |
1724 call mont_reduce | |
1725 | |
1726 | |
1727 | |
1728 mov rax,QWORD[8+rsp] | |
1729 mov r8,QWORD[rax] | |
1730 mov r9,QWORD[8+rax] | |
1731 mov r10,QWORD[16+rax] | |
1732 mov r11,QWORD[24+rax] | |
1733 mov r12,QWORD[32+rax] | |
1734 mov r13,QWORD[40+rax] | |
1735 mov r14,QWORD[48+rax] | |
1736 mov r15,QWORD[56+rax] | |
1737 | |
1738 | |
1739 mov rbx,QWORD[24+rsp] | |
1740 add rbx,512 | |
1741 | |
1742 sub r8,QWORD[rbx] | |
1743 sbb r9,QWORD[8+rbx] | |
1744 sbb r10,QWORD[16+rbx] | |
1745 sbb r11,QWORD[24+rbx] | |
1746 sbb r12,QWORD[32+rbx] | |
1747 sbb r13,QWORD[40+rbx] | |
1748 sbb r14,QWORD[48+rbx] | |
1749 sbb r15,QWORD[56+rbx] | |
1750 | |
1751 | |
1752 mov rsi,QWORD[rax] | |
1753 mov rdi,QWORD[8+rax] | |
1754 mov rcx,QWORD[16+rax] | |
1755 mov rdx,QWORD[24+rax] | |
1756 cmovnc rsi,r8 | |
1757 cmovnc rdi,r9 | |
1758 cmovnc rcx,r10 | |
1759 cmovnc rdx,r11 | |
1760 mov QWORD[rax],rsi | |
1761 mov QWORD[8+rax],rdi | |
1762 mov QWORD[16+rax],rcx | |
1763 mov QWORD[24+rax],rdx | |
1764 | |
1765 mov rsi,QWORD[32+rax] | |
1766 mov rdi,QWORD[40+rax] | |
1767 mov rcx,QWORD[48+rax] | |
1768 mov rdx,QWORD[56+rax] | |
1769 cmovnc rsi,r12 | |
1770 cmovnc rdi,r13 | |
1771 cmovnc rcx,r14 | |
1772 cmovnc rdx,r15 | |
1773 mov QWORD[32+rax],rsi | |
1774 mov QWORD[40+rax],rdi | |
1775 mov QWORD[48+rax],rcx | |
1776 mov QWORD[56+rax],rdx | |
1777 | |
1778 mov rsi,QWORD[rsp] | |
1779 mov r15,QWORD[rsi] | |
1780 mov r14,QWORD[8+rsi] | |
1781 mov r13,QWORD[16+rsi] | |
1782 mov r12,QWORD[24+rsi] | |
1783 mov rbx,QWORD[32+rsi] | |
1784 mov rbp,QWORD[40+rsi] | |
1785 lea rsp,[48+rsi] | |
1786 $L$epilogue: | |
1787 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
1788 mov rsi,QWORD[16+rsp] | |
1789 DB 0F3h,0C3h ;repret | |
1790 $L$SEH_end_mod_exp_512: | |
1791 EXTERN __imp_RtlVirtualUnwind | |
1792 | |
1793 ALIGN 16 | |
1794 mod_exp_512_se_handler: | |
1795 push rsi | |
1796 push rdi | |
1797 push rbx | |
1798 push rbp | |
1799 push r12 | |
1800 push r13 | |
1801 push r14 | |
1802 push r15 | |
1803 pushfq | |
1804 sub rsp,64 | |
1805 | |
1806 mov rax,QWORD[120+r8] | |
1807 mov rbx,QWORD[248+r8] | |
1808 | |
1809 lea r10,[$L$body] | |
1810 cmp rbx,r10 | |
1811 jb NEAR $L$in_prologue | |
1812 | |
1813 mov rax,QWORD[152+r8] | |
1814 | |
1815 lea r10,[$L$epilogue] | |
1816 cmp rbx,r10 | |
1817 jae NEAR $L$in_prologue | |
1818 | |
1819 mov rax,QWORD[rax] | |
1820 | |
1821 mov rbx,QWORD[32+rax] | |
1822 mov rbp,QWORD[40+rax] | |
1823 mov r12,QWORD[24+rax] | |
1824 mov r13,QWORD[16+rax] | |
1825 mov r14,QWORD[8+rax] | |
1826 mov r15,QWORD[rax] | |
1827 lea rax,[48+rax] | |
1828 mov QWORD[144+r8],rbx | |
1829 mov QWORD[160+r8],rbp | |
1830 mov QWORD[216+r8],r12 | |
1831 mov QWORD[224+r8],r13 | |
1832 mov QWORD[232+r8],r14 | |
1833 mov QWORD[240+r8],r15 | |
1834 | |
1835 $L$in_prologue: | |
1836 mov rdi,QWORD[8+rax] | |
1837 mov rsi,QWORD[16+rax] | |
1838 mov QWORD[152+r8],rax | |
1839 mov QWORD[168+r8],rsi | |
1840 mov QWORD[176+r8],rdi | |
1841 | |
1842 mov rdi,QWORD[40+r9] | |
1843 mov rsi,r8 | |
1844 mov ecx,154 | |
1845 DD 0xa548f3fc | |
1846 | |
1847 mov rsi,r9 | |
1848 xor rcx,rcx | |
1849 mov rdx,QWORD[8+rsi] | |
1850 mov r8,QWORD[rsi] | |
1851 mov r9,QWORD[16+rsi] | |
1852 mov r10,QWORD[40+rsi] | |
1853 lea r11,[56+rsi] | |
1854 lea r12,[24+rsi] | |
1855 mov QWORD[32+rsp],r10 | |
1856 mov QWORD[40+rsp],r11 | |
1857 mov QWORD[48+rsp],r12 | |
1858 mov QWORD[56+rsp],rcx | |
1859 call QWORD[__imp_RtlVirtualUnwind] | |
1860 | |
1861 mov eax,1 | |
1862 add rsp,64 | |
1863 popfq | |
1864 pop r15 | |
1865 pop r14 | |
1866 pop r13 | |
1867 pop r12 | |
1868 pop rbp | |
1869 pop rbx | |
1870 pop rdi | |
1871 pop rsi | |
1872 DB 0F3h,0C3h ;repret | |
1873 | |
1874 | |
1875 section .pdata rdata align=4 | |
1876 ALIGN 4 | |
1877 DD $L$SEH_begin_mod_exp_512 wrt ..imagebase | |
1878 DD $L$SEH_end_mod_exp_512 wrt ..imagebase | |
1879 DD $L$SEH_info_mod_exp_512 wrt ..imagebase | |
1880 | |
1881 section .xdata rdata align=8 | |
1882 ALIGN 8 | |
1883 $L$SEH_info_mod_exp_512: | |
1884 DB 9,0,0,0 | |
1885 DD mod_exp_512_se_handler wrt ..imagebase | |
OLD | NEW |