OLD | NEW |
| (Empty) |
1 #if defined(__x86_64__) | |
2 .text | |
3 | |
4 .type MULADD_128x512,@function | |
5 .align 16 | |
6 MULADD_128x512: | |
7 movq 0(%rsi),%rax | |
8 mulq %rbp | |
9 addq %rax,%r8 | |
10 adcq $0,%rdx | |
11 movq %r8,0(%rcx) | |
12 movq %rdx,%rbx | |
13 | |
14 movq 8(%rsi),%rax | |
15 mulq %rbp | |
16 addq %rax,%r9 | |
17 adcq $0,%rdx | |
18 addq %rbx,%r9 | |
19 adcq $0,%rdx | |
20 movq %rdx,%rbx | |
21 | |
22 movq 16(%rsi),%rax | |
23 mulq %rbp | |
24 addq %rax,%r10 | |
25 adcq $0,%rdx | |
26 addq %rbx,%r10 | |
27 adcq $0,%rdx | |
28 movq %rdx,%rbx | |
29 | |
30 movq 24(%rsi),%rax | |
31 mulq %rbp | |
32 addq %rax,%r11 | |
33 adcq $0,%rdx | |
34 addq %rbx,%r11 | |
35 adcq $0,%rdx | |
36 movq %rdx,%rbx | |
37 | |
38 movq 32(%rsi),%rax | |
39 mulq %rbp | |
40 addq %rax,%r12 | |
41 adcq $0,%rdx | |
42 addq %rbx,%r12 | |
43 adcq $0,%rdx | |
44 movq %rdx,%rbx | |
45 | |
46 movq 40(%rsi),%rax | |
47 mulq %rbp | |
48 addq %rax,%r13 | |
49 adcq $0,%rdx | |
50 addq %rbx,%r13 | |
51 adcq $0,%rdx | |
52 movq %rdx,%rbx | |
53 | |
54 movq 48(%rsi),%rax | |
55 mulq %rbp | |
56 addq %rax,%r14 | |
57 adcq $0,%rdx | |
58 addq %rbx,%r14 | |
59 adcq $0,%rdx | |
60 movq %rdx,%rbx | |
61 | |
62 movq 56(%rsi),%rax | |
63 mulq %rbp | |
64 addq %rax,%r15 | |
65 adcq $0,%rdx | |
66 addq %rbx,%r15 | |
67 adcq $0,%rdx | |
68 movq %rdx,%r8 | |
69 movq 8(%rdi),%rbp | |
70 movq 0(%rsi),%rax | |
71 mulq %rbp | |
72 addq %rax,%r9 | |
73 adcq $0,%rdx | |
74 movq %r9,8(%rcx) | |
75 movq %rdx,%rbx | |
76 | |
77 movq 8(%rsi),%rax | |
78 mulq %rbp | |
79 addq %rax,%r10 | |
80 adcq $0,%rdx | |
81 addq %rbx,%r10 | |
82 adcq $0,%rdx | |
83 movq %rdx,%rbx | |
84 | |
85 movq 16(%rsi),%rax | |
86 mulq %rbp | |
87 addq %rax,%r11 | |
88 adcq $0,%rdx | |
89 addq %rbx,%r11 | |
90 adcq $0,%rdx | |
91 movq %rdx,%rbx | |
92 | |
93 movq 24(%rsi),%rax | |
94 mulq %rbp | |
95 addq %rax,%r12 | |
96 adcq $0,%rdx | |
97 addq %rbx,%r12 | |
98 adcq $0,%rdx | |
99 movq %rdx,%rbx | |
100 | |
101 movq 32(%rsi),%rax | |
102 mulq %rbp | |
103 addq %rax,%r13 | |
104 adcq $0,%rdx | |
105 addq %rbx,%r13 | |
106 adcq $0,%rdx | |
107 movq %rdx,%rbx | |
108 | |
109 movq 40(%rsi),%rax | |
110 mulq %rbp | |
111 addq %rax,%r14 | |
112 adcq $0,%rdx | |
113 addq %rbx,%r14 | |
114 adcq $0,%rdx | |
115 movq %rdx,%rbx | |
116 | |
117 movq 48(%rsi),%rax | |
118 mulq %rbp | |
119 addq %rax,%r15 | |
120 adcq $0,%rdx | |
121 addq %rbx,%r15 | |
122 adcq $0,%rdx | |
123 movq %rdx,%rbx | |
124 | |
125 movq 56(%rsi),%rax | |
126 mulq %rbp | |
127 addq %rax,%r8 | |
128 adcq $0,%rdx | |
129 addq %rbx,%r8 | |
130 adcq $0,%rdx | |
131 movq %rdx,%r9 | |
132 .byte 0xf3,0xc3 | |
133 .size MULADD_128x512,.-MULADD_128x512 | |
134 .type mont_reduce,@function | |
135 .align 16 | |
136 mont_reduce: | |
137 leaq 192(%rsp),%rdi | |
138 movq 32(%rsp),%rsi | |
139 addq $576,%rsi | |
140 leaq 520(%rsp),%rcx | |
141 | |
142 movq 96(%rcx),%rbp | |
143 movq 0(%rsi),%rax | |
144 mulq %rbp | |
145 movq (%rcx),%r8 | |
146 addq %rax,%r8 | |
147 adcq $0,%rdx | |
148 movq %r8,0(%rdi) | |
149 movq %rdx,%rbx | |
150 | |
151 movq 8(%rsi),%rax | |
152 mulq %rbp | |
153 movq 8(%rcx),%r9 | |
154 addq %rax,%r9 | |
155 adcq $0,%rdx | |
156 addq %rbx,%r9 | |
157 adcq $0,%rdx | |
158 movq %rdx,%rbx | |
159 | |
160 movq 16(%rsi),%rax | |
161 mulq %rbp | |
162 movq 16(%rcx),%r10 | |
163 addq %rax,%r10 | |
164 adcq $0,%rdx | |
165 addq %rbx,%r10 | |
166 adcq $0,%rdx | |
167 movq %rdx,%rbx | |
168 | |
169 movq 24(%rsi),%rax | |
170 mulq %rbp | |
171 movq 24(%rcx),%r11 | |
172 addq %rax,%r11 | |
173 adcq $0,%rdx | |
174 addq %rbx,%r11 | |
175 adcq $0,%rdx | |
176 movq %rdx,%rbx | |
177 | |
178 movq 32(%rsi),%rax | |
179 mulq %rbp | |
180 movq 32(%rcx),%r12 | |
181 addq %rax,%r12 | |
182 adcq $0,%rdx | |
183 addq %rbx,%r12 | |
184 adcq $0,%rdx | |
185 movq %rdx,%rbx | |
186 | |
187 movq 40(%rsi),%rax | |
188 mulq %rbp | |
189 movq 40(%rcx),%r13 | |
190 addq %rax,%r13 | |
191 adcq $0,%rdx | |
192 addq %rbx,%r13 | |
193 adcq $0,%rdx | |
194 movq %rdx,%rbx | |
195 | |
196 movq 48(%rsi),%rax | |
197 mulq %rbp | |
198 movq 48(%rcx),%r14 | |
199 addq %rax,%r14 | |
200 adcq $0,%rdx | |
201 addq %rbx,%r14 | |
202 adcq $0,%rdx | |
203 movq %rdx,%rbx | |
204 | |
205 movq 56(%rsi),%rax | |
206 mulq %rbp | |
207 movq 56(%rcx),%r15 | |
208 addq %rax,%r15 | |
209 adcq $0,%rdx | |
210 addq %rbx,%r15 | |
211 adcq $0,%rdx | |
212 movq %rdx,%r8 | |
213 movq 104(%rcx),%rbp | |
214 movq 0(%rsi),%rax | |
215 mulq %rbp | |
216 addq %rax,%r9 | |
217 adcq $0,%rdx | |
218 movq %r9,8(%rdi) | |
219 movq %rdx,%rbx | |
220 | |
221 movq 8(%rsi),%rax | |
222 mulq %rbp | |
223 addq %rax,%r10 | |
224 adcq $0,%rdx | |
225 addq %rbx,%r10 | |
226 adcq $0,%rdx | |
227 movq %rdx,%rbx | |
228 | |
229 movq 16(%rsi),%rax | |
230 mulq %rbp | |
231 addq %rax,%r11 | |
232 adcq $0,%rdx | |
233 addq %rbx,%r11 | |
234 adcq $0,%rdx | |
235 movq %rdx,%rbx | |
236 | |
237 movq 24(%rsi),%rax | |
238 mulq %rbp | |
239 addq %rax,%r12 | |
240 adcq $0,%rdx | |
241 addq %rbx,%r12 | |
242 adcq $0,%rdx | |
243 movq %rdx,%rbx | |
244 | |
245 movq 32(%rsi),%rax | |
246 mulq %rbp | |
247 addq %rax,%r13 | |
248 adcq $0,%rdx | |
249 addq %rbx,%r13 | |
250 adcq $0,%rdx | |
251 movq %rdx,%rbx | |
252 | |
253 movq 40(%rsi),%rax | |
254 mulq %rbp | |
255 addq %rax,%r14 | |
256 adcq $0,%rdx | |
257 addq %rbx,%r14 | |
258 adcq $0,%rdx | |
259 movq %rdx,%rbx | |
260 | |
261 movq 48(%rsi),%rax | |
262 mulq %rbp | |
263 addq %rax,%r15 | |
264 adcq $0,%rdx | |
265 addq %rbx,%r15 | |
266 adcq $0,%rdx | |
267 movq %rdx,%rbx | |
268 | |
269 movq 56(%rsi),%rax | |
270 mulq %rbp | |
271 addq %rax,%r8 | |
272 adcq $0,%rdx | |
273 addq %rbx,%r8 | |
274 adcq $0,%rdx | |
275 movq %rdx,%r9 | |
276 movq 112(%rcx),%rbp | |
277 movq 0(%rsi),%rax | |
278 mulq %rbp | |
279 addq %rax,%r10 | |
280 adcq $0,%rdx | |
281 movq %r10,16(%rdi) | |
282 movq %rdx,%rbx | |
283 | |
284 movq 8(%rsi),%rax | |
285 mulq %rbp | |
286 addq %rax,%r11 | |
287 adcq $0,%rdx | |
288 addq %rbx,%r11 | |
289 adcq $0,%rdx | |
290 movq %rdx,%rbx | |
291 | |
292 movq 16(%rsi),%rax | |
293 mulq %rbp | |
294 addq %rax,%r12 | |
295 adcq $0,%rdx | |
296 addq %rbx,%r12 | |
297 adcq $0,%rdx | |
298 movq %rdx,%rbx | |
299 | |
300 movq 24(%rsi),%rax | |
301 mulq %rbp | |
302 addq %rax,%r13 | |
303 adcq $0,%rdx | |
304 addq %rbx,%r13 | |
305 adcq $0,%rdx | |
306 movq %rdx,%rbx | |
307 | |
308 movq 32(%rsi),%rax | |
309 mulq %rbp | |
310 addq %rax,%r14 | |
311 adcq $0,%rdx | |
312 addq %rbx,%r14 | |
313 adcq $0,%rdx | |
314 movq %rdx,%rbx | |
315 | |
316 movq 40(%rsi),%rax | |
317 mulq %rbp | |
318 addq %rax,%r15 | |
319 adcq $0,%rdx | |
320 addq %rbx,%r15 | |
321 adcq $0,%rdx | |
322 movq %rdx,%rbx | |
323 | |
324 movq 48(%rsi),%rax | |
325 mulq %rbp | |
326 addq %rax,%r8 | |
327 adcq $0,%rdx | |
328 addq %rbx,%r8 | |
329 adcq $0,%rdx | |
330 movq %rdx,%rbx | |
331 | |
332 movq 56(%rsi),%rax | |
333 mulq %rbp | |
334 addq %rax,%r9 | |
335 adcq $0,%rdx | |
336 addq %rbx,%r9 | |
337 adcq $0,%rdx | |
338 movq %rdx,%r10 | |
339 movq 120(%rcx),%rbp | |
340 movq 0(%rsi),%rax | |
341 mulq %rbp | |
342 addq %rax,%r11 | |
343 adcq $0,%rdx | |
344 movq %r11,24(%rdi) | |
345 movq %rdx,%rbx | |
346 | |
347 movq 8(%rsi),%rax | |
348 mulq %rbp | |
349 addq %rax,%r12 | |
350 adcq $0,%rdx | |
351 addq %rbx,%r12 | |
352 adcq $0,%rdx | |
353 movq %rdx,%rbx | |
354 | |
355 movq 16(%rsi),%rax | |
356 mulq %rbp | |
357 addq %rax,%r13 | |
358 adcq $0,%rdx | |
359 addq %rbx,%r13 | |
360 adcq $0,%rdx | |
361 movq %rdx,%rbx | |
362 | |
363 movq 24(%rsi),%rax | |
364 mulq %rbp | |
365 addq %rax,%r14 | |
366 adcq $0,%rdx | |
367 addq %rbx,%r14 | |
368 adcq $0,%rdx | |
369 movq %rdx,%rbx | |
370 | |
371 movq 32(%rsi),%rax | |
372 mulq %rbp | |
373 addq %rax,%r15 | |
374 adcq $0,%rdx | |
375 addq %rbx,%r15 | |
376 adcq $0,%rdx | |
377 movq %rdx,%rbx | |
378 | |
379 movq 40(%rsi),%rax | |
380 mulq %rbp | |
381 addq %rax,%r8 | |
382 adcq $0,%rdx | |
383 addq %rbx,%r8 | |
384 adcq $0,%rdx | |
385 movq %rdx,%rbx | |
386 | |
387 movq 48(%rsi),%rax | |
388 mulq %rbp | |
389 addq %rax,%r9 | |
390 adcq $0,%rdx | |
391 addq %rbx,%r9 | |
392 adcq $0,%rdx | |
393 movq %rdx,%rbx | |
394 | |
395 movq 56(%rsi),%rax | |
396 mulq %rbp | |
397 addq %rax,%r10 | |
398 adcq $0,%rdx | |
399 addq %rbx,%r10 | |
400 adcq $0,%rdx | |
401 movq %rdx,%r11 | |
402 xorq %rax,%rax | |
403 | |
404 addq 64(%rcx),%r8 | |
405 adcq 72(%rcx),%r9 | |
406 adcq 80(%rcx),%r10 | |
407 adcq 88(%rcx),%r11 | |
408 adcq $0,%rax | |
409 | |
410 | |
411 | |
412 | |
413 movq %r8,64(%rdi) | |
414 movq %r9,72(%rdi) | |
415 movq %r10,%rbp | |
416 movq %r11,88(%rdi) | |
417 | |
418 movq %rax,384(%rsp) | |
419 | |
420 movq 0(%rdi),%r8 | |
421 movq 8(%rdi),%r9 | |
422 movq 16(%rdi),%r10 | |
423 movq 24(%rdi),%r11 | |
424 | |
425 | |
426 | |
427 | |
428 | |
429 | |
430 | |
431 | |
432 addq $80,%rdi | |
433 | |
434 addq $64,%rsi | |
435 leaq 296(%rsp),%rcx | |
436 | |
437 call MULADD_128x512 | |
438 | |
439 movq 384(%rsp),%rax | |
440 | |
441 | |
442 addq -16(%rdi),%r8 | |
443 adcq -8(%rdi),%r9 | |
444 movq %r8,64(%rcx) | |
445 movq %r9,72(%rcx) | |
446 | |
447 adcq %rax,%rax | |
448 movq %rax,384(%rsp) | |
449 | |
450 leaq 192(%rsp),%rdi | |
451 addq $64,%rsi | |
452 | |
453 | |
454 | |
455 | |
456 | |
457 movq (%rsi),%r8 | |
458 movq 8(%rsi),%rbx | |
459 | |
460 movq (%rcx),%rax | |
461 mulq %r8 | |
462 movq %rax,%rbp | |
463 movq %rdx,%r9 | |
464 | |
465 movq 8(%rcx),%rax | |
466 mulq %r8 | |
467 addq %rax,%r9 | |
468 | |
469 movq (%rcx),%rax | |
470 mulq %rbx | |
471 addq %rax,%r9 | |
472 | |
473 movq %r9,8(%rdi) | |
474 | |
475 | |
476 subq $192,%rsi | |
477 | |
478 movq (%rcx),%r8 | |
479 movq 8(%rcx),%r9 | |
480 | |
481 call MULADD_128x512 | |
482 | |
483 | |
484 | |
485 | |
486 movq 0(%rsi),%rax | |
487 movq 8(%rsi),%rbx | |
488 movq 16(%rsi),%rdi | |
489 movq 24(%rsi),%rdx | |
490 | |
491 | |
492 movq 384(%rsp),%rbp | |
493 | |
494 addq 64(%rcx),%r8 | |
495 adcq 72(%rcx),%r9 | |
496 | |
497 | |
498 adcq %rbp,%rbp | |
499 | |
500 | |
501 | |
502 shlq $3,%rbp | |
503 movq 32(%rsp),%rcx | |
504 addq %rcx,%rbp | |
505 | |
506 | |
507 xorq %rsi,%rsi | |
508 | |
509 addq 0(%rbp),%r10 | |
510 adcq 64(%rbp),%r11 | |
511 adcq 128(%rbp),%r12 | |
512 adcq 192(%rbp),%r13 | |
513 adcq 256(%rbp),%r14 | |
514 adcq 320(%rbp),%r15 | |
515 adcq 384(%rbp),%r8 | |
516 adcq 448(%rbp),%r9 | |
517 | |
518 | |
519 | |
520 sbbq $0,%rsi | |
521 | |
522 | |
523 andq %rsi,%rax | |
524 andq %rsi,%rbx | |
525 andq %rsi,%rdi | |
526 andq %rsi,%rdx | |
527 | |
528 movq $1,%rbp | |
529 subq %rax,%r10 | |
530 sbbq %rbx,%r11 | |
531 sbbq %rdi,%r12 | |
532 sbbq %rdx,%r13 | |
533 | |
534 | |
535 | |
536 | |
537 sbbq $0,%rbp | |
538 | |
539 | |
540 | |
541 addq $512,%rcx | |
542 movq 32(%rcx),%rax | |
543 movq 40(%rcx),%rbx | |
544 movq 48(%rcx),%rdi | |
545 movq 56(%rcx),%rdx | |
546 | |
547 | |
548 | |
549 andq %rsi,%rax | |
550 andq %rsi,%rbx | |
551 andq %rsi,%rdi | |
552 andq %rsi,%rdx | |
553 | |
554 | |
555 | |
556 subq $1,%rbp | |
557 | |
558 sbbq %rax,%r14 | |
559 sbbq %rbx,%r15 | |
560 sbbq %rdi,%r8 | |
561 sbbq %rdx,%r9 | |
562 | |
563 | |
564 | |
565 movq 144(%rsp),%rsi | |
566 movq %r10,0(%rsi) | |
567 movq %r11,8(%rsi) | |
568 movq %r12,16(%rsi) | |
569 movq %r13,24(%rsi) | |
570 movq %r14,32(%rsi) | |
571 movq %r15,40(%rsi) | |
572 movq %r8,48(%rsi) | |
573 movq %r9,56(%rsi) | |
574 | |
575 .byte 0xf3,0xc3 | |
576 .size mont_reduce,.-mont_reduce | |
577 .type mont_mul_a3b,@function | |
578 .align 16 | |
579 mont_mul_a3b: | |
580 | |
581 | |
582 | |
583 | |
584 movq 0(%rdi),%rbp | |
585 | |
586 movq %r10,%rax | |
587 mulq %rbp | |
588 movq %rax,520(%rsp) | |
589 movq %rdx,%r10 | |
590 movq %r11,%rax | |
591 mulq %rbp | |
592 addq %rax,%r10 | |
593 adcq $0,%rdx | |
594 movq %rdx,%r11 | |
595 movq %r12,%rax | |
596 mulq %rbp | |
597 addq %rax,%r11 | |
598 adcq $0,%rdx | |
599 movq %rdx,%r12 | |
600 movq %r13,%rax | |
601 mulq %rbp | |
602 addq %rax,%r12 | |
603 adcq $0,%rdx | |
604 movq %rdx,%r13 | |
605 movq %r14,%rax | |
606 mulq %rbp | |
607 addq %rax,%r13 | |
608 adcq $0,%rdx | |
609 movq %rdx,%r14 | |
610 movq %r15,%rax | |
611 mulq %rbp | |
612 addq %rax,%r14 | |
613 adcq $0,%rdx | |
614 movq %rdx,%r15 | |
615 movq %r8,%rax | |
616 mulq %rbp | |
617 addq %rax,%r15 | |
618 adcq $0,%rdx | |
619 movq %rdx,%r8 | |
620 movq %r9,%rax | |
621 mulq %rbp | |
622 addq %rax,%r8 | |
623 adcq $0,%rdx | |
624 movq %rdx,%r9 | |
625 movq 8(%rdi),%rbp | |
626 movq 0(%rsi),%rax | |
627 mulq %rbp | |
628 addq %rax,%r10 | |
629 adcq $0,%rdx | |
630 movq %r10,528(%rsp) | |
631 movq %rdx,%rbx | |
632 | |
633 movq 8(%rsi),%rax | |
634 mulq %rbp | |
635 addq %rax,%r11 | |
636 adcq $0,%rdx | |
637 addq %rbx,%r11 | |
638 adcq $0,%rdx | |
639 movq %rdx,%rbx | |
640 | |
641 movq 16(%rsi),%rax | |
642 mulq %rbp | |
643 addq %rax,%r12 | |
644 adcq $0,%rdx | |
645 addq %rbx,%r12 | |
646 adcq $0,%rdx | |
647 movq %rdx,%rbx | |
648 | |
649 movq 24(%rsi),%rax | |
650 mulq %rbp | |
651 addq %rax,%r13 | |
652 adcq $0,%rdx | |
653 addq %rbx,%r13 | |
654 adcq $0,%rdx | |
655 movq %rdx,%rbx | |
656 | |
657 movq 32(%rsi),%rax | |
658 mulq %rbp | |
659 addq %rax,%r14 | |
660 adcq $0,%rdx | |
661 addq %rbx,%r14 | |
662 adcq $0,%rdx | |
663 movq %rdx,%rbx | |
664 | |
665 movq 40(%rsi),%rax | |
666 mulq %rbp | |
667 addq %rax,%r15 | |
668 adcq $0,%rdx | |
669 addq %rbx,%r15 | |
670 adcq $0,%rdx | |
671 movq %rdx,%rbx | |
672 | |
673 movq 48(%rsi),%rax | |
674 mulq %rbp | |
675 addq %rax,%r8 | |
676 adcq $0,%rdx | |
677 addq %rbx,%r8 | |
678 adcq $0,%rdx | |
679 movq %rdx,%rbx | |
680 | |
681 movq 56(%rsi),%rax | |
682 mulq %rbp | |
683 addq %rax,%r9 | |
684 adcq $0,%rdx | |
685 addq %rbx,%r9 | |
686 adcq $0,%rdx | |
687 movq %rdx,%r10 | |
688 movq 16(%rdi),%rbp | |
689 movq 0(%rsi),%rax | |
690 mulq %rbp | |
691 addq %rax,%r11 | |
692 adcq $0,%rdx | |
693 movq %r11,536(%rsp) | |
694 movq %rdx,%rbx | |
695 | |
696 movq 8(%rsi),%rax | |
697 mulq %rbp | |
698 addq %rax,%r12 | |
699 adcq $0,%rdx | |
700 addq %rbx,%r12 | |
701 adcq $0,%rdx | |
702 movq %rdx,%rbx | |
703 | |
704 movq 16(%rsi),%rax | |
705 mulq %rbp | |
706 addq %rax,%r13 | |
707 adcq $0,%rdx | |
708 addq %rbx,%r13 | |
709 adcq $0,%rdx | |
710 movq %rdx,%rbx | |
711 | |
712 movq 24(%rsi),%rax | |
713 mulq %rbp | |
714 addq %rax,%r14 | |
715 adcq $0,%rdx | |
716 addq %rbx,%r14 | |
717 adcq $0,%rdx | |
718 movq %rdx,%rbx | |
719 | |
720 movq 32(%rsi),%rax | |
721 mulq %rbp | |
722 addq %rax,%r15 | |
723 adcq $0,%rdx | |
724 addq %rbx,%r15 | |
725 adcq $0,%rdx | |
726 movq %rdx,%rbx | |
727 | |
728 movq 40(%rsi),%rax | |
729 mulq %rbp | |
730 addq %rax,%r8 | |
731 adcq $0,%rdx | |
732 addq %rbx,%r8 | |
733 adcq $0,%rdx | |
734 movq %rdx,%rbx | |
735 | |
736 movq 48(%rsi),%rax | |
737 mulq %rbp | |
738 addq %rax,%r9 | |
739 adcq $0,%rdx | |
740 addq %rbx,%r9 | |
741 adcq $0,%rdx | |
742 movq %rdx,%rbx | |
743 | |
744 movq 56(%rsi),%rax | |
745 mulq %rbp | |
746 addq %rax,%r10 | |
747 adcq $0,%rdx | |
748 addq %rbx,%r10 | |
749 adcq $0,%rdx | |
750 movq %rdx,%r11 | |
751 movq 24(%rdi),%rbp | |
752 movq 0(%rsi),%rax | |
753 mulq %rbp | |
754 addq %rax,%r12 | |
755 adcq $0,%rdx | |
756 movq %r12,544(%rsp) | |
757 movq %rdx,%rbx | |
758 | |
759 movq 8(%rsi),%rax | |
760 mulq %rbp | |
761 addq %rax,%r13 | |
762 adcq $0,%rdx | |
763 addq %rbx,%r13 | |
764 adcq $0,%rdx | |
765 movq %rdx,%rbx | |
766 | |
767 movq 16(%rsi),%rax | |
768 mulq %rbp | |
769 addq %rax,%r14 | |
770 adcq $0,%rdx | |
771 addq %rbx,%r14 | |
772 adcq $0,%rdx | |
773 movq %rdx,%rbx | |
774 | |
775 movq 24(%rsi),%rax | |
776 mulq %rbp | |
777 addq %rax,%r15 | |
778 adcq $0,%rdx | |
779 addq %rbx,%r15 | |
780 adcq $0,%rdx | |
781 movq %rdx,%rbx | |
782 | |
783 movq 32(%rsi),%rax | |
784 mulq %rbp | |
785 addq %rax,%r8 | |
786 adcq $0,%rdx | |
787 addq %rbx,%r8 | |
788 adcq $0,%rdx | |
789 movq %rdx,%rbx | |
790 | |
791 movq 40(%rsi),%rax | |
792 mulq %rbp | |
793 addq %rax,%r9 | |
794 adcq $0,%rdx | |
795 addq %rbx,%r9 | |
796 adcq $0,%rdx | |
797 movq %rdx,%rbx | |
798 | |
799 movq 48(%rsi),%rax | |
800 mulq %rbp | |
801 addq %rax,%r10 | |
802 adcq $0,%rdx | |
803 addq %rbx,%r10 | |
804 adcq $0,%rdx | |
805 movq %rdx,%rbx | |
806 | |
807 movq 56(%rsi),%rax | |
808 mulq %rbp | |
809 addq %rax,%r11 | |
810 adcq $0,%rdx | |
811 addq %rbx,%r11 | |
812 adcq $0,%rdx | |
813 movq %rdx,%r12 | |
814 movq 32(%rdi),%rbp | |
815 movq 0(%rsi),%rax | |
816 mulq %rbp | |
817 addq %rax,%r13 | |
818 adcq $0,%rdx | |
819 movq %r13,552(%rsp) | |
820 movq %rdx,%rbx | |
821 | |
822 movq 8(%rsi),%rax | |
823 mulq %rbp | |
824 addq %rax,%r14 | |
825 adcq $0,%rdx | |
826 addq %rbx,%r14 | |
827 adcq $0,%rdx | |
828 movq %rdx,%rbx | |
829 | |
830 movq 16(%rsi),%rax | |
831 mulq %rbp | |
832 addq %rax,%r15 | |
833 adcq $0,%rdx | |
834 addq %rbx,%r15 | |
835 adcq $0,%rdx | |
836 movq %rdx,%rbx | |
837 | |
838 movq 24(%rsi),%rax | |
839 mulq %rbp | |
840 addq %rax,%r8 | |
841 adcq $0,%rdx | |
842 addq %rbx,%r8 | |
843 adcq $0,%rdx | |
844 movq %rdx,%rbx | |
845 | |
846 movq 32(%rsi),%rax | |
847 mulq %rbp | |
848 addq %rax,%r9 | |
849 adcq $0,%rdx | |
850 addq %rbx,%r9 | |
851 adcq $0,%rdx | |
852 movq %rdx,%rbx | |
853 | |
854 movq 40(%rsi),%rax | |
855 mulq %rbp | |
856 addq %rax,%r10 | |
857 adcq $0,%rdx | |
858 addq %rbx,%r10 | |
859 adcq $0,%rdx | |
860 movq %rdx,%rbx | |
861 | |
862 movq 48(%rsi),%rax | |
863 mulq %rbp | |
864 addq %rax,%r11 | |
865 adcq $0,%rdx | |
866 addq %rbx,%r11 | |
867 adcq $0,%rdx | |
868 movq %rdx,%rbx | |
869 | |
870 movq 56(%rsi),%rax | |
871 mulq %rbp | |
872 addq %rax,%r12 | |
873 adcq $0,%rdx | |
874 addq %rbx,%r12 | |
875 adcq $0,%rdx | |
876 movq %rdx,%r13 | |
877 movq 40(%rdi),%rbp | |
878 movq 0(%rsi),%rax | |
879 mulq %rbp | |
880 addq %rax,%r14 | |
881 adcq $0,%rdx | |
882 movq %r14,560(%rsp) | |
883 movq %rdx,%rbx | |
884 | |
885 movq 8(%rsi),%rax | |
886 mulq %rbp | |
887 addq %rax,%r15 | |
888 adcq $0,%rdx | |
889 addq %rbx,%r15 | |
890 adcq $0,%rdx | |
891 movq %rdx,%rbx | |
892 | |
893 movq 16(%rsi),%rax | |
894 mulq %rbp | |
895 addq %rax,%r8 | |
896 adcq $0,%rdx | |
897 addq %rbx,%r8 | |
898 adcq $0,%rdx | |
899 movq %rdx,%rbx | |
900 | |
901 movq 24(%rsi),%rax | |
902 mulq %rbp | |
903 addq %rax,%r9 | |
904 adcq $0,%rdx | |
905 addq %rbx,%r9 | |
906 adcq $0,%rdx | |
907 movq %rdx,%rbx | |
908 | |
909 movq 32(%rsi),%rax | |
910 mulq %rbp | |
911 addq %rax,%r10 | |
912 adcq $0,%rdx | |
913 addq %rbx,%r10 | |
914 adcq $0,%rdx | |
915 movq %rdx,%rbx | |
916 | |
917 movq 40(%rsi),%rax | |
918 mulq %rbp | |
919 addq %rax,%r11 | |
920 adcq $0,%rdx | |
921 addq %rbx,%r11 | |
922 adcq $0,%rdx | |
923 movq %rdx,%rbx | |
924 | |
925 movq 48(%rsi),%rax | |
926 mulq %rbp | |
927 addq %rax,%r12 | |
928 adcq $0,%rdx | |
929 addq %rbx,%r12 | |
930 adcq $0,%rdx | |
931 movq %rdx,%rbx | |
932 | |
933 movq 56(%rsi),%rax | |
934 mulq %rbp | |
935 addq %rax,%r13 | |
936 adcq $0,%rdx | |
937 addq %rbx,%r13 | |
938 adcq $0,%rdx | |
939 movq %rdx,%r14 | |
940 movq 48(%rdi),%rbp | |
941 movq 0(%rsi),%rax | |
942 mulq %rbp | |
943 addq %rax,%r15 | |
944 adcq $0,%rdx | |
945 movq %r15,568(%rsp) | |
946 movq %rdx,%rbx | |
947 | |
948 movq 8(%rsi),%rax | |
949 mulq %rbp | |
950 addq %rax,%r8 | |
951 adcq $0,%rdx | |
952 addq %rbx,%r8 | |
953 adcq $0,%rdx | |
954 movq %rdx,%rbx | |
955 | |
956 movq 16(%rsi),%rax | |
957 mulq %rbp | |
958 addq %rax,%r9 | |
959 adcq $0,%rdx | |
960 addq %rbx,%r9 | |
961 adcq $0,%rdx | |
962 movq %rdx,%rbx | |
963 | |
964 movq 24(%rsi),%rax | |
965 mulq %rbp | |
966 addq %rax,%r10 | |
967 adcq $0,%rdx | |
968 addq %rbx,%r10 | |
969 adcq $0,%rdx | |
970 movq %rdx,%rbx | |
971 | |
972 movq 32(%rsi),%rax | |
973 mulq %rbp | |
974 addq %rax,%r11 | |
975 adcq $0,%rdx | |
976 addq %rbx,%r11 | |
977 adcq $0,%rdx | |
978 movq %rdx,%rbx | |
979 | |
980 movq 40(%rsi),%rax | |
981 mulq %rbp | |
982 addq %rax,%r12 | |
983 adcq $0,%rdx | |
984 addq %rbx,%r12 | |
985 adcq $0,%rdx | |
986 movq %rdx,%rbx | |
987 | |
988 movq 48(%rsi),%rax | |
989 mulq %rbp | |
990 addq %rax,%r13 | |
991 adcq $0,%rdx | |
992 addq %rbx,%r13 | |
993 adcq $0,%rdx | |
994 movq %rdx,%rbx | |
995 | |
996 movq 56(%rsi),%rax | |
997 mulq %rbp | |
998 addq %rax,%r14 | |
999 adcq $0,%rdx | |
1000 addq %rbx,%r14 | |
1001 adcq $0,%rdx | |
1002 movq %rdx,%r15 | |
1003 movq 56(%rdi),%rbp | |
1004 movq 0(%rsi),%rax | |
1005 mulq %rbp | |
1006 addq %rax,%r8 | |
1007 adcq $0,%rdx | |
1008 movq %r8,576(%rsp) | |
1009 movq %rdx,%rbx | |
1010 | |
1011 movq 8(%rsi),%rax | |
1012 mulq %rbp | |
1013 addq %rax,%r9 | |
1014 adcq $0,%rdx | |
1015 addq %rbx,%r9 | |
1016 adcq $0,%rdx | |
1017 movq %rdx,%rbx | |
1018 | |
1019 movq 16(%rsi),%rax | |
1020 mulq %rbp | |
1021 addq %rax,%r10 | |
1022 adcq $0,%rdx | |
1023 addq %rbx,%r10 | |
1024 adcq $0,%rdx | |
1025 movq %rdx,%rbx | |
1026 | |
1027 movq 24(%rsi),%rax | |
1028 mulq %rbp | |
1029 addq %rax,%r11 | |
1030 adcq $0,%rdx | |
1031 addq %rbx,%r11 | |
1032 adcq $0,%rdx | |
1033 movq %rdx,%rbx | |
1034 | |
1035 movq 32(%rsi),%rax | |
1036 mulq %rbp | |
1037 addq %rax,%r12 | |
1038 adcq $0,%rdx | |
1039 addq %rbx,%r12 | |
1040 adcq $0,%rdx | |
1041 movq %rdx,%rbx | |
1042 | |
1043 movq 40(%rsi),%rax | |
1044 mulq %rbp | |
1045 addq %rax,%r13 | |
1046 adcq $0,%rdx | |
1047 addq %rbx,%r13 | |
1048 adcq $0,%rdx | |
1049 movq %rdx,%rbx | |
1050 | |
1051 movq 48(%rsi),%rax | |
1052 mulq %rbp | |
1053 addq %rax,%r14 | |
1054 adcq $0,%rdx | |
1055 addq %rbx,%r14 | |
1056 adcq $0,%rdx | |
1057 movq %rdx,%rbx | |
1058 | |
1059 movq 56(%rsi),%rax | |
1060 mulq %rbp | |
1061 addq %rax,%r15 | |
1062 adcq $0,%rdx | |
1063 addq %rbx,%r15 | |
1064 adcq $0,%rdx | |
1065 movq %rdx,%r8 | |
1066 movq %r9,584(%rsp) | |
1067 movq %r10,592(%rsp) | |
1068 movq %r11,600(%rsp) | |
1069 movq %r12,608(%rsp) | |
1070 movq %r13,616(%rsp) | |
1071 movq %r14,624(%rsp) | |
1072 movq %r15,632(%rsp) | |
1073 movq %r8,640(%rsp) | |
1074 | |
1075 | |
1076 | |
1077 | |
1078 | |
1079 jmp mont_reduce | |
1080 | |
1081 | |
1082 .size mont_mul_a3b,.-mont_mul_a3b | |
1083 .type sqr_reduce,@function | |
1084 .align 16 | |
1085 sqr_reduce: | |
1086 movq 16(%rsp),%rcx | |
1087 | |
1088 | |
1089 | |
1090 movq %r10,%rbx | |
1091 | |
1092 movq %r11,%rax | |
1093 mulq %rbx | |
1094 movq %rax,528(%rsp) | |
1095 movq %rdx,%r10 | |
1096 movq %r12,%rax | |
1097 mulq %rbx | |
1098 addq %rax,%r10 | |
1099 adcq $0,%rdx | |
1100 movq %rdx,%r11 | |
1101 movq %r13,%rax | |
1102 mulq %rbx | |
1103 addq %rax,%r11 | |
1104 adcq $0,%rdx | |
1105 movq %rdx,%r12 | |
1106 movq %r14,%rax | |
1107 mulq %rbx | |
1108 addq %rax,%r12 | |
1109 adcq $0,%rdx | |
1110 movq %rdx,%r13 | |
1111 movq %r15,%rax | |
1112 mulq %rbx | |
1113 addq %rax,%r13 | |
1114 adcq $0,%rdx | |
1115 movq %rdx,%r14 | |
1116 movq %r8,%rax | |
1117 mulq %rbx | |
1118 addq %rax,%r14 | |
1119 adcq $0,%rdx | |
1120 movq %rdx,%r15 | |
1121 movq %r9,%rax | |
1122 mulq %rbx | |
1123 addq %rax,%r15 | |
1124 adcq $0,%rdx | |
1125 movq %rdx,%rsi | |
1126 | |
1127 movq %r10,536(%rsp) | |
1128 | |
1129 | |
1130 | |
1131 | |
1132 | |
1133 movq 8(%rcx),%rbx | |
1134 | |
1135 movq 16(%rcx),%rax | |
1136 mulq %rbx | |
1137 addq %rax,%r11 | |
1138 adcq $0,%rdx | |
1139 movq %r11,544(%rsp) | |
1140 | |
1141 movq %rdx,%r10 | |
1142 movq 24(%rcx),%rax | |
1143 mulq %rbx | |
1144 addq %rax,%r12 | |
1145 adcq $0,%rdx | |
1146 addq %r10,%r12 | |
1147 adcq $0,%rdx | |
1148 movq %r12,552(%rsp) | |
1149 | |
1150 movq %rdx,%r10 | |
1151 movq 32(%rcx),%rax | |
1152 mulq %rbx | |
1153 addq %rax,%r13 | |
1154 adcq $0,%rdx | |
1155 addq %r10,%r13 | |
1156 adcq $0,%rdx | |
1157 | |
1158 movq %rdx,%r10 | |
1159 movq 40(%rcx),%rax | |
1160 mulq %rbx | |
1161 addq %rax,%r14 | |
1162 adcq $0,%rdx | |
1163 addq %r10,%r14 | |
1164 adcq $0,%rdx | |
1165 | |
1166 movq %rdx,%r10 | |
1167 movq %r8,%rax | |
1168 mulq %rbx | |
1169 addq %rax,%r15 | |
1170 adcq $0,%rdx | |
1171 addq %r10,%r15 | |
1172 adcq $0,%rdx | |
1173 | |
1174 movq %rdx,%r10 | |
1175 movq %r9,%rax | |
1176 mulq %rbx | |
1177 addq %rax,%rsi | |
1178 adcq $0,%rdx | |
1179 addq %r10,%rsi | |
1180 adcq $0,%rdx | |
1181 | |
1182 movq %rdx,%r11 | |
1183 | |
1184 | |
1185 | |
1186 | |
1187 movq 16(%rcx),%rbx | |
1188 | |
1189 movq 24(%rcx),%rax | |
1190 mulq %rbx | |
1191 addq %rax,%r13 | |
1192 adcq $0,%rdx | |
1193 movq %r13,560(%rsp) | |
1194 | |
1195 movq %rdx,%r10 | |
1196 movq 32(%rcx),%rax | |
1197 mulq %rbx | |
1198 addq %rax,%r14 | |
1199 adcq $0,%rdx | |
1200 addq %r10,%r14 | |
1201 adcq $0,%rdx | |
1202 movq %r14,568(%rsp) | |
1203 | |
1204 movq %rdx,%r10 | |
1205 movq 40(%rcx),%rax | |
1206 mulq %rbx | |
1207 addq %rax,%r15 | |
1208 adcq $0,%rdx | |
1209 addq %r10,%r15 | |
1210 adcq $0,%rdx | |
1211 | |
1212 movq %rdx,%r10 | |
1213 movq %r8,%rax | |
1214 mulq %rbx | |
1215 addq %rax,%rsi | |
1216 adcq $0,%rdx | |
1217 addq %r10,%rsi | |
1218 adcq $0,%rdx | |
1219 | |
1220 movq %rdx,%r10 | |
1221 movq %r9,%rax | |
1222 mulq %rbx | |
1223 addq %rax,%r11 | |
1224 adcq $0,%rdx | |
1225 addq %r10,%r11 | |
1226 adcq $0,%rdx | |
1227 | |
1228 movq %rdx,%r12 | |
1229 | |
1230 | |
1231 | |
1232 | |
1233 | |
1234 movq 24(%rcx),%rbx | |
1235 | |
1236 movq 32(%rcx),%rax | |
1237 mulq %rbx | |
1238 addq %rax,%r15 | |
1239 adcq $0,%rdx | |
1240 movq %r15,576(%rsp) | |
1241 | |
1242 movq %rdx,%r10 | |
1243 movq 40(%rcx),%rax | |
1244 mulq %rbx | |
1245 addq %rax,%rsi | |
1246 adcq $0,%rdx | |
1247 addq %r10,%rsi | |
1248 adcq $0,%rdx | |
1249 movq %rsi,584(%rsp) | |
1250 | |
1251 movq %rdx,%r10 | |
1252 movq %r8,%rax | |
1253 mulq %rbx | |
1254 addq %rax,%r11 | |
1255 adcq $0,%rdx | |
1256 addq %r10,%r11 | |
1257 adcq $0,%rdx | |
1258 | |
1259 movq %rdx,%r10 | |
1260 movq %r9,%rax | |
1261 mulq %rbx | |
1262 addq %rax,%r12 | |
1263 adcq $0,%rdx | |
1264 addq %r10,%r12 | |
1265 adcq $0,%rdx | |
1266 | |
1267 movq %rdx,%r15 | |
1268 | |
1269 | |
1270 | |
1271 | |
1272 movq 32(%rcx),%rbx | |
1273 | |
1274 movq 40(%rcx),%rax | |
1275 mulq %rbx | |
1276 addq %rax,%r11 | |
1277 adcq $0,%rdx | |
1278 movq %r11,592(%rsp) | |
1279 | |
1280 movq %rdx,%r10 | |
1281 movq %r8,%rax | |
1282 mulq %rbx | |
1283 addq %rax,%r12 | |
1284 adcq $0,%rdx | |
1285 addq %r10,%r12 | |
1286 adcq $0,%rdx | |
1287 movq %r12,600(%rsp) | |
1288 | |
1289 movq %rdx,%r10 | |
1290 movq %r9,%rax | |
1291 mulq %rbx | |
1292 addq %rax,%r15 | |
1293 adcq $0,%rdx | |
1294 addq %r10,%r15 | |
1295 adcq $0,%rdx | |
1296 | |
1297 movq %rdx,%r11 | |
1298 | |
1299 | |
1300 | |
1301 | |
1302 movq 40(%rcx),%rbx | |
1303 | |
1304 movq %r8,%rax | |
1305 mulq %rbx | |
1306 addq %rax,%r15 | |
1307 adcq $0,%rdx | |
1308 movq %r15,608(%rsp) | |
1309 | |
1310 movq %rdx,%r10 | |
1311 movq %r9,%rax | |
1312 mulq %rbx | |
1313 addq %rax,%r11 | |
1314 adcq $0,%rdx | |
1315 addq %r10,%r11 | |
1316 adcq $0,%rdx | |
1317 movq %r11,616(%rsp) | |
1318 | |
1319 movq %rdx,%r12 | |
1320 | |
1321 | |
1322 | |
1323 | |
1324 movq %r8,%rbx | |
1325 | |
1326 movq %r9,%rax | |
1327 mulq %rbx | |
1328 addq %rax,%r12 | |
1329 adcq $0,%rdx | |
1330 movq %r12,624(%rsp) | |
1331 | |
1332 movq %rdx,632(%rsp) | |
1333 | |
1334 | |
1335 movq 528(%rsp),%r10 | |
1336 movq 536(%rsp),%r11 | |
1337 movq 544(%rsp),%r12 | |
1338 movq 552(%rsp),%r13 | |
1339 movq 560(%rsp),%r14 | |
1340 movq 568(%rsp),%r15 | |
1341 | |
1342 movq 24(%rcx),%rax | |
1343 mulq %rax | |
1344 movq %rax,%rdi | |
1345 movq %rdx,%r8 | |
1346 | |
1347 addq %r10,%r10 | |
1348 adcq %r11,%r11 | |
1349 adcq %r12,%r12 | |
1350 adcq %r13,%r13 | |
1351 adcq %r14,%r14 | |
1352 adcq %r15,%r15 | |
1353 adcq $0,%r8 | |
1354 | |
1355 movq 0(%rcx),%rax | |
1356 mulq %rax | |
1357 movq %rax,520(%rsp) | |
1358 movq %rdx,%rbx | |
1359 | |
1360 movq 8(%rcx),%rax | |
1361 mulq %rax | |
1362 | |
1363 addq %rbx,%r10 | |
1364 adcq %rax,%r11 | |
1365 adcq $0,%rdx | |
1366 | |
1367 movq %rdx,%rbx | |
1368 movq %r10,528(%rsp) | |
1369 movq %r11,536(%rsp) | |
1370 | |
1371 movq 16(%rcx),%rax | |
1372 mulq %rax | |
1373 | |
1374 addq %rbx,%r12 | |
1375 adcq %rax,%r13 | |
1376 adcq $0,%rdx | |
1377 | |
1378 movq %rdx,%rbx | |
1379 | |
1380 movq %r12,544(%rsp) | |
1381 movq %r13,552(%rsp) | |
1382 | |
1383 xorq %rbp,%rbp | |
1384 addq %rbx,%r14 | |
1385 adcq %rdi,%r15 | |
1386 adcq $0,%rbp | |
1387 | |
1388 movq %r14,560(%rsp) | |
1389 movq %r15,568(%rsp) | |
1390 | |
1391 | |
1392 | |
1393 | |
1394 movq 576(%rsp),%r10 | |
1395 movq 584(%rsp),%r11 | |
1396 movq 592(%rsp),%r12 | |
1397 movq 600(%rsp),%r13 | |
1398 movq 608(%rsp),%r14 | |
1399 movq 616(%rsp),%r15 | |
1400 movq 624(%rsp),%rdi | |
1401 movq 632(%rsp),%rsi | |
1402 | |
1403 movq %r9,%rax | |
1404 mulq %rax | |
1405 movq %rax,%r9 | |
1406 movq %rdx,%rbx | |
1407 | |
1408 addq %r10,%r10 | |
1409 adcq %r11,%r11 | |
1410 adcq %r12,%r12 | |
1411 adcq %r13,%r13 | |
1412 adcq %r14,%r14 | |
1413 adcq %r15,%r15 | |
1414 adcq %rdi,%rdi | |
1415 adcq %rsi,%rsi | |
1416 adcq $0,%rbx | |
1417 | |
1418 addq %rbp,%r10 | |
1419 | |
1420 movq 32(%rcx),%rax | |
1421 mulq %rax | |
1422 | |
1423 addq %r8,%r10 | |
1424 adcq %rax,%r11 | |
1425 adcq $0,%rdx | |
1426 | |
1427 movq %rdx,%rbp | |
1428 | |
1429 movq %r10,576(%rsp) | |
1430 movq %r11,584(%rsp) | |
1431 | |
1432 movq 40(%rcx),%rax | |
1433 mulq %rax | |
1434 | |
1435 addq %rbp,%r12 | |
1436 adcq %rax,%r13 | |
1437 adcq $0,%rdx | |
1438 | |
1439 movq %rdx,%rbp | |
1440 | |
1441 movq %r12,592(%rsp) | |
1442 movq %r13,600(%rsp) | |
1443 | |
1444 movq 48(%rcx),%rax | |
1445 mulq %rax | |
1446 | |
1447 addq %rbp,%r14 | |
1448 adcq %rax,%r15 | |
1449 adcq $0,%rdx | |
1450 | |
1451 movq %r14,608(%rsp) | |
1452 movq %r15,616(%rsp) | |
1453 | |
1454 addq %rdx,%rdi | |
1455 adcq %r9,%rsi | |
1456 adcq $0,%rbx | |
1457 | |
1458 movq %rdi,624(%rsp) | |
1459 movq %rsi,632(%rsp) | |
1460 movq %rbx,640(%rsp) | |
1461 | |
1462 jmp mont_reduce | |
1463 | |
1464 | |
1465 .size sqr_reduce,.-sqr_reduce | |
1466 .globl mod_exp_512 | |
1467 .hidden mod_exp_512 | |
1468 .type mod_exp_512,@function | |
1469 mod_exp_512: | |
1470 pushq %rbp | |
1471 pushq %rbx | |
1472 pushq %r12 | |
1473 pushq %r13 | |
1474 pushq %r14 | |
1475 pushq %r15 | |
1476 | |
1477 | |
1478 movq %rsp,%r8 | |
1479 subq $2688,%rsp | |
1480 andq $-64,%rsp | |
1481 | |
1482 | |
1483 movq %r8,0(%rsp) | |
1484 movq %rdi,8(%rsp) | |
1485 movq %rsi,16(%rsp) | |
1486 movq %rcx,24(%rsp) | |
1487 .Lbody: | |
1488 | |
1489 | |
1490 | |
1491 pxor %xmm4,%xmm4 | |
1492 movdqu 0(%rsi),%xmm0 | |
1493 movdqu 16(%rsi),%xmm1 | |
1494 movdqu 32(%rsi),%xmm2 | |
1495 movdqu 48(%rsi),%xmm3 | |
1496 movdqa %xmm4,512(%rsp) | |
1497 movdqa %xmm4,528(%rsp) | |
1498 movdqa %xmm4,608(%rsp) | |
1499 movdqa %xmm4,624(%rsp) | |
1500 movdqa %xmm0,544(%rsp) | |
1501 movdqa %xmm1,560(%rsp) | |
1502 movdqa %xmm2,576(%rsp) | |
1503 movdqa %xmm3,592(%rsp) | |
1504 | |
1505 | |
1506 movdqu 0(%rdx),%xmm0 | |
1507 movdqu 16(%rdx),%xmm1 | |
1508 movdqu 32(%rdx),%xmm2 | |
1509 movdqu 48(%rdx),%xmm3 | |
1510 | |
1511 leaq 384(%rsp),%rbx | |
1512 movq %rbx,136(%rsp) | |
1513 call mont_reduce | |
1514 | |
1515 | |
1516 leaq 448(%rsp),%rcx | |
1517 xorq %rax,%rax | |
1518 movq %rax,0(%rcx) | |
1519 movq %rax,8(%rcx) | |
1520 movq %rax,24(%rcx) | |
1521 movq %rax,32(%rcx) | |
1522 movq %rax,40(%rcx) | |
1523 movq %rax,48(%rcx) | |
1524 movq %rax,56(%rcx) | |
1525 movq %rax,128(%rsp) | |
1526 movq $1,16(%rcx) | |
1527 | |
1528 leaq 640(%rsp),%rbp | |
1529 movq %rcx,%rsi | |
1530 movq %rbp,%rdi | |
1531 movq $8,%rax | |
1532 loop_0: | |
1533 movq (%rcx),%rbx | |
1534 movw %bx,(%rdi) | |
1535 shrq $16,%rbx | |
1536 movw %bx,64(%rdi) | |
1537 shrq $16,%rbx | |
1538 movw %bx,128(%rdi) | |
1539 shrq $16,%rbx | |
1540 movw %bx,192(%rdi) | |
1541 leaq 8(%rcx),%rcx | |
1542 leaq 256(%rdi),%rdi | |
1543 decq %rax | |
1544 jnz loop_0 | |
1545 movq $31,%rax | |
1546 movq %rax,32(%rsp) | |
1547 movq %rbp,40(%rsp) | |
1548 | |
1549 movq %rsi,136(%rsp) | |
1550 movq 0(%rsi),%r10 | |
1551 movq 8(%rsi),%r11 | |
1552 movq 16(%rsi),%r12 | |
1553 movq 24(%rsi),%r13 | |
1554 movq 32(%rsi),%r14 | |
1555 movq 40(%rsi),%r15 | |
1556 movq 48(%rsi),%r8 | |
1557 movq 56(%rsi),%r9 | |
1558 init_loop: | |
1559 leaq 384(%rsp),%rdi | |
1560 call mont_mul_a3b | |
1561 leaq 448(%rsp),%rsi | |
1562 movq 40(%rsp),%rbp | |
1563 addq $2,%rbp | |
1564 movq %rbp,40(%rsp) | |
1565 movq %rsi,%rcx | |
1566 movq $8,%rax | |
1567 loop_1: | |
1568 movq (%rcx),%rbx | |
1569 movw %bx,(%rbp) | |
1570 shrq $16,%rbx | |
1571 movw %bx,64(%rbp) | |
1572 shrq $16,%rbx | |
1573 movw %bx,128(%rbp) | |
1574 shrq $16,%rbx | |
1575 movw %bx,192(%rbp) | |
1576 leaq 8(%rcx),%rcx | |
1577 leaq 256(%rbp),%rbp | |
1578 decq %rax | |
1579 jnz loop_1 | |
1580 movq 32(%rsp),%rax | |
1581 subq $1,%rax | |
1582 movq %rax,32(%rsp) | |
1583 jne init_loop | |
1584 | |
1585 | |
1586 | |
1587 movdqa %xmm0,64(%rsp) | |
1588 movdqa %xmm1,80(%rsp) | |
1589 movdqa %xmm2,96(%rsp) | |
1590 movdqa %xmm3,112(%rsp) | |
1591 | |
1592 | |
1593 | |
1594 | |
1595 | |
1596 movl 126(%rsp),%eax | |
1597 movq %rax,%rdx | |
1598 shrq $11,%rax | |
1599 andl $2047,%edx | |
1600 movl %edx,126(%rsp) | |
1601 leaq 640(%rsp,%rax,2),%rsi | |
1602 movq 8(%rsp),%rdx | |
1603 movq $4,%rbp | |
1604 loop_2: | |
1605 movzwq 192(%rsi),%rbx | |
1606 movzwq 448(%rsi),%rax | |
1607 shlq $16,%rbx | |
1608 shlq $16,%rax | |
1609 movw 128(%rsi),%bx | |
1610 movw 384(%rsi),%ax | |
1611 shlq $16,%rbx | |
1612 shlq $16,%rax | |
1613 movw 64(%rsi),%bx | |
1614 movw 320(%rsi),%ax | |
1615 shlq $16,%rbx | |
1616 shlq $16,%rax | |
1617 movw 0(%rsi),%bx | |
1618 movw 256(%rsi),%ax | |
1619 movq %rbx,0(%rdx) | |
1620 movq %rax,8(%rdx) | |
1621 leaq 512(%rsi),%rsi | |
1622 leaq 16(%rdx),%rdx | |
1623 subq $1,%rbp | |
1624 jnz loop_2 | |
1625 movq $505,48(%rsp) | |
1626 | |
1627 movq 8(%rsp),%rcx | |
1628 movq %rcx,136(%rsp) | |
1629 movq 0(%rcx),%r10 | |
1630 movq 8(%rcx),%r11 | |
1631 movq 16(%rcx),%r12 | |
1632 movq 24(%rcx),%r13 | |
1633 movq 32(%rcx),%r14 | |
1634 movq 40(%rcx),%r15 | |
1635 movq 48(%rcx),%r8 | |
1636 movq 56(%rcx),%r9 | |
1637 jmp sqr_2 | |
1638 | |
1639 main_loop_a3b: | |
1640 call sqr_reduce | |
1641 call sqr_reduce | |
1642 call sqr_reduce | |
1643 sqr_2: | |
1644 call sqr_reduce | |
1645 call sqr_reduce | |
1646 | |
1647 | |
1648 | |
1649 movq 48(%rsp),%rcx | |
1650 movq %rcx,%rax | |
1651 shrq $4,%rax | |
1652 movl 64(%rsp,%rax,2),%edx | |
1653 andq $15,%rcx | |
1654 shrq %cl,%rdx | |
1655 andq $31,%rdx | |
1656 | |
1657 leaq 640(%rsp,%rdx,2),%rsi | |
1658 leaq 448(%rsp),%rdx | |
1659 movq %rdx,%rdi | |
1660 movq $4,%rbp | |
1661 loop_3: | |
1662 movzwq 192(%rsi),%rbx | |
1663 movzwq 448(%rsi),%rax | |
1664 shlq $16,%rbx | |
1665 shlq $16,%rax | |
1666 movw 128(%rsi),%bx | |
1667 movw 384(%rsi),%ax | |
1668 shlq $16,%rbx | |
1669 shlq $16,%rax | |
1670 movw 64(%rsi),%bx | |
1671 movw 320(%rsi),%ax | |
1672 shlq $16,%rbx | |
1673 shlq $16,%rax | |
1674 movw 0(%rsi),%bx | |
1675 movw 256(%rsi),%ax | |
1676 movq %rbx,0(%rdx) | |
1677 movq %rax,8(%rdx) | |
1678 leaq 512(%rsi),%rsi | |
1679 leaq 16(%rdx),%rdx | |
1680 subq $1,%rbp | |
1681 jnz loop_3 | |
1682 movq 8(%rsp),%rsi | |
1683 call mont_mul_a3b | |
1684 | |
1685 | |
1686 | |
1687 movq 48(%rsp),%rcx | |
1688 subq $5,%rcx | |
1689 movq %rcx,48(%rsp) | |
1690 jge main_loop_a3b | |
1691 | |
1692 | |
1693 | |
1694 end_main_loop_a3b: | |
1695 | |
1696 | |
1697 movq 8(%rsp),%rdx | |
1698 pxor %xmm4,%xmm4 | |
1699 movdqu 0(%rdx),%xmm0 | |
1700 movdqu 16(%rdx),%xmm1 | |
1701 movdqu 32(%rdx),%xmm2 | |
1702 movdqu 48(%rdx),%xmm3 | |
1703 movdqa %xmm4,576(%rsp) | |
1704 movdqa %xmm4,592(%rsp) | |
1705 movdqa %xmm4,608(%rsp) | |
1706 movdqa %xmm4,624(%rsp) | |
1707 movdqa %xmm0,512(%rsp) | |
1708 movdqa %xmm1,528(%rsp) | |
1709 movdqa %xmm2,544(%rsp) | |
1710 movdqa %xmm3,560(%rsp) | |
1711 call mont_reduce | |
1712 | |
1713 | |
1714 | |
1715 movq 8(%rsp),%rax | |
1716 movq 0(%rax),%r8 | |
1717 movq 8(%rax),%r9 | |
1718 movq 16(%rax),%r10 | |
1719 movq 24(%rax),%r11 | |
1720 movq 32(%rax),%r12 | |
1721 movq 40(%rax),%r13 | |
1722 movq 48(%rax),%r14 | |
1723 movq 56(%rax),%r15 | |
1724 | |
1725 | |
1726 movq 24(%rsp),%rbx | |
1727 addq $512,%rbx | |
1728 | |
1729 subq 0(%rbx),%r8 | |
1730 sbbq 8(%rbx),%r9 | |
1731 sbbq 16(%rbx),%r10 | |
1732 sbbq 24(%rbx),%r11 | |
1733 sbbq 32(%rbx),%r12 | |
1734 sbbq 40(%rbx),%r13 | |
1735 sbbq 48(%rbx),%r14 | |
1736 sbbq 56(%rbx),%r15 | |
1737 | |
1738 | |
1739 movq 0(%rax),%rsi | |
1740 movq 8(%rax),%rdi | |
1741 movq 16(%rax),%rcx | |
1742 movq 24(%rax),%rdx | |
1743 cmovncq %r8,%rsi | |
1744 cmovncq %r9,%rdi | |
1745 cmovncq %r10,%rcx | |
1746 cmovncq %r11,%rdx | |
1747 movq %rsi,0(%rax) | |
1748 movq %rdi,8(%rax) | |
1749 movq %rcx,16(%rax) | |
1750 movq %rdx,24(%rax) | |
1751 | |
1752 movq 32(%rax),%rsi | |
1753 movq 40(%rax),%rdi | |
1754 movq 48(%rax),%rcx | |
1755 movq 56(%rax),%rdx | |
1756 cmovncq %r12,%rsi | |
1757 cmovncq %r13,%rdi | |
1758 cmovncq %r14,%rcx | |
1759 cmovncq %r15,%rdx | |
1760 movq %rsi,32(%rax) | |
1761 movq %rdi,40(%rax) | |
1762 movq %rcx,48(%rax) | |
1763 movq %rdx,56(%rax) | |
1764 | |
1765 movq 0(%rsp),%rsi | |
1766 movq 0(%rsi),%r15 | |
1767 movq 8(%rsi),%r14 | |
1768 movq 16(%rsi),%r13 | |
1769 movq 24(%rsi),%r12 | |
1770 movq 32(%rsi),%rbx | |
1771 movq 40(%rsi),%rbp | |
1772 leaq 48(%rsi),%rsp | |
1773 .Lepilogue: | |
1774 .byte 0xf3,0xc3 | |
1775 .size mod_exp_512, . - mod_exp_512 | |
1776 #endif | |
OLD | NEW |