OLD | NEW |
| (Empty) |
1 .text | |
2 | |
3 .globl bn_mul_mont | |
4 .type bn_mul_mont,@function | |
5 .align 16 | |
6 bn_mul_mont: | |
7 testl $3,%r9d | |
8 jnz .Lmul_enter | |
9 cmpl $8,%r9d | |
10 jb .Lmul_enter | |
11 cmpq %rsi,%rdx | |
12 jne .Lmul4x_enter | |
13 jmp .Lsqr4x_enter | |
14 | |
15 .align 16 | |
16 .Lmul_enter: | |
17 pushq %rbx | |
18 pushq %rbp | |
19 pushq %r12 | |
20 pushq %r13 | |
21 pushq %r14 | |
22 pushq %r15 | |
23 | |
24 movl %r9d,%r9d | |
25 leaq 2(%r9),%r10 | |
26 movq %rsp,%r11 | |
27 negq %r10 | |
28 leaq (%rsp,%r10,8),%rsp | |
29 andq $-1024,%rsp | |
30 | |
31 movq %r11,8(%rsp,%r9,8) | |
32 .Lmul_body: | |
33 movq %rdx,%r12 | |
34 movq (%r8),%r8 | |
35 movq (%r12),%rbx | |
36 movq (%rsi),%rax | |
37 | |
38 xorq %r14,%r14 | |
39 xorq %r15,%r15 | |
40 | |
41 movq %r8,%rbp | |
42 mulq %rbx | |
43 movq %rax,%r10 | |
44 movq (%rcx),%rax | |
45 | |
46 imulq %r10,%rbp | |
47 movq %rdx,%r11 | |
48 | |
49 mulq %rbp | |
50 addq %rax,%r10 | |
51 movq 8(%rsi),%rax | |
52 adcq $0,%rdx | |
53 movq %rdx,%r13 | |
54 | |
55 leaq 1(%r15),%r15 | |
56 jmp .L1st_enter | |
57 | |
58 .align 16 | |
59 .L1st: | |
60 addq %rax,%r13 | |
61 movq (%rsi,%r15,8),%rax | |
62 adcq $0,%rdx | |
63 addq %r11,%r13 | |
64 movq %r10,%r11 | |
65 adcq $0,%rdx | |
66 movq %r13,-16(%rsp,%r15,8) | |
67 movq %rdx,%r13 | |
68 | |
69 .L1st_enter: | |
70 mulq %rbx | |
71 addq %rax,%r11 | |
72 movq (%rcx,%r15,8),%rax | |
73 adcq $0,%rdx | |
74 leaq 1(%r15),%r15 | |
75 movq %rdx,%r10 | |
76 | |
77 mulq %rbp | |
78 cmpq %r9,%r15 | |
79 jne .L1st | |
80 | |
81 addq %rax,%r13 | |
82 movq (%rsi),%rax | |
83 adcq $0,%rdx | |
84 addq %r11,%r13 | |
85 adcq $0,%rdx | |
86 movq %r13,-16(%rsp,%r15,8) | |
87 movq %rdx,%r13 | |
88 movq %r10,%r11 | |
89 | |
90 xorq %rdx,%rdx | |
91 addq %r11,%r13 | |
92 adcq $0,%rdx | |
93 movq %r13,-8(%rsp,%r9,8) | |
94 movq %rdx,(%rsp,%r9,8) | |
95 | |
96 leaq 1(%r14),%r14 | |
97 jmp .Louter | |
98 .align 16 | |
99 .Louter: | |
100 movq (%r12,%r14,8),%rbx | |
101 xorq %r15,%r15 | |
102 movq %r8,%rbp | |
103 movq (%rsp),%r10 | |
104 mulq %rbx | |
105 addq %rax,%r10 | |
106 movq (%rcx),%rax | |
107 adcq $0,%rdx | |
108 | |
109 imulq %r10,%rbp | |
110 movq %rdx,%r11 | |
111 | |
112 mulq %rbp | |
113 addq %rax,%r10 | |
114 movq 8(%rsi),%rax | |
115 adcq $0,%rdx | |
116 movq 8(%rsp),%r10 | |
117 movq %rdx,%r13 | |
118 | |
119 leaq 1(%r15),%r15 | |
120 jmp .Linner_enter | |
121 | |
122 .align 16 | |
123 .Linner: | |
124 addq %rax,%r13 | |
125 movq (%rsi,%r15,8),%rax | |
126 adcq $0,%rdx | |
127 addq %r10,%r13 | |
128 movq (%rsp,%r15,8),%r10 | |
129 adcq $0,%rdx | |
130 movq %r13,-16(%rsp,%r15,8) | |
131 movq %rdx,%r13 | |
132 | |
133 .Linner_enter: | |
134 mulq %rbx | |
135 addq %rax,%r11 | |
136 movq (%rcx,%r15,8),%rax | |
137 adcq $0,%rdx | |
138 addq %r11,%r10 | |
139 movq %rdx,%r11 | |
140 adcq $0,%r11 | |
141 leaq 1(%r15),%r15 | |
142 | |
143 mulq %rbp | |
144 cmpq %r9,%r15 | |
145 jne .Linner | |
146 | |
147 addq %rax,%r13 | |
148 movq (%rsi),%rax | |
149 adcq $0,%rdx | |
150 addq %r10,%r13 | |
151 movq (%rsp,%r15,8),%r10 | |
152 adcq $0,%rdx | |
153 movq %r13,-16(%rsp,%r15,8) | |
154 movq %rdx,%r13 | |
155 | |
156 xorq %rdx,%rdx | |
157 addq %r11,%r13 | |
158 adcq $0,%rdx | |
159 addq %r10,%r13 | |
160 adcq $0,%rdx | |
161 movq %r13,-8(%rsp,%r9,8) | |
162 movq %rdx,(%rsp,%r9,8) | |
163 | |
164 leaq 1(%r14),%r14 | |
165 cmpq %r9,%r14 | |
166 jl .Louter | |
167 | |
168 xorq %r14,%r14 | |
169 movq (%rsp),%rax | |
170 leaq (%rsp),%rsi | |
171 movq %r9,%r15 | |
172 jmp .Lsub | |
173 .align 16 | |
174 .Lsub: sbbq (%rcx,%r14,8),%rax | |
175 movq %rax,(%rdi,%r14,8) | |
176 movq 8(%rsi,%r14,8),%rax | |
177 leaq 1(%r14),%r14 | |
178 decq %r15 | |
179 jnz .Lsub | |
180 | |
181 sbbq $0,%rax | |
182 xorq %r14,%r14 | |
183 andq %rax,%rsi | |
184 notq %rax | |
185 movq %rdi,%rcx | |
186 andq %rax,%rcx | |
187 movq %r9,%r15 | |
188 orq %rcx,%rsi | |
189 .align 16 | |
190 .Lcopy: | |
191 movq (%rsi,%r14,8),%rax | |
192 movq %r14,(%rsp,%r14,8) | |
193 movq %rax,(%rdi,%r14,8) | |
194 leaq 1(%r14),%r14 | |
195 subq $1,%r15 | |
196 jnz .Lcopy | |
197 | |
198 movq 8(%rsp,%r9,8),%rsi | |
199 movq $1,%rax | |
200 movq (%rsi),%r15 | |
201 movq 8(%rsi),%r14 | |
202 movq 16(%rsi),%r13 | |
203 movq 24(%rsi),%r12 | |
204 movq 32(%rsi),%rbp | |
205 movq 40(%rsi),%rbx | |
206 leaq 48(%rsi),%rsp | |
207 .Lmul_epilogue: | |
208 .byte 0xf3,0xc3 | |
209 .size bn_mul_mont,.-bn_mul_mont | |
210 .type bn_mul4x_mont,@function | |
211 .align 16 | |
212 bn_mul4x_mont: | |
213 .Lmul4x_enter: | |
214 pushq %rbx | |
215 pushq %rbp | |
216 pushq %r12 | |
217 pushq %r13 | |
218 pushq %r14 | |
219 pushq %r15 | |
220 | |
221 movl %r9d,%r9d | |
222 leaq 4(%r9),%r10 | |
223 movq %rsp,%r11 | |
224 negq %r10 | |
225 leaq (%rsp,%r10,8),%rsp | |
226 andq $-1024,%rsp | |
227 | |
228 movq %r11,8(%rsp,%r9,8) | |
229 .Lmul4x_body: | |
230 movq %rdi,16(%rsp,%r9,8) | |
231 movq %rdx,%r12 | |
232 movq (%r8),%r8 | |
233 movq (%r12),%rbx | |
234 movq (%rsi),%rax | |
235 | |
236 xorq %r14,%r14 | |
237 xorq %r15,%r15 | |
238 | |
239 movq %r8,%rbp | |
240 mulq %rbx | |
241 movq %rax,%r10 | |
242 movq (%rcx),%rax | |
243 | |
244 imulq %r10,%rbp | |
245 movq %rdx,%r11 | |
246 | |
247 mulq %rbp | |
248 addq %rax,%r10 | |
249 movq 8(%rsi),%rax | |
250 adcq $0,%rdx | |
251 movq %rdx,%rdi | |
252 | |
253 mulq %rbx | |
254 addq %rax,%r11 | |
255 movq 8(%rcx),%rax | |
256 adcq $0,%rdx | |
257 movq %rdx,%r10 | |
258 | |
259 mulq %rbp | |
260 addq %rax,%rdi | |
261 movq 16(%rsi),%rax | |
262 adcq $0,%rdx | |
263 addq %r11,%rdi | |
264 leaq 4(%r15),%r15 | |
265 adcq $0,%rdx | |
266 movq %rdi,(%rsp) | |
267 movq %rdx,%r13 | |
268 jmp .L1st4x | |
269 .align 16 | |
270 .L1st4x: | |
271 mulq %rbx | |
272 addq %rax,%r10 | |
273 movq -16(%rcx,%r15,8),%rax | |
274 adcq $0,%rdx | |
275 movq %rdx,%r11 | |
276 | |
277 mulq %rbp | |
278 addq %rax,%r13 | |
279 movq -8(%rsi,%r15,8),%rax | |
280 adcq $0,%rdx | |
281 addq %r10,%r13 | |
282 adcq $0,%rdx | |
283 movq %r13,-24(%rsp,%r15,8) | |
284 movq %rdx,%rdi | |
285 | |
286 mulq %rbx | |
287 addq %rax,%r11 | |
288 movq -8(%rcx,%r15,8),%rax | |
289 adcq $0,%rdx | |
290 movq %rdx,%r10 | |
291 | |
292 mulq %rbp | |
293 addq %rax,%rdi | |
294 movq (%rsi,%r15,8),%rax | |
295 adcq $0,%rdx | |
296 addq %r11,%rdi | |
297 adcq $0,%rdx | |
298 movq %rdi,-16(%rsp,%r15,8) | |
299 movq %rdx,%r13 | |
300 | |
301 mulq %rbx | |
302 addq %rax,%r10 | |
303 movq (%rcx,%r15,8),%rax | |
304 adcq $0,%rdx | |
305 movq %rdx,%r11 | |
306 | |
307 mulq %rbp | |
308 addq %rax,%r13 | |
309 movq 8(%rsi,%r15,8),%rax | |
310 adcq $0,%rdx | |
311 addq %r10,%r13 | |
312 adcq $0,%rdx | |
313 movq %r13,-8(%rsp,%r15,8) | |
314 movq %rdx,%rdi | |
315 | |
316 mulq %rbx | |
317 addq %rax,%r11 | |
318 movq 8(%rcx,%r15,8),%rax | |
319 adcq $0,%rdx | |
320 leaq 4(%r15),%r15 | |
321 movq %rdx,%r10 | |
322 | |
323 mulq %rbp | |
324 addq %rax,%rdi | |
325 movq -16(%rsi,%r15,8),%rax | |
326 adcq $0,%rdx | |
327 addq %r11,%rdi | |
328 adcq $0,%rdx | |
329 movq %rdi,-32(%rsp,%r15,8) | |
330 movq %rdx,%r13 | |
331 cmpq %r9,%r15 | |
332 jl .L1st4x | |
333 | |
334 mulq %rbx | |
335 addq %rax,%r10 | |
336 movq -16(%rcx,%r15,8),%rax | |
337 adcq $0,%rdx | |
338 movq %rdx,%r11 | |
339 | |
340 mulq %rbp | |
341 addq %rax,%r13 | |
342 movq -8(%rsi,%r15,8),%rax | |
343 adcq $0,%rdx | |
344 addq %r10,%r13 | |
345 adcq $0,%rdx | |
346 movq %r13,-24(%rsp,%r15,8) | |
347 movq %rdx,%rdi | |
348 | |
349 mulq %rbx | |
350 addq %rax,%r11 | |
351 movq -8(%rcx,%r15,8),%rax | |
352 adcq $0,%rdx | |
353 movq %rdx,%r10 | |
354 | |
355 mulq %rbp | |
356 addq %rax,%rdi | |
357 movq (%rsi),%rax | |
358 adcq $0,%rdx | |
359 addq %r11,%rdi | |
360 adcq $0,%rdx | |
361 movq %rdi,-16(%rsp,%r15,8) | |
362 movq %rdx,%r13 | |
363 | |
364 xorq %rdi,%rdi | |
365 addq %r10,%r13 | |
366 adcq $0,%rdi | |
367 movq %r13,-8(%rsp,%r15,8) | |
368 movq %rdi,(%rsp,%r15,8) | |
369 | |
370 leaq 1(%r14),%r14 | |
371 .align 4 | |
372 .Louter4x: | |
373 movq (%r12,%r14,8),%rbx | |
374 xorq %r15,%r15 | |
375 movq (%rsp),%r10 | |
376 movq %r8,%rbp | |
377 mulq %rbx | |
378 addq %rax,%r10 | |
379 movq (%rcx),%rax | |
380 adcq $0,%rdx | |
381 | |
382 imulq %r10,%rbp | |
383 movq %rdx,%r11 | |
384 | |
385 mulq %rbp | |
386 addq %rax,%r10 | |
387 movq 8(%rsi),%rax | |
388 adcq $0,%rdx | |
389 movq %rdx,%rdi | |
390 | |
391 mulq %rbx | |
392 addq %rax,%r11 | |
393 movq 8(%rcx),%rax | |
394 adcq $0,%rdx | |
395 addq 8(%rsp),%r11 | |
396 adcq $0,%rdx | |
397 movq %rdx,%r10 | |
398 | |
399 mulq %rbp | |
400 addq %rax,%rdi | |
401 movq 16(%rsi),%rax | |
402 adcq $0,%rdx | |
403 addq %r11,%rdi | |
404 leaq 4(%r15),%r15 | |
405 adcq $0,%rdx | |
406 movq %rdi,(%rsp) | |
407 movq %rdx,%r13 | |
408 jmp .Linner4x | |
409 .align 16 | |
410 .Linner4x: | |
411 mulq %rbx | |
412 addq %rax,%r10 | |
413 movq -16(%rcx,%r15,8),%rax | |
414 adcq $0,%rdx | |
415 addq -16(%rsp,%r15,8),%r10 | |
416 adcq $0,%rdx | |
417 movq %rdx,%r11 | |
418 | |
419 mulq %rbp | |
420 addq %rax,%r13 | |
421 movq -8(%rsi,%r15,8),%rax | |
422 adcq $0,%rdx | |
423 addq %r10,%r13 | |
424 adcq $0,%rdx | |
425 movq %r13,-24(%rsp,%r15,8) | |
426 movq %rdx,%rdi | |
427 | |
428 mulq %rbx | |
429 addq %rax,%r11 | |
430 movq -8(%rcx,%r15,8),%rax | |
431 adcq $0,%rdx | |
432 addq -8(%rsp,%r15,8),%r11 | |
433 adcq $0,%rdx | |
434 movq %rdx,%r10 | |
435 | |
436 mulq %rbp | |
437 addq %rax,%rdi | |
438 movq (%rsi,%r15,8),%rax | |
439 adcq $0,%rdx | |
440 addq %r11,%rdi | |
441 adcq $0,%rdx | |
442 movq %rdi,-16(%rsp,%r15,8) | |
443 movq %rdx,%r13 | |
444 | |
445 mulq %rbx | |
446 addq %rax,%r10 | |
447 movq (%rcx,%r15,8),%rax | |
448 adcq $0,%rdx | |
449 addq (%rsp,%r15,8),%r10 | |
450 adcq $0,%rdx | |
451 movq %rdx,%r11 | |
452 | |
453 mulq %rbp | |
454 addq %rax,%r13 | |
455 movq 8(%rsi,%r15,8),%rax | |
456 adcq $0,%rdx | |
457 addq %r10,%r13 | |
458 adcq $0,%rdx | |
459 movq %r13,-8(%rsp,%r15,8) | |
460 movq %rdx,%rdi | |
461 | |
462 mulq %rbx | |
463 addq %rax,%r11 | |
464 movq 8(%rcx,%r15,8),%rax | |
465 adcq $0,%rdx | |
466 addq 8(%rsp,%r15,8),%r11 | |
467 adcq $0,%rdx | |
468 leaq 4(%r15),%r15 | |
469 movq %rdx,%r10 | |
470 | |
471 mulq %rbp | |
472 addq %rax,%rdi | |
473 movq -16(%rsi,%r15,8),%rax | |
474 adcq $0,%rdx | |
475 addq %r11,%rdi | |
476 adcq $0,%rdx | |
477 movq %rdi,-32(%rsp,%r15,8) | |
478 movq %rdx,%r13 | |
479 cmpq %r9,%r15 | |
480 jl .Linner4x | |
481 | |
482 mulq %rbx | |
483 addq %rax,%r10 | |
484 movq -16(%rcx,%r15,8),%rax | |
485 adcq $0,%rdx | |
486 addq -16(%rsp,%r15,8),%r10 | |
487 adcq $0,%rdx | |
488 movq %rdx,%r11 | |
489 | |
490 mulq %rbp | |
491 addq %rax,%r13 | |
492 movq -8(%rsi,%r15,8),%rax | |
493 adcq $0,%rdx | |
494 addq %r10,%r13 | |
495 adcq $0,%rdx | |
496 movq %r13,-24(%rsp,%r15,8) | |
497 movq %rdx,%rdi | |
498 | |
499 mulq %rbx | |
500 addq %rax,%r11 | |
501 movq -8(%rcx,%r15,8),%rax | |
502 adcq $0,%rdx | |
503 addq -8(%rsp,%r15,8),%r11 | |
504 adcq $0,%rdx | |
505 leaq 1(%r14),%r14 | |
506 movq %rdx,%r10 | |
507 | |
508 mulq %rbp | |
509 addq %rax,%rdi | |
510 movq (%rsi),%rax | |
511 adcq $0,%rdx | |
512 addq %r11,%rdi | |
513 adcq $0,%rdx | |
514 movq %rdi,-16(%rsp,%r15,8) | |
515 movq %rdx,%r13 | |
516 | |
517 xorq %rdi,%rdi | |
518 addq %r10,%r13 | |
519 adcq $0,%rdi | |
520 addq (%rsp,%r9,8),%r13 | |
521 adcq $0,%rdi | |
522 movq %r13,-8(%rsp,%r15,8) | |
523 movq %rdi,(%rsp,%r15,8) | |
524 | |
525 cmpq %r9,%r14 | |
526 jl .Louter4x | |
527 movq 16(%rsp,%r9,8),%rdi | |
528 movq 0(%rsp),%rax | |
529 pxor %xmm0,%xmm0 | |
530 movq 8(%rsp),%rdx | |
531 shrq $2,%r9 | |
532 leaq (%rsp),%rsi | |
533 xorq %r14,%r14 | |
534 | |
535 subq 0(%rcx),%rax | |
536 movq 16(%rsi),%rbx | |
537 movq 24(%rsi),%rbp | |
538 sbbq 8(%rcx),%rdx | |
539 leaq -1(%r9),%r15 | |
540 jmp .Lsub4x | |
541 .align 16 | |
542 .Lsub4x: | |
543 movq %rax,0(%rdi,%r14,8) | |
544 movq %rdx,8(%rdi,%r14,8) | |
545 sbbq 16(%rcx,%r14,8),%rbx | |
546 movq 32(%rsi,%r14,8),%rax | |
547 movq 40(%rsi,%r14,8),%rdx | |
548 sbbq 24(%rcx,%r14,8),%rbp | |
549 movq %rbx,16(%rdi,%r14,8) | |
550 movq %rbp,24(%rdi,%r14,8) | |
551 sbbq 32(%rcx,%r14,8),%rax | |
552 movq 48(%rsi,%r14,8),%rbx | |
553 movq 56(%rsi,%r14,8),%rbp | |
554 sbbq 40(%rcx,%r14,8),%rdx | |
555 leaq 4(%r14),%r14 | |
556 decq %r15 | |
557 jnz .Lsub4x | |
558 | |
559 movq %rax,0(%rdi,%r14,8) | |
560 movq 32(%rsi,%r14,8),%rax | |
561 sbbq 16(%rcx,%r14,8),%rbx | |
562 movq %rdx,8(%rdi,%r14,8) | |
563 sbbq 24(%rcx,%r14,8),%rbp | |
564 movq %rbx,16(%rdi,%r14,8) | |
565 | |
566 sbbq $0,%rax | |
567 movq %rbp,24(%rdi,%r14,8) | |
568 xorq %r14,%r14 | |
569 andq %rax,%rsi | |
570 notq %rax | |
571 movq %rdi,%rcx | |
572 andq %rax,%rcx | |
573 leaq -1(%r9),%r15 | |
574 orq %rcx,%rsi | |
575 | |
576 movdqu (%rsi),%xmm1 | |
577 movdqa %xmm0,(%rsp) | |
578 movdqu %xmm1,(%rdi) | |
579 jmp .Lcopy4x | |
580 .align 16 | |
581 .Lcopy4x: | |
582 movdqu 16(%rsi,%r14,1),%xmm2 | |
583 movdqu 32(%rsi,%r14,1),%xmm1 | |
584 movdqa %xmm0,16(%rsp,%r14,1) | |
585 movdqu %xmm2,16(%rdi,%r14,1) | |
586 movdqa %xmm0,32(%rsp,%r14,1) | |
587 movdqu %xmm1,32(%rdi,%r14,1) | |
588 leaq 32(%r14),%r14 | |
589 decq %r15 | |
590 jnz .Lcopy4x | |
591 | |
592 shlq $2,%r9 | |
593 movdqu 16(%rsi,%r14,1),%xmm2 | |
594 movdqa %xmm0,16(%rsp,%r14,1) | |
595 movdqu %xmm2,16(%rdi,%r14,1) | |
596 movq 8(%rsp,%r9,8),%rsi | |
597 movq $1,%rax | |
598 movq (%rsi),%r15 | |
599 movq 8(%rsi),%r14 | |
600 movq 16(%rsi),%r13 | |
601 movq 24(%rsi),%r12 | |
602 movq 32(%rsi),%rbp | |
603 movq 40(%rsi),%rbx | |
604 leaq 48(%rsi),%rsp | |
605 .Lmul4x_epilogue: | |
606 .byte 0xf3,0xc3 | |
607 .size bn_mul4x_mont,.-bn_mul4x_mont | |
608 .type bn_sqr4x_mont,@function | |
609 .align 16 | |
610 bn_sqr4x_mont: | |
611 .Lsqr4x_enter: | |
612 pushq %rbx | |
613 pushq %rbp | |
614 pushq %r12 | |
615 pushq %r13 | |
616 pushq %r14 | |
617 pushq %r15 | |
618 | |
619 shll $3,%r9d | |
620 xorq %r10,%r10 | |
621 movq %rsp,%r11 | |
622 subq %r9,%r10 | |
623 movq (%r8),%r8 | |
624 leaq -72(%rsp,%r10,2),%rsp | |
625 andq $-1024,%rsp | |
626 | |
627 | |
628 | |
629 | |
630 | |
631 | |
632 | |
633 | |
634 | |
635 | |
636 | |
637 movq %rdi,32(%rsp) | |
638 movq %rcx,40(%rsp) | |
639 movq %r8,48(%rsp) | |
640 movq %r11,56(%rsp) | |
641 .Lsqr4x_body: | |
642 | |
643 | |
644 | |
645 | |
646 | |
647 | |
648 | |
649 leaq 32(%r10),%rbp | |
650 leaq (%rsi,%r9,1),%rsi | |
651 | |
652 movq %r9,%rcx | |
653 | |
654 | |
655 movq -32(%rsi,%rbp,1),%r14 | |
656 leaq 64(%rsp,%r9,2),%rdi | |
657 movq -24(%rsi,%rbp,1),%rax | |
658 leaq -32(%rdi,%rbp,1),%rdi | |
659 movq -16(%rsi,%rbp,1),%rbx | |
660 movq %rax,%r15 | |
661 | |
662 mulq %r14 | |
663 movq %rax,%r10 | |
664 movq %rbx,%rax | |
665 movq %rdx,%r11 | |
666 movq %r10,-24(%rdi,%rbp,1) | |
667 | |
668 xorq %r10,%r10 | |
669 mulq %r14 | |
670 addq %rax,%r11 | |
671 movq %rbx,%rax | |
672 adcq %rdx,%r10 | |
673 movq %r11,-16(%rdi,%rbp,1) | |
674 | |
675 leaq -16(%rbp),%rcx | |
676 | |
677 | |
678 movq 8(%rsi,%rcx,1),%rbx | |
679 mulq %r15 | |
680 movq %rax,%r12 | |
681 movq %rbx,%rax | |
682 movq %rdx,%r13 | |
683 | |
684 xorq %r11,%r11 | |
685 addq %r12,%r10 | |
686 leaq 16(%rcx),%rcx | |
687 adcq $0,%r11 | |
688 mulq %r14 | |
689 addq %rax,%r10 | |
690 movq %rbx,%rax | |
691 adcq %rdx,%r11 | |
692 movq %r10,-8(%rdi,%rcx,1) | |
693 jmp .Lsqr4x_1st | |
694 | |
695 .align 16 | |
696 .Lsqr4x_1st: | |
697 movq (%rsi,%rcx,1),%rbx | |
698 xorq %r12,%r12 | |
699 mulq %r15 | |
700 addq %rax,%r13 | |
701 movq %rbx,%rax | |
702 adcq %rdx,%r12 | |
703 | |
704 xorq %r10,%r10 | |
705 addq %r13,%r11 | |
706 adcq $0,%r10 | |
707 mulq %r14 | |
708 addq %rax,%r11 | |
709 movq %rbx,%rax | |
710 adcq %rdx,%r10 | |
711 movq %r11,(%rdi,%rcx,1) | |
712 | |
713 | |
714 movq 8(%rsi,%rcx,1),%rbx | |
715 xorq %r13,%r13 | |
716 mulq %r15 | |
717 addq %rax,%r12 | |
718 movq %rbx,%rax | |
719 adcq %rdx,%r13 | |
720 | |
721 xorq %r11,%r11 | |
722 addq %r12,%r10 | |
723 adcq $0,%r11 | |
724 mulq %r14 | |
725 addq %rax,%r10 | |
726 movq %rbx,%rax | |
727 adcq %rdx,%r11 | |
728 movq %r10,8(%rdi,%rcx,1) | |
729 | |
730 movq 16(%rsi,%rcx,1),%rbx | |
731 xorq %r12,%r12 | |
732 mulq %r15 | |
733 addq %rax,%r13 | |
734 movq %rbx,%rax | |
735 adcq %rdx,%r12 | |
736 | |
737 xorq %r10,%r10 | |
738 addq %r13,%r11 | |
739 adcq $0,%r10 | |
740 mulq %r14 | |
741 addq %rax,%r11 | |
742 movq %rbx,%rax | |
743 adcq %rdx,%r10 | |
744 movq %r11,16(%rdi,%rcx,1) | |
745 | |
746 | |
747 movq 24(%rsi,%rcx,1),%rbx | |
748 xorq %r13,%r13 | |
749 mulq %r15 | |
750 addq %rax,%r12 | |
751 movq %rbx,%rax | |
752 adcq %rdx,%r13 | |
753 | |
754 xorq %r11,%r11 | |
755 addq %r12,%r10 | |
756 leaq 32(%rcx),%rcx | |
757 adcq $0,%r11 | |
758 mulq %r14 | |
759 addq %rax,%r10 | |
760 movq %rbx,%rax | |
761 adcq %rdx,%r11 | |
762 movq %r10,-8(%rdi,%rcx,1) | |
763 | |
764 cmpq $0,%rcx | |
765 jne .Lsqr4x_1st | |
766 | |
767 xorq %r12,%r12 | |
768 addq %r11,%r13 | |
769 adcq $0,%r12 | |
770 mulq %r15 | |
771 addq %rax,%r13 | |
772 adcq %rdx,%r12 | |
773 | |
774 movq %r13,(%rdi) | |
775 leaq 16(%rbp),%rbp | |
776 movq %r12,8(%rdi) | |
777 jmp .Lsqr4x_outer | |
778 | |
779 .align 16 | |
780 .Lsqr4x_outer: | |
781 movq -32(%rsi,%rbp,1),%r14 | |
782 leaq 64(%rsp,%r9,2),%rdi | |
783 movq -24(%rsi,%rbp,1),%rax | |
784 leaq -32(%rdi,%rbp,1),%rdi | |
785 movq -16(%rsi,%rbp,1),%rbx | |
786 movq %rax,%r15 | |
787 | |
788 movq -24(%rdi,%rbp,1),%r10 | |
789 xorq %r11,%r11 | |
790 mulq %r14 | |
791 addq %rax,%r10 | |
792 movq %rbx,%rax | |
793 adcq %rdx,%r11 | |
794 movq %r10,-24(%rdi,%rbp,1) | |
795 | |
796 xorq %r10,%r10 | |
797 addq -16(%rdi,%rbp,1),%r11 | |
798 adcq $0,%r10 | |
799 mulq %r14 | |
800 addq %rax,%r11 | |
801 movq %rbx,%rax | |
802 adcq %rdx,%r10 | |
803 movq %r11,-16(%rdi,%rbp,1) | |
804 | |
805 leaq -16(%rbp),%rcx | |
806 xorq %r12,%r12 | |
807 | |
808 | |
809 movq 8(%rsi,%rcx,1),%rbx | |
810 xorq %r13,%r13 | |
811 addq 8(%rdi,%rcx,1),%r12 | |
812 adcq $0,%r13 | |
813 mulq %r15 | |
814 addq %rax,%r12 | |
815 movq %rbx,%rax | |
816 adcq %rdx,%r13 | |
817 | |
818 xorq %r11,%r11 | |
819 addq %r12,%r10 | |
820 adcq $0,%r11 | |
821 mulq %r14 | |
822 addq %rax,%r10 | |
823 movq %rbx,%rax | |
824 adcq %rdx,%r11 | |
825 movq %r10,8(%rdi,%rcx,1) | |
826 | |
827 leaq 16(%rcx),%rcx | |
828 jmp .Lsqr4x_inner | |
829 | |
830 .align 16 | |
831 .Lsqr4x_inner: | |
832 movq (%rsi,%rcx,1),%rbx | |
833 xorq %r12,%r12 | |
834 addq (%rdi,%rcx,1),%r13 | |
835 adcq $0,%r12 | |
836 mulq %r15 | |
837 addq %rax,%r13 | |
838 movq %rbx,%rax | |
839 adcq %rdx,%r12 | |
840 | |
841 xorq %r10,%r10 | |
842 addq %r13,%r11 | |
843 adcq $0,%r10 | |
844 mulq %r14 | |
845 addq %rax,%r11 | |
846 movq %rbx,%rax | |
847 adcq %rdx,%r10 | |
848 movq %r11,(%rdi,%rcx,1) | |
849 | |
850 movq 8(%rsi,%rcx,1),%rbx | |
851 xorq %r13,%r13 | |
852 addq 8(%rdi,%rcx,1),%r12 | |
853 adcq $0,%r13 | |
854 mulq %r15 | |
855 addq %rax,%r12 | |
856 movq %rbx,%rax | |
857 adcq %rdx,%r13 | |
858 | |
859 xorq %r11,%r11 | |
860 addq %r12,%r10 | |
861 leaq 16(%rcx),%rcx | |
862 adcq $0,%r11 | |
863 mulq %r14 | |
864 addq %rax,%r10 | |
865 movq %rbx,%rax | |
866 adcq %rdx,%r11 | |
867 movq %r10,-8(%rdi,%rcx,1) | |
868 | |
869 cmpq $0,%rcx | |
870 jne .Lsqr4x_inner | |
871 | |
872 xorq %r12,%r12 | |
873 addq %r11,%r13 | |
874 adcq $0,%r12 | |
875 mulq %r15 | |
876 addq %rax,%r13 | |
877 adcq %rdx,%r12 | |
878 | |
879 movq %r13,(%rdi) | |
880 movq %r12,8(%rdi) | |
881 | |
882 addq $16,%rbp | |
883 jnz .Lsqr4x_outer | |
884 | |
885 | |
886 movq -32(%rsi),%r14 | |
887 leaq 64(%rsp,%r9,2),%rdi | |
888 movq -24(%rsi),%rax | |
889 leaq -32(%rdi,%rbp,1),%rdi | |
890 movq -16(%rsi),%rbx | |
891 movq %rax,%r15 | |
892 | |
893 xorq %r11,%r11 | |
894 mulq %r14 | |
895 addq %rax,%r10 | |
896 movq %rbx,%rax | |
897 adcq %rdx,%r11 | |
898 movq %r10,-24(%rdi) | |
899 | |
900 xorq %r10,%r10 | |
901 addq %r13,%r11 | |
902 adcq $0,%r10 | |
903 mulq %r14 | |
904 addq %rax,%r11 | |
905 movq %rbx,%rax | |
906 adcq %rdx,%r10 | |
907 movq %r11,-16(%rdi) | |
908 | |
909 movq -8(%rsi),%rbx | |
910 mulq %r15 | |
911 addq %rax,%r12 | |
912 movq %rbx,%rax | |
913 adcq $0,%rdx | |
914 | |
915 xorq %r11,%r11 | |
916 addq %r12,%r10 | |
917 movq %rdx,%r13 | |
918 adcq $0,%r11 | |
919 mulq %r14 | |
920 addq %rax,%r10 | |
921 movq %rbx,%rax | |
922 adcq %rdx,%r11 | |
923 movq %r10,-8(%rdi) | |
924 | |
925 xorq %r12,%r12 | |
926 addq %r11,%r13 | |
927 adcq $0,%r12 | |
928 mulq %r15 | |
929 addq %rax,%r13 | |
930 movq -16(%rsi),%rax | |
931 adcq %rdx,%r12 | |
932 | |
933 movq %r13,(%rdi) | |
934 movq %r12,8(%rdi) | |
935 | |
936 mulq %rbx | |
937 addq $16,%rbp | |
938 xorq %r14,%r14 | |
939 subq %r9,%rbp | |
940 xorq %r15,%r15 | |
941 | |
942 addq %r12,%rax | |
943 adcq $0,%rdx | |
944 movq %rax,8(%rdi) | |
945 movq %rdx,16(%rdi) | |
946 movq %r15,24(%rdi) | |
947 | |
948 movq -16(%rsi,%rbp,1),%rax | |
949 leaq 64(%rsp,%r9,2),%rdi | |
950 xorq %r10,%r10 | |
951 movq -24(%rdi,%rbp,2),%r11 | |
952 | |
953 leaq (%r14,%r10,2),%r12 | |
954 shrq $63,%r10 | |
955 leaq (%rcx,%r11,2),%r13 | |
956 shrq $63,%r11 | |
957 orq %r10,%r13 | |
958 movq -16(%rdi,%rbp,2),%r10 | |
959 movq %r11,%r14 | |
960 mulq %rax | |
961 negq %r15 | |
962 movq -8(%rdi,%rbp,2),%r11 | |
963 adcq %rax,%r12 | |
964 movq -8(%rsi,%rbp,1),%rax | |
965 movq %r12,-32(%rdi,%rbp,2) | |
966 adcq %rdx,%r13 | |
967 | |
968 leaq (%r14,%r10,2),%rbx | |
969 movq %r13,-24(%rdi,%rbp,2) | |
970 sbbq %r15,%r15 | |
971 shrq $63,%r10 | |
972 leaq (%rcx,%r11,2),%r8 | |
973 shrq $63,%r11 | |
974 orq %r10,%r8 | |
975 movq 0(%rdi,%rbp,2),%r10 | |
976 movq %r11,%r14 | |
977 mulq %rax | |
978 negq %r15 | |
979 movq 8(%rdi,%rbp,2),%r11 | |
980 adcq %rax,%rbx | |
981 movq 0(%rsi,%rbp,1),%rax | |
982 movq %rbx,-16(%rdi,%rbp,2) | |
983 adcq %rdx,%r8 | |
984 leaq 16(%rbp),%rbp | |
985 movq %r8,-40(%rdi,%rbp,2) | |
986 sbbq %r15,%r15 | |
987 jmp .Lsqr4x_shift_n_add | |
988 | |
989 .align 16 | |
990 .Lsqr4x_shift_n_add: | |
991 leaq (%r14,%r10,2),%r12 | |
992 shrq $63,%r10 | |
993 leaq (%rcx,%r11,2),%r13 | |
994 shrq $63,%r11 | |
995 orq %r10,%r13 | |
996 movq -16(%rdi,%rbp,2),%r10 | |
997 movq %r11,%r14 | |
998 mulq %rax | |
999 negq %r15 | |
1000 movq -8(%rdi,%rbp,2),%r11 | |
1001 adcq %rax,%r12 | |
1002 movq -8(%rsi,%rbp,1),%rax | |
1003 movq %r12,-32(%rdi,%rbp,2) | |
1004 adcq %rdx,%r13 | |
1005 | |
1006 leaq (%r14,%r10,2),%rbx | |
1007 movq %r13,-24(%rdi,%rbp,2) | |
1008 sbbq %r15,%r15 | |
1009 shrq $63,%r10 | |
1010 leaq (%rcx,%r11,2),%r8 | |
1011 shrq $63,%r11 | |
1012 orq %r10,%r8 | |
1013 movq 0(%rdi,%rbp,2),%r10 | |
1014 movq %r11,%r14 | |
1015 mulq %rax | |
1016 negq %r15 | |
1017 movq 8(%rdi,%rbp,2),%r11 | |
1018 adcq %rax,%rbx | |
1019 movq 0(%rsi,%rbp,1),%rax | |
1020 movq %rbx,-16(%rdi,%rbp,2) | |
1021 adcq %rdx,%r8 | |
1022 | |
1023 leaq (%r14,%r10,2),%r12 | |
1024 movq %r8,-8(%rdi,%rbp,2) | |
1025 sbbq %r15,%r15 | |
1026 shrq $63,%r10 | |
1027 leaq (%rcx,%r11,2),%r13 | |
1028 shrq $63,%r11 | |
1029 orq %r10,%r13 | |
1030 movq 16(%rdi,%rbp,2),%r10 | |
1031 movq %r11,%r14 | |
1032 mulq %rax | |
1033 negq %r15 | |
1034 movq 24(%rdi,%rbp,2),%r11 | |
1035 adcq %rax,%r12 | |
1036 movq 8(%rsi,%rbp,1),%rax | |
1037 movq %r12,0(%rdi,%rbp,2) | |
1038 adcq %rdx,%r13 | |
1039 | |
1040 leaq (%r14,%r10,2),%rbx | |
1041 movq %r13,8(%rdi,%rbp,2) | |
1042 sbbq %r15,%r15 | |
1043 shrq $63,%r10 | |
1044 leaq (%rcx,%r11,2),%r8 | |
1045 shrq $63,%r11 | |
1046 orq %r10,%r8 | |
1047 movq 32(%rdi,%rbp,2),%r10 | |
1048 movq %r11,%r14 | |
1049 mulq %rax | |
1050 negq %r15 | |
1051 movq 40(%rdi,%rbp,2),%r11 | |
1052 adcq %rax,%rbx | |
1053 movq 16(%rsi,%rbp,1),%rax | |
1054 movq %rbx,16(%rdi,%rbp,2) | |
1055 adcq %rdx,%r8 | |
1056 movq %r8,24(%rdi,%rbp,2) | |
1057 sbbq %r15,%r15 | |
1058 addq $32,%rbp | |
1059 jnz .Lsqr4x_shift_n_add | |
1060 | |
1061 leaq (%r14,%r10,2),%r12 | |
1062 shrq $63,%r10 | |
1063 leaq (%rcx,%r11,2),%r13 | |
1064 shrq $63,%r11 | |
1065 orq %r10,%r13 | |
1066 movq -16(%rdi),%r10 | |
1067 movq %r11,%r14 | |
1068 mulq %rax | |
1069 negq %r15 | |
1070 movq -8(%rdi),%r11 | |
1071 adcq %rax,%r12 | |
1072 movq -8(%rsi),%rax | |
1073 movq %r12,-32(%rdi) | |
1074 adcq %rdx,%r13 | |
1075 | |
1076 leaq (%r14,%r10,2),%rbx | |
1077 movq %r13,-24(%rdi) | |
1078 sbbq %r15,%r15 | |
1079 shrq $63,%r10 | |
1080 leaq (%rcx,%r11,2),%r8 | |
1081 shrq $63,%r11 | |
1082 orq %r10,%r8 | |
1083 mulq %rax | |
1084 negq %r15 | |
1085 adcq %rax,%rbx | |
1086 adcq %rdx,%r8 | |
1087 movq %rbx,-16(%rdi) | |
1088 movq %r8,-8(%rdi) | |
1089 movq 40(%rsp),%rsi | |
1090 movq 48(%rsp),%r8 | |
1091 xorq %rcx,%rcx | |
1092 movq %r9,0(%rsp) | |
1093 subq %r9,%rcx | |
1094 movq 64(%rsp),%r10 | |
1095 movq %r8,%r14 | |
1096 leaq 64(%rsp,%r9,2),%rax | |
1097 leaq 64(%rsp,%r9,1),%rdi | |
1098 movq %rax,8(%rsp) | |
1099 leaq (%rsi,%r9,1),%rsi | |
1100 xorq %rbp,%rbp | |
1101 | |
1102 movq 0(%rsi,%rcx,1),%rax | |
1103 movq 8(%rsi,%rcx,1),%r9 | |
1104 imulq %r10,%r14 | |
1105 movq %rax,%rbx | |
1106 jmp .Lsqr4x_mont_outer | |
1107 | |
1108 .align 16 | |
1109 .Lsqr4x_mont_outer: | |
1110 xorq %r11,%r11 | |
1111 mulq %r14 | |
1112 addq %rax,%r10 | |
1113 movq %r9,%rax | |
1114 adcq %rdx,%r11 | |
1115 movq %r8,%r15 | |
1116 | |
1117 xorq %r10,%r10 | |
1118 addq 8(%rdi,%rcx,1),%r11 | |
1119 adcq $0,%r10 | |
1120 mulq %r14 | |
1121 addq %rax,%r11 | |
1122 movq %rbx,%rax | |
1123 adcq %rdx,%r10 | |
1124 | |
1125 imulq %r11,%r15 | |
1126 | |
1127 movq 16(%rsi,%rcx,1),%rbx | |
1128 xorq %r13,%r13 | |
1129 addq %r11,%r12 | |
1130 adcq $0,%r13 | |
1131 mulq %r15 | |
1132 addq %rax,%r12 | |
1133 movq %rbx,%rax | |
1134 adcq %rdx,%r13 | |
1135 movq %r12,8(%rdi,%rcx,1) | |
1136 | |
1137 xorq %r11,%r11 | |
1138 addq 16(%rdi,%rcx,1),%r10 | |
1139 adcq $0,%r11 | |
1140 mulq %r14 | |
1141 addq %rax,%r10 | |
1142 movq %r9,%rax | |
1143 adcq %rdx,%r11 | |
1144 | |
1145 movq 24(%rsi,%rcx,1),%r9 | |
1146 xorq %r12,%r12 | |
1147 addq %r10,%r13 | |
1148 adcq $0,%r12 | |
1149 mulq %r15 | |
1150 addq %rax,%r13 | |
1151 movq %r9,%rax | |
1152 adcq %rdx,%r12 | |
1153 movq %r13,16(%rdi,%rcx,1) | |
1154 | |
1155 xorq %r10,%r10 | |
1156 addq 24(%rdi,%rcx,1),%r11 | |
1157 leaq 32(%rcx),%rcx | |
1158 adcq $0,%r10 | |
1159 mulq %r14 | |
1160 addq %rax,%r11 | |
1161 movq %rbx,%rax | |
1162 adcq %rdx,%r10 | |
1163 jmp .Lsqr4x_mont_inner | |
1164 | |
1165 .align 16 | |
1166 .Lsqr4x_mont_inner: | |
1167 movq (%rsi,%rcx,1),%rbx | |
1168 xorq %r13,%r13 | |
1169 addq %r11,%r12 | |
1170 adcq $0,%r13 | |
1171 mulq %r15 | |
1172 addq %rax,%r12 | |
1173 movq %rbx,%rax | |
1174 adcq %rdx,%r13 | |
1175 movq %r12,-8(%rdi,%rcx,1) | |
1176 | |
1177 xorq %r11,%r11 | |
1178 addq (%rdi,%rcx,1),%r10 | |
1179 adcq $0,%r11 | |
1180 mulq %r14 | |
1181 addq %rax,%r10 | |
1182 movq %r9,%rax | |
1183 adcq %rdx,%r11 | |
1184 | |
1185 movq 8(%rsi,%rcx,1),%r9 | |
1186 xorq %r12,%r12 | |
1187 addq %r10,%r13 | |
1188 adcq $0,%r12 | |
1189 mulq %r15 | |
1190 addq %rax,%r13 | |
1191 movq %r9,%rax | |
1192 adcq %rdx,%r12 | |
1193 movq %r13,(%rdi,%rcx,1) | |
1194 | |
1195 xorq %r10,%r10 | |
1196 addq 8(%rdi,%rcx,1),%r11 | |
1197 adcq $0,%r10 | |
1198 mulq %r14 | |
1199 addq %rax,%r11 | |
1200 movq %rbx,%rax | |
1201 adcq %rdx,%r10 | |
1202 | |
1203 | |
1204 movq 16(%rsi,%rcx,1),%rbx | |
1205 xorq %r13,%r13 | |
1206 addq %r11,%r12 | |
1207 adcq $0,%r13 | |
1208 mulq %r15 | |
1209 addq %rax,%r12 | |
1210 movq %rbx,%rax | |
1211 adcq %rdx,%r13 | |
1212 movq %r12,8(%rdi,%rcx,1) | |
1213 | |
1214 xorq %r11,%r11 | |
1215 addq 16(%rdi,%rcx,1),%r10 | |
1216 adcq $0,%r11 | |
1217 mulq %r14 | |
1218 addq %rax,%r10 | |
1219 movq %r9,%rax | |
1220 adcq %rdx,%r11 | |
1221 | |
1222 movq 24(%rsi,%rcx,1),%r9 | |
1223 xorq %r12,%r12 | |
1224 addq %r10,%r13 | |
1225 adcq $0,%r12 | |
1226 mulq %r15 | |
1227 addq %rax,%r13 | |
1228 movq %r9,%rax | |
1229 adcq %rdx,%r12 | |
1230 movq %r13,16(%rdi,%rcx,1) | |
1231 | |
1232 xorq %r10,%r10 | |
1233 addq 24(%rdi,%rcx,1),%r11 | |
1234 leaq 32(%rcx),%rcx | |
1235 adcq $0,%r10 | |
1236 mulq %r14 | |
1237 addq %rax,%r11 | |
1238 movq %rbx,%rax | |
1239 adcq %rdx,%r10 | |
1240 cmpq $0,%rcx | |
1241 jne .Lsqr4x_mont_inner | |
1242 | |
1243 subq 0(%rsp),%rcx | |
1244 movq %r8,%r14 | |
1245 | |
1246 xorq %r13,%r13 | |
1247 addq %r11,%r12 | |
1248 adcq $0,%r13 | |
1249 mulq %r15 | |
1250 addq %rax,%r12 | |
1251 movq %r9,%rax | |
1252 adcq %rdx,%r13 | |
1253 movq %r12,-8(%rdi) | |
1254 | |
1255 xorq %r11,%r11 | |
1256 addq (%rdi),%r10 | |
1257 adcq $0,%r11 | |
1258 movq 0(%rsi,%rcx,1),%rbx | |
1259 addq %rbp,%r10 | |
1260 adcq $0,%r11 | |
1261 | |
1262 imulq 16(%rdi,%rcx,1),%r14 | |
1263 xorq %r12,%r12 | |
1264 movq 8(%rsi,%rcx,1),%r9 | |
1265 addq %r10,%r13 | |
1266 movq 16(%rdi,%rcx,1),%r10 | |
1267 adcq $0,%r12 | |
1268 mulq %r15 | |
1269 addq %rax,%r13 | |
1270 movq %rbx,%rax | |
1271 adcq %rdx,%r12 | |
1272 movq %r13,(%rdi) | |
1273 | |
1274 xorq %rbp,%rbp | |
1275 addq 8(%rdi),%r12 | |
1276 adcq %rbp,%rbp | |
1277 addq %r11,%r12 | |
1278 leaq 16(%rdi),%rdi | |
1279 adcq $0,%rbp | |
1280 movq %r12,-8(%rdi) | |
1281 cmpq 8(%rsp),%rdi | |
1282 jb .Lsqr4x_mont_outer | |
1283 | |
1284 movq 0(%rsp),%r9 | |
1285 movq %rbp,(%rdi) | |
1286 movq 64(%rsp,%r9,1),%rax | |
1287 leaq 64(%rsp,%r9,1),%rbx | |
1288 movq 40(%rsp),%rsi | |
1289 shrq $5,%r9 | |
1290 movq 8(%rbx),%rdx | |
1291 xorq %rbp,%rbp | |
1292 | |
1293 movq 32(%rsp),%rdi | |
1294 subq 0(%rsi),%rax | |
1295 movq 16(%rbx),%r10 | |
1296 movq 24(%rbx),%r11 | |
1297 sbbq 8(%rsi),%rdx | |
1298 leaq -1(%r9),%rcx | |
1299 jmp .Lsqr4x_sub | |
1300 .align 16 | |
1301 .Lsqr4x_sub: | |
1302 movq %rax,0(%rdi,%rbp,8) | |
1303 movq %rdx,8(%rdi,%rbp,8) | |
1304 sbbq 16(%rsi,%rbp,8),%r10 | |
1305 movq 32(%rbx,%rbp,8),%rax | |
1306 movq 40(%rbx,%rbp,8),%rdx | |
1307 sbbq 24(%rsi,%rbp,8),%r11 | |
1308 movq %r10,16(%rdi,%rbp,8) | |
1309 movq %r11,24(%rdi,%rbp,8) | |
1310 sbbq 32(%rsi,%rbp,8),%rax | |
1311 movq 48(%rbx,%rbp,8),%r10 | |
1312 movq 56(%rbx,%rbp,8),%r11 | |
1313 sbbq 40(%rsi,%rbp,8),%rdx | |
1314 leaq 4(%rbp),%rbp | |
1315 decq %rcx | |
1316 jnz .Lsqr4x_sub | |
1317 | |
1318 movq %rax,0(%rdi,%rbp,8) | |
1319 movq 32(%rbx,%rbp,8),%rax | |
1320 sbbq 16(%rsi,%rbp,8),%r10 | |
1321 movq %rdx,8(%rdi,%rbp,8) | |
1322 sbbq 24(%rsi,%rbp,8),%r11 | |
1323 movq %r10,16(%rdi,%rbp,8) | |
1324 | |
1325 sbbq $0,%rax | |
1326 movq %r11,24(%rdi,%rbp,8) | |
1327 xorq %rbp,%rbp | |
1328 andq %rax,%rbx | |
1329 notq %rax | |
1330 movq %rdi,%rsi | |
1331 andq %rax,%rsi | |
1332 leaq -1(%r9),%rcx | |
1333 orq %rsi,%rbx | |
1334 | |
1335 pxor %xmm0,%xmm0 | |
1336 leaq 64(%rsp,%r9,8),%rsi | |
1337 movdqu (%rbx),%xmm1 | |
1338 leaq (%rsi,%r9,8),%rsi | |
1339 movdqa %xmm0,64(%rsp) | |
1340 movdqa %xmm0,(%rsi) | |
1341 movdqu %xmm1,(%rdi) | |
1342 jmp .Lsqr4x_copy | |
1343 .align 16 | |
1344 .Lsqr4x_copy: | |
1345 movdqu 16(%rbx,%rbp,1),%xmm2 | |
1346 movdqu 32(%rbx,%rbp,1),%xmm1 | |
1347 movdqa %xmm0,80(%rsp,%rbp,1) | |
1348 movdqa %xmm0,96(%rsp,%rbp,1) | |
1349 movdqa %xmm0,16(%rsi,%rbp,1) | |
1350 movdqa %xmm0,32(%rsi,%rbp,1) | |
1351 movdqu %xmm2,16(%rdi,%rbp,1) | |
1352 movdqu %xmm1,32(%rdi,%rbp,1) | |
1353 leaq 32(%rbp),%rbp | |
1354 decq %rcx | |
1355 jnz .Lsqr4x_copy | |
1356 | |
1357 movdqu 16(%rbx,%rbp,1),%xmm2 | |
1358 movdqa %xmm0,80(%rsp,%rbp,1) | |
1359 movdqa %xmm0,16(%rsi,%rbp,1) | |
1360 movdqu %xmm2,16(%rdi,%rbp,1) | |
1361 movq 56(%rsp),%rsi | |
1362 movq $1,%rax | |
1363 movq 0(%rsi),%r15 | |
1364 movq 8(%rsi),%r14 | |
1365 movq 16(%rsi),%r13 | |
1366 movq 24(%rsi),%r12 | |
1367 movq 32(%rsi),%rbp | |
1368 movq 40(%rsi),%rbx | |
1369 leaq 48(%rsi),%rsp | |
1370 .Lsqr4x_epilogue: | |
1371 .byte 0xf3,0xc3 | |
1372 .size bn_sqr4x_mont,.-bn_sqr4x_mont | |
1373 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84
,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10
8,46,111,114,103,62,0 | |
1374 .align 16 | |
OLD | NEW |