Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(327)

Side by Side Diff: openssl/crypto/bn/asm/x86_64-mont.S

Issue 2072073002: Delete bundled copy of OpenSSL and replace with README. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/openssl@master
Patch Set: Delete bundled copy of OpenSSL and replace with README. Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « openssl/crypto/bn/asm/x86_64-gf2m.pl ('k') | openssl/crypto/bn/asm/x86_64-mont.pl » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 .text
2
3 .globl bn_mul_mont
4 .type bn_mul_mont,@function
5 .align 16
6 bn_mul_mont:
7 testl $3,%r9d
8 jnz .Lmul_enter
9 cmpl $8,%r9d
10 jb .Lmul_enter
11 cmpq %rsi,%rdx
12 jne .Lmul4x_enter
13 jmp .Lsqr4x_enter
14
15 .align 16
16 .Lmul_enter:
17 pushq %rbx
18 pushq %rbp
19 pushq %r12
20 pushq %r13
21 pushq %r14
22 pushq %r15
23
24 movl %r9d,%r9d
25 leaq 2(%r9),%r10
26 movq %rsp,%r11
27 negq %r10
28 leaq (%rsp,%r10,8),%rsp
29 andq $-1024,%rsp
30
31 movq %r11,8(%rsp,%r9,8)
32 .Lmul_body:
33 movq %rdx,%r12
34 movq (%r8),%r8
35 movq (%r12),%rbx
36 movq (%rsi),%rax
37
38 xorq %r14,%r14
39 xorq %r15,%r15
40
41 movq %r8,%rbp
42 mulq %rbx
43 movq %rax,%r10
44 movq (%rcx),%rax
45
46 imulq %r10,%rbp
47 movq %rdx,%r11
48
49 mulq %rbp
50 addq %rax,%r10
51 movq 8(%rsi),%rax
52 adcq $0,%rdx
53 movq %rdx,%r13
54
55 leaq 1(%r15),%r15
56 jmp .L1st_enter
57
58 .align 16
59 .L1st:
60 addq %rax,%r13
61 movq (%rsi,%r15,8),%rax
62 adcq $0,%rdx
63 addq %r11,%r13
64 movq %r10,%r11
65 adcq $0,%rdx
66 movq %r13,-16(%rsp,%r15,8)
67 movq %rdx,%r13
68
69 .L1st_enter:
70 mulq %rbx
71 addq %rax,%r11
72 movq (%rcx,%r15,8),%rax
73 adcq $0,%rdx
74 leaq 1(%r15),%r15
75 movq %rdx,%r10
76
77 mulq %rbp
78 cmpq %r9,%r15
79 jne .L1st
80
81 addq %rax,%r13
82 movq (%rsi),%rax
83 adcq $0,%rdx
84 addq %r11,%r13
85 adcq $0,%rdx
86 movq %r13,-16(%rsp,%r15,8)
87 movq %rdx,%r13
88 movq %r10,%r11
89
90 xorq %rdx,%rdx
91 addq %r11,%r13
92 adcq $0,%rdx
93 movq %r13,-8(%rsp,%r9,8)
94 movq %rdx,(%rsp,%r9,8)
95
96 leaq 1(%r14),%r14
97 jmp .Louter
98 .align 16
99 .Louter:
100 movq (%r12,%r14,8),%rbx
101 xorq %r15,%r15
102 movq %r8,%rbp
103 movq (%rsp),%r10
104 mulq %rbx
105 addq %rax,%r10
106 movq (%rcx),%rax
107 adcq $0,%rdx
108
109 imulq %r10,%rbp
110 movq %rdx,%r11
111
112 mulq %rbp
113 addq %rax,%r10
114 movq 8(%rsi),%rax
115 adcq $0,%rdx
116 movq 8(%rsp),%r10
117 movq %rdx,%r13
118
119 leaq 1(%r15),%r15
120 jmp .Linner_enter
121
122 .align 16
123 .Linner:
124 addq %rax,%r13
125 movq (%rsi,%r15,8),%rax
126 adcq $0,%rdx
127 addq %r10,%r13
128 movq (%rsp,%r15,8),%r10
129 adcq $0,%rdx
130 movq %r13,-16(%rsp,%r15,8)
131 movq %rdx,%r13
132
133 .Linner_enter:
134 mulq %rbx
135 addq %rax,%r11
136 movq (%rcx,%r15,8),%rax
137 adcq $0,%rdx
138 addq %r11,%r10
139 movq %rdx,%r11
140 adcq $0,%r11
141 leaq 1(%r15),%r15
142
143 mulq %rbp
144 cmpq %r9,%r15
145 jne .Linner
146
147 addq %rax,%r13
148 movq (%rsi),%rax
149 adcq $0,%rdx
150 addq %r10,%r13
151 movq (%rsp,%r15,8),%r10
152 adcq $0,%rdx
153 movq %r13,-16(%rsp,%r15,8)
154 movq %rdx,%r13
155
156 xorq %rdx,%rdx
157 addq %r11,%r13
158 adcq $0,%rdx
159 addq %r10,%r13
160 adcq $0,%rdx
161 movq %r13,-8(%rsp,%r9,8)
162 movq %rdx,(%rsp,%r9,8)
163
164 leaq 1(%r14),%r14
165 cmpq %r9,%r14
166 jl .Louter
167
168 xorq %r14,%r14
169 movq (%rsp),%rax
170 leaq (%rsp),%rsi
171 movq %r9,%r15
172 jmp .Lsub
173 .align 16
174 .Lsub: sbbq (%rcx,%r14,8),%rax
175 movq %rax,(%rdi,%r14,8)
176 movq 8(%rsi,%r14,8),%rax
177 leaq 1(%r14),%r14
178 decq %r15
179 jnz .Lsub
180
181 sbbq $0,%rax
182 xorq %r14,%r14
183 andq %rax,%rsi
184 notq %rax
185 movq %rdi,%rcx
186 andq %rax,%rcx
187 movq %r9,%r15
188 orq %rcx,%rsi
189 .align 16
190 .Lcopy:
191 movq (%rsi,%r14,8),%rax
192 movq %r14,(%rsp,%r14,8)
193 movq %rax,(%rdi,%r14,8)
194 leaq 1(%r14),%r14
195 subq $1,%r15
196 jnz .Lcopy
197
198 movq 8(%rsp,%r9,8),%rsi
199 movq $1,%rax
200 movq (%rsi),%r15
201 movq 8(%rsi),%r14
202 movq 16(%rsi),%r13
203 movq 24(%rsi),%r12
204 movq 32(%rsi),%rbp
205 movq 40(%rsi),%rbx
206 leaq 48(%rsi),%rsp
207 .Lmul_epilogue:
208 .byte 0xf3,0xc3
209 .size bn_mul_mont,.-bn_mul_mont
210 .type bn_mul4x_mont,@function
211 .align 16
212 bn_mul4x_mont:
213 .Lmul4x_enter:
214 pushq %rbx
215 pushq %rbp
216 pushq %r12
217 pushq %r13
218 pushq %r14
219 pushq %r15
220
221 movl %r9d,%r9d
222 leaq 4(%r9),%r10
223 movq %rsp,%r11
224 negq %r10
225 leaq (%rsp,%r10,8),%rsp
226 andq $-1024,%rsp
227
228 movq %r11,8(%rsp,%r9,8)
229 .Lmul4x_body:
230 movq %rdi,16(%rsp,%r9,8)
231 movq %rdx,%r12
232 movq (%r8),%r8
233 movq (%r12),%rbx
234 movq (%rsi),%rax
235
236 xorq %r14,%r14
237 xorq %r15,%r15
238
239 movq %r8,%rbp
240 mulq %rbx
241 movq %rax,%r10
242 movq (%rcx),%rax
243
244 imulq %r10,%rbp
245 movq %rdx,%r11
246
247 mulq %rbp
248 addq %rax,%r10
249 movq 8(%rsi),%rax
250 adcq $0,%rdx
251 movq %rdx,%rdi
252
253 mulq %rbx
254 addq %rax,%r11
255 movq 8(%rcx),%rax
256 adcq $0,%rdx
257 movq %rdx,%r10
258
259 mulq %rbp
260 addq %rax,%rdi
261 movq 16(%rsi),%rax
262 adcq $0,%rdx
263 addq %r11,%rdi
264 leaq 4(%r15),%r15
265 adcq $0,%rdx
266 movq %rdi,(%rsp)
267 movq %rdx,%r13
268 jmp .L1st4x
269 .align 16
270 .L1st4x:
271 mulq %rbx
272 addq %rax,%r10
273 movq -16(%rcx,%r15,8),%rax
274 adcq $0,%rdx
275 movq %rdx,%r11
276
277 mulq %rbp
278 addq %rax,%r13
279 movq -8(%rsi,%r15,8),%rax
280 adcq $0,%rdx
281 addq %r10,%r13
282 adcq $0,%rdx
283 movq %r13,-24(%rsp,%r15,8)
284 movq %rdx,%rdi
285
286 mulq %rbx
287 addq %rax,%r11
288 movq -8(%rcx,%r15,8),%rax
289 adcq $0,%rdx
290 movq %rdx,%r10
291
292 mulq %rbp
293 addq %rax,%rdi
294 movq (%rsi,%r15,8),%rax
295 adcq $0,%rdx
296 addq %r11,%rdi
297 adcq $0,%rdx
298 movq %rdi,-16(%rsp,%r15,8)
299 movq %rdx,%r13
300
301 mulq %rbx
302 addq %rax,%r10
303 movq (%rcx,%r15,8),%rax
304 adcq $0,%rdx
305 movq %rdx,%r11
306
307 mulq %rbp
308 addq %rax,%r13
309 movq 8(%rsi,%r15,8),%rax
310 adcq $0,%rdx
311 addq %r10,%r13
312 adcq $0,%rdx
313 movq %r13,-8(%rsp,%r15,8)
314 movq %rdx,%rdi
315
316 mulq %rbx
317 addq %rax,%r11
318 movq 8(%rcx,%r15,8),%rax
319 adcq $0,%rdx
320 leaq 4(%r15),%r15
321 movq %rdx,%r10
322
323 mulq %rbp
324 addq %rax,%rdi
325 movq -16(%rsi,%r15,8),%rax
326 adcq $0,%rdx
327 addq %r11,%rdi
328 adcq $0,%rdx
329 movq %rdi,-32(%rsp,%r15,8)
330 movq %rdx,%r13
331 cmpq %r9,%r15
332 jl .L1st4x
333
334 mulq %rbx
335 addq %rax,%r10
336 movq -16(%rcx,%r15,8),%rax
337 adcq $0,%rdx
338 movq %rdx,%r11
339
340 mulq %rbp
341 addq %rax,%r13
342 movq -8(%rsi,%r15,8),%rax
343 adcq $0,%rdx
344 addq %r10,%r13
345 adcq $0,%rdx
346 movq %r13,-24(%rsp,%r15,8)
347 movq %rdx,%rdi
348
349 mulq %rbx
350 addq %rax,%r11
351 movq -8(%rcx,%r15,8),%rax
352 adcq $0,%rdx
353 movq %rdx,%r10
354
355 mulq %rbp
356 addq %rax,%rdi
357 movq (%rsi),%rax
358 adcq $0,%rdx
359 addq %r11,%rdi
360 adcq $0,%rdx
361 movq %rdi,-16(%rsp,%r15,8)
362 movq %rdx,%r13
363
364 xorq %rdi,%rdi
365 addq %r10,%r13
366 adcq $0,%rdi
367 movq %r13,-8(%rsp,%r15,8)
368 movq %rdi,(%rsp,%r15,8)
369
370 leaq 1(%r14),%r14
371 .align 4
372 .Louter4x:
373 movq (%r12,%r14,8),%rbx
374 xorq %r15,%r15
375 movq (%rsp),%r10
376 movq %r8,%rbp
377 mulq %rbx
378 addq %rax,%r10
379 movq (%rcx),%rax
380 adcq $0,%rdx
381
382 imulq %r10,%rbp
383 movq %rdx,%r11
384
385 mulq %rbp
386 addq %rax,%r10
387 movq 8(%rsi),%rax
388 adcq $0,%rdx
389 movq %rdx,%rdi
390
391 mulq %rbx
392 addq %rax,%r11
393 movq 8(%rcx),%rax
394 adcq $0,%rdx
395 addq 8(%rsp),%r11
396 adcq $0,%rdx
397 movq %rdx,%r10
398
399 mulq %rbp
400 addq %rax,%rdi
401 movq 16(%rsi),%rax
402 adcq $0,%rdx
403 addq %r11,%rdi
404 leaq 4(%r15),%r15
405 adcq $0,%rdx
406 movq %rdi,(%rsp)
407 movq %rdx,%r13
408 jmp .Linner4x
409 .align 16
410 .Linner4x:
411 mulq %rbx
412 addq %rax,%r10
413 movq -16(%rcx,%r15,8),%rax
414 adcq $0,%rdx
415 addq -16(%rsp,%r15,8),%r10
416 adcq $0,%rdx
417 movq %rdx,%r11
418
419 mulq %rbp
420 addq %rax,%r13
421 movq -8(%rsi,%r15,8),%rax
422 adcq $0,%rdx
423 addq %r10,%r13
424 adcq $0,%rdx
425 movq %r13,-24(%rsp,%r15,8)
426 movq %rdx,%rdi
427
428 mulq %rbx
429 addq %rax,%r11
430 movq -8(%rcx,%r15,8),%rax
431 adcq $0,%rdx
432 addq -8(%rsp,%r15,8),%r11
433 adcq $0,%rdx
434 movq %rdx,%r10
435
436 mulq %rbp
437 addq %rax,%rdi
438 movq (%rsi,%r15,8),%rax
439 adcq $0,%rdx
440 addq %r11,%rdi
441 adcq $0,%rdx
442 movq %rdi,-16(%rsp,%r15,8)
443 movq %rdx,%r13
444
445 mulq %rbx
446 addq %rax,%r10
447 movq (%rcx,%r15,8),%rax
448 adcq $0,%rdx
449 addq (%rsp,%r15,8),%r10
450 adcq $0,%rdx
451 movq %rdx,%r11
452
453 mulq %rbp
454 addq %rax,%r13
455 movq 8(%rsi,%r15,8),%rax
456 adcq $0,%rdx
457 addq %r10,%r13
458 adcq $0,%rdx
459 movq %r13,-8(%rsp,%r15,8)
460 movq %rdx,%rdi
461
462 mulq %rbx
463 addq %rax,%r11
464 movq 8(%rcx,%r15,8),%rax
465 adcq $0,%rdx
466 addq 8(%rsp,%r15,8),%r11
467 adcq $0,%rdx
468 leaq 4(%r15),%r15
469 movq %rdx,%r10
470
471 mulq %rbp
472 addq %rax,%rdi
473 movq -16(%rsi,%r15,8),%rax
474 adcq $0,%rdx
475 addq %r11,%rdi
476 adcq $0,%rdx
477 movq %rdi,-32(%rsp,%r15,8)
478 movq %rdx,%r13
479 cmpq %r9,%r15
480 jl .Linner4x
481
482 mulq %rbx
483 addq %rax,%r10
484 movq -16(%rcx,%r15,8),%rax
485 adcq $0,%rdx
486 addq -16(%rsp,%r15,8),%r10
487 adcq $0,%rdx
488 movq %rdx,%r11
489
490 mulq %rbp
491 addq %rax,%r13
492 movq -8(%rsi,%r15,8),%rax
493 adcq $0,%rdx
494 addq %r10,%r13
495 adcq $0,%rdx
496 movq %r13,-24(%rsp,%r15,8)
497 movq %rdx,%rdi
498
499 mulq %rbx
500 addq %rax,%r11
501 movq -8(%rcx,%r15,8),%rax
502 adcq $0,%rdx
503 addq -8(%rsp,%r15,8),%r11
504 adcq $0,%rdx
505 leaq 1(%r14),%r14
506 movq %rdx,%r10
507
508 mulq %rbp
509 addq %rax,%rdi
510 movq (%rsi),%rax
511 adcq $0,%rdx
512 addq %r11,%rdi
513 adcq $0,%rdx
514 movq %rdi,-16(%rsp,%r15,8)
515 movq %rdx,%r13
516
517 xorq %rdi,%rdi
518 addq %r10,%r13
519 adcq $0,%rdi
520 addq (%rsp,%r9,8),%r13
521 adcq $0,%rdi
522 movq %r13,-8(%rsp,%r15,8)
523 movq %rdi,(%rsp,%r15,8)
524
525 cmpq %r9,%r14
526 jl .Louter4x
527 movq 16(%rsp,%r9,8),%rdi
528 movq 0(%rsp),%rax
529 pxor %xmm0,%xmm0
530 movq 8(%rsp),%rdx
531 shrq $2,%r9
532 leaq (%rsp),%rsi
533 xorq %r14,%r14
534
535 subq 0(%rcx),%rax
536 movq 16(%rsi),%rbx
537 movq 24(%rsi),%rbp
538 sbbq 8(%rcx),%rdx
539 leaq -1(%r9),%r15
540 jmp .Lsub4x
541 .align 16
542 .Lsub4x:
543 movq %rax,0(%rdi,%r14,8)
544 movq %rdx,8(%rdi,%r14,8)
545 sbbq 16(%rcx,%r14,8),%rbx
546 movq 32(%rsi,%r14,8),%rax
547 movq 40(%rsi,%r14,8),%rdx
548 sbbq 24(%rcx,%r14,8),%rbp
549 movq %rbx,16(%rdi,%r14,8)
550 movq %rbp,24(%rdi,%r14,8)
551 sbbq 32(%rcx,%r14,8),%rax
552 movq 48(%rsi,%r14,8),%rbx
553 movq 56(%rsi,%r14,8),%rbp
554 sbbq 40(%rcx,%r14,8),%rdx
555 leaq 4(%r14),%r14
556 decq %r15
557 jnz .Lsub4x
558
559 movq %rax,0(%rdi,%r14,8)
560 movq 32(%rsi,%r14,8),%rax
561 sbbq 16(%rcx,%r14,8),%rbx
562 movq %rdx,8(%rdi,%r14,8)
563 sbbq 24(%rcx,%r14,8),%rbp
564 movq %rbx,16(%rdi,%r14,8)
565
566 sbbq $0,%rax
567 movq %rbp,24(%rdi,%r14,8)
568 xorq %r14,%r14
569 andq %rax,%rsi
570 notq %rax
571 movq %rdi,%rcx
572 andq %rax,%rcx
573 leaq -1(%r9),%r15
574 orq %rcx,%rsi
575
576 movdqu (%rsi),%xmm1
577 movdqa %xmm0,(%rsp)
578 movdqu %xmm1,(%rdi)
579 jmp .Lcopy4x
580 .align 16
581 .Lcopy4x:
582 movdqu 16(%rsi,%r14,1),%xmm2
583 movdqu 32(%rsi,%r14,1),%xmm1
584 movdqa %xmm0,16(%rsp,%r14,1)
585 movdqu %xmm2,16(%rdi,%r14,1)
586 movdqa %xmm0,32(%rsp,%r14,1)
587 movdqu %xmm1,32(%rdi,%r14,1)
588 leaq 32(%r14),%r14
589 decq %r15
590 jnz .Lcopy4x
591
592 shlq $2,%r9
593 movdqu 16(%rsi,%r14,1),%xmm2
594 movdqa %xmm0,16(%rsp,%r14,1)
595 movdqu %xmm2,16(%rdi,%r14,1)
596 movq 8(%rsp,%r9,8),%rsi
597 movq $1,%rax
598 movq (%rsi),%r15
599 movq 8(%rsi),%r14
600 movq 16(%rsi),%r13
601 movq 24(%rsi),%r12
602 movq 32(%rsi),%rbp
603 movq 40(%rsi),%rbx
604 leaq 48(%rsi),%rsp
605 .Lmul4x_epilogue:
606 .byte 0xf3,0xc3
607 .size bn_mul4x_mont,.-bn_mul4x_mont
608 .type bn_sqr4x_mont,@function
609 .align 16
610 bn_sqr4x_mont:
611 .Lsqr4x_enter:
612 pushq %rbx
613 pushq %rbp
614 pushq %r12
615 pushq %r13
616 pushq %r14
617 pushq %r15
618
619 shll $3,%r9d
620 xorq %r10,%r10
621 movq %rsp,%r11
622 subq %r9,%r10
623 movq (%r8),%r8
624 leaq -72(%rsp,%r10,2),%rsp
625 andq $-1024,%rsp
626
627
628
629
630
631
632
633
634
635
636
637 movq %rdi,32(%rsp)
638 movq %rcx,40(%rsp)
639 movq %r8,48(%rsp)
640 movq %r11,56(%rsp)
641 .Lsqr4x_body:
642
643
644
645
646
647
648
649 leaq 32(%r10),%rbp
650 leaq (%rsi,%r9,1),%rsi
651
652 movq %r9,%rcx
653
654
655 movq -32(%rsi,%rbp,1),%r14
656 leaq 64(%rsp,%r9,2),%rdi
657 movq -24(%rsi,%rbp,1),%rax
658 leaq -32(%rdi,%rbp,1),%rdi
659 movq -16(%rsi,%rbp,1),%rbx
660 movq %rax,%r15
661
662 mulq %r14
663 movq %rax,%r10
664 movq %rbx,%rax
665 movq %rdx,%r11
666 movq %r10,-24(%rdi,%rbp,1)
667
668 xorq %r10,%r10
669 mulq %r14
670 addq %rax,%r11
671 movq %rbx,%rax
672 adcq %rdx,%r10
673 movq %r11,-16(%rdi,%rbp,1)
674
675 leaq -16(%rbp),%rcx
676
677
678 movq 8(%rsi,%rcx,1),%rbx
679 mulq %r15
680 movq %rax,%r12
681 movq %rbx,%rax
682 movq %rdx,%r13
683
684 xorq %r11,%r11
685 addq %r12,%r10
686 leaq 16(%rcx),%rcx
687 adcq $0,%r11
688 mulq %r14
689 addq %rax,%r10
690 movq %rbx,%rax
691 adcq %rdx,%r11
692 movq %r10,-8(%rdi,%rcx,1)
693 jmp .Lsqr4x_1st
694
695 .align 16
696 .Lsqr4x_1st:
697 movq (%rsi,%rcx,1),%rbx
698 xorq %r12,%r12
699 mulq %r15
700 addq %rax,%r13
701 movq %rbx,%rax
702 adcq %rdx,%r12
703
704 xorq %r10,%r10
705 addq %r13,%r11
706 adcq $0,%r10
707 mulq %r14
708 addq %rax,%r11
709 movq %rbx,%rax
710 adcq %rdx,%r10
711 movq %r11,(%rdi,%rcx,1)
712
713
714 movq 8(%rsi,%rcx,1),%rbx
715 xorq %r13,%r13
716 mulq %r15
717 addq %rax,%r12
718 movq %rbx,%rax
719 adcq %rdx,%r13
720
721 xorq %r11,%r11
722 addq %r12,%r10
723 adcq $0,%r11
724 mulq %r14
725 addq %rax,%r10
726 movq %rbx,%rax
727 adcq %rdx,%r11
728 movq %r10,8(%rdi,%rcx,1)
729
730 movq 16(%rsi,%rcx,1),%rbx
731 xorq %r12,%r12
732 mulq %r15
733 addq %rax,%r13
734 movq %rbx,%rax
735 adcq %rdx,%r12
736
737 xorq %r10,%r10
738 addq %r13,%r11
739 adcq $0,%r10
740 mulq %r14
741 addq %rax,%r11
742 movq %rbx,%rax
743 adcq %rdx,%r10
744 movq %r11,16(%rdi,%rcx,1)
745
746
747 movq 24(%rsi,%rcx,1),%rbx
748 xorq %r13,%r13
749 mulq %r15
750 addq %rax,%r12
751 movq %rbx,%rax
752 adcq %rdx,%r13
753
754 xorq %r11,%r11
755 addq %r12,%r10
756 leaq 32(%rcx),%rcx
757 adcq $0,%r11
758 mulq %r14
759 addq %rax,%r10
760 movq %rbx,%rax
761 adcq %rdx,%r11
762 movq %r10,-8(%rdi,%rcx,1)
763
764 cmpq $0,%rcx
765 jne .Lsqr4x_1st
766
767 xorq %r12,%r12
768 addq %r11,%r13
769 adcq $0,%r12
770 mulq %r15
771 addq %rax,%r13
772 adcq %rdx,%r12
773
774 movq %r13,(%rdi)
775 leaq 16(%rbp),%rbp
776 movq %r12,8(%rdi)
777 jmp .Lsqr4x_outer
778
779 .align 16
780 .Lsqr4x_outer:
781 movq -32(%rsi,%rbp,1),%r14
782 leaq 64(%rsp,%r9,2),%rdi
783 movq -24(%rsi,%rbp,1),%rax
784 leaq -32(%rdi,%rbp,1),%rdi
785 movq -16(%rsi,%rbp,1),%rbx
786 movq %rax,%r15
787
788 movq -24(%rdi,%rbp,1),%r10
789 xorq %r11,%r11
790 mulq %r14
791 addq %rax,%r10
792 movq %rbx,%rax
793 adcq %rdx,%r11
794 movq %r10,-24(%rdi,%rbp,1)
795
796 xorq %r10,%r10
797 addq -16(%rdi,%rbp,1),%r11
798 adcq $0,%r10
799 mulq %r14
800 addq %rax,%r11
801 movq %rbx,%rax
802 adcq %rdx,%r10
803 movq %r11,-16(%rdi,%rbp,1)
804
805 leaq -16(%rbp),%rcx
806 xorq %r12,%r12
807
808
809 movq 8(%rsi,%rcx,1),%rbx
810 xorq %r13,%r13
811 addq 8(%rdi,%rcx,1),%r12
812 adcq $0,%r13
813 mulq %r15
814 addq %rax,%r12
815 movq %rbx,%rax
816 adcq %rdx,%r13
817
818 xorq %r11,%r11
819 addq %r12,%r10
820 adcq $0,%r11
821 mulq %r14
822 addq %rax,%r10
823 movq %rbx,%rax
824 adcq %rdx,%r11
825 movq %r10,8(%rdi,%rcx,1)
826
827 leaq 16(%rcx),%rcx
828 jmp .Lsqr4x_inner
829
830 .align 16
831 .Lsqr4x_inner:
832 movq (%rsi,%rcx,1),%rbx
833 xorq %r12,%r12
834 addq (%rdi,%rcx,1),%r13
835 adcq $0,%r12
836 mulq %r15
837 addq %rax,%r13
838 movq %rbx,%rax
839 adcq %rdx,%r12
840
841 xorq %r10,%r10
842 addq %r13,%r11
843 adcq $0,%r10
844 mulq %r14
845 addq %rax,%r11
846 movq %rbx,%rax
847 adcq %rdx,%r10
848 movq %r11,(%rdi,%rcx,1)
849
850 movq 8(%rsi,%rcx,1),%rbx
851 xorq %r13,%r13
852 addq 8(%rdi,%rcx,1),%r12
853 adcq $0,%r13
854 mulq %r15
855 addq %rax,%r12
856 movq %rbx,%rax
857 adcq %rdx,%r13
858
859 xorq %r11,%r11
860 addq %r12,%r10
861 leaq 16(%rcx),%rcx
862 adcq $0,%r11
863 mulq %r14
864 addq %rax,%r10
865 movq %rbx,%rax
866 adcq %rdx,%r11
867 movq %r10,-8(%rdi,%rcx,1)
868
869 cmpq $0,%rcx
870 jne .Lsqr4x_inner
871
872 xorq %r12,%r12
873 addq %r11,%r13
874 adcq $0,%r12
875 mulq %r15
876 addq %rax,%r13
877 adcq %rdx,%r12
878
879 movq %r13,(%rdi)
880 movq %r12,8(%rdi)
881
882 addq $16,%rbp
883 jnz .Lsqr4x_outer
884
885
886 movq -32(%rsi),%r14
887 leaq 64(%rsp,%r9,2),%rdi
888 movq -24(%rsi),%rax
889 leaq -32(%rdi,%rbp,1),%rdi
890 movq -16(%rsi),%rbx
891 movq %rax,%r15
892
893 xorq %r11,%r11
894 mulq %r14
895 addq %rax,%r10
896 movq %rbx,%rax
897 adcq %rdx,%r11
898 movq %r10,-24(%rdi)
899
900 xorq %r10,%r10
901 addq %r13,%r11
902 adcq $0,%r10
903 mulq %r14
904 addq %rax,%r11
905 movq %rbx,%rax
906 adcq %rdx,%r10
907 movq %r11,-16(%rdi)
908
909 movq -8(%rsi),%rbx
910 mulq %r15
911 addq %rax,%r12
912 movq %rbx,%rax
913 adcq $0,%rdx
914
915 xorq %r11,%r11
916 addq %r12,%r10
917 movq %rdx,%r13
918 adcq $0,%r11
919 mulq %r14
920 addq %rax,%r10
921 movq %rbx,%rax
922 adcq %rdx,%r11
923 movq %r10,-8(%rdi)
924
925 xorq %r12,%r12
926 addq %r11,%r13
927 adcq $0,%r12
928 mulq %r15
929 addq %rax,%r13
930 movq -16(%rsi),%rax
931 adcq %rdx,%r12
932
933 movq %r13,(%rdi)
934 movq %r12,8(%rdi)
935
936 mulq %rbx
937 addq $16,%rbp
938 xorq %r14,%r14
939 subq %r9,%rbp
940 xorq %r15,%r15
941
942 addq %r12,%rax
943 adcq $0,%rdx
944 movq %rax,8(%rdi)
945 movq %rdx,16(%rdi)
946 movq %r15,24(%rdi)
947
948 movq -16(%rsi,%rbp,1),%rax
949 leaq 64(%rsp,%r9,2),%rdi
950 xorq %r10,%r10
951 movq -24(%rdi,%rbp,2),%r11
952
953 leaq (%r14,%r10,2),%r12
954 shrq $63,%r10
955 leaq (%rcx,%r11,2),%r13
956 shrq $63,%r11
957 orq %r10,%r13
958 movq -16(%rdi,%rbp,2),%r10
959 movq %r11,%r14
960 mulq %rax
961 negq %r15
962 movq -8(%rdi,%rbp,2),%r11
963 adcq %rax,%r12
964 movq -8(%rsi,%rbp,1),%rax
965 movq %r12,-32(%rdi,%rbp,2)
966 adcq %rdx,%r13
967
968 leaq (%r14,%r10,2),%rbx
969 movq %r13,-24(%rdi,%rbp,2)
970 sbbq %r15,%r15
971 shrq $63,%r10
972 leaq (%rcx,%r11,2),%r8
973 shrq $63,%r11
974 orq %r10,%r8
975 movq 0(%rdi,%rbp,2),%r10
976 movq %r11,%r14
977 mulq %rax
978 negq %r15
979 movq 8(%rdi,%rbp,2),%r11
980 adcq %rax,%rbx
981 movq 0(%rsi,%rbp,1),%rax
982 movq %rbx,-16(%rdi,%rbp,2)
983 adcq %rdx,%r8
984 leaq 16(%rbp),%rbp
985 movq %r8,-40(%rdi,%rbp,2)
986 sbbq %r15,%r15
987 jmp .Lsqr4x_shift_n_add
988
989 .align 16
990 .Lsqr4x_shift_n_add:
991 leaq (%r14,%r10,2),%r12
992 shrq $63,%r10
993 leaq (%rcx,%r11,2),%r13
994 shrq $63,%r11
995 orq %r10,%r13
996 movq -16(%rdi,%rbp,2),%r10
997 movq %r11,%r14
998 mulq %rax
999 negq %r15
1000 movq -8(%rdi,%rbp,2),%r11
1001 adcq %rax,%r12
1002 movq -8(%rsi,%rbp,1),%rax
1003 movq %r12,-32(%rdi,%rbp,2)
1004 adcq %rdx,%r13
1005
1006 leaq (%r14,%r10,2),%rbx
1007 movq %r13,-24(%rdi,%rbp,2)
1008 sbbq %r15,%r15
1009 shrq $63,%r10
1010 leaq (%rcx,%r11,2),%r8
1011 shrq $63,%r11
1012 orq %r10,%r8
1013 movq 0(%rdi,%rbp,2),%r10
1014 movq %r11,%r14
1015 mulq %rax
1016 negq %r15
1017 movq 8(%rdi,%rbp,2),%r11
1018 adcq %rax,%rbx
1019 movq 0(%rsi,%rbp,1),%rax
1020 movq %rbx,-16(%rdi,%rbp,2)
1021 adcq %rdx,%r8
1022
1023 leaq (%r14,%r10,2),%r12
1024 movq %r8,-8(%rdi,%rbp,2)
1025 sbbq %r15,%r15
1026 shrq $63,%r10
1027 leaq (%rcx,%r11,2),%r13
1028 shrq $63,%r11
1029 orq %r10,%r13
1030 movq 16(%rdi,%rbp,2),%r10
1031 movq %r11,%r14
1032 mulq %rax
1033 negq %r15
1034 movq 24(%rdi,%rbp,2),%r11
1035 adcq %rax,%r12
1036 movq 8(%rsi,%rbp,1),%rax
1037 movq %r12,0(%rdi,%rbp,2)
1038 adcq %rdx,%r13
1039
1040 leaq (%r14,%r10,2),%rbx
1041 movq %r13,8(%rdi,%rbp,2)
1042 sbbq %r15,%r15
1043 shrq $63,%r10
1044 leaq (%rcx,%r11,2),%r8
1045 shrq $63,%r11
1046 orq %r10,%r8
1047 movq 32(%rdi,%rbp,2),%r10
1048 movq %r11,%r14
1049 mulq %rax
1050 negq %r15
1051 movq 40(%rdi,%rbp,2),%r11
1052 adcq %rax,%rbx
1053 movq 16(%rsi,%rbp,1),%rax
1054 movq %rbx,16(%rdi,%rbp,2)
1055 adcq %rdx,%r8
1056 movq %r8,24(%rdi,%rbp,2)
1057 sbbq %r15,%r15
1058 addq $32,%rbp
1059 jnz .Lsqr4x_shift_n_add
1060
1061 leaq (%r14,%r10,2),%r12
1062 shrq $63,%r10
1063 leaq (%rcx,%r11,2),%r13
1064 shrq $63,%r11
1065 orq %r10,%r13
1066 movq -16(%rdi),%r10
1067 movq %r11,%r14
1068 mulq %rax
1069 negq %r15
1070 movq -8(%rdi),%r11
1071 adcq %rax,%r12
1072 movq -8(%rsi),%rax
1073 movq %r12,-32(%rdi)
1074 adcq %rdx,%r13
1075
1076 leaq (%r14,%r10,2),%rbx
1077 movq %r13,-24(%rdi)
1078 sbbq %r15,%r15
1079 shrq $63,%r10
1080 leaq (%rcx,%r11,2),%r8
1081 shrq $63,%r11
1082 orq %r10,%r8
1083 mulq %rax
1084 negq %r15
1085 adcq %rax,%rbx
1086 adcq %rdx,%r8
1087 movq %rbx,-16(%rdi)
1088 movq %r8,-8(%rdi)
1089 movq 40(%rsp),%rsi
1090 movq 48(%rsp),%r8
1091 xorq %rcx,%rcx
1092 movq %r9,0(%rsp)
1093 subq %r9,%rcx
1094 movq 64(%rsp),%r10
1095 movq %r8,%r14
1096 leaq 64(%rsp,%r9,2),%rax
1097 leaq 64(%rsp,%r9,1),%rdi
1098 movq %rax,8(%rsp)
1099 leaq (%rsi,%r9,1),%rsi
1100 xorq %rbp,%rbp
1101
1102 movq 0(%rsi,%rcx,1),%rax
1103 movq 8(%rsi,%rcx,1),%r9
1104 imulq %r10,%r14
1105 movq %rax,%rbx
1106 jmp .Lsqr4x_mont_outer
1107
1108 .align 16
1109 .Lsqr4x_mont_outer:
1110 xorq %r11,%r11
1111 mulq %r14
1112 addq %rax,%r10
1113 movq %r9,%rax
1114 adcq %rdx,%r11
1115 movq %r8,%r15
1116
1117 xorq %r10,%r10
1118 addq 8(%rdi,%rcx,1),%r11
1119 adcq $0,%r10
1120 mulq %r14
1121 addq %rax,%r11
1122 movq %rbx,%rax
1123 adcq %rdx,%r10
1124
1125 imulq %r11,%r15
1126
1127 movq 16(%rsi,%rcx,1),%rbx
1128 xorq %r13,%r13
1129 addq %r11,%r12
1130 adcq $0,%r13
1131 mulq %r15
1132 addq %rax,%r12
1133 movq %rbx,%rax
1134 adcq %rdx,%r13
1135 movq %r12,8(%rdi,%rcx,1)
1136
1137 xorq %r11,%r11
1138 addq 16(%rdi,%rcx,1),%r10
1139 adcq $0,%r11
1140 mulq %r14
1141 addq %rax,%r10
1142 movq %r9,%rax
1143 adcq %rdx,%r11
1144
1145 movq 24(%rsi,%rcx,1),%r9
1146 xorq %r12,%r12
1147 addq %r10,%r13
1148 adcq $0,%r12
1149 mulq %r15
1150 addq %rax,%r13
1151 movq %r9,%rax
1152 adcq %rdx,%r12
1153 movq %r13,16(%rdi,%rcx,1)
1154
1155 xorq %r10,%r10
1156 addq 24(%rdi,%rcx,1),%r11
1157 leaq 32(%rcx),%rcx
1158 adcq $0,%r10
1159 mulq %r14
1160 addq %rax,%r11
1161 movq %rbx,%rax
1162 adcq %rdx,%r10
1163 jmp .Lsqr4x_mont_inner
1164
1165 .align 16
1166 .Lsqr4x_mont_inner:
1167 movq (%rsi,%rcx,1),%rbx
1168 xorq %r13,%r13
1169 addq %r11,%r12
1170 adcq $0,%r13
1171 mulq %r15
1172 addq %rax,%r12
1173 movq %rbx,%rax
1174 adcq %rdx,%r13
1175 movq %r12,-8(%rdi,%rcx,1)
1176
1177 xorq %r11,%r11
1178 addq (%rdi,%rcx,1),%r10
1179 adcq $0,%r11
1180 mulq %r14
1181 addq %rax,%r10
1182 movq %r9,%rax
1183 adcq %rdx,%r11
1184
1185 movq 8(%rsi,%rcx,1),%r9
1186 xorq %r12,%r12
1187 addq %r10,%r13
1188 adcq $0,%r12
1189 mulq %r15
1190 addq %rax,%r13
1191 movq %r9,%rax
1192 adcq %rdx,%r12
1193 movq %r13,(%rdi,%rcx,1)
1194
1195 xorq %r10,%r10
1196 addq 8(%rdi,%rcx,1),%r11
1197 adcq $0,%r10
1198 mulq %r14
1199 addq %rax,%r11
1200 movq %rbx,%rax
1201 adcq %rdx,%r10
1202
1203
1204 movq 16(%rsi,%rcx,1),%rbx
1205 xorq %r13,%r13
1206 addq %r11,%r12
1207 adcq $0,%r13
1208 mulq %r15
1209 addq %rax,%r12
1210 movq %rbx,%rax
1211 adcq %rdx,%r13
1212 movq %r12,8(%rdi,%rcx,1)
1213
1214 xorq %r11,%r11
1215 addq 16(%rdi,%rcx,1),%r10
1216 adcq $0,%r11
1217 mulq %r14
1218 addq %rax,%r10
1219 movq %r9,%rax
1220 adcq %rdx,%r11
1221
1222 movq 24(%rsi,%rcx,1),%r9
1223 xorq %r12,%r12
1224 addq %r10,%r13
1225 adcq $0,%r12
1226 mulq %r15
1227 addq %rax,%r13
1228 movq %r9,%rax
1229 adcq %rdx,%r12
1230 movq %r13,16(%rdi,%rcx,1)
1231
1232 xorq %r10,%r10
1233 addq 24(%rdi,%rcx,1),%r11
1234 leaq 32(%rcx),%rcx
1235 adcq $0,%r10
1236 mulq %r14
1237 addq %rax,%r11
1238 movq %rbx,%rax
1239 adcq %rdx,%r10
1240 cmpq $0,%rcx
1241 jne .Lsqr4x_mont_inner
1242
1243 subq 0(%rsp),%rcx
1244 movq %r8,%r14
1245
1246 xorq %r13,%r13
1247 addq %r11,%r12
1248 adcq $0,%r13
1249 mulq %r15
1250 addq %rax,%r12
1251 movq %r9,%rax
1252 adcq %rdx,%r13
1253 movq %r12,-8(%rdi)
1254
1255 xorq %r11,%r11
1256 addq (%rdi),%r10
1257 adcq $0,%r11
1258 movq 0(%rsi,%rcx,1),%rbx
1259 addq %rbp,%r10
1260 adcq $0,%r11
1261
1262 imulq 16(%rdi,%rcx,1),%r14
1263 xorq %r12,%r12
1264 movq 8(%rsi,%rcx,1),%r9
1265 addq %r10,%r13
1266 movq 16(%rdi,%rcx,1),%r10
1267 adcq $0,%r12
1268 mulq %r15
1269 addq %rax,%r13
1270 movq %rbx,%rax
1271 adcq %rdx,%r12
1272 movq %r13,(%rdi)
1273
1274 xorq %rbp,%rbp
1275 addq 8(%rdi),%r12
1276 adcq %rbp,%rbp
1277 addq %r11,%r12
1278 leaq 16(%rdi),%rdi
1279 adcq $0,%rbp
1280 movq %r12,-8(%rdi)
1281 cmpq 8(%rsp),%rdi
1282 jb .Lsqr4x_mont_outer
1283
1284 movq 0(%rsp),%r9
1285 movq %rbp,(%rdi)
1286 movq 64(%rsp,%r9,1),%rax
1287 leaq 64(%rsp,%r9,1),%rbx
1288 movq 40(%rsp),%rsi
1289 shrq $5,%r9
1290 movq 8(%rbx),%rdx
1291 xorq %rbp,%rbp
1292
1293 movq 32(%rsp),%rdi
1294 subq 0(%rsi),%rax
1295 movq 16(%rbx),%r10
1296 movq 24(%rbx),%r11
1297 sbbq 8(%rsi),%rdx
1298 leaq -1(%r9),%rcx
1299 jmp .Lsqr4x_sub
1300 .align 16
1301 .Lsqr4x_sub:
1302 movq %rax,0(%rdi,%rbp,8)
1303 movq %rdx,8(%rdi,%rbp,8)
1304 sbbq 16(%rsi,%rbp,8),%r10
1305 movq 32(%rbx,%rbp,8),%rax
1306 movq 40(%rbx,%rbp,8),%rdx
1307 sbbq 24(%rsi,%rbp,8),%r11
1308 movq %r10,16(%rdi,%rbp,8)
1309 movq %r11,24(%rdi,%rbp,8)
1310 sbbq 32(%rsi,%rbp,8),%rax
1311 movq 48(%rbx,%rbp,8),%r10
1312 movq 56(%rbx,%rbp,8),%r11
1313 sbbq 40(%rsi,%rbp,8),%rdx
1314 leaq 4(%rbp),%rbp
1315 decq %rcx
1316 jnz .Lsqr4x_sub
1317
1318 movq %rax,0(%rdi,%rbp,8)
1319 movq 32(%rbx,%rbp,8),%rax
1320 sbbq 16(%rsi,%rbp,8),%r10
1321 movq %rdx,8(%rdi,%rbp,8)
1322 sbbq 24(%rsi,%rbp,8),%r11
1323 movq %r10,16(%rdi,%rbp,8)
1324
1325 sbbq $0,%rax
1326 movq %r11,24(%rdi,%rbp,8)
1327 xorq %rbp,%rbp
1328 andq %rax,%rbx
1329 notq %rax
1330 movq %rdi,%rsi
1331 andq %rax,%rsi
1332 leaq -1(%r9),%rcx
1333 orq %rsi,%rbx
1334
1335 pxor %xmm0,%xmm0
1336 leaq 64(%rsp,%r9,8),%rsi
1337 movdqu (%rbx),%xmm1
1338 leaq (%rsi,%r9,8),%rsi
1339 movdqa %xmm0,64(%rsp)
1340 movdqa %xmm0,(%rsi)
1341 movdqu %xmm1,(%rdi)
1342 jmp .Lsqr4x_copy
1343 .align 16
1344 .Lsqr4x_copy:
1345 movdqu 16(%rbx,%rbp,1),%xmm2
1346 movdqu 32(%rbx,%rbp,1),%xmm1
1347 movdqa %xmm0,80(%rsp,%rbp,1)
1348 movdqa %xmm0,96(%rsp,%rbp,1)
1349 movdqa %xmm0,16(%rsi,%rbp,1)
1350 movdqa %xmm0,32(%rsi,%rbp,1)
1351 movdqu %xmm2,16(%rdi,%rbp,1)
1352 movdqu %xmm1,32(%rdi,%rbp,1)
1353 leaq 32(%rbp),%rbp
1354 decq %rcx
1355 jnz .Lsqr4x_copy
1356
1357 movdqu 16(%rbx,%rbp,1),%xmm2
1358 movdqa %xmm0,80(%rsp,%rbp,1)
1359 movdqa %xmm0,16(%rsi,%rbp,1)
1360 movdqu %xmm2,16(%rdi,%rbp,1)
1361 movq 56(%rsp),%rsi
1362 movq $1,%rax
1363 movq 0(%rsi),%r15
1364 movq 8(%rsi),%r14
1365 movq 16(%rsi),%r13
1366 movq 24(%rsi),%r12
1367 movq 32(%rsi),%rbp
1368 movq 40(%rsi),%rbx
1369 leaq 48(%rsi),%rsp
1370 .Lsqr4x_epilogue:
1371 .byte 0xf3,0xc3
1372 .size bn_sqr4x_mont,.-bn_sqr4x_mont
1373 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105 ,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84 ,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10 8,46,111,114,103,62,0
1374 .align 16
OLDNEW
« no previous file with comments | « openssl/crypto/bn/asm/x86_64-gf2m.pl ('k') | openssl/crypto/bn/asm/x86_64-mont.pl » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698