Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(164)

Side by Side Diff: openssl/crypto/bn/asm/x86_64-mont5.S

Issue 2072073002: Delete bundled copy of OpenSSL and replace with README. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/openssl@master
Patch Set: Delete bundled copy of OpenSSL and replace with README. Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « openssl/crypto/bn/asm/x86_64-mont.pl ('k') | openssl/crypto/bn/asm/x86_64-mont5.pl » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 .text
2
3 .globl bn_mul_mont_gather5
4 .type bn_mul_mont_gather5,@function
5 .align 64
6 bn_mul_mont_gather5:
7 testl $3,%r9d
8 jnz .Lmul_enter
9 cmpl $8,%r9d
10 jb .Lmul_enter
11 jmp .Lmul4x_enter
12
13 .align 16
14 .Lmul_enter:
15 movl %r9d,%r9d
16 movl 8(%rsp),%r10d
17 pushq %rbx
18 pushq %rbp
19 pushq %r12
20 pushq %r13
21 pushq %r14
22 pushq %r15
23 movq %rsp,%rax
24 leaq 2(%r9),%r11
25 negq %r11
26 leaq (%rsp,%r11,8),%rsp
27 andq $-1024,%rsp
28
29 movq %rax,8(%rsp,%r9,8)
30 .Lmul_body:
31 movq %rdx,%r12
32 movq %r10,%r11
33 shrq $3,%r10
34 andq $7,%r11
35 notq %r10
36 leaq .Lmagic_masks(%rip),%rax
37 andq $3,%r10
38 leaq 96(%r12,%r11,8),%r12
39 movq 0(%rax,%r10,8),%xmm4
40 movq 8(%rax,%r10,8),%xmm5
41 movq 16(%rax,%r10,8),%xmm6
42 movq 24(%rax,%r10,8),%xmm7
43
44 movq -96(%r12),%xmm0
45 movq -32(%r12),%xmm1
46 pand %xmm4,%xmm0
47 movq 32(%r12),%xmm2
48 pand %xmm5,%xmm1
49 movq 96(%r12),%xmm3
50 pand %xmm6,%xmm2
51 por %xmm1,%xmm0
52 pand %xmm7,%xmm3
53 por %xmm2,%xmm0
54 leaq 256(%r12),%r12
55 por %xmm3,%xmm0
56
57 .byte 102,72,15,126,195
58
59 movq (%r8),%r8
60 movq (%rsi),%rax
61
62 xorq %r14,%r14
63 xorq %r15,%r15
64
65 movq -96(%r12),%xmm0
66 movq -32(%r12),%xmm1
67 pand %xmm4,%xmm0
68 movq 32(%r12),%xmm2
69 pand %xmm5,%xmm1
70
71 movq %r8,%rbp
72 mulq %rbx
73 movq %rax,%r10
74 movq (%rcx),%rax
75
76 movq 96(%r12),%xmm3
77 pand %xmm6,%xmm2
78 por %xmm1,%xmm0
79 pand %xmm7,%xmm3
80
81 imulq %r10,%rbp
82 movq %rdx,%r11
83
84 por %xmm2,%xmm0
85 leaq 256(%r12),%r12
86 por %xmm3,%xmm0
87
88 mulq %rbp
89 addq %rax,%r10
90 movq 8(%rsi),%rax
91 adcq $0,%rdx
92 movq %rdx,%r13
93
94 leaq 1(%r15),%r15
95 jmp .L1st_enter
96
97 .align 16
98 .L1st:
99 addq %rax,%r13
100 movq (%rsi,%r15,8),%rax
101 adcq $0,%rdx
102 addq %r11,%r13
103 movq %r10,%r11
104 adcq $0,%rdx
105 movq %r13,-16(%rsp,%r15,8)
106 movq %rdx,%r13
107
108 .L1st_enter:
109 mulq %rbx
110 addq %rax,%r11
111 movq (%rcx,%r15,8),%rax
112 adcq $0,%rdx
113 leaq 1(%r15),%r15
114 movq %rdx,%r10
115
116 mulq %rbp
117 cmpq %r9,%r15
118 jne .L1st
119
120 .byte 102,72,15,126,195
121
122 addq %rax,%r13
123 movq (%rsi),%rax
124 adcq $0,%rdx
125 addq %r11,%r13
126 adcq $0,%rdx
127 movq %r13,-16(%rsp,%r15,8)
128 movq %rdx,%r13
129 movq %r10,%r11
130
131 xorq %rdx,%rdx
132 addq %r11,%r13
133 adcq $0,%rdx
134 movq %r13,-8(%rsp,%r9,8)
135 movq %rdx,(%rsp,%r9,8)
136
137 leaq 1(%r14),%r14
138 jmp .Louter
139 .align 16
140 .Louter:
141 xorq %r15,%r15
142 movq %r8,%rbp
143 movq (%rsp),%r10
144
145 movq -96(%r12),%xmm0
146 movq -32(%r12),%xmm1
147 pand %xmm4,%xmm0
148 movq 32(%r12),%xmm2
149 pand %xmm5,%xmm1
150
151 mulq %rbx
152 addq %rax,%r10
153 movq (%rcx),%rax
154 adcq $0,%rdx
155
156 movq 96(%r12),%xmm3
157 pand %xmm6,%xmm2
158 por %xmm1,%xmm0
159 pand %xmm7,%xmm3
160
161 imulq %r10,%rbp
162 movq %rdx,%r11
163
164 por %xmm2,%xmm0
165 leaq 256(%r12),%r12
166 por %xmm3,%xmm0
167
168 mulq %rbp
169 addq %rax,%r10
170 movq 8(%rsi),%rax
171 adcq $0,%rdx
172 movq 8(%rsp),%r10
173 movq %rdx,%r13
174
175 leaq 1(%r15),%r15
176 jmp .Linner_enter
177
178 .align 16
179 .Linner:
180 addq %rax,%r13
181 movq (%rsi,%r15,8),%rax
182 adcq $0,%rdx
183 addq %r10,%r13
184 movq (%rsp,%r15,8),%r10
185 adcq $0,%rdx
186 movq %r13,-16(%rsp,%r15,8)
187 movq %rdx,%r13
188
189 .Linner_enter:
190 mulq %rbx
191 addq %rax,%r11
192 movq (%rcx,%r15,8),%rax
193 adcq $0,%rdx
194 addq %r11,%r10
195 movq %rdx,%r11
196 adcq $0,%r11
197 leaq 1(%r15),%r15
198
199 mulq %rbp
200 cmpq %r9,%r15
201 jne .Linner
202
203 .byte 102,72,15,126,195
204
205 addq %rax,%r13
206 movq (%rsi),%rax
207 adcq $0,%rdx
208 addq %r10,%r13
209 movq (%rsp,%r15,8),%r10
210 adcq $0,%rdx
211 movq %r13,-16(%rsp,%r15,8)
212 movq %rdx,%r13
213
214 xorq %rdx,%rdx
215 addq %r11,%r13
216 adcq $0,%rdx
217 addq %r10,%r13
218 adcq $0,%rdx
219 movq %r13,-8(%rsp,%r9,8)
220 movq %rdx,(%rsp,%r9,8)
221
222 leaq 1(%r14),%r14
223 cmpq %r9,%r14
224 jl .Louter
225
226 xorq %r14,%r14
227 movq (%rsp),%rax
228 leaq (%rsp),%rsi
229 movq %r9,%r15
230 jmp .Lsub
231 .align 16
232 .Lsub: sbbq (%rcx,%r14,8),%rax
233 movq %rax,(%rdi,%r14,8)
234 movq 8(%rsi,%r14,8),%rax
235 leaq 1(%r14),%r14
236 decq %r15
237 jnz .Lsub
238
239 sbbq $0,%rax
240 xorq %r14,%r14
241 andq %rax,%rsi
242 notq %rax
243 movq %rdi,%rcx
244 andq %rax,%rcx
245 movq %r9,%r15
246 orq %rcx,%rsi
247 .align 16
248 .Lcopy:
249 movq (%rsi,%r14,8),%rax
250 movq %r14,(%rsp,%r14,8)
251 movq %rax,(%rdi,%r14,8)
252 leaq 1(%r14),%r14
253 subq $1,%r15
254 jnz .Lcopy
255
256 movq 8(%rsp,%r9,8),%rsi
257 movq $1,%rax
258 movq (%rsi),%r15
259 movq 8(%rsi),%r14
260 movq 16(%rsi),%r13
261 movq 24(%rsi),%r12
262 movq 32(%rsi),%rbp
263 movq 40(%rsi),%rbx
264 leaq 48(%rsi),%rsp
265 .Lmul_epilogue:
266 .byte 0xf3,0xc3
267 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5
268 .type bn_mul4x_mont_gather5,@function
269 .align 16
270 bn_mul4x_mont_gather5:
271 .Lmul4x_enter:
272 movl %r9d,%r9d
273 movl 8(%rsp),%r10d
274 pushq %rbx
275 pushq %rbp
276 pushq %r12
277 pushq %r13
278 pushq %r14
279 pushq %r15
280 movq %rsp,%rax
281 leaq 4(%r9),%r11
282 negq %r11
283 leaq (%rsp,%r11,8),%rsp
284 andq $-1024,%rsp
285
286 movq %rax,8(%rsp,%r9,8)
287 .Lmul4x_body:
288 movq %rdi,16(%rsp,%r9,8)
289 movq %rdx,%r12
290 movq %r10,%r11
291 shrq $3,%r10
292 andq $7,%r11
293 notq %r10
294 leaq .Lmagic_masks(%rip),%rax
295 andq $3,%r10
296 leaq 96(%r12,%r11,8),%r12
297 movq 0(%rax,%r10,8),%xmm4
298 movq 8(%rax,%r10,8),%xmm5
299 movq 16(%rax,%r10,8),%xmm6
300 movq 24(%rax,%r10,8),%xmm7
301
302 movq -96(%r12),%xmm0
303 movq -32(%r12),%xmm1
304 pand %xmm4,%xmm0
305 movq 32(%r12),%xmm2
306 pand %xmm5,%xmm1
307 movq 96(%r12),%xmm3
308 pand %xmm6,%xmm2
309 por %xmm1,%xmm0
310 pand %xmm7,%xmm3
311 por %xmm2,%xmm0
312 leaq 256(%r12),%r12
313 por %xmm3,%xmm0
314
315 .byte 102,72,15,126,195
316 movq (%r8),%r8
317 movq (%rsi),%rax
318
319 xorq %r14,%r14
320 xorq %r15,%r15
321
322 movq -96(%r12),%xmm0
323 movq -32(%r12),%xmm1
324 pand %xmm4,%xmm0
325 movq 32(%r12),%xmm2
326 pand %xmm5,%xmm1
327
328 movq %r8,%rbp
329 mulq %rbx
330 movq %rax,%r10
331 movq (%rcx),%rax
332
333 movq 96(%r12),%xmm3
334 pand %xmm6,%xmm2
335 por %xmm1,%xmm0
336 pand %xmm7,%xmm3
337
338 imulq %r10,%rbp
339 movq %rdx,%r11
340
341 por %xmm2,%xmm0
342 leaq 256(%r12),%r12
343 por %xmm3,%xmm0
344
345 mulq %rbp
346 addq %rax,%r10
347 movq 8(%rsi),%rax
348 adcq $0,%rdx
349 movq %rdx,%rdi
350
351 mulq %rbx
352 addq %rax,%r11
353 movq 8(%rcx),%rax
354 adcq $0,%rdx
355 movq %rdx,%r10
356
357 mulq %rbp
358 addq %rax,%rdi
359 movq 16(%rsi),%rax
360 adcq $0,%rdx
361 addq %r11,%rdi
362 leaq 4(%r15),%r15
363 adcq $0,%rdx
364 movq %rdi,(%rsp)
365 movq %rdx,%r13
366 jmp .L1st4x
367 .align 16
368 .L1st4x:
369 mulq %rbx
370 addq %rax,%r10
371 movq -16(%rcx,%r15,8),%rax
372 adcq $0,%rdx
373 movq %rdx,%r11
374
375 mulq %rbp
376 addq %rax,%r13
377 movq -8(%rsi,%r15,8),%rax
378 adcq $0,%rdx
379 addq %r10,%r13
380 adcq $0,%rdx
381 movq %r13,-24(%rsp,%r15,8)
382 movq %rdx,%rdi
383
384 mulq %rbx
385 addq %rax,%r11
386 movq -8(%rcx,%r15,8),%rax
387 adcq $0,%rdx
388 movq %rdx,%r10
389
390 mulq %rbp
391 addq %rax,%rdi
392 movq (%rsi,%r15,8),%rax
393 adcq $0,%rdx
394 addq %r11,%rdi
395 adcq $0,%rdx
396 movq %rdi,-16(%rsp,%r15,8)
397 movq %rdx,%r13
398
399 mulq %rbx
400 addq %rax,%r10
401 movq (%rcx,%r15,8),%rax
402 adcq $0,%rdx
403 movq %rdx,%r11
404
405 mulq %rbp
406 addq %rax,%r13
407 movq 8(%rsi,%r15,8),%rax
408 adcq $0,%rdx
409 addq %r10,%r13
410 adcq $0,%rdx
411 movq %r13,-8(%rsp,%r15,8)
412 movq %rdx,%rdi
413
414 mulq %rbx
415 addq %rax,%r11
416 movq 8(%rcx,%r15,8),%rax
417 adcq $0,%rdx
418 leaq 4(%r15),%r15
419 movq %rdx,%r10
420
421 mulq %rbp
422 addq %rax,%rdi
423 movq -16(%rsi,%r15,8),%rax
424 adcq $0,%rdx
425 addq %r11,%rdi
426 adcq $0,%rdx
427 movq %rdi,-32(%rsp,%r15,8)
428 movq %rdx,%r13
429 cmpq %r9,%r15
430 jl .L1st4x
431
432 mulq %rbx
433 addq %rax,%r10
434 movq -16(%rcx,%r15,8),%rax
435 adcq $0,%rdx
436 movq %rdx,%r11
437
438 mulq %rbp
439 addq %rax,%r13
440 movq -8(%rsi,%r15,8),%rax
441 adcq $0,%rdx
442 addq %r10,%r13
443 adcq $0,%rdx
444 movq %r13,-24(%rsp,%r15,8)
445 movq %rdx,%rdi
446
447 mulq %rbx
448 addq %rax,%r11
449 movq -8(%rcx,%r15,8),%rax
450 adcq $0,%rdx
451 movq %rdx,%r10
452
453 mulq %rbp
454 addq %rax,%rdi
455 movq (%rsi),%rax
456 adcq $0,%rdx
457 addq %r11,%rdi
458 adcq $0,%rdx
459 movq %rdi,-16(%rsp,%r15,8)
460 movq %rdx,%r13
461
462 .byte 102,72,15,126,195
463
464 xorq %rdi,%rdi
465 addq %r10,%r13
466 adcq $0,%rdi
467 movq %r13,-8(%rsp,%r15,8)
468 movq %rdi,(%rsp,%r15,8)
469
470 leaq 1(%r14),%r14
471 .align 4
472 .Louter4x:
473 xorq %r15,%r15
474 movq -96(%r12),%xmm0
475 movq -32(%r12),%xmm1
476 pand %xmm4,%xmm0
477 movq 32(%r12),%xmm2
478 pand %xmm5,%xmm1
479
480 movq (%rsp),%r10
481 movq %r8,%rbp
482 mulq %rbx
483 addq %rax,%r10
484 movq (%rcx),%rax
485 adcq $0,%rdx
486
487 movq 96(%r12),%xmm3
488 pand %xmm6,%xmm2
489 por %xmm1,%xmm0
490 pand %xmm7,%xmm3
491
492 imulq %r10,%rbp
493 movq %rdx,%r11
494
495 por %xmm2,%xmm0
496 leaq 256(%r12),%r12
497 por %xmm3,%xmm0
498
499 mulq %rbp
500 addq %rax,%r10
501 movq 8(%rsi),%rax
502 adcq $0,%rdx
503 movq %rdx,%rdi
504
505 mulq %rbx
506 addq %rax,%r11
507 movq 8(%rcx),%rax
508 adcq $0,%rdx
509 addq 8(%rsp),%r11
510 adcq $0,%rdx
511 movq %rdx,%r10
512
513 mulq %rbp
514 addq %rax,%rdi
515 movq 16(%rsi),%rax
516 adcq $0,%rdx
517 addq %r11,%rdi
518 leaq 4(%r15),%r15
519 adcq $0,%rdx
520 movq %rdx,%r13
521 jmp .Linner4x
522 .align 16
523 .Linner4x:
524 mulq %rbx
525 addq %rax,%r10
526 movq -16(%rcx,%r15,8),%rax
527 adcq $0,%rdx
528 addq -16(%rsp,%r15,8),%r10
529 adcq $0,%rdx
530 movq %rdx,%r11
531
532 mulq %rbp
533 addq %rax,%r13
534 movq -8(%rsi,%r15,8),%rax
535 adcq $0,%rdx
536 addq %r10,%r13
537 adcq $0,%rdx
538 movq %rdi,-32(%rsp,%r15,8)
539 movq %rdx,%rdi
540
541 mulq %rbx
542 addq %rax,%r11
543 movq -8(%rcx,%r15,8),%rax
544 adcq $0,%rdx
545 addq -8(%rsp,%r15,8),%r11
546 adcq $0,%rdx
547 movq %rdx,%r10
548
549 mulq %rbp
550 addq %rax,%rdi
551 movq (%rsi,%r15,8),%rax
552 adcq $0,%rdx
553 addq %r11,%rdi
554 adcq $0,%rdx
555 movq %r13,-24(%rsp,%r15,8)
556 movq %rdx,%r13
557
558 mulq %rbx
559 addq %rax,%r10
560 movq (%rcx,%r15,8),%rax
561 adcq $0,%rdx
562 addq (%rsp,%r15,8),%r10
563 adcq $0,%rdx
564 movq %rdx,%r11
565
566 mulq %rbp
567 addq %rax,%r13
568 movq 8(%rsi,%r15,8),%rax
569 adcq $0,%rdx
570 addq %r10,%r13
571 adcq $0,%rdx
572 movq %rdi,-16(%rsp,%r15,8)
573 movq %rdx,%rdi
574
575 mulq %rbx
576 addq %rax,%r11
577 movq 8(%rcx,%r15,8),%rax
578 adcq $0,%rdx
579 addq 8(%rsp,%r15,8),%r11
580 adcq $0,%rdx
581 leaq 4(%r15),%r15
582 movq %rdx,%r10
583
584 mulq %rbp
585 addq %rax,%rdi
586 movq -16(%rsi,%r15,8),%rax
587 adcq $0,%rdx
588 addq %r11,%rdi
589 adcq $0,%rdx
590 movq %r13,-40(%rsp,%r15,8)
591 movq %rdx,%r13
592 cmpq %r9,%r15
593 jl .Linner4x
594
595 mulq %rbx
596 addq %rax,%r10
597 movq -16(%rcx,%r15,8),%rax
598 adcq $0,%rdx
599 addq -16(%rsp,%r15,8),%r10
600 adcq $0,%rdx
601 movq %rdx,%r11
602
603 mulq %rbp
604 addq %rax,%r13
605 movq -8(%rsi,%r15,8),%rax
606 adcq $0,%rdx
607 addq %r10,%r13
608 adcq $0,%rdx
609 movq %rdi,-32(%rsp,%r15,8)
610 movq %rdx,%rdi
611
612 mulq %rbx
613 addq %rax,%r11
614 movq -8(%rcx,%r15,8),%rax
615 adcq $0,%rdx
616 addq -8(%rsp,%r15,8),%r11
617 adcq $0,%rdx
618 leaq 1(%r14),%r14
619 movq %rdx,%r10
620
621 mulq %rbp
622 addq %rax,%rdi
623 movq (%rsi),%rax
624 adcq $0,%rdx
625 addq %r11,%rdi
626 adcq $0,%rdx
627 movq %r13,-24(%rsp,%r15,8)
628 movq %rdx,%r13
629
630 .byte 102,72,15,126,195
631 movq %rdi,-16(%rsp,%r15,8)
632
633 xorq %rdi,%rdi
634 addq %r10,%r13
635 adcq $0,%rdi
636 addq (%rsp,%r9,8),%r13
637 adcq $0,%rdi
638 movq %r13,-8(%rsp,%r15,8)
639 movq %rdi,(%rsp,%r15,8)
640
641 cmpq %r9,%r14
642 jl .Louter4x
643 movq 16(%rsp,%r9,8),%rdi
644 movq 0(%rsp),%rax
645 pxor %xmm0,%xmm0
646 movq 8(%rsp),%rdx
647 shrq $2,%r9
648 leaq (%rsp),%rsi
649 xorq %r14,%r14
650
651 subq 0(%rcx),%rax
652 movq 16(%rsi),%rbx
653 movq 24(%rsi),%rbp
654 sbbq 8(%rcx),%rdx
655 leaq -1(%r9),%r15
656 jmp .Lsub4x
657 .align 16
658 .Lsub4x:
659 movq %rax,0(%rdi,%r14,8)
660 movq %rdx,8(%rdi,%r14,8)
661 sbbq 16(%rcx,%r14,8),%rbx
662 movq 32(%rsi,%r14,8),%rax
663 movq 40(%rsi,%r14,8),%rdx
664 sbbq 24(%rcx,%r14,8),%rbp
665 movq %rbx,16(%rdi,%r14,8)
666 movq %rbp,24(%rdi,%r14,8)
667 sbbq 32(%rcx,%r14,8),%rax
668 movq 48(%rsi,%r14,8),%rbx
669 movq 56(%rsi,%r14,8),%rbp
670 sbbq 40(%rcx,%r14,8),%rdx
671 leaq 4(%r14),%r14
672 decq %r15
673 jnz .Lsub4x
674
675 movq %rax,0(%rdi,%r14,8)
676 movq 32(%rsi,%r14,8),%rax
677 sbbq 16(%rcx,%r14,8),%rbx
678 movq %rdx,8(%rdi,%r14,8)
679 sbbq 24(%rcx,%r14,8),%rbp
680 movq %rbx,16(%rdi,%r14,8)
681
682 sbbq $0,%rax
683 movq %rbp,24(%rdi,%r14,8)
684 xorq %r14,%r14
685 andq %rax,%rsi
686 notq %rax
687 movq %rdi,%rcx
688 andq %rax,%rcx
689 leaq -1(%r9),%r15
690 orq %rcx,%rsi
691
692 movdqu (%rsi),%xmm1
693 movdqa %xmm0,(%rsp)
694 movdqu %xmm1,(%rdi)
695 jmp .Lcopy4x
696 .align 16
697 .Lcopy4x:
698 movdqu 16(%rsi,%r14,1),%xmm2
699 movdqu 32(%rsi,%r14,1),%xmm1
700 movdqa %xmm0,16(%rsp,%r14,1)
701 movdqu %xmm2,16(%rdi,%r14,1)
702 movdqa %xmm0,32(%rsp,%r14,1)
703 movdqu %xmm1,32(%rdi,%r14,1)
704 leaq 32(%r14),%r14
705 decq %r15
706 jnz .Lcopy4x
707
708 shlq $2,%r9
709 movdqu 16(%rsi,%r14,1),%xmm2
710 movdqa %xmm0,16(%rsp,%r14,1)
711 movdqu %xmm2,16(%rdi,%r14,1)
712 movq 8(%rsp,%r9,8),%rsi
713 movq $1,%rax
714 movq (%rsi),%r15
715 movq 8(%rsi),%r14
716 movq 16(%rsi),%r13
717 movq 24(%rsi),%r12
718 movq 32(%rsi),%rbp
719 movq 40(%rsi),%rbx
720 leaq 48(%rsi),%rsp
721 .Lmul4x_epilogue:
722 .byte 0xf3,0xc3
723 .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
724 .globl bn_scatter5
725 .type bn_scatter5,@function
726 .align 16
727 bn_scatter5:
728 cmpq $0,%rsi
729 jz .Lscatter_epilogue
730 leaq (%rdx,%rcx,8),%rdx
731 .Lscatter:
732 movq (%rdi),%rax
733 leaq 8(%rdi),%rdi
734 movq %rax,(%rdx)
735 leaq 256(%rdx),%rdx
736 subq $1,%rsi
737 jnz .Lscatter
738 .Lscatter_epilogue:
739 .byte 0xf3,0xc3
740 .size bn_scatter5,.-bn_scatter5
741
742 .globl bn_gather5
743 .type bn_gather5,@function
744 .align 16
745 bn_gather5:
746 movq %rcx,%r11
747 shrq $3,%rcx
748 andq $7,%r11
749 notq %rcx
750 leaq .Lmagic_masks(%rip),%rax
751 andq $3,%rcx
752 leaq 96(%rdx,%r11,8),%rdx
753 movq 0(%rax,%rcx,8),%xmm4
754 movq 8(%rax,%rcx,8),%xmm5
755 movq 16(%rax,%rcx,8),%xmm6
756 movq 24(%rax,%rcx,8),%xmm7
757 jmp .Lgather
758 .align 16
759 .Lgather:
760 movq -96(%rdx),%xmm0
761 movq -32(%rdx),%xmm1
762 pand %xmm4,%xmm0
763 movq 32(%rdx),%xmm2
764 pand %xmm5,%xmm1
765 movq 96(%rdx),%xmm3
766 pand %xmm6,%xmm2
767 por %xmm1,%xmm0
768 pand %xmm7,%xmm3
769 por %xmm2,%xmm0
770 leaq 256(%rdx),%rdx
771 por %xmm3,%xmm0
772
773 movq %xmm0,(%rdi)
774 leaq 8(%rdi),%rdi
775 subq $1,%rsi
776 jnz .Lgather
777 .byte 0xf3,0xc3
778 .LSEH_end_bn_gather5:
779 .size bn_gather5,.-bn_gather5
780 .align 64
781 .Lmagic_masks:
782 .long 0,0, 0,0, 0,0, -1,-1
783 .long 0,0, 0,0, 0,0, 0,0
784 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105 ,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97 ,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71 ,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,1 11,114,103,62,0
OLDNEW
« no previous file with comments | « openssl/crypto/bn/asm/x86_64-mont.pl ('k') | openssl/crypto/bn/asm/x86_64-mont5.pl » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698