Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(297)

Side by Side Diff: third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont5.asm

Issue 1319703002: Breaking Change: merge BoringSSL branch into master (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 default rel
2 %define XMMWORD
3 %define YMMWORD
4 %define ZMMWORD
5 section .text code align=64
6
7
8 EXTERN OPENSSL_ia32cap_P
9
10 global bn_mul_mont_gather5
11
12 ALIGN 64
13 bn_mul_mont_gather5:
14 mov QWORD[8+rsp],rdi ;WIN64 prologue
15 mov QWORD[16+rsp],rsi
16 mov rax,rsp
17 $L$SEH_begin_bn_mul_mont_gather5:
18 mov rdi,rcx
19 mov rsi,rdx
20 mov rdx,r8
21 mov rcx,r9
22 mov r8,QWORD[40+rsp]
23 mov r9,QWORD[48+rsp]
24
25
26 test r9d,7
27 jnz NEAR $L$mul_enter
28 jmp NEAR $L$mul4x_enter
29
30 ALIGN 16
31 $L$mul_enter:
32 mov r9d,r9d
33 mov rax,rsp
34 mov r10d,DWORD[56+rsp]
35 push rbx
36 push rbp
37 push r12
38 push r13
39 push r14
40 push r15
41 lea rsp,[((-40))+rsp]
42 movaps XMMWORD[rsp],xmm6
43 movaps XMMWORD[16+rsp],xmm7
44 lea r11,[2+r9]
45 neg r11
46 lea rsp,[r11*8+rsp]
47 and rsp,-1024
48
49 mov QWORD[8+r9*8+rsp],rax
50 $L$mul_body:
51 mov r12,rdx
52 mov r11,r10
53 shr r10,3
54 and r11,7
55 not r10
56 lea rax,[$L$magic_masks]
57 and r10,3
58 lea r12,[96+r11*8+r12]
59 movq xmm4,QWORD[r10*8+rax]
60 movq xmm5,QWORD[8+r10*8+rax]
61 movq xmm6,QWORD[16+r10*8+rax]
62 movq xmm7,QWORD[24+r10*8+rax]
63
64 movq xmm0,QWORD[(((-96)))+r12]
65 movq xmm1,QWORD[((-32))+r12]
66 pand xmm0,xmm4
67 movq xmm2,QWORD[32+r12]
68 pand xmm1,xmm5
69 movq xmm3,QWORD[96+r12]
70 pand xmm2,xmm6
71 por xmm0,xmm1
72 pand xmm3,xmm7
73 por xmm0,xmm2
74 lea r12,[256+r12]
75 por xmm0,xmm3
76
77 DB 102,72,15,126,195
78
79 mov r8,QWORD[r8]
80 mov rax,QWORD[rsi]
81
82 xor r14,r14
83 xor r15,r15
84
85 movq xmm0,QWORD[(((-96)))+r12]
86 movq xmm1,QWORD[((-32))+r12]
87 pand xmm0,xmm4
88 movq xmm2,QWORD[32+r12]
89 pand xmm1,xmm5
90
91 mov rbp,r8
92 mul rbx
93 mov r10,rax
94 mov rax,QWORD[rcx]
95
96 movq xmm3,QWORD[96+r12]
97 pand xmm2,xmm6
98 por xmm0,xmm1
99 pand xmm3,xmm7
100
101 imul rbp,r10
102 mov r11,rdx
103
104 por xmm0,xmm2
105 lea r12,[256+r12]
106 por xmm0,xmm3
107
108 mul rbp
109 add r10,rax
110 mov rax,QWORD[8+rsi]
111 adc rdx,0
112 mov r13,rdx
113
114 lea r15,[1+r15]
115 jmp NEAR $L$1st_enter
116
117 ALIGN 16
118 $L$1st:
119 add r13,rax
120 mov rax,QWORD[r15*8+rsi]
121 adc rdx,0
122 add r13,r11
123 mov r11,r10
124 adc rdx,0
125 mov QWORD[((-16))+r15*8+rsp],r13
126 mov r13,rdx
127
128 $L$1st_enter:
129 mul rbx
130 add r11,rax
131 mov rax,QWORD[r15*8+rcx]
132 adc rdx,0
133 lea r15,[1+r15]
134 mov r10,rdx
135
136 mul rbp
137 cmp r15,r9
138 jne NEAR $L$1st
139
140 DB 102,72,15,126,195
141
142 add r13,rax
143 mov rax,QWORD[rsi]
144 adc rdx,0
145 add r13,r11
146 adc rdx,0
147 mov QWORD[((-16))+r15*8+rsp],r13
148 mov r13,rdx
149 mov r11,r10
150
151 xor rdx,rdx
152 add r13,r11
153 adc rdx,0
154 mov QWORD[((-8))+r9*8+rsp],r13
155 mov QWORD[r9*8+rsp],rdx
156
157 lea r14,[1+r14]
158 jmp NEAR $L$outer
159 ALIGN 16
160 $L$outer:
161 xor r15,r15
162 mov rbp,r8
163 mov r10,QWORD[rsp]
164
165 movq xmm0,QWORD[(((-96)))+r12]
166 movq xmm1,QWORD[((-32))+r12]
167 pand xmm0,xmm4
168 movq xmm2,QWORD[32+r12]
169 pand xmm1,xmm5
170
171 mul rbx
172 add r10,rax
173 mov rax,QWORD[rcx]
174 adc rdx,0
175
176 movq xmm3,QWORD[96+r12]
177 pand xmm2,xmm6
178 por xmm0,xmm1
179 pand xmm3,xmm7
180
181 imul rbp,r10
182 mov r11,rdx
183
184 por xmm0,xmm2
185 lea r12,[256+r12]
186 por xmm0,xmm3
187
188 mul rbp
189 add r10,rax
190 mov rax,QWORD[8+rsi]
191 adc rdx,0
192 mov r10,QWORD[8+rsp]
193 mov r13,rdx
194
195 lea r15,[1+r15]
196 jmp NEAR $L$inner_enter
197
198 ALIGN 16
199 $L$inner:
200 add r13,rax
201 mov rax,QWORD[r15*8+rsi]
202 adc rdx,0
203 add r13,r10
204 mov r10,QWORD[r15*8+rsp]
205 adc rdx,0
206 mov QWORD[((-16))+r15*8+rsp],r13
207 mov r13,rdx
208
209 $L$inner_enter:
210 mul rbx
211 add r11,rax
212 mov rax,QWORD[r15*8+rcx]
213 adc rdx,0
214 add r10,r11
215 mov r11,rdx
216 adc r11,0
217 lea r15,[1+r15]
218
219 mul rbp
220 cmp r15,r9
221 jne NEAR $L$inner
222
223 DB 102,72,15,126,195
224
225 add r13,rax
226 mov rax,QWORD[rsi]
227 adc rdx,0
228 add r13,r10
229 mov r10,QWORD[r15*8+rsp]
230 adc rdx,0
231 mov QWORD[((-16))+r15*8+rsp],r13
232 mov r13,rdx
233
234 xor rdx,rdx
235 add r13,r11
236 adc rdx,0
237 add r13,r10
238 adc rdx,0
239 mov QWORD[((-8))+r9*8+rsp],r13
240 mov QWORD[r9*8+rsp],rdx
241
242 lea r14,[1+r14]
243 cmp r14,r9
244 jb NEAR $L$outer
245
246 xor r14,r14
247 mov rax,QWORD[rsp]
248 lea rsi,[rsp]
249 mov r15,r9
250 jmp NEAR $L$sub
251 ALIGN 16
252 $L$sub: sbb rax,QWORD[r14*8+rcx]
253 mov QWORD[r14*8+rdi],rax
254 mov rax,QWORD[8+r14*8+rsi]
255 lea r14,[1+r14]
256 dec r15
257 jnz NEAR $L$sub
258
259 sbb rax,0
260 xor r14,r14
261 mov r15,r9
262 ALIGN 16
263 $L$copy:
264 mov rsi,QWORD[r14*8+rsp]
265 mov rcx,QWORD[r14*8+rdi]
266 xor rsi,rcx
267 and rsi,rax
268 xor rsi,rcx
269 mov QWORD[r14*8+rsp],r14
270 mov QWORD[r14*8+rdi],rsi
271 lea r14,[1+r14]
272 sub r15,1
273 jnz NEAR $L$copy
274
275 mov rsi,QWORD[8+r9*8+rsp]
276 mov rax,1
277 movaps xmm6,XMMWORD[((-88))+rsi]
278 movaps xmm7,XMMWORD[((-72))+rsi]
279 mov r15,QWORD[((-48))+rsi]
280 mov r14,QWORD[((-40))+rsi]
281 mov r13,QWORD[((-32))+rsi]
282 mov r12,QWORD[((-24))+rsi]
283 mov rbp,QWORD[((-16))+rsi]
284 mov rbx,QWORD[((-8))+rsi]
285 lea rsp,[rsi]
286 $L$mul_epilogue:
287 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
288 mov rsi,QWORD[16+rsp]
289 DB 0F3h,0C3h ;repret
290 $L$SEH_end_bn_mul_mont_gather5:
291
292 ALIGN 32
293 bn_mul4x_mont_gather5:
294 mov QWORD[8+rsp],rdi ;WIN64 prologue
295 mov QWORD[16+rsp],rsi
296 mov rax,rsp
297 $L$SEH_begin_bn_mul4x_mont_gather5:
298 mov rdi,rcx
299 mov rsi,rdx
300 mov rdx,r8
301 mov rcx,r9
302 mov r8,QWORD[40+rsp]
303 mov r9,QWORD[48+rsp]
304
305
306 $L$mul4x_enter:
307 DB 0x67
308 mov rax,rsp
309 push rbx
310 push rbp
311 push r12
312 push r13
313 push r14
314 push r15
315 lea rsp,[((-40))+rsp]
316 movaps XMMWORD[rsp],xmm6
317 movaps XMMWORD[16+rsp],xmm7
318 DB 0x67
319 mov r10d,r9d
320 shl r9d,3
321 shl r10d,3+2
322 neg r9
323
324
325
326
327
328
329
330
331 lea r11,[((-64))+r9*2+rsp]
332 sub r11,rsi
333 and r11,4095
334 cmp r10,r11
335 jb NEAR $L$mul4xsp_alt
336 sub rsp,r11
337 lea rsp,[((-64))+r9*2+rsp]
338 jmp NEAR $L$mul4xsp_done
339
340 ALIGN 32
341 $L$mul4xsp_alt:
342 lea r10,[((4096-64))+r9*2]
343 lea rsp,[((-64))+r9*2+rsp]
344 sub r11,r10
345 mov r10,0
346 cmovc r11,r10
347 sub rsp,r11
348 $L$mul4xsp_done:
349 and rsp,-64
350 neg r9
351
352 mov QWORD[40+rsp],rax
353 $L$mul4x_body:
354
355 call mul4x_internal
356
357 mov rsi,QWORD[40+rsp]
358 mov rax,1
359 movaps xmm6,XMMWORD[((-88))+rsi]
360 movaps xmm7,XMMWORD[((-72))+rsi]
361 mov r15,QWORD[((-48))+rsi]
362 mov r14,QWORD[((-40))+rsi]
363 mov r13,QWORD[((-32))+rsi]
364 mov r12,QWORD[((-24))+rsi]
365 mov rbp,QWORD[((-16))+rsi]
366 mov rbx,QWORD[((-8))+rsi]
367 lea rsp,[rsi]
368 $L$mul4x_epilogue:
369 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
370 mov rsi,QWORD[16+rsp]
371 DB 0F3h,0C3h ;repret
372 $L$SEH_end_bn_mul4x_mont_gather5:
373
374
375 ALIGN 32
376 mul4x_internal:
377 shl r9,5
378 mov r10d,DWORD[56+rax]
379 lea r13,[256+r9*1+rdx]
380 shr r9,5
381 mov r11,r10
382 shr r10,3
383 and r11,7
384 not r10
385 lea rax,[$L$magic_masks]
386 and r10,3
387 lea r12,[96+r11*8+rdx]
388 movq xmm4,QWORD[r10*8+rax]
389 movq xmm5,QWORD[8+r10*8+rax]
390 add r11,7
391 movq xmm6,QWORD[16+r10*8+rax]
392 movq xmm7,QWORD[24+r10*8+rax]
393 and r11,7
394
395 movq xmm0,QWORD[(((-96)))+r12]
396 lea r14,[256+r12]
397 movq xmm1,QWORD[((-32))+r12]
398 pand xmm0,xmm4
399 movq xmm2,QWORD[32+r12]
400 pand xmm1,xmm5
401 movq xmm3,QWORD[96+r12]
402 pand xmm2,xmm6
403 DB 0x67
404 por xmm0,xmm1
405 movq xmm1,QWORD[((-96))+r14]
406 DB 0x67
407 pand xmm3,xmm7
408 DB 0x67
409 por xmm0,xmm2
410 movq xmm2,QWORD[((-32))+r14]
411 DB 0x67
412 pand xmm1,xmm4
413 DB 0x67
414 por xmm0,xmm3
415 movq xmm3,QWORD[32+r14]
416
417 DB 102,72,15,126,195
418 movq xmm0,QWORD[96+r14]
419 mov QWORD[((16+8))+rsp],r13
420 mov QWORD[((56+8))+rsp],rdi
421
422 mov r8,QWORD[r8]
423 mov rax,QWORD[rsi]
424 lea rsi,[r9*1+rsi]
425 neg r9
426
427 mov rbp,r8
428 mul rbx
429 mov r10,rax
430 mov rax,QWORD[rcx]
431
432 pand xmm2,xmm5
433 pand xmm3,xmm6
434 por xmm1,xmm2
435
436 imul rbp,r10
437
438
439
440
441
442
443
444 lea r14,[((64+8))+r11*8+rsp]
445 mov r11,rdx
446
447 pand xmm0,xmm7
448 por xmm1,xmm3
449 lea r12,[512+r12]
450 por xmm0,xmm1
451
452 mul rbp
453 add r10,rax
454 mov rax,QWORD[8+r9*1+rsi]
455 adc rdx,0
456 mov rdi,rdx
457
458 mul rbx
459 add r11,rax
460 mov rax,QWORD[16+rcx]
461 adc rdx,0
462 mov r10,rdx
463
464 mul rbp
465 add rdi,rax
466 mov rax,QWORD[16+r9*1+rsi]
467 adc rdx,0
468 add rdi,r11
469 lea r15,[32+r9]
470 lea rcx,[64+rcx]
471 adc rdx,0
472 mov QWORD[r14],rdi
473 mov r13,rdx
474 jmp NEAR $L$1st4x
475
476 ALIGN 32
477 $L$1st4x:
478 mul rbx
479 add r10,rax
480 mov rax,QWORD[((-32))+rcx]
481 lea r14,[32+r14]
482 adc rdx,0
483 mov r11,rdx
484
485 mul rbp
486 add r13,rax
487 mov rax,QWORD[((-8))+r15*1+rsi]
488 adc rdx,0
489 add r13,r10
490 adc rdx,0
491 mov QWORD[((-24))+r14],r13
492 mov rdi,rdx
493
494 mul rbx
495 add r11,rax
496 mov rax,QWORD[((-16))+rcx]
497 adc rdx,0
498 mov r10,rdx
499
500 mul rbp
501 add rdi,rax
502 mov rax,QWORD[r15*1+rsi]
503 adc rdx,0
504 add rdi,r11
505 adc rdx,0
506 mov QWORD[((-16))+r14],rdi
507 mov r13,rdx
508
509 mul rbx
510 add r10,rax
511 mov rax,QWORD[rcx]
512 adc rdx,0
513 mov r11,rdx
514
515 mul rbp
516 add r13,rax
517 mov rax,QWORD[8+r15*1+rsi]
518 adc rdx,0
519 add r13,r10
520 adc rdx,0
521 mov QWORD[((-8))+r14],r13
522 mov rdi,rdx
523
524 mul rbx
525 add r11,rax
526 mov rax,QWORD[16+rcx]
527 adc rdx,0
528 mov r10,rdx
529
530 mul rbp
531 add rdi,rax
532 mov rax,QWORD[16+r15*1+rsi]
533 adc rdx,0
534 add rdi,r11
535 lea rcx,[64+rcx]
536 adc rdx,0
537 mov QWORD[r14],rdi
538 mov r13,rdx
539
540 add r15,32
541 jnz NEAR $L$1st4x
542
543 mul rbx
544 add r10,rax
545 mov rax,QWORD[((-32))+rcx]
546 lea r14,[32+r14]
547 adc rdx,0
548 mov r11,rdx
549
550 mul rbp
551 add r13,rax
552 mov rax,QWORD[((-8))+rsi]
553 adc rdx,0
554 add r13,r10
555 adc rdx,0
556 mov QWORD[((-24))+r14],r13
557 mov rdi,rdx
558
559 mul rbx
560 add r11,rax
561 mov rax,QWORD[((-16))+rcx]
562 adc rdx,0
563 mov r10,rdx
564
565 mul rbp
566 add rdi,rax
567 mov rax,QWORD[r9*1+rsi]
568 adc rdx,0
569 add rdi,r11
570 adc rdx,0
571 mov QWORD[((-16))+r14],rdi
572 mov r13,rdx
573
574 DB 102,72,15,126,195
575 lea rcx,[r9*2+rcx]
576
577 xor rdi,rdi
578 add r13,r10
579 adc rdi,0
580 mov QWORD[((-8))+r14],r13
581
582 jmp NEAR $L$outer4x
583
584 ALIGN 32
585 $L$outer4x:
586 mov r10,QWORD[r9*1+r14]
587 mov rbp,r8
588 mul rbx
589 add r10,rax
590 mov rax,QWORD[rcx]
591 adc rdx,0
592
593 movq xmm0,QWORD[(((-96)))+r12]
594 movq xmm1,QWORD[((-32))+r12]
595 pand xmm0,xmm4
596 movq xmm2,QWORD[32+r12]
597 pand xmm1,xmm5
598 movq xmm3,QWORD[96+r12]
599
600 imul rbp,r10
601 DB 0x67
602 mov r11,rdx
603 mov QWORD[r14],rdi
604
605 pand xmm2,xmm6
606 por xmm0,xmm1
607 pand xmm3,xmm7
608 por xmm0,xmm2
609 lea r14,[r9*1+r14]
610 lea r12,[256+r12]
611 por xmm0,xmm3
612
613 mul rbp
614 add r10,rax
615 mov rax,QWORD[8+r9*1+rsi]
616 adc rdx,0
617 mov rdi,rdx
618
619 mul rbx
620 add r11,rax
621 mov rax,QWORD[16+rcx]
622 adc rdx,0
623 add r11,QWORD[8+r14]
624 adc rdx,0
625 mov r10,rdx
626
627 mul rbp
628 add rdi,rax
629 mov rax,QWORD[16+r9*1+rsi]
630 adc rdx,0
631 add rdi,r11
632 lea r15,[32+r9]
633 lea rcx,[64+rcx]
634 adc rdx,0
635 mov r13,rdx
636 jmp NEAR $L$inner4x
637
638 ALIGN 32
639 $L$inner4x:
640 mul rbx
641 add r10,rax
642 mov rax,QWORD[((-32))+rcx]
643 adc rdx,0
644 add r10,QWORD[16+r14]
645 lea r14,[32+r14]
646 adc rdx,0
647 mov r11,rdx
648
649 mul rbp
650 add r13,rax
651 mov rax,QWORD[((-8))+r15*1+rsi]
652 adc rdx,0
653 add r13,r10
654 adc rdx,0
655 mov QWORD[((-32))+r14],rdi
656 mov rdi,rdx
657
658 mul rbx
659 add r11,rax
660 mov rax,QWORD[((-16))+rcx]
661 adc rdx,0
662 add r11,QWORD[((-8))+r14]
663 adc rdx,0
664 mov r10,rdx
665
666 mul rbp
667 add rdi,rax
668 mov rax,QWORD[r15*1+rsi]
669 adc rdx,0
670 add rdi,r11
671 adc rdx,0
672 mov QWORD[((-24))+r14],r13
673 mov r13,rdx
674
675 mul rbx
676 add r10,rax
677 mov rax,QWORD[rcx]
678 adc rdx,0
679 add r10,QWORD[r14]
680 adc rdx,0
681 mov r11,rdx
682
683 mul rbp
684 add r13,rax
685 mov rax,QWORD[8+r15*1+rsi]
686 adc rdx,0
687 add r13,r10
688 adc rdx,0
689 mov QWORD[((-16))+r14],rdi
690 mov rdi,rdx
691
692 mul rbx
693 add r11,rax
694 mov rax,QWORD[16+rcx]
695 adc rdx,0
696 add r11,QWORD[8+r14]
697 adc rdx,0
698 mov r10,rdx
699
700 mul rbp
701 add rdi,rax
702 mov rax,QWORD[16+r15*1+rsi]
703 adc rdx,0
704 add rdi,r11
705 lea rcx,[64+rcx]
706 adc rdx,0
707 mov QWORD[((-8))+r14],r13
708 mov r13,rdx
709
710 add r15,32
711 jnz NEAR $L$inner4x
712
713 mul rbx
714 add r10,rax
715 mov rax,QWORD[((-32))+rcx]
716 adc rdx,0
717 add r10,QWORD[16+r14]
718 lea r14,[32+r14]
719 adc rdx,0
720 mov r11,rdx
721
722 mul rbp
723 add r13,rax
724 mov rax,QWORD[((-8))+rsi]
725 adc rdx,0
726 add r13,r10
727 adc rdx,0
728 mov QWORD[((-32))+r14],rdi
729 mov rdi,rdx
730
731 mul rbx
732 add r11,rax
733 mov rax,rbp
734 mov rbp,QWORD[((-16))+rcx]
735 adc rdx,0
736 add r11,QWORD[((-8))+r14]
737 adc rdx,0
738 mov r10,rdx
739
740 mul rbp
741 add rdi,rax
742 mov rax,QWORD[r9*1+rsi]
743 adc rdx,0
744 add rdi,r11
745 adc rdx,0
746 mov QWORD[((-24))+r14],r13
747 mov r13,rdx
748
749 DB 102,72,15,126,195
750 mov QWORD[((-16))+r14],rdi
751 lea rcx,[r9*2+rcx]
752
753 xor rdi,rdi
754 add r13,r10
755 adc rdi,0
756 add r13,QWORD[r14]
757 adc rdi,0
758 mov QWORD[((-8))+r14],r13
759
760 cmp r12,QWORD[((16+8))+rsp]
761 jb NEAR $L$outer4x
762 sub rbp,r13
763 adc r15,r15
764 or rdi,r15
765 xor rdi,1
766 lea rbx,[r9*1+r14]
767 lea rbp,[rdi*8+rcx]
768 mov rcx,r9
769 sar rcx,3+2
770 mov rdi,QWORD[((56+8))+rsp]
771 jmp NEAR $L$sqr4x_sub
772
773 global bn_power5
774
775 ALIGN 32
776 bn_power5:
777 mov QWORD[8+rsp],rdi ;WIN64 prologue
778 mov QWORD[16+rsp],rsi
779 mov rax,rsp
780 $L$SEH_begin_bn_power5:
781 mov rdi,rcx
782 mov rsi,rdx
783 mov rdx,r8
784 mov rcx,r9
785 mov r8,QWORD[40+rsp]
786 mov r9,QWORD[48+rsp]
787
788
789 mov rax,rsp
790 push rbx
791 push rbp
792 push r12
793 push r13
794 push r14
795 push r15
796 lea rsp,[((-40))+rsp]
797 movaps XMMWORD[rsp],xmm6
798 movaps XMMWORD[16+rsp],xmm7
799 mov r10d,r9d
800 shl r9d,3
801 shl r10d,3+2
802 neg r9
803 mov r8,QWORD[r8]
804
805
806
807
808
809
810
811 lea r11,[((-64))+r9*2+rsp]
812 sub r11,rsi
813 and r11,4095
814 cmp r10,r11
815 jb NEAR $L$pwr_sp_alt
816 sub rsp,r11
817 lea rsp,[((-64))+r9*2+rsp]
818 jmp NEAR $L$pwr_sp_done
819
820 ALIGN 32
821 $L$pwr_sp_alt:
822 lea r10,[((4096-64))+r9*2]
823 lea rsp,[((-64))+r9*2+rsp]
824 sub r11,r10
825 mov r10,0
826 cmovc r11,r10
827 sub rsp,r11
828 $L$pwr_sp_done:
829 and rsp,-64
830 mov r10,r9
831 neg r9
832
833
834
835
836
837
838
839
840
841
842 mov QWORD[32+rsp],r8
843 mov QWORD[40+rsp],rax
844 $L$power5_body:
845 DB 102,72,15,110,207
846 DB 102,72,15,110,209
847 DB 102,73,15,110,218
848 DB 102,72,15,110,226
849
850 call __bn_sqr8x_internal
851 call __bn_sqr8x_internal
852 call __bn_sqr8x_internal
853 call __bn_sqr8x_internal
854 call __bn_sqr8x_internal
855
856 DB 102,72,15,126,209
857 DB 102,72,15,126,226
858 mov rdi,rsi
859 mov rax,QWORD[40+rsp]
860 lea r8,[32+rsp]
861
862 call mul4x_internal
863
864 mov rsi,QWORD[40+rsp]
865 mov rax,1
866 mov r15,QWORD[((-48))+rsi]
867 mov r14,QWORD[((-40))+rsi]
868 mov r13,QWORD[((-32))+rsi]
869 mov r12,QWORD[((-24))+rsi]
870 mov rbp,QWORD[((-16))+rsi]
871 mov rbx,QWORD[((-8))+rsi]
872 lea rsp,[rsi]
873 $L$power5_epilogue:
874 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
875 mov rsi,QWORD[16+rsp]
876 DB 0F3h,0C3h ;repret
877 $L$SEH_end_bn_power5:
878
879 global bn_sqr8x_internal
880
881
882 ALIGN 32
883 bn_sqr8x_internal:
884 __bn_sqr8x_internal:
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958 lea rbp,[32+r10]
959 lea rsi,[r9*1+rsi]
960
961 mov rcx,r9
962
963
964 mov r14,QWORD[((-32))+rbp*1+rsi]
965 lea rdi,[((48+8))+r9*2+rsp]
966 mov rax,QWORD[((-24))+rbp*1+rsi]
967 lea rdi,[((-32))+rbp*1+rdi]
968 mov rbx,QWORD[((-16))+rbp*1+rsi]
969 mov r15,rax
970
971 mul r14
972 mov r10,rax
973 mov rax,rbx
974 mov r11,rdx
975 mov QWORD[((-24))+rbp*1+rdi],r10
976
977 mul r14
978 add r11,rax
979 mov rax,rbx
980 adc rdx,0
981 mov QWORD[((-16))+rbp*1+rdi],r11
982 mov r10,rdx
983
984
985 mov rbx,QWORD[((-8))+rbp*1+rsi]
986 mul r15
987 mov r12,rax
988 mov rax,rbx
989 mov r13,rdx
990
991 lea rcx,[rbp]
992 mul r14
993 add r10,rax
994 mov rax,rbx
995 mov r11,rdx
996 adc r11,0
997 add r10,r12
998 adc r11,0
999 mov QWORD[((-8))+rcx*1+rdi],r10
1000 jmp NEAR $L$sqr4x_1st
1001
1002 ALIGN 32
1003 $L$sqr4x_1st:
1004 mov rbx,QWORD[rcx*1+rsi]
1005 mul r15
1006 add r13,rax
1007 mov rax,rbx
1008 mov r12,rdx
1009 adc r12,0
1010
1011 mul r14
1012 add r11,rax
1013 mov rax,rbx
1014 mov rbx,QWORD[8+rcx*1+rsi]
1015 mov r10,rdx
1016 adc r10,0
1017 add r11,r13
1018 adc r10,0
1019
1020
1021 mul r15
1022 add r12,rax
1023 mov rax,rbx
1024 mov QWORD[rcx*1+rdi],r11
1025 mov r13,rdx
1026 adc r13,0
1027
1028 mul r14
1029 add r10,rax
1030 mov rax,rbx
1031 mov rbx,QWORD[16+rcx*1+rsi]
1032 mov r11,rdx
1033 adc r11,0
1034 add r10,r12
1035 adc r11,0
1036
1037 mul r15
1038 add r13,rax
1039 mov rax,rbx
1040 mov QWORD[8+rcx*1+rdi],r10
1041 mov r12,rdx
1042 adc r12,0
1043
1044 mul r14
1045 add r11,rax
1046 mov rax,rbx
1047 mov rbx,QWORD[24+rcx*1+rsi]
1048 mov r10,rdx
1049 adc r10,0
1050 add r11,r13
1051 adc r10,0
1052
1053
1054 mul r15
1055 add r12,rax
1056 mov rax,rbx
1057 mov QWORD[16+rcx*1+rdi],r11
1058 mov r13,rdx
1059 adc r13,0
1060 lea rcx,[32+rcx]
1061
1062 mul r14
1063 add r10,rax
1064 mov rax,rbx
1065 mov r11,rdx
1066 adc r11,0
1067 add r10,r12
1068 adc r11,0
1069 mov QWORD[((-8))+rcx*1+rdi],r10
1070
1071 cmp rcx,0
1072 jne NEAR $L$sqr4x_1st
1073
1074 mul r15
1075 add r13,rax
1076 lea rbp,[16+rbp]
1077 adc rdx,0
1078 add r13,r11
1079 adc rdx,0
1080
1081 mov QWORD[rdi],r13
1082 mov r12,rdx
1083 mov QWORD[8+rdi],rdx
1084 jmp NEAR $L$sqr4x_outer
1085
1086 ALIGN 32
1087 $L$sqr4x_outer:
1088 mov r14,QWORD[((-32))+rbp*1+rsi]
1089 lea rdi,[((48+8))+r9*2+rsp]
1090 mov rax,QWORD[((-24))+rbp*1+rsi]
1091 lea rdi,[((-32))+rbp*1+rdi]
1092 mov rbx,QWORD[((-16))+rbp*1+rsi]
1093 mov r15,rax
1094
1095 mul r14
1096 mov r10,QWORD[((-24))+rbp*1+rdi]
1097 add r10,rax
1098 mov rax,rbx
1099 adc rdx,0
1100 mov QWORD[((-24))+rbp*1+rdi],r10
1101 mov r11,rdx
1102
1103 mul r14
1104 add r11,rax
1105 mov rax,rbx
1106 adc rdx,0
1107 add r11,QWORD[((-16))+rbp*1+rdi]
1108 mov r10,rdx
1109 adc r10,0
1110 mov QWORD[((-16))+rbp*1+rdi],r11
1111
1112 xor r12,r12
1113
1114 mov rbx,QWORD[((-8))+rbp*1+rsi]
1115 mul r15
1116 add r12,rax
1117 mov rax,rbx
1118 adc rdx,0
1119 add r12,QWORD[((-8))+rbp*1+rdi]
1120 mov r13,rdx
1121 adc r13,0
1122
1123 mul r14
1124 add r10,rax
1125 mov rax,rbx
1126 adc rdx,0
1127 add r10,r12
1128 mov r11,rdx
1129 adc r11,0
1130 mov QWORD[((-8))+rbp*1+rdi],r10
1131
1132 lea rcx,[rbp]
1133 jmp NEAR $L$sqr4x_inner
1134
1135 ALIGN 32
1136 $L$sqr4x_inner:
1137 mov rbx,QWORD[rcx*1+rsi]
1138 mul r15
1139 add r13,rax
1140 mov rax,rbx
1141 mov r12,rdx
1142 adc r12,0
1143 add r13,QWORD[rcx*1+rdi]
1144 adc r12,0
1145
1146 DB 0x67
1147 mul r14
1148 add r11,rax
1149 mov rax,rbx
1150 mov rbx,QWORD[8+rcx*1+rsi]
1151 mov r10,rdx
1152 adc r10,0
1153 add r11,r13
1154 adc r10,0
1155
1156 mul r15
1157 add r12,rax
1158 mov QWORD[rcx*1+rdi],r11
1159 mov rax,rbx
1160 mov r13,rdx
1161 adc r13,0
1162 add r12,QWORD[8+rcx*1+rdi]
1163 lea rcx,[16+rcx]
1164 adc r13,0
1165
1166 mul r14
1167 add r10,rax
1168 mov rax,rbx
1169 adc rdx,0
1170 add r10,r12
1171 mov r11,rdx
1172 adc r11,0
1173 mov QWORD[((-8))+rcx*1+rdi],r10
1174
1175 cmp rcx,0
1176 jne NEAR $L$sqr4x_inner
1177
1178 DB 0x67
1179 mul r15
1180 add r13,rax
1181 adc rdx,0
1182 add r13,r11
1183 adc rdx,0
1184
1185 mov QWORD[rdi],r13
1186 mov r12,rdx
1187 mov QWORD[8+rdi],rdx
1188
1189 add rbp,16
1190 jnz NEAR $L$sqr4x_outer
1191
1192
1193 mov r14,QWORD[((-32))+rsi]
1194 lea rdi,[((48+8))+r9*2+rsp]
1195 mov rax,QWORD[((-24))+rsi]
1196 lea rdi,[((-32))+rbp*1+rdi]
1197 mov rbx,QWORD[((-16))+rsi]
1198 mov r15,rax
1199
1200 mul r14
1201 add r10,rax
1202 mov rax,rbx
1203 mov r11,rdx
1204 adc r11,0
1205
1206 mul r14
1207 add r11,rax
1208 mov rax,rbx
1209 mov QWORD[((-24))+rdi],r10
1210 mov r10,rdx
1211 adc r10,0
1212 add r11,r13
1213 mov rbx,QWORD[((-8))+rsi]
1214 adc r10,0
1215
1216 mul r15
1217 add r12,rax
1218 mov rax,rbx
1219 mov QWORD[((-16))+rdi],r11
1220 mov r13,rdx
1221 adc r13,0
1222
1223 mul r14
1224 add r10,rax
1225 mov rax,rbx
1226 mov r11,rdx
1227 adc r11,0
1228 add r10,r12
1229 adc r11,0
1230 mov QWORD[((-8))+rdi],r10
1231
1232 mul r15
1233 add r13,rax
1234 mov rax,QWORD[((-16))+rsi]
1235 adc rdx,0
1236 add r13,r11
1237 adc rdx,0
1238
1239 mov QWORD[rdi],r13
1240 mov r12,rdx
1241 mov QWORD[8+rdi],rdx
1242
1243 mul rbx
1244 add rbp,16
1245 xor r14,r14
1246 sub rbp,r9
1247 xor r15,r15
1248
1249 add rax,r12
1250 adc rdx,0
1251 mov QWORD[8+rdi],rax
1252 mov QWORD[16+rdi],rdx
1253 mov QWORD[24+rdi],r15
1254
1255 mov rax,QWORD[((-16))+rbp*1+rsi]
1256 lea rdi,[((48+8))+rsp]
1257 xor r10,r10
1258 mov r11,QWORD[8+rdi]
1259
1260 lea r12,[r10*2+r14]
1261 shr r10,63
1262 lea r13,[r11*2+rcx]
1263 shr r11,63
1264 or r13,r10
1265 mov r10,QWORD[16+rdi]
1266 mov r14,r11
1267 mul rax
1268 neg r15
1269 mov r11,QWORD[24+rdi]
1270 adc r12,rax
1271 mov rax,QWORD[((-8))+rbp*1+rsi]
1272 mov QWORD[rdi],r12
1273 adc r13,rdx
1274
1275 lea rbx,[r10*2+r14]
1276 mov QWORD[8+rdi],r13
1277 sbb r15,r15
1278 shr r10,63
1279 lea r8,[r11*2+rcx]
1280 shr r11,63
1281 or r8,r10
1282 mov r10,QWORD[32+rdi]
1283 mov r14,r11
1284 mul rax
1285 neg r15
1286 mov r11,QWORD[40+rdi]
1287 adc rbx,rax
1288 mov rax,QWORD[rbp*1+rsi]
1289 mov QWORD[16+rdi],rbx
1290 adc r8,rdx
1291 lea rbp,[16+rbp]
1292 mov QWORD[24+rdi],r8
1293 sbb r15,r15
1294 lea rdi,[64+rdi]
1295 jmp NEAR $L$sqr4x_shift_n_add
1296
1297 ALIGN 32
1298 $L$sqr4x_shift_n_add:
1299 lea r12,[r10*2+r14]
1300 shr r10,63
1301 lea r13,[r11*2+rcx]
1302 shr r11,63
1303 or r13,r10
1304 mov r10,QWORD[((-16))+rdi]
1305 mov r14,r11
1306 mul rax
1307 neg r15
1308 mov r11,QWORD[((-8))+rdi]
1309 adc r12,rax
1310 mov rax,QWORD[((-8))+rbp*1+rsi]
1311 mov QWORD[((-32))+rdi],r12
1312 adc r13,rdx
1313
1314 lea rbx,[r10*2+r14]
1315 mov QWORD[((-24))+rdi],r13
1316 sbb r15,r15
1317 shr r10,63
1318 lea r8,[r11*2+rcx]
1319 shr r11,63
1320 or r8,r10
1321 mov r10,QWORD[rdi]
1322 mov r14,r11
1323 mul rax
1324 neg r15
1325 mov r11,QWORD[8+rdi]
1326 adc rbx,rax
1327 mov rax,QWORD[rbp*1+rsi]
1328 mov QWORD[((-16))+rdi],rbx
1329 adc r8,rdx
1330
1331 lea r12,[r10*2+r14]
1332 mov QWORD[((-8))+rdi],r8
1333 sbb r15,r15
1334 shr r10,63
1335 lea r13,[r11*2+rcx]
1336 shr r11,63
1337 or r13,r10
1338 mov r10,QWORD[16+rdi]
1339 mov r14,r11
1340 mul rax
1341 neg r15
1342 mov r11,QWORD[24+rdi]
1343 adc r12,rax
1344 mov rax,QWORD[8+rbp*1+rsi]
1345 mov QWORD[rdi],r12
1346 adc r13,rdx
1347
1348 lea rbx,[r10*2+r14]
1349 mov QWORD[8+rdi],r13
1350 sbb r15,r15
1351 shr r10,63
1352 lea r8,[r11*2+rcx]
1353 shr r11,63
1354 or r8,r10
1355 mov r10,QWORD[32+rdi]
1356 mov r14,r11
1357 mul rax
1358 neg r15
1359 mov r11,QWORD[40+rdi]
1360 adc rbx,rax
1361 mov rax,QWORD[16+rbp*1+rsi]
1362 mov QWORD[16+rdi],rbx
1363 adc r8,rdx
1364 mov QWORD[24+rdi],r8
1365 sbb r15,r15
1366 lea rdi,[64+rdi]
1367 add rbp,32
1368 jnz NEAR $L$sqr4x_shift_n_add
1369
1370 lea r12,[r10*2+r14]
1371 DB 0x67
1372 shr r10,63
1373 lea r13,[r11*2+rcx]
1374 shr r11,63
1375 or r13,r10
1376 mov r10,QWORD[((-16))+rdi]
1377 mov r14,r11
1378 mul rax
1379 neg r15
1380 mov r11,QWORD[((-8))+rdi]
1381 adc r12,rax
1382 mov rax,QWORD[((-8))+rsi]
1383 mov QWORD[((-32))+rdi],r12
1384 adc r13,rdx
1385
1386 lea rbx,[r10*2+r14]
1387 mov QWORD[((-24))+rdi],r13
1388 sbb r15,r15
1389 shr r10,63
1390 lea r8,[r11*2+rcx]
1391 shr r11,63
1392 or r8,r10
1393 mul rax
1394 neg r15
1395 adc rbx,rax
1396 adc r8,rdx
1397 mov QWORD[((-16))+rdi],rbx
1398 mov QWORD[((-8))+rdi],r8
1399 DB 102,72,15,126,213
1400 sqr8x_reduction:
1401 xor rax,rax
1402 lea rcx,[r9*2+rbp]
1403 lea rdx,[((48+8))+r9*2+rsp]
1404 mov QWORD[((0+8))+rsp],rcx
1405 lea rdi,[((48+8))+r9*1+rsp]
1406 mov QWORD[((8+8))+rsp],rdx
1407 neg r9
1408 jmp NEAR $L$8x_reduction_loop
1409
1410 ALIGN 32
1411 $L$8x_reduction_loop:
1412 lea rdi,[r9*1+rdi]
1413 DB 0x66
1414 mov rbx,QWORD[rdi]
1415 mov r9,QWORD[8+rdi]
1416 mov r10,QWORD[16+rdi]
1417 mov r11,QWORD[24+rdi]
1418 mov r12,QWORD[32+rdi]
1419 mov r13,QWORD[40+rdi]
1420 mov r14,QWORD[48+rdi]
1421 mov r15,QWORD[56+rdi]
1422 mov QWORD[rdx],rax
1423 lea rdi,[64+rdi]
1424
1425 DB 0x67
1426 mov r8,rbx
1427 imul rbx,QWORD[((32+8))+rsp]
1428 mov rax,QWORD[rbp]
1429 mov ecx,8
1430 jmp NEAR $L$8x_reduce
1431
1432 ALIGN 32
1433 $L$8x_reduce:
1434 mul rbx
1435 mov rax,QWORD[16+rbp]
1436 neg r8
1437 mov r8,rdx
1438 adc r8,0
1439
1440 mul rbx
1441 add r9,rax
1442 mov rax,QWORD[32+rbp]
1443 adc rdx,0
1444 add r8,r9
1445 mov QWORD[((48-8+8))+rcx*8+rsp],rbx
1446 mov r9,rdx
1447 adc r9,0
1448
1449 mul rbx
1450 add r10,rax
1451 mov rax,QWORD[48+rbp]
1452 adc rdx,0
1453 add r9,r10
1454 mov rsi,QWORD[((32+8))+rsp]
1455 mov r10,rdx
1456 adc r10,0
1457
1458 mul rbx
1459 add r11,rax
1460 mov rax,QWORD[64+rbp]
1461 adc rdx,0
1462 imul rsi,r8
1463 add r10,r11
1464 mov r11,rdx
1465 adc r11,0
1466
1467 mul rbx
1468 add r12,rax
1469 mov rax,QWORD[80+rbp]
1470 adc rdx,0
1471 add r11,r12
1472 mov r12,rdx
1473 adc r12,0
1474
1475 mul rbx
1476 add r13,rax
1477 mov rax,QWORD[96+rbp]
1478 adc rdx,0
1479 add r12,r13
1480 mov r13,rdx
1481 adc r13,0
1482
1483 mul rbx
1484 add r14,rax
1485 mov rax,QWORD[112+rbp]
1486 adc rdx,0
1487 add r13,r14
1488 mov r14,rdx
1489 adc r14,0
1490
1491 mul rbx
1492 mov rbx,rsi
1493 add r15,rax
1494 mov rax,QWORD[rbp]
1495 adc rdx,0
1496 add r14,r15
1497 mov r15,rdx
1498 adc r15,0
1499
1500 dec ecx
1501 jnz NEAR $L$8x_reduce
1502
1503 lea rbp,[128+rbp]
1504 xor rax,rax
1505 mov rdx,QWORD[((8+8))+rsp]
1506 cmp rbp,QWORD[((0+8))+rsp]
1507 jae NEAR $L$8x_no_tail
1508
1509 DB 0x66
1510 add r8,QWORD[rdi]
1511 adc r9,QWORD[8+rdi]
1512 adc r10,QWORD[16+rdi]
1513 adc r11,QWORD[24+rdi]
1514 adc r12,QWORD[32+rdi]
1515 adc r13,QWORD[40+rdi]
1516 adc r14,QWORD[48+rdi]
1517 adc r15,QWORD[56+rdi]
1518 sbb rsi,rsi
1519
1520 mov rbx,QWORD[((48+56+8))+rsp]
1521 mov ecx,8
1522 mov rax,QWORD[rbp]
1523 jmp NEAR $L$8x_tail
1524
1525 ALIGN 32
1526 $L$8x_tail:
1527 mul rbx
1528 add r8,rax
1529 mov rax,QWORD[16+rbp]
1530 mov QWORD[rdi],r8
1531 mov r8,rdx
1532 adc r8,0
1533
1534 mul rbx
1535 add r9,rax
1536 mov rax,QWORD[32+rbp]
1537 adc rdx,0
1538 add r8,r9
1539 lea rdi,[8+rdi]
1540 mov r9,rdx
1541 adc r9,0
1542
1543 mul rbx
1544 add r10,rax
1545 mov rax,QWORD[48+rbp]
1546 adc rdx,0
1547 add r9,r10
1548 mov r10,rdx
1549 adc r10,0
1550
1551 mul rbx
1552 add r11,rax
1553 mov rax,QWORD[64+rbp]
1554 adc rdx,0
1555 add r10,r11
1556 mov r11,rdx
1557 adc r11,0
1558
1559 mul rbx
1560 add r12,rax
1561 mov rax,QWORD[80+rbp]
1562 adc rdx,0
1563 add r11,r12
1564 mov r12,rdx
1565 adc r12,0
1566
1567 mul rbx
1568 add r13,rax
1569 mov rax,QWORD[96+rbp]
1570 adc rdx,0
1571 add r12,r13
1572 mov r13,rdx
1573 adc r13,0
1574
1575 mul rbx
1576 add r14,rax
1577 mov rax,QWORD[112+rbp]
1578 adc rdx,0
1579 add r13,r14
1580 mov r14,rdx
1581 adc r14,0
1582
1583 mul rbx
1584 mov rbx,QWORD[((48-16+8))+rcx*8+rsp]
1585 add r15,rax
1586 adc rdx,0
1587 add r14,r15
1588 mov rax,QWORD[rbp]
1589 mov r15,rdx
1590 adc r15,0
1591
1592 dec ecx
1593 jnz NEAR $L$8x_tail
1594
1595 lea rbp,[128+rbp]
1596 mov rdx,QWORD[((8+8))+rsp]
1597 cmp rbp,QWORD[((0+8))+rsp]
1598 jae NEAR $L$8x_tail_done
1599
1600 mov rbx,QWORD[((48+56+8))+rsp]
1601 neg rsi
1602 mov rax,QWORD[rbp]
1603 adc r8,QWORD[rdi]
1604 adc r9,QWORD[8+rdi]
1605 adc r10,QWORD[16+rdi]
1606 adc r11,QWORD[24+rdi]
1607 adc r12,QWORD[32+rdi]
1608 adc r13,QWORD[40+rdi]
1609 adc r14,QWORD[48+rdi]
1610 adc r15,QWORD[56+rdi]
1611 sbb rsi,rsi
1612
1613 mov ecx,8
1614 jmp NEAR $L$8x_tail
1615
1616 ALIGN 32
1617 $L$8x_tail_done:
1618 add r8,QWORD[rdx]
1619 xor rax,rax
1620
1621 neg rsi
1622 $L$8x_no_tail:
1623 adc r8,QWORD[rdi]
1624 adc r9,QWORD[8+rdi]
1625 adc r10,QWORD[16+rdi]
1626 adc r11,QWORD[24+rdi]
1627 adc r12,QWORD[32+rdi]
1628 adc r13,QWORD[40+rdi]
1629 adc r14,QWORD[48+rdi]
1630 adc r15,QWORD[56+rdi]
1631 adc rax,0
1632 mov rcx,QWORD[((-16))+rbp]
1633 xor rsi,rsi
1634
1635 DB 102,72,15,126,213
1636
1637 mov QWORD[rdi],r8
1638 mov QWORD[8+rdi],r9
1639 DB 102,73,15,126,217
1640 mov QWORD[16+rdi],r10
1641 mov QWORD[24+rdi],r11
1642 mov QWORD[32+rdi],r12
1643 mov QWORD[40+rdi],r13
1644 mov QWORD[48+rdi],r14
1645 mov QWORD[56+rdi],r15
1646 lea rdi,[64+rdi]
1647
1648 cmp rdi,rdx
1649 jb NEAR $L$8x_reduction_loop
1650
1651 sub rcx,r15
1652 lea rbx,[r9*1+rdi]
1653 adc rsi,rsi
1654 mov rcx,r9
1655 or rax,rsi
1656 DB 102,72,15,126,207
1657 xor rax,1
1658 DB 102,72,15,126,206
1659 lea rbp,[rax*8+rbp]
1660 sar rcx,3+2
1661 jmp NEAR $L$sqr4x_sub
1662
1663 ALIGN 32
1664 $L$sqr4x_sub:
1665 DB 0x66
1666 mov r12,QWORD[rbx]
1667 mov r13,QWORD[8+rbx]
1668 sbb r12,QWORD[rbp]
1669 mov r14,QWORD[16+rbx]
1670 sbb r13,QWORD[16+rbp]
1671 mov r15,QWORD[24+rbx]
1672 lea rbx,[32+rbx]
1673 sbb r14,QWORD[32+rbp]
1674 mov QWORD[rdi],r12
1675 sbb r15,QWORD[48+rbp]
1676 lea rbp,[64+rbp]
1677 mov QWORD[8+rdi],r13
1678 mov QWORD[16+rdi],r14
1679 mov QWORD[24+rdi],r15
1680 lea rdi,[32+rdi]
1681
1682 inc rcx
1683 jnz NEAR $L$sqr4x_sub
1684 mov r10,r9
1685 neg r9
1686 DB 0F3h,0C3h ;repret
1687
1688 global bn_from_montgomery
1689
1690 ALIGN 32
1691 bn_from_montgomery:
1692 test DWORD[48+rsp],7
1693 jz NEAR bn_from_mont8x
1694 xor eax,eax
1695 DB 0F3h,0C3h ;repret
1696
1697
1698
1699 ALIGN 32
1700 bn_from_mont8x:
1701 mov QWORD[8+rsp],rdi ;WIN64 prologue
1702 mov QWORD[16+rsp],rsi
1703 mov rax,rsp
1704 $L$SEH_begin_bn_from_mont8x:
1705 mov rdi,rcx
1706 mov rsi,rdx
1707 mov rdx,r8
1708 mov rcx,r9
1709 mov r8,QWORD[40+rsp]
1710 mov r9,QWORD[48+rsp]
1711
1712
1713 DB 0x67
1714 mov rax,rsp
1715 push rbx
1716 push rbp
1717 push r12
1718 push r13
1719 push r14
1720 push r15
1721 lea rsp,[((-40))+rsp]
1722 movaps XMMWORD[rsp],xmm6
1723 movaps XMMWORD[16+rsp],xmm7
1724 DB 0x67
1725 mov r10d,r9d
1726 shl r9d,3
1727 shl r10d,3+2
1728 neg r9
1729 mov r8,QWORD[r8]
1730
1731
1732
1733
1734
1735
1736
1737 lea r11,[((-64))+r9*2+rsp]
1738 sub r11,rsi
1739 and r11,4095
1740 cmp r10,r11
1741 jb NEAR $L$from_sp_alt
1742 sub rsp,r11
1743 lea rsp,[((-64))+r9*2+rsp]
1744 jmp NEAR $L$from_sp_done
1745
1746 ALIGN 32
1747 $L$from_sp_alt:
1748 lea r10,[((4096-64))+r9*2]
1749 lea rsp,[((-64))+r9*2+rsp]
1750 sub r11,r10
1751 mov r10,0
1752 cmovc r11,r10
1753 sub rsp,r11
1754 $L$from_sp_done:
1755 and rsp,-64
1756 mov r10,r9
1757 neg r9
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768 mov QWORD[32+rsp],r8
1769 mov QWORD[40+rsp],rax
1770 $L$from_body:
1771 mov r11,r9
1772 lea rax,[48+rsp]
1773 pxor xmm0,xmm0
1774 jmp NEAR $L$mul_by_1
1775
1776 ALIGN 32
1777 $L$mul_by_1:
1778 movdqu xmm1,XMMWORD[rsi]
1779 movdqu xmm2,XMMWORD[16+rsi]
1780 movdqu xmm3,XMMWORD[32+rsi]
1781 movdqa XMMWORD[r9*1+rax],xmm0
1782 movdqu xmm4,XMMWORD[48+rsi]
1783 movdqa XMMWORD[16+r9*1+rax],xmm0
1784 DB 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
1785 movdqa XMMWORD[rax],xmm1
1786 movdqa XMMWORD[32+r9*1+rax],xmm0
1787 movdqa XMMWORD[16+rax],xmm2
1788 movdqa XMMWORD[48+r9*1+rax],xmm0
1789 movdqa XMMWORD[32+rax],xmm3
1790 movdqa XMMWORD[48+rax],xmm4
1791 lea rax,[64+rax]
1792 sub r11,64
1793 jnz NEAR $L$mul_by_1
1794
1795 DB 102,72,15,110,207
1796 DB 102,72,15,110,209
1797 DB 0x67
1798 mov rbp,rcx
1799 DB 102,73,15,110,218
1800 call sqr8x_reduction
1801
1802 pxor xmm0,xmm0
1803 lea rax,[48+rsp]
1804 mov rsi,QWORD[40+rsp]
1805 jmp NEAR $L$from_mont_zero
1806
1807 ALIGN 32
1808 $L$from_mont_zero:
1809 movdqa XMMWORD[rax],xmm0
1810 movdqa XMMWORD[16+rax],xmm0
1811 movdqa XMMWORD[32+rax],xmm0
1812 movdqa XMMWORD[48+rax],xmm0
1813 lea rax,[64+rax]
1814 sub r9,32
1815 jnz NEAR $L$from_mont_zero
1816
1817 mov rax,1
1818 mov r15,QWORD[((-48))+rsi]
1819 mov r14,QWORD[((-40))+rsi]
1820 mov r13,QWORD[((-32))+rsi]
1821 mov r12,QWORD[((-24))+rsi]
1822 mov rbp,QWORD[((-16))+rsi]
1823 mov rbx,QWORD[((-8))+rsi]
1824 lea rsp,[rsi]
1825 $L$from_epilogue:
1826 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1827 mov rsi,QWORD[16+rsp]
1828 DB 0F3h,0C3h ;repret
1829 $L$SEH_end_bn_from_mont8x:
1830 global bn_scatter5
1831
1832 ALIGN 16
1833 bn_scatter5:
1834 cmp edx,0
1835 jz NEAR $L$scatter_epilogue
1836 lea r8,[r9*8+r8]
1837 $L$scatter:
1838 mov rax,QWORD[rcx]
1839 lea rcx,[8+rcx]
1840 mov QWORD[r8],rax
1841 lea r8,[256+r8]
1842 sub edx,1
1843 jnz NEAR $L$scatter
1844 $L$scatter_epilogue:
1845 DB 0F3h,0C3h ;repret
1846
1847
1848 global bn_gather5
1849
1850 ALIGN 16
1851 bn_gather5:
1852 $L$SEH_begin_bn_gather5:
1853
1854 DB 0x48,0x83,0xec,0x28
1855 DB 0x0f,0x29,0x34,0x24
1856 DB 0x0f,0x29,0x7c,0x24,0x10
1857 mov r11d,r9d
1858 shr r9d,3
1859 and r11,7
1860 not r9d
1861 lea rax,[$L$magic_masks]
1862 and r9d,3
1863 lea r8,[128+r11*8+r8]
1864 movq xmm4,QWORD[r9*8+rax]
1865 movq xmm5,QWORD[8+r9*8+rax]
1866 movq xmm6,QWORD[16+r9*8+rax]
1867 movq xmm7,QWORD[24+r9*8+rax]
1868 jmp NEAR $L$gather
1869 ALIGN 16
1870 $L$gather:
1871 movq xmm0,QWORD[(((-128)))+r8]
1872 movq xmm1,QWORD[((-64))+r8]
1873 pand xmm0,xmm4
1874 movq xmm2,QWORD[r8]
1875 pand xmm1,xmm5
1876 movq xmm3,QWORD[64+r8]
1877 pand xmm2,xmm6
1878 por xmm0,xmm1
1879 pand xmm3,xmm7
1880 DB 0x67,0x67
1881 por xmm0,xmm2
1882 lea r8,[256+r8]
1883 por xmm0,xmm3
1884
1885 movq QWORD[rcx],xmm0
1886 lea rcx,[8+rcx]
1887 sub edx,1
1888 jnz NEAR $L$gather
1889 movaps xmm6,XMMWORD[rsp]
1890 movaps xmm7,XMMWORD[16+rsp]
1891 lea rsp,[40+rsp]
1892 DB 0F3h,0C3h ;repret
1893 $L$SEH_end_bn_gather5:
1894
1895 ALIGN 64
1896 $L$magic_masks:
1897 DD 0,0,0,0,0,0,-1,-1
1898 DD 0,0,0,0,0,0,0,0
1899 DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
1900 DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115
1901 DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111
1902 DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79
1903 DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111
1904 DB 112,101,110,115,115,108,46,111,114,103,62,0
1905 EXTERN __imp_RtlVirtualUnwind
1906
1907 ALIGN 16
1908 mul_handler:
1909 push rsi
1910 push rdi
1911 push rbx
1912 push rbp
1913 push r12
1914 push r13
1915 push r14
1916 push r15
1917 pushfq
1918 sub rsp,64
1919
1920 mov rax,QWORD[120+r8]
1921 mov rbx,QWORD[248+r8]
1922
1923 mov rsi,QWORD[8+r9]
1924 mov r11,QWORD[56+r9]
1925
1926 mov r10d,DWORD[r11]
1927 lea r10,[r10*1+rsi]
1928 cmp rbx,r10
1929 jb NEAR $L$common_seh_tail
1930
1931 mov rax,QWORD[152+r8]
1932
1933 mov r10d,DWORD[4+r11]
1934 lea r10,[r10*1+rsi]
1935 cmp rbx,r10
1936 jae NEAR $L$common_seh_tail
1937
1938 lea r10,[$L$mul_epilogue]
1939 cmp rbx,r10
1940 jb NEAR $L$body_40
1941
1942 mov r10,QWORD[192+r8]
1943 mov rax,QWORD[8+r10*8+rax]
1944 jmp NEAR $L$body_proceed
1945
1946 $L$body_40:
1947 mov rax,QWORD[40+rax]
1948 $L$body_proceed:
1949
1950 movaps xmm0,XMMWORD[((-88))+rax]
1951 movaps xmm1,XMMWORD[((-72))+rax]
1952
1953 mov rbx,QWORD[((-8))+rax]
1954 mov rbp,QWORD[((-16))+rax]
1955 mov r12,QWORD[((-24))+rax]
1956 mov r13,QWORD[((-32))+rax]
1957 mov r14,QWORD[((-40))+rax]
1958 mov r15,QWORD[((-48))+rax]
1959 mov QWORD[144+r8],rbx
1960 mov QWORD[160+r8],rbp
1961 mov QWORD[216+r8],r12
1962 mov QWORD[224+r8],r13
1963 mov QWORD[232+r8],r14
1964 mov QWORD[240+r8],r15
1965 movups XMMWORD[512+r8],xmm0
1966 movups XMMWORD[528+r8],xmm1
1967
1968 $L$common_seh_tail:
1969 mov rdi,QWORD[8+rax]
1970 mov rsi,QWORD[16+rax]
1971 mov QWORD[152+r8],rax
1972 mov QWORD[168+r8],rsi
1973 mov QWORD[176+r8],rdi
1974
1975 mov rdi,QWORD[40+r9]
1976 mov rsi,r8
1977 mov ecx,154
1978 DD 0xa548f3fc
1979
1980 mov rsi,r9
1981 xor rcx,rcx
1982 mov rdx,QWORD[8+rsi]
1983 mov r8,QWORD[rsi]
1984 mov r9,QWORD[16+rsi]
1985 mov r10,QWORD[40+rsi]
1986 lea r11,[56+rsi]
1987 lea r12,[24+rsi]
1988 mov QWORD[32+rsp],r10
1989 mov QWORD[40+rsp],r11
1990 mov QWORD[48+rsp],r12
1991 mov QWORD[56+rsp],rcx
1992 call QWORD[__imp_RtlVirtualUnwind]
1993
1994 mov eax,1
1995 add rsp,64
1996 popfq
1997 pop r15
1998 pop r14
1999 pop r13
2000 pop r12
2001 pop rbp
2002 pop rbx
2003 pop rdi
2004 pop rsi
2005 DB 0F3h,0C3h ;repret
2006
2007
2008 section .pdata rdata align=4
2009 ALIGN 4
2010 DD $L$SEH_begin_bn_mul_mont_gather5 wrt ..imagebase
2011 DD $L$SEH_end_bn_mul_mont_gather5 wrt ..imagebase
2012 DD $L$SEH_info_bn_mul_mont_gather5 wrt ..imagebase
2013
2014 DD $L$SEH_begin_bn_mul4x_mont_gather5 wrt ..imagebase
2015 DD $L$SEH_end_bn_mul4x_mont_gather5 wrt ..imagebase
2016 DD $L$SEH_info_bn_mul4x_mont_gather5 wrt ..imagebase
2017
2018 DD $L$SEH_begin_bn_power5 wrt ..imagebase
2019 DD $L$SEH_end_bn_power5 wrt ..imagebase
2020 DD $L$SEH_info_bn_power5 wrt ..imagebase
2021
2022 DD $L$SEH_begin_bn_from_mont8x wrt ..imagebase
2023 DD $L$SEH_end_bn_from_mont8x wrt ..imagebase
2024 DD $L$SEH_info_bn_from_mont8x wrt ..imagebase
2025 DD $L$SEH_begin_bn_gather5 wrt ..imagebase
2026 DD $L$SEH_end_bn_gather5 wrt ..imagebase
2027 DD $L$SEH_info_bn_gather5 wrt ..imagebase
2028
2029 section .xdata rdata align=8
2030 ALIGN 8
2031 $L$SEH_info_bn_mul_mont_gather5:
2032 DB 9,0,0,0
2033 DD mul_handler wrt ..imagebase
2034 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
2035 ALIGN 8
2036 $L$SEH_info_bn_mul4x_mont_gather5:
2037 DB 9,0,0,0
2038 DD mul_handler wrt ..imagebase
2039 DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase
2040 ALIGN 8
2041 $L$SEH_info_bn_power5:
2042 DB 9,0,0,0
2043 DD mul_handler wrt ..imagebase
2044 DD $L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebas e
2045 ALIGN 8
2046 $L$SEH_info_bn_from_mont8x:
2047 DB 9,0,0,0
2048 DD mul_handler wrt ..imagebase
2049 DD $L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase
2050 ALIGN 8
2051 $L$SEH_info_bn_gather5:
2052 DB 0x01,0x0d,0x05,0x00
2053 DB 0x0d,0x78,0x01,0x00
2054 DB 0x08,0x68,0x00,0x00
2055 DB 0x04,0x42,0x00,0x00
2056 ALIGN 8
OLDNEW
« no previous file with comments | « third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont.asm ('k') | third_party/boringssl/win-x86_64/crypto/cpu-x86_64-asm.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698