Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(593)

Side by Side Diff: third_party/boringssl/win-x86_64/crypto/bn/rsaz-x86_64.asm

Issue 1319703002: Breaking Change: merge BoringSSL branch into master (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 default rel
2 %define XMMWORD
3 %define YMMWORD
4 %define ZMMWORD
5 section .text code align=64
6
7
8 EXTERN OPENSSL_ia32cap_P
9
10 global rsaz_512_sqr
11
12 ALIGN 32
13 rsaz_512_sqr:
14 mov QWORD[8+rsp],rdi ;WIN64 prologue
15 mov QWORD[16+rsp],rsi
16 mov rax,rsp
17 $L$SEH_begin_rsaz_512_sqr:
18 mov rdi,rcx
19 mov rsi,rdx
20 mov rdx,r8
21 mov rcx,r9
22 mov r8,QWORD[40+rsp]
23
24
25 push rbx
26 push rbp
27 push r12
28 push r13
29 push r14
30 push r15
31
32 sub rsp,128+24
33 $L$sqr_body:
34 mov rbp,rdx
35 mov rdx,QWORD[rsi]
36 mov rax,QWORD[8+rsi]
37 mov QWORD[128+rsp],rcx
38 jmp NEAR $L$oop_sqr
39
40 ALIGN 32
41 $L$oop_sqr:
42 mov DWORD[((128+8))+rsp],r8d
43
44 mov rbx,rdx
45 mul rdx
46 mov r8,rax
47 mov rax,QWORD[16+rsi]
48 mov r9,rdx
49
50 mul rbx
51 add r9,rax
52 mov rax,QWORD[24+rsi]
53 mov r10,rdx
54 adc r10,0
55
56 mul rbx
57 add r10,rax
58 mov rax,QWORD[32+rsi]
59 mov r11,rdx
60 adc r11,0
61
62 mul rbx
63 add r11,rax
64 mov rax,QWORD[40+rsi]
65 mov r12,rdx
66 adc r12,0
67
68 mul rbx
69 add r12,rax
70 mov rax,QWORD[48+rsi]
71 mov r13,rdx
72 adc r13,0
73
74 mul rbx
75 add r13,rax
76 mov rax,QWORD[56+rsi]
77 mov r14,rdx
78 adc r14,0
79
80 mul rbx
81 add r14,rax
82 mov rax,rbx
83 mov r15,rdx
84 adc r15,0
85
86 add r8,r8
87 mov rcx,r9
88 adc r9,r9
89
90 mul rax
91 mov QWORD[rsp],rax
92 add r8,rdx
93 adc r9,0
94
95 mov QWORD[8+rsp],r8
96 shr rcx,63
97
98
99 mov r8,QWORD[8+rsi]
100 mov rax,QWORD[16+rsi]
101 mul r8
102 add r10,rax
103 mov rax,QWORD[24+rsi]
104 mov rbx,rdx
105 adc rbx,0
106
107 mul r8
108 add r11,rax
109 mov rax,QWORD[32+rsi]
110 adc rdx,0
111 add r11,rbx
112 mov rbx,rdx
113 adc rbx,0
114
115 mul r8
116 add r12,rax
117 mov rax,QWORD[40+rsi]
118 adc rdx,0
119 add r12,rbx
120 mov rbx,rdx
121 adc rbx,0
122
123 mul r8
124 add r13,rax
125 mov rax,QWORD[48+rsi]
126 adc rdx,0
127 add r13,rbx
128 mov rbx,rdx
129 adc rbx,0
130
131 mul r8
132 add r14,rax
133 mov rax,QWORD[56+rsi]
134 adc rdx,0
135 add r14,rbx
136 mov rbx,rdx
137 adc rbx,0
138
139 mul r8
140 add r15,rax
141 mov rax,r8
142 adc rdx,0
143 add r15,rbx
144 mov r8,rdx
145 mov rdx,r10
146 adc r8,0
147
148 add rdx,rdx
149 lea r10,[r10*2+rcx]
150 mov rbx,r11
151 adc r11,r11
152
153 mul rax
154 add r9,rax
155 adc r10,rdx
156 adc r11,0
157
158 mov QWORD[16+rsp],r9
159 mov QWORD[24+rsp],r10
160 shr rbx,63
161
162
163 mov r9,QWORD[16+rsi]
164 mov rax,QWORD[24+rsi]
165 mul r9
166 add r12,rax
167 mov rax,QWORD[32+rsi]
168 mov rcx,rdx
169 adc rcx,0
170
171 mul r9
172 add r13,rax
173 mov rax,QWORD[40+rsi]
174 adc rdx,0
175 add r13,rcx
176 mov rcx,rdx
177 adc rcx,0
178
179 mul r9
180 add r14,rax
181 mov rax,QWORD[48+rsi]
182 adc rdx,0
183 add r14,rcx
184 mov rcx,rdx
185 adc rcx,0
186
187 mul r9
188 mov r10,r12
189 lea r12,[r12*2+rbx]
190 add r15,rax
191 mov rax,QWORD[56+rsi]
192 adc rdx,0
193 add r15,rcx
194 mov rcx,rdx
195 adc rcx,0
196
197 mul r9
198 shr r10,63
199 add r8,rax
200 mov rax,r9
201 adc rdx,0
202 add r8,rcx
203 mov r9,rdx
204 adc r9,0
205
206 mov rcx,r13
207 lea r13,[r13*2+r10]
208
209 mul rax
210 add r11,rax
211 adc r12,rdx
212 adc r13,0
213
214 mov QWORD[32+rsp],r11
215 mov QWORD[40+rsp],r12
216 shr rcx,63
217
218
219 mov r10,QWORD[24+rsi]
220 mov rax,QWORD[32+rsi]
221 mul r10
222 add r14,rax
223 mov rax,QWORD[40+rsi]
224 mov rbx,rdx
225 adc rbx,0
226
227 mul r10
228 add r15,rax
229 mov rax,QWORD[48+rsi]
230 adc rdx,0
231 add r15,rbx
232 mov rbx,rdx
233 adc rbx,0
234
235 mul r10
236 mov r12,r14
237 lea r14,[r14*2+rcx]
238 add r8,rax
239 mov rax,QWORD[56+rsi]
240 adc rdx,0
241 add r8,rbx
242 mov rbx,rdx
243 adc rbx,0
244
245 mul r10
246 shr r12,63
247 add r9,rax
248 mov rax,r10
249 adc rdx,0
250 add r9,rbx
251 mov r10,rdx
252 adc r10,0
253
254 mov rbx,r15
255 lea r15,[r15*2+r12]
256
257 mul rax
258 add r13,rax
259 adc r14,rdx
260 adc r15,0
261
262 mov QWORD[48+rsp],r13
263 mov QWORD[56+rsp],r14
264 shr rbx,63
265
266
267 mov r11,QWORD[32+rsi]
268 mov rax,QWORD[40+rsi]
269 mul r11
270 add r8,rax
271 mov rax,QWORD[48+rsi]
272 mov rcx,rdx
273 adc rcx,0
274
275 mul r11
276 add r9,rax
277 mov rax,QWORD[56+rsi]
278 adc rdx,0
279 mov r12,r8
280 lea r8,[r8*2+rbx]
281 add r9,rcx
282 mov rcx,rdx
283 adc rcx,0
284
285 mul r11
286 shr r12,63
287 add r10,rax
288 mov rax,r11
289 adc rdx,0
290 add r10,rcx
291 mov r11,rdx
292 adc r11,0
293
294 mov rcx,r9
295 lea r9,[r9*2+r12]
296
297 mul rax
298 add r15,rax
299 adc r8,rdx
300 adc r9,0
301
302 mov QWORD[64+rsp],r15
303 mov QWORD[72+rsp],r8
304 shr rcx,63
305
306
307 mov r12,QWORD[40+rsi]
308 mov rax,QWORD[48+rsi]
309 mul r12
310 add r10,rax
311 mov rax,QWORD[56+rsi]
312 mov rbx,rdx
313 adc rbx,0
314
315 mul r12
316 add r11,rax
317 mov rax,r12
318 mov r15,r10
319 lea r10,[r10*2+rcx]
320 adc rdx,0
321 shr r15,63
322 add r11,rbx
323 mov r12,rdx
324 adc r12,0
325
326 mov rbx,r11
327 lea r11,[r11*2+r15]
328
329 mul rax
330 add r9,rax
331 adc r10,rdx
332 adc r11,0
333
334 mov QWORD[80+rsp],r9
335 mov QWORD[88+rsp],r10
336
337
338 mov r13,QWORD[48+rsi]
339 mov rax,QWORD[56+rsi]
340 mul r13
341 add r12,rax
342 mov rax,r13
343 mov r13,rdx
344 adc r13,0
345
346 xor r14,r14
347 shl rbx,1
348 adc r12,r12
349 adc r13,r13
350 adc r14,r14
351
352 mul rax
353 add r11,rax
354 adc r12,rdx
355 adc r13,0
356
357 mov QWORD[96+rsp],r11
358 mov QWORD[104+rsp],r12
359
360
361 mov rax,QWORD[56+rsi]
362 mul rax
363 add r13,rax
364 adc rdx,0
365
366 add r14,rdx
367
368 mov QWORD[112+rsp],r13
369 mov QWORD[120+rsp],r14
370
371 mov r8,QWORD[rsp]
372 mov r9,QWORD[8+rsp]
373 mov r10,QWORD[16+rsp]
374 mov r11,QWORD[24+rsp]
375 mov r12,QWORD[32+rsp]
376 mov r13,QWORD[40+rsp]
377 mov r14,QWORD[48+rsp]
378 mov r15,QWORD[56+rsp]
379
380 call __rsaz_512_reduce
381
382 add r8,QWORD[64+rsp]
383 adc r9,QWORD[72+rsp]
384 adc r10,QWORD[80+rsp]
385 adc r11,QWORD[88+rsp]
386 adc r12,QWORD[96+rsp]
387 adc r13,QWORD[104+rsp]
388 adc r14,QWORD[112+rsp]
389 adc r15,QWORD[120+rsp]
390 sbb rcx,rcx
391
392 call __rsaz_512_subtract
393
394 mov rdx,r8
395 mov rax,r9
396 mov r8d,DWORD[((128+8))+rsp]
397 mov rsi,rdi
398
399 dec r8d
400 jnz NEAR $L$oop_sqr
401
402 lea rax,[((128+24+48))+rsp]
403 mov r15,QWORD[((-48))+rax]
404 mov r14,QWORD[((-40))+rax]
405 mov r13,QWORD[((-32))+rax]
406 mov r12,QWORD[((-24))+rax]
407 mov rbp,QWORD[((-16))+rax]
408 mov rbx,QWORD[((-8))+rax]
409 lea rsp,[rax]
410 $L$sqr_epilogue:
411 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
412 mov rsi,QWORD[16+rsp]
413 DB 0F3h,0C3h ;repret
414 $L$SEH_end_rsaz_512_sqr:
415 global rsaz_512_mul
416
417 ALIGN 32
418 rsaz_512_mul:
419 mov QWORD[8+rsp],rdi ;WIN64 prologue
420 mov QWORD[16+rsp],rsi
421 mov rax,rsp
422 $L$SEH_begin_rsaz_512_mul:
423 mov rdi,rcx
424 mov rsi,rdx
425 mov rdx,r8
426 mov rcx,r9
427 mov r8,QWORD[40+rsp]
428
429
430 push rbx
431 push rbp
432 push r12
433 push r13
434 push r14
435 push r15
436
437 sub rsp,128+24
438 $L$mul_body:
439 DB 102,72,15,110,199
440 DB 102,72,15,110,201
441 mov QWORD[128+rsp],r8
442 mov rbx,QWORD[rdx]
443 mov rbp,rdx
444 call __rsaz_512_mul
445
446 DB 102,72,15,126,199
447 DB 102,72,15,126,205
448
449 mov r8,QWORD[rsp]
450 mov r9,QWORD[8+rsp]
451 mov r10,QWORD[16+rsp]
452 mov r11,QWORD[24+rsp]
453 mov r12,QWORD[32+rsp]
454 mov r13,QWORD[40+rsp]
455 mov r14,QWORD[48+rsp]
456 mov r15,QWORD[56+rsp]
457
458 call __rsaz_512_reduce
459 add r8,QWORD[64+rsp]
460 adc r9,QWORD[72+rsp]
461 adc r10,QWORD[80+rsp]
462 adc r11,QWORD[88+rsp]
463 adc r12,QWORD[96+rsp]
464 adc r13,QWORD[104+rsp]
465 adc r14,QWORD[112+rsp]
466 adc r15,QWORD[120+rsp]
467 sbb rcx,rcx
468
469 call __rsaz_512_subtract
470
471 lea rax,[((128+24+48))+rsp]
472 mov r15,QWORD[((-48))+rax]
473 mov r14,QWORD[((-40))+rax]
474 mov r13,QWORD[((-32))+rax]
475 mov r12,QWORD[((-24))+rax]
476 mov rbp,QWORD[((-16))+rax]
477 mov rbx,QWORD[((-8))+rax]
478 lea rsp,[rax]
479 $L$mul_epilogue:
480 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
481 mov rsi,QWORD[16+rsp]
482 DB 0F3h,0C3h ;repret
483 $L$SEH_end_rsaz_512_mul:
484 global rsaz_512_mul_gather4
485
486 ALIGN 32
487 rsaz_512_mul_gather4:
488 mov QWORD[8+rsp],rdi ;WIN64 prologue
489 mov QWORD[16+rsp],rsi
490 mov rax,rsp
491 $L$SEH_begin_rsaz_512_mul_gather4:
492 mov rdi,rcx
493 mov rsi,rdx
494 mov rdx,r8
495 mov rcx,r9
496 mov r8,QWORD[40+rsp]
497 mov r9,QWORD[48+rsp]
498
499
500 push rbx
501 push rbp
502 push r12
503 push r13
504 push r14
505 push r15
506
507 mov r9d,r9d
508 sub rsp,128+24
509 $L$mul_gather4_body:
510 mov eax,DWORD[64+r9*4+rdx]
511 DB 102,72,15,110,199
512 mov ebx,DWORD[r9*4+rdx]
513 DB 102,72,15,110,201
514 mov QWORD[128+rsp],r8
515
516 shl rax,32
517 or rbx,rax
518 mov rax,QWORD[rsi]
519 mov rcx,QWORD[8+rsi]
520 lea rbp,[128+r9*4+rdx]
521 mul rbx
522 mov QWORD[rsp],rax
523 mov rax,rcx
524 mov r8,rdx
525
526 mul rbx
527 movd xmm4,DWORD[rbp]
528 add r8,rax
529 mov rax,QWORD[16+rsi]
530 mov r9,rdx
531 adc r9,0
532
533 mul rbx
534 movd xmm5,DWORD[64+rbp]
535 add r9,rax
536 mov rax,QWORD[24+rsi]
537 mov r10,rdx
538 adc r10,0
539
540 mul rbx
541 pslldq xmm5,4
542 add r10,rax
543 mov rax,QWORD[32+rsi]
544 mov r11,rdx
545 adc r11,0
546
547 mul rbx
548 por xmm4,xmm5
549 add r11,rax
550 mov rax,QWORD[40+rsi]
551 mov r12,rdx
552 adc r12,0
553
554 mul rbx
555 add r12,rax
556 mov rax,QWORD[48+rsi]
557 mov r13,rdx
558 adc r13,0
559
560 mul rbx
561 lea rbp,[128+rbp]
562 add r13,rax
563 mov rax,QWORD[56+rsi]
564 mov r14,rdx
565 adc r14,0
566
567 mul rbx
568 DB 102,72,15,126,227
569 add r14,rax
570 mov rax,QWORD[rsi]
571 mov r15,rdx
572 adc r15,0
573
574 lea rdi,[8+rsp]
575 mov ecx,7
576 jmp NEAR $L$oop_mul_gather
577
578 ALIGN 32
579 $L$oop_mul_gather:
580 mul rbx
581 add r8,rax
582 mov rax,QWORD[8+rsi]
583 mov QWORD[rdi],r8
584 mov r8,rdx
585 adc r8,0
586
587 mul rbx
588 movd xmm4,DWORD[rbp]
589 add r9,rax
590 mov rax,QWORD[16+rsi]
591 adc rdx,0
592 add r8,r9
593 mov r9,rdx
594 adc r9,0
595
596 mul rbx
597 movd xmm5,DWORD[64+rbp]
598 add r10,rax
599 mov rax,QWORD[24+rsi]
600 adc rdx,0
601 add r9,r10
602 mov r10,rdx
603 adc r10,0
604
605 mul rbx
606 pslldq xmm5,4
607 add r11,rax
608 mov rax,QWORD[32+rsi]
609 adc rdx,0
610 add r10,r11
611 mov r11,rdx
612 adc r11,0
613
614 mul rbx
615 por xmm4,xmm5
616 add r12,rax
617 mov rax,QWORD[40+rsi]
618 adc rdx,0
619 add r11,r12
620 mov r12,rdx
621 adc r12,0
622
623 mul rbx
624 add r13,rax
625 mov rax,QWORD[48+rsi]
626 adc rdx,0
627 add r12,r13
628 mov r13,rdx
629 adc r13,0
630
631 mul rbx
632 add r14,rax
633 mov rax,QWORD[56+rsi]
634 adc rdx,0
635 add r13,r14
636 mov r14,rdx
637 adc r14,0
638
639 mul rbx
640 DB 102,72,15,126,227
641 add r15,rax
642 mov rax,QWORD[rsi]
643 adc rdx,0
644 add r14,r15
645 mov r15,rdx
646 adc r15,0
647
648 lea rbp,[128+rbp]
649 lea rdi,[8+rdi]
650
651 dec ecx
652 jnz NEAR $L$oop_mul_gather
653
654 mov QWORD[rdi],r8
655 mov QWORD[8+rdi],r9
656 mov QWORD[16+rdi],r10
657 mov QWORD[24+rdi],r11
658 mov QWORD[32+rdi],r12
659 mov QWORD[40+rdi],r13
660 mov QWORD[48+rdi],r14
661 mov QWORD[56+rdi],r15
662
663 DB 102,72,15,126,199
664 DB 102,72,15,126,205
665
666 mov r8,QWORD[rsp]
667 mov r9,QWORD[8+rsp]
668 mov r10,QWORD[16+rsp]
669 mov r11,QWORD[24+rsp]
670 mov r12,QWORD[32+rsp]
671 mov r13,QWORD[40+rsp]
672 mov r14,QWORD[48+rsp]
673 mov r15,QWORD[56+rsp]
674
675 call __rsaz_512_reduce
676 add r8,QWORD[64+rsp]
677 adc r9,QWORD[72+rsp]
678 adc r10,QWORD[80+rsp]
679 adc r11,QWORD[88+rsp]
680 adc r12,QWORD[96+rsp]
681 adc r13,QWORD[104+rsp]
682 adc r14,QWORD[112+rsp]
683 adc r15,QWORD[120+rsp]
684 sbb rcx,rcx
685
686 call __rsaz_512_subtract
687
688 lea rax,[((128+24+48))+rsp]
689 mov r15,QWORD[((-48))+rax]
690 mov r14,QWORD[((-40))+rax]
691 mov r13,QWORD[((-32))+rax]
692 mov r12,QWORD[((-24))+rax]
693 mov rbp,QWORD[((-16))+rax]
694 mov rbx,QWORD[((-8))+rax]
695 lea rsp,[rax]
696 $L$mul_gather4_epilogue:
697 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
698 mov rsi,QWORD[16+rsp]
699 DB 0F3h,0C3h ;repret
700 $L$SEH_end_rsaz_512_mul_gather4:
701 global rsaz_512_mul_scatter4
702
703 ALIGN 32
704 rsaz_512_mul_scatter4:
705 mov QWORD[8+rsp],rdi ;WIN64 prologue
706 mov QWORD[16+rsp],rsi
707 mov rax,rsp
708 $L$SEH_begin_rsaz_512_mul_scatter4:
709 mov rdi,rcx
710 mov rsi,rdx
711 mov rdx,r8
712 mov rcx,r9
713 mov r8,QWORD[40+rsp]
714 mov r9,QWORD[48+rsp]
715
716
717 push rbx
718 push rbp
719 push r12
720 push r13
721 push r14
722 push r15
723
724 mov r9d,r9d
725 sub rsp,128+24
726 $L$mul_scatter4_body:
727 lea r8,[r9*4+r8]
728 DB 102,72,15,110,199
729 DB 102,72,15,110,202
730 DB 102,73,15,110,208
731 mov QWORD[128+rsp],rcx
732
733 mov rbp,rdi
734 mov rbx,QWORD[rdi]
735 call __rsaz_512_mul
736
737 DB 102,72,15,126,199
738 DB 102,72,15,126,205
739
740 mov r8,QWORD[rsp]
741 mov r9,QWORD[8+rsp]
742 mov r10,QWORD[16+rsp]
743 mov r11,QWORD[24+rsp]
744 mov r12,QWORD[32+rsp]
745 mov r13,QWORD[40+rsp]
746 mov r14,QWORD[48+rsp]
747 mov r15,QWORD[56+rsp]
748
749 call __rsaz_512_reduce
750 add r8,QWORD[64+rsp]
751 adc r9,QWORD[72+rsp]
752 adc r10,QWORD[80+rsp]
753 adc r11,QWORD[88+rsp]
754 adc r12,QWORD[96+rsp]
755 adc r13,QWORD[104+rsp]
756 adc r14,QWORD[112+rsp]
757 adc r15,QWORD[120+rsp]
758 DB 102,72,15,126,214
759 sbb rcx,rcx
760
761 call __rsaz_512_subtract
762
763 mov DWORD[rsi],r8d
764 shr r8,32
765 mov DWORD[128+rsi],r9d
766 shr r9,32
767 mov DWORD[256+rsi],r10d
768 shr r10,32
769 mov DWORD[384+rsi],r11d
770 shr r11,32
771 mov DWORD[512+rsi],r12d
772 shr r12,32
773 mov DWORD[640+rsi],r13d
774 shr r13,32
775 mov DWORD[768+rsi],r14d
776 shr r14,32
777 mov DWORD[896+rsi],r15d
778 shr r15,32
779 mov DWORD[64+rsi],r8d
780 mov DWORD[192+rsi],r9d
781 mov DWORD[320+rsi],r10d
782 mov DWORD[448+rsi],r11d
783 mov DWORD[576+rsi],r12d
784 mov DWORD[704+rsi],r13d
785 mov DWORD[832+rsi],r14d
786 mov DWORD[960+rsi],r15d
787
788 lea rax,[((128+24+48))+rsp]
789 mov r15,QWORD[((-48))+rax]
790 mov r14,QWORD[((-40))+rax]
791 mov r13,QWORD[((-32))+rax]
792 mov r12,QWORD[((-24))+rax]
793 mov rbp,QWORD[((-16))+rax]
794 mov rbx,QWORD[((-8))+rax]
795 lea rsp,[rax]
796 $L$mul_scatter4_epilogue:
797 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
798 mov rsi,QWORD[16+rsp]
799 DB 0F3h,0C3h ;repret
800 $L$SEH_end_rsaz_512_mul_scatter4:
801 global rsaz_512_mul_by_one
802
803 ALIGN 32
804 rsaz_512_mul_by_one:
805 mov QWORD[8+rsp],rdi ;WIN64 prologue
806 mov QWORD[16+rsp],rsi
807 mov rax,rsp
808 $L$SEH_begin_rsaz_512_mul_by_one:
809 mov rdi,rcx
810 mov rsi,rdx
811 mov rdx,r8
812 mov rcx,r9
813
814
815 push rbx
816 push rbp
817 push r12
818 push r13
819 push r14
820 push r15
821
822 sub rsp,128+24
823 $L$mul_by_one_body:
824 mov rbp,rdx
825 mov QWORD[128+rsp],rcx
826
827 mov r8,QWORD[rsi]
828 pxor xmm0,xmm0
829 mov r9,QWORD[8+rsi]
830 mov r10,QWORD[16+rsi]
831 mov r11,QWORD[24+rsi]
832 mov r12,QWORD[32+rsi]
833 mov r13,QWORD[40+rsi]
834 mov r14,QWORD[48+rsi]
835 mov r15,QWORD[56+rsi]
836
837 movdqa XMMWORD[rsp],xmm0
838 movdqa XMMWORD[16+rsp],xmm0
839 movdqa XMMWORD[32+rsp],xmm0
840 movdqa XMMWORD[48+rsp],xmm0
841 movdqa XMMWORD[64+rsp],xmm0
842 movdqa XMMWORD[80+rsp],xmm0
843 movdqa XMMWORD[96+rsp],xmm0
844 call __rsaz_512_reduce
845 mov QWORD[rdi],r8
846 mov QWORD[8+rdi],r9
847 mov QWORD[16+rdi],r10
848 mov QWORD[24+rdi],r11
849 mov QWORD[32+rdi],r12
850 mov QWORD[40+rdi],r13
851 mov QWORD[48+rdi],r14
852 mov QWORD[56+rdi],r15
853
854 lea rax,[((128+24+48))+rsp]
855 mov r15,QWORD[((-48))+rax]
856 mov r14,QWORD[((-40))+rax]
857 mov r13,QWORD[((-32))+rax]
858 mov r12,QWORD[((-24))+rax]
859 mov rbp,QWORD[((-16))+rax]
860 mov rbx,QWORD[((-8))+rax]
861 lea rsp,[rax]
862 $L$mul_by_one_epilogue:
863 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
864 mov rsi,QWORD[16+rsp]
865 DB 0F3h,0C3h ;repret
866 $L$SEH_end_rsaz_512_mul_by_one:
867
868 ALIGN 32
869 __rsaz_512_reduce:
870 mov rbx,r8
871 imul rbx,QWORD[((128+8))+rsp]
872 mov rax,QWORD[rbp]
873 mov ecx,8
874 jmp NEAR $L$reduction_loop
875
876 ALIGN 32
877 $L$reduction_loop:
878 mul rbx
879 mov rax,QWORD[8+rbp]
880 neg r8
881 mov r8,rdx
882 adc r8,0
883
884 mul rbx
885 add r9,rax
886 mov rax,QWORD[16+rbp]
887 adc rdx,0
888 add r8,r9
889 mov r9,rdx
890 adc r9,0
891
892 mul rbx
893 add r10,rax
894 mov rax,QWORD[24+rbp]
895 adc rdx,0
896 add r9,r10
897 mov r10,rdx
898 adc r10,0
899
900 mul rbx
901 add r11,rax
902 mov rax,QWORD[32+rbp]
903 adc rdx,0
904 add r10,r11
905 mov rsi,QWORD[((128+8))+rsp]
906
907
908 adc rdx,0
909 mov r11,rdx
910
911 mul rbx
912 add r12,rax
913 mov rax,QWORD[40+rbp]
914 adc rdx,0
915 imul rsi,r8
916 add r11,r12
917 mov r12,rdx
918 adc r12,0
919
920 mul rbx
921 add r13,rax
922 mov rax,QWORD[48+rbp]
923 adc rdx,0
924 add r12,r13
925 mov r13,rdx
926 adc r13,0
927
928 mul rbx
929 add r14,rax
930 mov rax,QWORD[56+rbp]
931 adc rdx,0
932 add r13,r14
933 mov r14,rdx
934 adc r14,0
935
936 mul rbx
937 mov rbx,rsi
938 add r15,rax
939 mov rax,QWORD[rbp]
940 adc rdx,0
941 add r14,r15
942 mov r15,rdx
943 adc r15,0
944
945 dec ecx
946 jne NEAR $L$reduction_loop
947
948 DB 0F3h,0C3h ;repret
949
950
951 ALIGN 32
952 __rsaz_512_subtract:
953 mov QWORD[rdi],r8
954 mov QWORD[8+rdi],r9
955 mov QWORD[16+rdi],r10
956 mov QWORD[24+rdi],r11
957 mov QWORD[32+rdi],r12
958 mov QWORD[40+rdi],r13
959 mov QWORD[48+rdi],r14
960 mov QWORD[56+rdi],r15
961
962 mov r8,QWORD[rbp]
963 mov r9,QWORD[8+rbp]
964 neg r8
965 not r9
966 and r8,rcx
967 mov r10,QWORD[16+rbp]
968 and r9,rcx
969 not r10
970 mov r11,QWORD[24+rbp]
971 and r10,rcx
972 not r11
973 mov r12,QWORD[32+rbp]
974 and r11,rcx
975 not r12
976 mov r13,QWORD[40+rbp]
977 and r12,rcx
978 not r13
979 mov r14,QWORD[48+rbp]
980 and r13,rcx
981 not r14
982 mov r15,QWORD[56+rbp]
983 and r14,rcx
984 not r15
985 and r15,rcx
986
987 add r8,QWORD[rdi]
988 adc r9,QWORD[8+rdi]
989 adc r10,QWORD[16+rdi]
990 adc r11,QWORD[24+rdi]
991 adc r12,QWORD[32+rdi]
992 adc r13,QWORD[40+rdi]
993 adc r14,QWORD[48+rdi]
994 adc r15,QWORD[56+rdi]
995
996 mov QWORD[rdi],r8
997 mov QWORD[8+rdi],r9
998 mov QWORD[16+rdi],r10
999 mov QWORD[24+rdi],r11
1000 mov QWORD[32+rdi],r12
1001 mov QWORD[40+rdi],r13
1002 mov QWORD[48+rdi],r14
1003 mov QWORD[56+rdi],r15
1004
1005 DB 0F3h,0C3h ;repret
1006
1007
1008 ALIGN 32
1009 __rsaz_512_mul:
1010 lea rdi,[8+rsp]
1011
1012 mov rax,QWORD[rsi]
1013 mul rbx
1014 mov QWORD[rdi],rax
1015 mov rax,QWORD[8+rsi]
1016 mov r8,rdx
1017
1018 mul rbx
1019 add r8,rax
1020 mov rax,QWORD[16+rsi]
1021 mov r9,rdx
1022 adc r9,0
1023
1024 mul rbx
1025 add r9,rax
1026 mov rax,QWORD[24+rsi]
1027 mov r10,rdx
1028 adc r10,0
1029
1030 mul rbx
1031 add r10,rax
1032 mov rax,QWORD[32+rsi]
1033 mov r11,rdx
1034 adc r11,0
1035
1036 mul rbx
1037 add r11,rax
1038 mov rax,QWORD[40+rsi]
1039 mov r12,rdx
1040 adc r12,0
1041
1042 mul rbx
1043 add r12,rax
1044 mov rax,QWORD[48+rsi]
1045 mov r13,rdx
1046 adc r13,0
1047
1048 mul rbx
1049 add r13,rax
1050 mov rax,QWORD[56+rsi]
1051 mov r14,rdx
1052 adc r14,0
1053
1054 mul rbx
1055 add r14,rax
1056 mov rax,QWORD[rsi]
1057 mov r15,rdx
1058 adc r15,0
1059
1060 lea rbp,[8+rbp]
1061 lea rdi,[8+rdi]
1062
1063 mov ecx,7
1064 jmp NEAR $L$oop_mul
1065
1066 ALIGN 32
1067 $L$oop_mul:
1068 mov rbx,QWORD[rbp]
1069 mul rbx
1070 add r8,rax
1071 mov rax,QWORD[8+rsi]
1072 mov QWORD[rdi],r8
1073 mov r8,rdx
1074 adc r8,0
1075
1076 mul rbx
1077 add r9,rax
1078 mov rax,QWORD[16+rsi]
1079 adc rdx,0
1080 add r8,r9
1081 mov r9,rdx
1082 adc r9,0
1083
1084 mul rbx
1085 add r10,rax
1086 mov rax,QWORD[24+rsi]
1087 adc rdx,0
1088 add r9,r10
1089 mov r10,rdx
1090 adc r10,0
1091
1092 mul rbx
1093 add r11,rax
1094 mov rax,QWORD[32+rsi]
1095 adc rdx,0
1096 add r10,r11
1097 mov r11,rdx
1098 adc r11,0
1099
1100 mul rbx
1101 add r12,rax
1102 mov rax,QWORD[40+rsi]
1103 adc rdx,0
1104 add r11,r12
1105 mov r12,rdx
1106 adc r12,0
1107
1108 mul rbx
1109 add r13,rax
1110 mov rax,QWORD[48+rsi]
1111 adc rdx,0
1112 add r12,r13
1113 mov r13,rdx
1114 adc r13,0
1115
1116 mul rbx
1117 add r14,rax
1118 mov rax,QWORD[56+rsi]
1119 adc rdx,0
1120 add r13,r14
1121 mov r14,rdx
1122 lea rbp,[8+rbp]
1123 adc r14,0
1124
1125 mul rbx
1126 add r15,rax
1127 mov rax,QWORD[rsi]
1128 adc rdx,0
1129 add r14,r15
1130 mov r15,rdx
1131 adc r15,0
1132
1133 lea rdi,[8+rdi]
1134
1135 dec ecx
1136 jnz NEAR $L$oop_mul
1137
1138 mov QWORD[rdi],r8
1139 mov QWORD[8+rdi],r9
1140 mov QWORD[16+rdi],r10
1141 mov QWORD[24+rdi],r11
1142 mov QWORD[32+rdi],r12
1143 mov QWORD[40+rdi],r13
1144 mov QWORD[48+rdi],r14
1145 mov QWORD[56+rdi],r15
1146
1147 DB 0F3h,0C3h ;repret
1148
1149 global rsaz_512_scatter4
1150
1151 ALIGN 16
1152 rsaz_512_scatter4:
1153 lea rcx,[r8*4+rcx]
1154 mov r9d,8
1155 jmp NEAR $L$oop_scatter
1156 ALIGN 16
1157 $L$oop_scatter:
1158 mov rax,QWORD[rdx]
1159 lea rdx,[8+rdx]
1160 mov DWORD[rcx],eax
1161 shr rax,32
1162 mov DWORD[64+rcx],eax
1163 lea rcx,[128+rcx]
1164 dec r9d
1165 jnz NEAR $L$oop_scatter
1166 DB 0F3h,0C3h ;repret
1167
1168
1169 global rsaz_512_gather4
1170
1171 ALIGN 16
1172 rsaz_512_gather4:
1173 lea rdx,[r8*4+rdx]
1174 mov r9d,8
1175 jmp NEAR $L$oop_gather
1176 ALIGN 16
1177 $L$oop_gather:
1178 mov eax,DWORD[rdx]
1179 mov r8d,DWORD[64+rdx]
1180 lea rdx,[128+rdx]
1181 shl r8,32
1182 or rax,r8
1183 mov QWORD[rcx],rax
1184 lea rcx,[8+rcx]
1185 dec r9d
1186 jnz NEAR $L$oop_gather
1187 DB 0F3h,0C3h ;repret
1188
1189 EXTERN __imp_RtlVirtualUnwind
1190
1191 ALIGN 16
1192 se_handler:
1193 push rsi
1194 push rdi
1195 push rbx
1196 push rbp
1197 push r12
1198 push r13
1199 push r14
1200 push r15
1201 pushfq
1202 sub rsp,64
1203
1204 mov rax,QWORD[120+r8]
1205 mov rbx,QWORD[248+r8]
1206
1207 mov rsi,QWORD[8+r9]
1208 mov r11,QWORD[56+r9]
1209
1210 mov r10d,DWORD[r11]
1211 lea r10,[r10*1+rsi]
1212 cmp rbx,r10
1213 jb NEAR $L$common_seh_tail
1214
1215 mov rax,QWORD[152+r8]
1216
1217 mov r10d,DWORD[4+r11]
1218 lea r10,[r10*1+rsi]
1219 cmp rbx,r10
1220 jae NEAR $L$common_seh_tail
1221
1222 lea rax,[((128+24+48))+rax]
1223
1224 mov rbx,QWORD[((-8))+rax]
1225 mov rbp,QWORD[((-16))+rax]
1226 mov r12,QWORD[((-24))+rax]
1227 mov r13,QWORD[((-32))+rax]
1228 mov r14,QWORD[((-40))+rax]
1229 mov r15,QWORD[((-48))+rax]
1230 mov QWORD[144+r8],rbx
1231 mov QWORD[160+r8],rbp
1232 mov QWORD[216+r8],r12
1233 mov QWORD[224+r8],r13
1234 mov QWORD[232+r8],r14
1235 mov QWORD[240+r8],r15
1236
1237 $L$common_seh_tail:
1238 mov rdi,QWORD[8+rax]
1239 mov rsi,QWORD[16+rax]
1240 mov QWORD[152+r8],rax
1241 mov QWORD[168+r8],rsi
1242 mov QWORD[176+r8],rdi
1243
1244 mov rdi,QWORD[40+r9]
1245 mov rsi,r8
1246 mov ecx,154
1247 DD 0xa548f3fc
1248
1249 mov rsi,r9
1250 xor rcx,rcx
1251 mov rdx,QWORD[8+rsi]
1252 mov r8,QWORD[rsi]
1253 mov r9,QWORD[16+rsi]
1254 mov r10,QWORD[40+rsi]
1255 lea r11,[56+rsi]
1256 lea r12,[24+rsi]
1257 mov QWORD[32+rsp],r10
1258 mov QWORD[40+rsp],r11
1259 mov QWORD[48+rsp],r12
1260 mov QWORD[56+rsp],rcx
1261 call QWORD[__imp_RtlVirtualUnwind]
1262
1263 mov eax,1
1264 add rsp,64
1265 popfq
1266 pop r15
1267 pop r14
1268 pop r13
1269 pop r12
1270 pop rbp
1271 pop rbx
1272 pop rdi
1273 pop rsi
1274 DB 0F3h,0C3h ;repret
1275
1276
1277 section .pdata rdata align=4
1278 ALIGN 4
1279 DD $L$SEH_begin_rsaz_512_sqr wrt ..imagebase
1280 DD $L$SEH_end_rsaz_512_sqr wrt ..imagebase
1281 DD $L$SEH_info_rsaz_512_sqr wrt ..imagebase
1282
1283 DD $L$SEH_begin_rsaz_512_mul wrt ..imagebase
1284 DD $L$SEH_end_rsaz_512_mul wrt ..imagebase
1285 DD $L$SEH_info_rsaz_512_mul wrt ..imagebase
1286
1287 DD $L$SEH_begin_rsaz_512_mul_gather4 wrt ..imagebase
1288 DD $L$SEH_end_rsaz_512_mul_gather4 wrt ..imagebase
1289 DD $L$SEH_info_rsaz_512_mul_gather4 wrt ..imagebase
1290
1291 DD $L$SEH_begin_rsaz_512_mul_scatter4 wrt ..imagebase
1292 DD $L$SEH_end_rsaz_512_mul_scatter4 wrt ..imagebase
1293 DD $L$SEH_info_rsaz_512_mul_scatter4 wrt ..imagebase
1294
1295 DD $L$SEH_begin_rsaz_512_mul_by_one wrt ..imagebase
1296 DD $L$SEH_end_rsaz_512_mul_by_one wrt ..imagebase
1297 DD $L$SEH_info_rsaz_512_mul_by_one wrt ..imagebase
1298
1299 section .xdata rdata align=8
1300 ALIGN 8
1301 $L$SEH_info_rsaz_512_sqr:
1302 DB 9,0,0,0
1303 DD se_handler wrt ..imagebase
1304 DD $L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase
1305 $L$SEH_info_rsaz_512_mul:
1306 DB 9,0,0,0
1307 DD se_handler wrt ..imagebase
1308 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
1309 $L$SEH_info_rsaz_512_mul_gather4:
1310 DB 9,0,0,0
1311 DD se_handler wrt ..imagebase
1312 DD $L$mul_gather4_body wrt ..imagebase,$L$mul_gather4_epilogue wrt ..imagebase
1313 $L$SEH_info_rsaz_512_mul_scatter4:
1314 DB 9,0,0,0
1315 DD se_handler wrt ..imagebase
1316 DD $L$mul_scatter4_body wrt ..imagebase,$L$mul_scatter4_epilogue wr t ..imagebase
1317 $L$SEH_info_rsaz_512_mul_by_one:
1318 DB 9,0,0,0
1319 DD se_handler wrt ..imagebase
1320 DD $L$mul_by_one_body wrt ..imagebase,$L$mul_by_one_epilogue wrt .. imagebase
OLDNEW
« no previous file with comments | « third_party/boringssl/win-x86_64/crypto/bn/rsaz-avx2.asm ('k') | third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698