Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(316)

Side by Side Diff: third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont5.asm

Issue 377783004: Add BoringSSL GYP files. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Final Python fix. Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 OPTION DOTNAME
2 .text$ SEGMENT ALIGN(64) 'CODE'
3
4 EXTERN OPENSSL_ia32cap_P:NEAR
5
6 PUBLIC bn_mul_mont_gather5
7
8 ALIGN 64
9 bn_mul_mont_gather5 PROC PUBLIC
10 mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
11 mov QWORD PTR[16+rsp],rsi
12 mov rax,rsp
13 $L$SEH_begin_bn_mul_mont_gather5::
14 mov rdi,rcx
15 mov rsi,rdx
16 mov rdx,r8
17 mov rcx,r9
18 mov r8,QWORD PTR[40+rsp]
19 mov r9,QWORD PTR[48+rsp]
20
21
22 test r9d,7
23 jnz $L$mul_enter
24 jmp $L$mul4x_enter
25
26 ALIGN 16
27 $L$mul_enter::
28 mov r9d,r9d
29 mov rax,rsp
30 mov r10d,DWORD PTR[56+rsp]
31 push rbx
32 push rbp
33 push r12
34 push r13
35 push r14
36 push r15
37 lea rsp,QWORD PTR[((-40))+rsp]
38 movaps XMMWORD PTR[rsp],xmm6
39 movaps XMMWORD PTR[16+rsp],xmm7
40 lea r11,QWORD PTR[2+r9]
41 neg r11
42 lea rsp,QWORD PTR[r11*8+rsp]
43 and rsp,-1024
44
45 mov QWORD PTR[8+r9*8+rsp],rax
46 $L$mul_body::
47 mov r12,rdx
48 mov r11,r10
49 shr r10,3
50 and r11,7
51 not r10
52 lea rax,QWORD PTR[$L$magic_masks]
53 and r10,3
54 lea r12,QWORD PTR[96+r11*8+r12]
55 movq xmm4,QWORD PTR[r10*8+rax]
56 movq xmm5,QWORD PTR[8+r10*8+rax]
57 movq xmm6,QWORD PTR[16+r10*8+rax]
58 movq xmm7,QWORD PTR[24+r10*8+rax]
59
60 movq xmm0,QWORD PTR[(((-96)))+r12]
61 movq xmm1,QWORD PTR[((-32))+r12]
62 pand xmm0,xmm4
63 movq xmm2,QWORD PTR[32+r12]
64 pand xmm1,xmm5
65 movq xmm3,QWORD PTR[96+r12]
66 pand xmm2,xmm6
67 por xmm0,xmm1
68 pand xmm3,xmm7
69 por xmm0,xmm2
70 lea r12,QWORD PTR[256+r12]
71 por xmm0,xmm3
72
73 DB 102,72,15,126,195
74
75 mov r8,QWORD PTR[r8]
76 mov rax,QWORD PTR[rsi]
77
78 xor r14,r14
79 xor r15,r15
80
81 movq xmm0,QWORD PTR[(((-96)))+r12]
82 movq xmm1,QWORD PTR[((-32))+r12]
83 pand xmm0,xmm4
84 movq xmm2,QWORD PTR[32+r12]
85 pand xmm1,xmm5
86
87 mov rbp,r8
88 mul rbx
89 mov r10,rax
90 mov rax,QWORD PTR[rcx]
91
92 movq xmm3,QWORD PTR[96+r12]
93 pand xmm2,xmm6
94 por xmm0,xmm1
95 pand xmm3,xmm7
96
97 imul rbp,r10
98 mov r11,rdx
99
100 por xmm0,xmm2
101 lea r12,QWORD PTR[256+r12]
102 por xmm0,xmm3
103
104 mul rbp
105 add r10,rax
106 mov rax,QWORD PTR[8+rsi]
107 adc rdx,0
108 mov r13,rdx
109
110 lea r15,QWORD PTR[1+r15]
111 jmp $L$1st_enter
112
113 ALIGN 16
114 $L$1st::
115 add r13,rax
116 mov rax,QWORD PTR[r15*8+rsi]
117 adc rdx,0
118 add r13,r11
119 mov r11,r10
120 adc rdx,0
121 mov QWORD PTR[((-16))+r15*8+rsp],r13
122 mov r13,rdx
123
124 $L$1st_enter::
125 mul rbx
126 add r11,rax
127 mov rax,QWORD PTR[r15*8+rcx]
128 adc rdx,0
129 lea r15,QWORD PTR[1+r15]
130 mov r10,rdx
131
132 mul rbp
133 cmp r15,r9
134 jne $L$1st
135
136 DB 102,72,15,126,195
137
138 add r13,rax
139 mov rax,QWORD PTR[rsi]
140 adc rdx,0
141 add r13,r11
142 adc rdx,0
143 mov QWORD PTR[((-16))+r15*8+rsp],r13
144 mov r13,rdx
145 mov r11,r10
146
147 xor rdx,rdx
148 add r13,r11
149 adc rdx,0
150 mov QWORD PTR[((-8))+r9*8+rsp],r13
151 mov QWORD PTR[r9*8+rsp],rdx
152
153 lea r14,QWORD PTR[1+r14]
154 jmp $L$outer
155 ALIGN 16
156 $L$outer::
157 xor r15,r15
158 mov rbp,r8
159 mov r10,QWORD PTR[rsp]
160
161 movq xmm0,QWORD PTR[(((-96)))+r12]
162 movq xmm1,QWORD PTR[((-32))+r12]
163 pand xmm0,xmm4
164 movq xmm2,QWORD PTR[32+r12]
165 pand xmm1,xmm5
166
167 mul rbx
168 add r10,rax
169 mov rax,QWORD PTR[rcx]
170 adc rdx,0
171
172 movq xmm3,QWORD PTR[96+r12]
173 pand xmm2,xmm6
174 por xmm0,xmm1
175 pand xmm3,xmm7
176
177 imul rbp,r10
178 mov r11,rdx
179
180 por xmm0,xmm2
181 lea r12,QWORD PTR[256+r12]
182 por xmm0,xmm3
183
184 mul rbp
185 add r10,rax
186 mov rax,QWORD PTR[8+rsi]
187 adc rdx,0
188 mov r10,QWORD PTR[8+rsp]
189 mov r13,rdx
190
191 lea r15,QWORD PTR[1+r15]
192 jmp $L$inner_enter
193
194 ALIGN 16
195 $L$inner::
196 add r13,rax
197 mov rax,QWORD PTR[r15*8+rsi]
198 adc rdx,0
199 add r13,r10
200 mov r10,QWORD PTR[r15*8+rsp]
201 adc rdx,0
202 mov QWORD PTR[((-16))+r15*8+rsp],r13
203 mov r13,rdx
204
205 $L$inner_enter::
206 mul rbx
207 add r11,rax
208 mov rax,QWORD PTR[r15*8+rcx]
209 adc rdx,0
210 add r10,r11
211 mov r11,rdx
212 adc r11,0
213 lea r15,QWORD PTR[1+r15]
214
215 mul rbp
216 cmp r15,r9
217 jne $L$inner
218
219 DB 102,72,15,126,195
220
221 add r13,rax
222 mov rax,QWORD PTR[rsi]
223 adc rdx,0
224 add r13,r10
225 mov r10,QWORD PTR[r15*8+rsp]
226 adc rdx,0
227 mov QWORD PTR[((-16))+r15*8+rsp],r13
228 mov r13,rdx
229
230 xor rdx,rdx
231 add r13,r11
232 adc rdx,0
233 add r13,r10
234 adc rdx,0
235 mov QWORD PTR[((-8))+r9*8+rsp],r13
236 mov QWORD PTR[r9*8+rsp],rdx
237
238 lea r14,QWORD PTR[1+r14]
239 cmp r14,r9
240 jb $L$outer
241
242 xor r14,r14
243 mov rax,QWORD PTR[rsp]
244 lea rsi,QWORD PTR[rsp]
245 mov r15,r9
246 jmp $L$sub
247 ALIGN 16
248 $L$sub:: sbb rax,QWORD PTR[r14*8+rcx]
249 mov QWORD PTR[r14*8+rdi],rax
250 mov rax,QWORD PTR[8+r14*8+rsi]
251 lea r14,QWORD PTR[1+r14]
252 dec r15
253 jnz $L$sub
254
255 sbb rax,0
256 xor r14,r14
257 mov r15,r9
258 ALIGN 16
259 $L$copy::
260 mov rsi,QWORD PTR[r14*8+rsp]
261 mov rcx,QWORD PTR[r14*8+rdi]
262 xor rsi,rcx
263 and rsi,rax
264 xor rsi,rcx
265 mov QWORD PTR[r14*8+rsp],r14
266 mov QWORD PTR[r14*8+rdi],rsi
267 lea r14,QWORD PTR[1+r14]
268 sub r15,1
269 jnz $L$copy
270
271 mov rsi,QWORD PTR[8+r9*8+rsp]
272 mov rax,1
273 movaps xmm6,XMMWORD PTR[((-88))+rsi]
274 movaps xmm7,XMMWORD PTR[((-72))+rsi]
275 mov r15,QWORD PTR[((-48))+rsi]
276 mov r14,QWORD PTR[((-40))+rsi]
277 mov r13,QWORD PTR[((-32))+rsi]
278 mov r12,QWORD PTR[((-24))+rsi]
279 mov rbp,QWORD PTR[((-16))+rsi]
280 mov rbx,QWORD PTR[((-8))+rsi]
281 lea rsp,QWORD PTR[rsi]
282 $L$mul_epilogue::
283 mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
284 mov rsi,QWORD PTR[16+rsp]
285 DB 0F3h,0C3h ;repret
286 $L$SEH_end_bn_mul_mont_gather5::
287 bn_mul_mont_gather5 ENDP
288
289 ALIGN 32
290 bn_mul4x_mont_gather5 PROC PRIVATE
291 mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
292 mov QWORD PTR[16+rsp],rsi
293 mov rax,rsp
294 $L$SEH_begin_bn_mul4x_mont_gather5::
295 mov rdi,rcx
296 mov rsi,rdx
297 mov rdx,r8
298 mov rcx,r9
299 mov r8,QWORD PTR[40+rsp]
300 mov r9,QWORD PTR[48+rsp]
301
302
303 $L$mul4x_enter::
304 DB 067h
305 mov rax,rsp
306 push rbx
307 push rbp
308 push r12
309 push r13
310 push r14
311 push r15
312 lea rsp,QWORD PTR[((-40))+rsp]
313 movaps XMMWORD PTR[rsp],xmm6
314 movaps XMMWORD PTR[16+rsp],xmm7
315 DB 067h
316 mov r10d,r9d
317 shl r9d,3
318 shl r10d,3+2
319 neg r9
320
321
322
323
324
325
326
327
328 lea r11,QWORD PTR[((-64))+r9*2+rsp]
329 sub r11,rsi
330 and r11,4095
331 cmp r10,r11
332 jb $L$mul4xsp_alt
333 sub rsp,r11
334 lea rsp,QWORD PTR[((-64))+r9*2+rsp]
335 jmp $L$mul4xsp_done
336
337 ALIGN 32
338 $L$mul4xsp_alt::
339 lea r10,QWORD PTR[((4096-64))+r9*2]
340 lea rsp,QWORD PTR[((-64))+r9*2+rsp]
341 sub r11,r10
342 mov r10,0
343 cmovc r11,r10
344 sub rsp,r11
345 $L$mul4xsp_done::
346 and rsp,-64
347 neg r9
348
349 mov QWORD PTR[40+rsp],rax
350 $L$mul4x_body::
351
352 call mul4x_internal
353
354 mov rsi,QWORD PTR[40+rsp]
355 mov rax,1
356 movaps xmm6,XMMWORD PTR[((-88))+rsi]
357 movaps xmm7,XMMWORD PTR[((-72))+rsi]
358 mov r15,QWORD PTR[((-48))+rsi]
359 mov r14,QWORD PTR[((-40))+rsi]
360 mov r13,QWORD PTR[((-32))+rsi]
361 mov r12,QWORD PTR[((-24))+rsi]
362 mov rbp,QWORD PTR[((-16))+rsi]
363 mov rbx,QWORD PTR[((-8))+rsi]
364 lea rsp,QWORD PTR[rsi]
365 $L$mul4x_epilogue::
366 mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
367 mov rsi,QWORD PTR[16+rsp]
368 DB 0F3h,0C3h ;repret
369 $L$SEH_end_bn_mul4x_mont_gather5::
370 bn_mul4x_mont_gather5 ENDP
371
372
373 ALIGN 32
374 mul4x_internal PROC PRIVATE
375 shl r9,5
376 mov r10d,DWORD PTR[56+rax]
377 lea r13,QWORD PTR[256+r9*1+rdx]
378 shr r9,5
379 mov r11,r10
380 shr r10,3
381 and r11,7
382 not r10
383 lea rax,QWORD PTR[$L$magic_masks]
384 and r10,3
385 lea r12,QWORD PTR[96+r11*8+rdx]
386 movq xmm4,QWORD PTR[r10*8+rax]
387 movq xmm5,QWORD PTR[8+r10*8+rax]
388 add r11,7
389 movq xmm6,QWORD PTR[16+r10*8+rax]
390 movq xmm7,QWORD PTR[24+r10*8+rax]
391 and r11,7
392
393 movq xmm0,QWORD PTR[(((-96)))+r12]
394 lea r14,QWORD PTR[256+r12]
395 movq xmm1,QWORD PTR[((-32))+r12]
396 pand xmm0,xmm4
397 movq xmm2,QWORD PTR[32+r12]
398 pand xmm1,xmm5
399 movq xmm3,QWORD PTR[96+r12]
400 pand xmm2,xmm6
401 DB 067h
402 por xmm0,xmm1
403 movq xmm1,QWORD PTR[((-96))+r14]
404 DB 067h
405 pand xmm3,xmm7
406 DB 067h
407 por xmm0,xmm2
408 movq xmm2,QWORD PTR[((-32))+r14]
409 DB 067h
410 pand xmm1,xmm4
411 DB 067h
412 por xmm0,xmm3
413 movq xmm3,QWORD PTR[32+r14]
414
415 DB 102,72,15,126,195
416 movq xmm0,QWORD PTR[96+r14]
417 mov QWORD PTR[((16+8))+rsp],r13
418 mov QWORD PTR[((56+8))+rsp],rdi
419
420 mov r8,QWORD PTR[r8]
421 mov rax,QWORD PTR[rsi]
422 lea rsi,QWORD PTR[r9*1+rsi]
423 neg r9
424
425 mov rbp,r8
426 mul rbx
427 mov r10,rax
428 mov rax,QWORD PTR[rcx]
429
430 pand xmm2,xmm5
431 pand xmm3,xmm6
432 por xmm1,xmm2
433
434 imul rbp,r10
435
436
437
438
439
440
441
442 lea r14,QWORD PTR[((64+8))+r11*8+rsp]
443 mov r11,rdx
444
445 pand xmm0,xmm7
446 por xmm1,xmm3
447 lea r12,QWORD PTR[512+r12]
448 por xmm0,xmm1
449
450 mul rbp
451 add r10,rax
452 mov rax,QWORD PTR[8+r9*1+rsi]
453 adc rdx,0
454 mov rdi,rdx
455
456 mul rbx
457 add r11,rax
458 mov rax,QWORD PTR[16+rcx]
459 adc rdx,0
460 mov r10,rdx
461
462 mul rbp
463 add rdi,rax
464 mov rax,QWORD PTR[16+r9*1+rsi]
465 adc rdx,0
466 add rdi,r11
467 lea r15,QWORD PTR[32+r9]
468 lea rcx,QWORD PTR[64+rcx]
469 adc rdx,0
470 mov QWORD PTR[r14],rdi
471 mov r13,rdx
472 jmp $L$1st4x
473
474 ALIGN 32
475 $L$1st4x::
476 mul rbx
477 add r10,rax
478 mov rax,QWORD PTR[((-32))+rcx]
479 lea r14,QWORD PTR[32+r14]
480 adc rdx,0
481 mov r11,rdx
482
483 mul rbp
484 add r13,rax
485 mov rax,QWORD PTR[((-8))+r15*1+rsi]
486 adc rdx,0
487 add r13,r10
488 adc rdx,0
489 mov QWORD PTR[((-24))+r14],r13
490 mov rdi,rdx
491
492 mul rbx
493 add r11,rax
494 mov rax,QWORD PTR[((-16))+rcx]
495 adc rdx,0
496 mov r10,rdx
497
498 mul rbp
499 add rdi,rax
500 mov rax,QWORD PTR[r15*1+rsi]
501 adc rdx,0
502 add rdi,r11
503 adc rdx,0
504 mov QWORD PTR[((-16))+r14],rdi
505 mov r13,rdx
506
507 mul rbx
508 add r10,rax
509 mov rax,QWORD PTR[rcx]
510 adc rdx,0
511 mov r11,rdx
512
513 mul rbp
514 add r13,rax
515 mov rax,QWORD PTR[8+r15*1+rsi]
516 adc rdx,0
517 add r13,r10
518 adc rdx,0
519 mov QWORD PTR[((-8))+r14],r13
520 mov rdi,rdx
521
522 mul rbx
523 add r11,rax
524 mov rax,QWORD PTR[16+rcx]
525 adc rdx,0
526 mov r10,rdx
527
528 mul rbp
529 add rdi,rax
530 mov rax,QWORD PTR[16+r15*1+rsi]
531 adc rdx,0
532 add rdi,r11
533 lea rcx,QWORD PTR[64+rcx]
534 adc rdx,0
535 mov QWORD PTR[r14],rdi
536 mov r13,rdx
537
538 add r15,32
539 jnz $L$1st4x
540
541 mul rbx
542 add r10,rax
543 mov rax,QWORD PTR[((-32))+rcx]
544 lea r14,QWORD PTR[32+r14]
545 adc rdx,0
546 mov r11,rdx
547
548 mul rbp
549 add r13,rax
550 mov rax,QWORD PTR[((-8))+rsi]
551 adc rdx,0
552 add r13,r10
553 adc rdx,0
554 mov QWORD PTR[((-24))+r14],r13
555 mov rdi,rdx
556
557 mul rbx
558 add r11,rax
559 mov rax,QWORD PTR[((-16))+rcx]
560 adc rdx,0
561 mov r10,rdx
562
563 mul rbp
564 add rdi,rax
565 mov rax,QWORD PTR[r9*1+rsi]
566 adc rdx,0
567 add rdi,r11
568 adc rdx,0
569 mov QWORD PTR[((-16))+r14],rdi
570 mov r13,rdx
571
572 DB 102,72,15,126,195
573 lea rcx,QWORD PTR[r9*2+rcx]
574
575 xor rdi,rdi
576 add r13,r10
577 adc rdi,0
578 mov QWORD PTR[((-8))+r14],r13
579
580 jmp $L$outer4x
581
582 ALIGN 32
583 $L$outer4x::
584 mov r10,QWORD PTR[r9*1+r14]
585 mov rbp,r8
586 mul rbx
587 add r10,rax
588 mov rax,QWORD PTR[rcx]
589 adc rdx,0
590
591 movq xmm0,QWORD PTR[(((-96)))+r12]
592 movq xmm1,QWORD PTR[((-32))+r12]
593 pand xmm0,xmm4
594 movq xmm2,QWORD PTR[32+r12]
595 pand xmm1,xmm5
596 movq xmm3,QWORD PTR[96+r12]
597
598 imul rbp,r10
599 DB 067h
600 mov r11,rdx
601 mov QWORD PTR[r14],rdi
602
603 pand xmm2,xmm6
604 por xmm0,xmm1
605 pand xmm3,xmm7
606 por xmm0,xmm2
607 lea r14,QWORD PTR[r9*1+r14]
608 lea r12,QWORD PTR[256+r12]
609 por xmm0,xmm3
610
611 mul rbp
612 add r10,rax
613 mov rax,QWORD PTR[8+r9*1+rsi]
614 adc rdx,0
615 mov rdi,rdx
616
617 mul rbx
618 add r11,rax
619 mov rax,QWORD PTR[16+rcx]
620 adc rdx,0
621 add r11,QWORD PTR[8+r14]
622 adc rdx,0
623 mov r10,rdx
624
625 mul rbp
626 add rdi,rax
627 mov rax,QWORD PTR[16+r9*1+rsi]
628 adc rdx,0
629 add rdi,r11
630 lea r15,QWORD PTR[32+r9]
631 lea rcx,QWORD PTR[64+rcx]
632 adc rdx,0
633 mov r13,rdx
634 jmp $L$inner4x
635
636 ALIGN 32
637 $L$inner4x::
638 mul rbx
639 add r10,rax
640 mov rax,QWORD PTR[((-32))+rcx]
641 adc rdx,0
642 add r10,QWORD PTR[16+r14]
643 lea r14,QWORD PTR[32+r14]
644 adc rdx,0
645 mov r11,rdx
646
647 mul rbp
648 add r13,rax
649 mov rax,QWORD PTR[((-8))+r15*1+rsi]
650 adc rdx,0
651 add r13,r10
652 adc rdx,0
653 mov QWORD PTR[((-32))+r14],rdi
654 mov rdi,rdx
655
656 mul rbx
657 add r11,rax
658 mov rax,QWORD PTR[((-16))+rcx]
659 adc rdx,0
660 add r11,QWORD PTR[((-8))+r14]
661 adc rdx,0
662 mov r10,rdx
663
664 mul rbp
665 add rdi,rax
666 mov rax,QWORD PTR[r15*1+rsi]
667 adc rdx,0
668 add rdi,r11
669 adc rdx,0
670 mov QWORD PTR[((-24))+r14],r13
671 mov r13,rdx
672
673 mul rbx
674 add r10,rax
675 mov rax,QWORD PTR[rcx]
676 adc rdx,0
677 add r10,QWORD PTR[r14]
678 adc rdx,0
679 mov r11,rdx
680
681 mul rbp
682 add r13,rax
683 mov rax,QWORD PTR[8+r15*1+rsi]
684 adc rdx,0
685 add r13,r10
686 adc rdx,0
687 mov QWORD PTR[((-16))+r14],rdi
688 mov rdi,rdx
689
690 mul rbx
691 add r11,rax
692 mov rax,QWORD PTR[16+rcx]
693 adc rdx,0
694 add r11,QWORD PTR[8+r14]
695 adc rdx,0
696 mov r10,rdx
697
698 mul rbp
699 add rdi,rax
700 mov rax,QWORD PTR[16+r15*1+rsi]
701 adc rdx,0
702 add rdi,r11
703 lea rcx,QWORD PTR[64+rcx]
704 adc rdx,0
705 mov QWORD PTR[((-8))+r14],r13
706 mov r13,rdx
707
708 add r15,32
709 jnz $L$inner4x
710
711 mul rbx
712 add r10,rax
713 mov rax,QWORD PTR[((-32))+rcx]
714 adc rdx,0
715 add r10,QWORD PTR[16+r14]
716 lea r14,QWORD PTR[32+r14]
717 adc rdx,0
718 mov r11,rdx
719
720 mul rbp
721 add r13,rax
722 mov rax,QWORD PTR[((-8))+rsi]
723 adc rdx,0
724 add r13,r10
725 adc rdx,0
726 mov QWORD PTR[((-32))+r14],rdi
727 mov rdi,rdx
728
729 mul rbx
730 add r11,rax
731 mov rax,rbp
732 mov rbp,QWORD PTR[((-16))+rcx]
733 adc rdx,0
734 add r11,QWORD PTR[((-8))+r14]
735 adc rdx,0
736 mov r10,rdx
737
738 mul rbp
739 add rdi,rax
740 mov rax,QWORD PTR[r9*1+rsi]
741 adc rdx,0
742 add rdi,r11
743 adc rdx,0
744 mov QWORD PTR[((-24))+r14],r13
745 mov r13,rdx
746
747 DB 102,72,15,126,195
748 mov QWORD PTR[((-16))+r14],rdi
749 lea rcx,QWORD PTR[r9*2+rcx]
750
751 xor rdi,rdi
752 add r13,r10
753 adc rdi,0
754 add r13,QWORD PTR[r14]
755 adc rdi,0
756 mov QWORD PTR[((-8))+r14],r13
757
758 cmp r12,QWORD PTR[((16+8))+rsp]
759 jb $L$outer4x
760 sub rbp,r13
761 adc r15,r15
762 or rdi,r15
763 xor rdi,1
764 lea rbx,QWORD PTR[r9*1+r14]
765 lea rbp,QWORD PTR[rdi*8+rcx]
766 mov rcx,r9
767 sar rcx,3+2
768 mov rdi,QWORD PTR[((56+8))+rsp]
769 jmp $L$sqr4x_sub
770 mul4x_internal ENDP
771 PUBLIC bn_power5
772
773 ALIGN 32
774 bn_power5 PROC PUBLIC
775 mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
776 mov QWORD PTR[16+rsp],rsi
777 mov rax,rsp
778 $L$SEH_begin_bn_power5::
779 mov rdi,rcx
780 mov rsi,rdx
781 mov rdx,r8
782 mov rcx,r9
783 mov r8,QWORD PTR[40+rsp]
784 mov r9,QWORD PTR[48+rsp]
785
786
787 mov rax,rsp
788 push rbx
789 push rbp
790 push r12
791 push r13
792 push r14
793 push r15
794 lea rsp,QWORD PTR[((-40))+rsp]
795 movaps XMMWORD PTR[rsp],xmm6
796 movaps XMMWORD PTR[16+rsp],xmm7
797 mov r10d,r9d
798 shl r9d,3
799 shl r10d,3+2
800 neg r9
801 mov r8,QWORD PTR[r8]
802
803
804
805
806
807
808
809 lea r11,QWORD PTR[((-64))+r9*2+rsp]
810 sub r11,rsi
811 and r11,4095
812 cmp r10,r11
813 jb $L$pwr_sp_alt
814 sub rsp,r11
815 lea rsp,QWORD PTR[((-64))+r9*2+rsp]
816 jmp $L$pwr_sp_done
817
818 ALIGN 32
819 $L$pwr_sp_alt::
820 lea r10,QWORD PTR[((4096-64))+r9*2]
821 lea rsp,QWORD PTR[((-64))+r9*2+rsp]
822 sub r11,r10
823 mov r10,0
824 cmovc r11,r10
825 sub rsp,r11
826 $L$pwr_sp_done::
827 and rsp,-64
828 mov r10,r9
829 neg r9
830
831
832
833
834
835
836
837
838
839
840 mov QWORD PTR[32+rsp],r8
841 mov QWORD PTR[40+rsp],rax
842 $L$power5_body::
843 DB 102,72,15,110,207
844 DB 102,72,15,110,209
845 DB 102,73,15,110,218
846 DB 102,72,15,110,226
847
848 call __bn_sqr8x_internal
849 call __bn_sqr8x_internal
850 call __bn_sqr8x_internal
851 call __bn_sqr8x_internal
852 call __bn_sqr8x_internal
853
854 DB 102,72,15,126,209
855 DB 102,72,15,126,226
856 mov rdi,rsi
857 mov rax,QWORD PTR[40+rsp]
858 lea r8,QWORD PTR[32+rsp]
859
860 call mul4x_internal
861
862 mov rsi,QWORD PTR[40+rsp]
863 mov rax,1
864 mov r15,QWORD PTR[((-48))+rsi]
865 mov r14,QWORD PTR[((-40))+rsi]
866 mov r13,QWORD PTR[((-32))+rsi]
867 mov r12,QWORD PTR[((-24))+rsi]
868 mov rbp,QWORD PTR[((-16))+rsi]
869 mov rbx,QWORD PTR[((-8))+rsi]
870 lea rsp,QWORD PTR[rsi]
871 $L$power5_epilogue::
872 mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
873 mov rsi,QWORD PTR[16+rsp]
874 DB 0F3h,0C3h ;repret
875 $L$SEH_end_bn_power5::
876 bn_power5 ENDP
877
878 PUBLIC bn_sqr8x_internal
879
880
881 ALIGN 32
882 bn_sqr8x_internal PROC PUBLIC
883 __bn_sqr8x_internal::
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957 lea rbp,QWORD PTR[32+r10]
958 lea rsi,QWORD PTR[r9*1+rsi]
959
960 mov rcx,r9
961
962
963 mov r14,QWORD PTR[((-32))+rbp*1+rsi]
964 lea rdi,QWORD PTR[((48+8))+r9*2+rsp]
965 mov rax,QWORD PTR[((-24))+rbp*1+rsi]
966 lea rdi,QWORD PTR[((-32))+rbp*1+rdi]
967 mov rbx,QWORD PTR[((-16))+rbp*1+rsi]
968 mov r15,rax
969
970 mul r14
971 mov r10,rax
972 mov rax,rbx
973 mov r11,rdx
974 mov QWORD PTR[((-24))+rbp*1+rdi],r10
975
976 mul r14
977 add r11,rax
978 mov rax,rbx
979 adc rdx,0
980 mov QWORD PTR[((-16))+rbp*1+rdi],r11
981 mov r10,rdx
982
983
984 mov rbx,QWORD PTR[((-8))+rbp*1+rsi]
985 mul r15
986 mov r12,rax
987 mov rax,rbx
988 mov r13,rdx
989
990 lea rcx,QWORD PTR[rbp]
991 mul r14
992 add r10,rax
993 mov rax,rbx
994 mov r11,rdx
995 adc r11,0
996 add r10,r12
997 adc r11,0
998 mov QWORD PTR[((-8))+rcx*1+rdi],r10
999 jmp $L$sqr4x_1st
1000
1001 ALIGN 32
1002 $L$sqr4x_1st::
1003 mov rbx,QWORD PTR[rcx*1+rsi]
1004 mul r15
1005 add r13,rax
1006 mov rax,rbx
1007 mov r12,rdx
1008 adc r12,0
1009
1010 mul r14
1011 add r11,rax
1012 mov rax,rbx
1013 mov rbx,QWORD PTR[8+rcx*1+rsi]
1014 mov r10,rdx
1015 adc r10,0
1016 add r11,r13
1017 adc r10,0
1018
1019
1020 mul r15
1021 add r12,rax
1022 mov rax,rbx
1023 mov QWORD PTR[rcx*1+rdi],r11
1024 mov r13,rdx
1025 adc r13,0
1026
1027 mul r14
1028 add r10,rax
1029 mov rax,rbx
1030 mov rbx,QWORD PTR[16+rcx*1+rsi]
1031 mov r11,rdx
1032 adc r11,0
1033 add r10,r12
1034 adc r11,0
1035
1036 mul r15
1037 add r13,rax
1038 mov rax,rbx
1039 mov QWORD PTR[8+rcx*1+rdi],r10
1040 mov r12,rdx
1041 adc r12,0
1042
1043 mul r14
1044 add r11,rax
1045 mov rax,rbx
1046 mov rbx,QWORD PTR[24+rcx*1+rsi]
1047 mov r10,rdx
1048 adc r10,0
1049 add r11,r13
1050 adc r10,0
1051
1052
1053 mul r15
1054 add r12,rax
1055 mov rax,rbx
1056 mov QWORD PTR[16+rcx*1+rdi],r11
1057 mov r13,rdx
1058 adc r13,0
1059 lea rcx,QWORD PTR[32+rcx]
1060
1061 mul r14
1062 add r10,rax
1063 mov rax,rbx
1064 mov r11,rdx
1065 adc r11,0
1066 add r10,r12
1067 adc r11,0
1068 mov QWORD PTR[((-8))+rcx*1+rdi],r10
1069
1070 cmp rcx,0
1071 jne $L$sqr4x_1st
1072
1073 mul r15
1074 add r13,rax
1075 lea rbp,QWORD PTR[16+rbp]
1076 adc rdx,0
1077 add r13,r11
1078 adc rdx,0
1079
1080 mov QWORD PTR[rdi],r13
1081 mov r12,rdx
1082 mov QWORD PTR[8+rdi],rdx
1083 jmp $L$sqr4x_outer
1084
1085 ALIGN 32
1086 $L$sqr4x_outer::
1087 mov r14,QWORD PTR[((-32))+rbp*1+rsi]
1088 lea rdi,QWORD PTR[((48+8))+r9*2+rsp]
1089 mov rax,QWORD PTR[((-24))+rbp*1+rsi]
1090 lea rdi,QWORD PTR[((-32))+rbp*1+rdi]
1091 mov rbx,QWORD PTR[((-16))+rbp*1+rsi]
1092 mov r15,rax
1093
1094 mul r14
1095 mov r10,QWORD PTR[((-24))+rbp*1+rdi]
1096 add r10,rax
1097 mov rax,rbx
1098 adc rdx,0
1099 mov QWORD PTR[((-24))+rbp*1+rdi],r10
1100 mov r11,rdx
1101
1102 mul r14
1103 add r11,rax
1104 mov rax,rbx
1105 adc rdx,0
1106 add r11,QWORD PTR[((-16))+rbp*1+rdi]
1107 mov r10,rdx
1108 adc r10,0
1109 mov QWORD PTR[((-16))+rbp*1+rdi],r11
1110
1111 xor r12,r12
1112
1113 mov rbx,QWORD PTR[((-8))+rbp*1+rsi]
1114 mul r15
1115 add r12,rax
1116 mov rax,rbx
1117 adc rdx,0
1118 add r12,QWORD PTR[((-8))+rbp*1+rdi]
1119 mov r13,rdx
1120 adc r13,0
1121
1122 mul r14
1123 add r10,rax
1124 mov rax,rbx
1125 adc rdx,0
1126 add r10,r12
1127 mov r11,rdx
1128 adc r11,0
1129 mov QWORD PTR[((-8))+rbp*1+rdi],r10
1130
1131 lea rcx,QWORD PTR[rbp]
1132 jmp $L$sqr4x_inner
1133
1134 ALIGN 32
1135 $L$sqr4x_inner::
1136 mov rbx,QWORD PTR[rcx*1+rsi]
1137 mul r15
1138 add r13,rax
1139 mov rax,rbx
1140 mov r12,rdx
1141 adc r12,0
1142 add r13,QWORD PTR[rcx*1+rdi]
1143 adc r12,0
1144
1145 DB 067h
1146 mul r14
1147 add r11,rax
1148 mov rax,rbx
1149 mov rbx,QWORD PTR[8+rcx*1+rsi]
1150 mov r10,rdx
1151 adc r10,0
1152 add r11,r13
1153 adc r10,0
1154
1155 mul r15
1156 add r12,rax
1157 mov QWORD PTR[rcx*1+rdi],r11
1158 mov rax,rbx
1159 mov r13,rdx
1160 adc r13,0
1161 add r12,QWORD PTR[8+rcx*1+rdi]
1162 lea rcx,QWORD PTR[16+rcx]
1163 adc r13,0
1164
1165 mul r14
1166 add r10,rax
1167 mov rax,rbx
1168 adc rdx,0
1169 add r10,r12
1170 mov r11,rdx
1171 adc r11,0
1172 mov QWORD PTR[((-8))+rcx*1+rdi],r10
1173
1174 cmp rcx,0
1175 jne $L$sqr4x_inner
1176
1177 DB 067h
1178 mul r15
1179 add r13,rax
1180 adc rdx,0
1181 add r13,r11
1182 adc rdx,0
1183
1184 mov QWORD PTR[rdi],r13
1185 mov r12,rdx
1186 mov QWORD PTR[8+rdi],rdx
1187
1188 add rbp,16
1189 jnz $L$sqr4x_outer
1190
1191
1192 mov r14,QWORD PTR[((-32))+rsi]
1193 lea rdi,QWORD PTR[((48+8))+r9*2+rsp]
1194 mov rax,QWORD PTR[((-24))+rsi]
1195 lea rdi,QWORD PTR[((-32))+rbp*1+rdi]
1196 mov rbx,QWORD PTR[((-16))+rsi]
1197 mov r15,rax
1198
1199 mul r14
1200 add r10,rax
1201 mov rax,rbx
1202 mov r11,rdx
1203 adc r11,0
1204
1205 mul r14
1206 add r11,rax
1207 mov rax,rbx
1208 mov QWORD PTR[((-24))+rdi],r10
1209 mov r10,rdx
1210 adc r10,0
1211 add r11,r13
1212 mov rbx,QWORD PTR[((-8))+rsi]
1213 adc r10,0
1214
1215 mul r15
1216 add r12,rax
1217 mov rax,rbx
1218 mov QWORD PTR[((-16))+rdi],r11
1219 mov r13,rdx
1220 adc r13,0
1221
1222 mul r14
1223 add r10,rax
1224 mov rax,rbx
1225 mov r11,rdx
1226 adc r11,0
1227 add r10,r12
1228 adc r11,0
1229 mov QWORD PTR[((-8))+rdi],r10
1230
1231 mul r15
1232 add r13,rax
1233 mov rax,QWORD PTR[((-16))+rsi]
1234 adc rdx,0
1235 add r13,r11
1236 adc rdx,0
1237
1238 mov QWORD PTR[rdi],r13
1239 mov r12,rdx
1240 mov QWORD PTR[8+rdi],rdx
1241
1242 mul rbx
1243 add rbp,16
1244 xor r14,r14
1245 sub rbp,r9
1246 xor r15,r15
1247
1248 add rax,r12
1249 adc rdx,0
1250 mov QWORD PTR[8+rdi],rax
1251 mov QWORD PTR[16+rdi],rdx
1252 mov QWORD PTR[24+rdi],r15
1253
1254 mov rax,QWORD PTR[((-16))+rbp*1+rsi]
1255 lea rdi,QWORD PTR[((48+8))+rsp]
1256 xor r10,r10
1257 mov r11,QWORD PTR[8+rdi]
1258
1259 lea r12,QWORD PTR[r10*2+r14]
1260 shr r10,63
1261 lea r13,QWORD PTR[r11*2+rcx]
1262 shr r11,63
1263 or r13,r10
1264 mov r10,QWORD PTR[16+rdi]
1265 mov r14,r11
1266 mul rax
1267 neg r15
1268 mov r11,QWORD PTR[24+rdi]
1269 adc r12,rax
1270 mov rax,QWORD PTR[((-8))+rbp*1+rsi]
1271 mov QWORD PTR[rdi],r12
1272 adc r13,rdx
1273
1274 lea rbx,QWORD PTR[r10*2+r14]
1275 mov QWORD PTR[8+rdi],r13
1276 sbb r15,r15
1277 shr r10,63
1278 lea r8,QWORD PTR[r11*2+rcx]
1279 shr r11,63
1280 or r8,r10
1281 mov r10,QWORD PTR[32+rdi]
1282 mov r14,r11
1283 mul rax
1284 neg r15
1285 mov r11,QWORD PTR[40+rdi]
1286 adc rbx,rax
1287 mov rax,QWORD PTR[rbp*1+rsi]
1288 mov QWORD PTR[16+rdi],rbx
1289 adc r8,rdx
1290 lea rbp,QWORD PTR[16+rbp]
1291 mov QWORD PTR[24+rdi],r8
1292 sbb r15,r15
1293 lea rdi,QWORD PTR[64+rdi]
1294 jmp $L$sqr4x_shift_n_add
1295
1296 ALIGN 32
1297 $L$sqr4x_shift_n_add::
1298 lea r12,QWORD PTR[r10*2+r14]
1299 shr r10,63
1300 lea r13,QWORD PTR[r11*2+rcx]
1301 shr r11,63
1302 or r13,r10
1303 mov r10,QWORD PTR[((-16))+rdi]
1304 mov r14,r11
1305 mul rax
1306 neg r15
1307 mov r11,QWORD PTR[((-8))+rdi]
1308 adc r12,rax
1309 mov rax,QWORD PTR[((-8))+rbp*1+rsi]
1310 mov QWORD PTR[((-32))+rdi],r12
1311 adc r13,rdx
1312
1313 lea rbx,QWORD PTR[r10*2+r14]
1314 mov QWORD PTR[((-24))+rdi],r13
1315 sbb r15,r15
1316 shr r10,63
1317 lea r8,QWORD PTR[r11*2+rcx]
1318 shr r11,63
1319 or r8,r10
1320 mov r10,QWORD PTR[rdi]
1321 mov r14,r11
1322 mul rax
1323 neg r15
1324 mov r11,QWORD PTR[8+rdi]
1325 adc rbx,rax
1326 mov rax,QWORD PTR[rbp*1+rsi]
1327 mov QWORD PTR[((-16))+rdi],rbx
1328 adc r8,rdx
1329
1330 lea r12,QWORD PTR[r10*2+r14]
1331 mov QWORD PTR[((-8))+rdi],r8
1332 sbb r15,r15
1333 shr r10,63
1334 lea r13,QWORD PTR[r11*2+rcx]
1335 shr r11,63
1336 or r13,r10
1337 mov r10,QWORD PTR[16+rdi]
1338 mov r14,r11
1339 mul rax
1340 neg r15
1341 mov r11,QWORD PTR[24+rdi]
1342 adc r12,rax
1343 mov rax,QWORD PTR[8+rbp*1+rsi]
1344 mov QWORD PTR[rdi],r12
1345 adc r13,rdx
1346
1347 lea rbx,QWORD PTR[r10*2+r14]
1348 mov QWORD PTR[8+rdi],r13
1349 sbb r15,r15
1350 shr r10,63
1351 lea r8,QWORD PTR[r11*2+rcx]
1352 shr r11,63
1353 or r8,r10
1354 mov r10,QWORD PTR[32+rdi]
1355 mov r14,r11
1356 mul rax
1357 neg r15
1358 mov r11,QWORD PTR[40+rdi]
1359 adc rbx,rax
1360 mov rax,QWORD PTR[16+rbp*1+rsi]
1361 mov QWORD PTR[16+rdi],rbx
1362 adc r8,rdx
1363 mov QWORD PTR[24+rdi],r8
1364 sbb r15,r15
1365 lea rdi,QWORD PTR[64+rdi]
1366 add rbp,32
1367 jnz $L$sqr4x_shift_n_add
1368
1369 lea r12,QWORD PTR[r10*2+r14]
1370 DB 067h
1371 shr r10,63
1372 lea r13,QWORD PTR[r11*2+rcx]
1373 shr r11,63
1374 or r13,r10
1375 mov r10,QWORD PTR[((-16))+rdi]
1376 mov r14,r11
1377 mul rax
1378 neg r15
1379 mov r11,QWORD PTR[((-8))+rdi]
1380 adc r12,rax
1381 mov rax,QWORD PTR[((-8))+rsi]
1382 mov QWORD PTR[((-32))+rdi],r12
1383 adc r13,rdx
1384
1385 lea rbx,QWORD PTR[r10*2+r14]
1386 mov QWORD PTR[((-24))+rdi],r13
1387 sbb r15,r15
1388 shr r10,63
1389 lea r8,QWORD PTR[r11*2+rcx]
1390 shr r11,63
1391 or r8,r10
1392 mul rax
1393 neg r15
1394 adc rbx,rax
1395 adc r8,rdx
1396 mov QWORD PTR[((-16))+rdi],rbx
1397 mov QWORD PTR[((-8))+rdi],r8
1398 DB 102,72,15,126,213
1399 sqr8x_reduction::
1400 xor rax,rax
1401 lea rcx,QWORD PTR[r9*2+rbp]
1402 lea rdx,QWORD PTR[((48+8))+r9*2+rsp]
1403 mov QWORD PTR[((0+8))+rsp],rcx
1404 lea rdi,QWORD PTR[((48+8))+r9*1+rsp]
1405 mov QWORD PTR[((8+8))+rsp],rdx
1406 neg r9
1407 jmp $L$8x_reduction_loop
1408
1409 ALIGN 32
1410 $L$8x_reduction_loop::
1411 lea rdi,QWORD PTR[r9*1+rdi]
1412 DB 066h
1413 mov rbx,QWORD PTR[rdi]
1414 mov r9,QWORD PTR[8+rdi]
1415 mov r10,QWORD PTR[16+rdi]
1416 mov r11,QWORD PTR[24+rdi]
1417 mov r12,QWORD PTR[32+rdi]
1418 mov r13,QWORD PTR[40+rdi]
1419 mov r14,QWORD PTR[48+rdi]
1420 mov r15,QWORD PTR[56+rdi]
1421 mov QWORD PTR[rdx],rax
1422 lea rdi,QWORD PTR[64+rdi]
1423
1424 DB 067h
1425 mov r8,rbx
1426 imul rbx,QWORD PTR[((32+8))+rsp]
1427 mov rax,QWORD PTR[rbp]
1428 mov ecx,8
1429 jmp $L$8x_reduce
1430
1431 ALIGN 32
1432 $L$8x_reduce::
1433 mul rbx
1434 mov rax,QWORD PTR[16+rbp]
1435 neg r8
1436 mov r8,rdx
1437 adc r8,0
1438
1439 mul rbx
1440 add r9,rax
1441 mov rax,QWORD PTR[32+rbp]
1442 adc rdx,0
1443 add r8,r9
1444 mov QWORD PTR[((48-8+8))+rcx*8+rsp],rbx
1445 mov r9,rdx
1446 adc r9,0
1447
1448 mul rbx
1449 add r10,rax
1450 mov rax,QWORD PTR[48+rbp]
1451 adc rdx,0
1452 add r9,r10
1453 mov rsi,QWORD PTR[((32+8))+rsp]
1454 mov r10,rdx
1455 adc r10,0
1456
1457 mul rbx
1458 add r11,rax
1459 mov rax,QWORD PTR[64+rbp]
1460 adc rdx,0
1461 imul rsi,r8
1462 add r10,r11
1463 mov r11,rdx
1464 adc r11,0
1465
1466 mul rbx
1467 add r12,rax
1468 mov rax,QWORD PTR[80+rbp]
1469 adc rdx,0
1470 add r11,r12
1471 mov r12,rdx
1472 adc r12,0
1473
1474 mul rbx
1475 add r13,rax
1476 mov rax,QWORD PTR[96+rbp]
1477 adc rdx,0
1478 add r12,r13
1479 mov r13,rdx
1480 adc r13,0
1481
1482 mul rbx
1483 add r14,rax
1484 mov rax,QWORD PTR[112+rbp]
1485 adc rdx,0
1486 add r13,r14
1487 mov r14,rdx
1488 adc r14,0
1489
1490 mul rbx
1491 mov rbx,rsi
1492 add r15,rax
1493 mov rax,QWORD PTR[rbp]
1494 adc rdx,0
1495 add r14,r15
1496 mov r15,rdx
1497 adc r15,0
1498
1499 dec ecx
1500 jnz $L$8x_reduce
1501
1502 lea rbp,QWORD PTR[128+rbp]
1503 xor rax,rax
1504 mov rdx,QWORD PTR[((8+8))+rsp]
1505 cmp rbp,QWORD PTR[((0+8))+rsp]
1506 jae $L$8x_no_tail
1507
1508 DB 066h
1509 add r8,QWORD PTR[rdi]
1510 adc r9,QWORD PTR[8+rdi]
1511 adc r10,QWORD PTR[16+rdi]
1512 adc r11,QWORD PTR[24+rdi]
1513 adc r12,QWORD PTR[32+rdi]
1514 adc r13,QWORD PTR[40+rdi]
1515 adc r14,QWORD PTR[48+rdi]
1516 adc r15,QWORD PTR[56+rdi]
1517 sbb rsi,rsi
1518
1519 mov rbx,QWORD PTR[((48+56+8))+rsp]
1520 mov ecx,8
1521 mov rax,QWORD PTR[rbp]
1522 jmp $L$8x_tail
1523
1524 ALIGN 32
1525 $L$8x_tail::
1526 mul rbx
1527 add r8,rax
1528 mov rax,QWORD PTR[16+rbp]
1529 mov QWORD PTR[rdi],r8
1530 mov r8,rdx
1531 adc r8,0
1532
1533 mul rbx
1534 add r9,rax
1535 mov rax,QWORD PTR[32+rbp]
1536 adc rdx,0
1537 add r8,r9
1538 lea rdi,QWORD PTR[8+rdi]
1539 mov r9,rdx
1540 adc r9,0
1541
1542 mul rbx
1543 add r10,rax
1544 mov rax,QWORD PTR[48+rbp]
1545 adc rdx,0
1546 add r9,r10
1547 mov r10,rdx
1548 adc r10,0
1549
1550 mul rbx
1551 add r11,rax
1552 mov rax,QWORD PTR[64+rbp]
1553 adc rdx,0
1554 add r10,r11
1555 mov r11,rdx
1556 adc r11,0
1557
1558 mul rbx
1559 add r12,rax
1560 mov rax,QWORD PTR[80+rbp]
1561 adc rdx,0
1562 add r11,r12
1563 mov r12,rdx
1564 adc r12,0
1565
1566 mul rbx
1567 add r13,rax
1568 mov rax,QWORD PTR[96+rbp]
1569 adc rdx,0
1570 add r12,r13
1571 mov r13,rdx
1572 adc r13,0
1573
1574 mul rbx
1575 add r14,rax
1576 mov rax,QWORD PTR[112+rbp]
1577 adc rdx,0
1578 add r13,r14
1579 mov r14,rdx
1580 adc r14,0
1581
1582 mul rbx
1583 mov rbx,QWORD PTR[((48-16+8))+rcx*8+rsp]
1584 add r15,rax
1585 adc rdx,0
1586 add r14,r15
1587 mov rax,QWORD PTR[rbp]
1588 mov r15,rdx
1589 adc r15,0
1590
1591 dec ecx
1592 jnz $L$8x_tail
1593
1594 lea rbp,QWORD PTR[128+rbp]
1595 mov rdx,QWORD PTR[((8+8))+rsp]
1596 cmp rbp,QWORD PTR[((0+8))+rsp]
1597 jae $L$8x_tail_done
1598
1599 mov rbx,QWORD PTR[((48+56+8))+rsp]
1600 neg rsi
1601 mov rax,QWORD PTR[rbp]
1602 adc r8,QWORD PTR[rdi]
1603 adc r9,QWORD PTR[8+rdi]
1604 adc r10,QWORD PTR[16+rdi]
1605 adc r11,QWORD PTR[24+rdi]
1606 adc r12,QWORD PTR[32+rdi]
1607 adc r13,QWORD PTR[40+rdi]
1608 adc r14,QWORD PTR[48+rdi]
1609 adc r15,QWORD PTR[56+rdi]
1610 sbb rsi,rsi
1611
1612 mov ecx,8
1613 jmp $L$8x_tail
1614
1615 ALIGN 32
1616 $L$8x_tail_done::
1617 add r8,QWORD PTR[rdx]
1618 xor rax,rax
1619
1620 neg rsi
1621 $L$8x_no_tail::
1622 adc r8,QWORD PTR[rdi]
1623 adc r9,QWORD PTR[8+rdi]
1624 adc r10,QWORD PTR[16+rdi]
1625 adc r11,QWORD PTR[24+rdi]
1626 adc r12,QWORD PTR[32+rdi]
1627 adc r13,QWORD PTR[40+rdi]
1628 adc r14,QWORD PTR[48+rdi]
1629 adc r15,QWORD PTR[56+rdi]
1630 adc rax,0
1631 mov rcx,QWORD PTR[((-16))+rbp]
1632 xor rsi,rsi
1633
1634 DB 102,72,15,126,213
1635
1636 mov QWORD PTR[rdi],r8
1637 mov QWORD PTR[8+rdi],r9
1638 DB 102,73,15,126,217
1639 mov QWORD PTR[16+rdi],r10
1640 mov QWORD PTR[24+rdi],r11
1641 mov QWORD PTR[32+rdi],r12
1642 mov QWORD PTR[40+rdi],r13
1643 mov QWORD PTR[48+rdi],r14
1644 mov QWORD PTR[56+rdi],r15
1645 lea rdi,QWORD PTR[64+rdi]
1646
1647 cmp rdi,rdx
1648 jb $L$8x_reduction_loop
1649
1650 sub rcx,r15
1651 lea rbx,QWORD PTR[r9*1+rdi]
1652 adc rsi,rsi
1653 mov rcx,r9
1654 or rax,rsi
1655 DB 102,72,15,126,207
1656 xor rax,1
1657 DB 102,72,15,126,206
1658 lea rbp,QWORD PTR[rax*8+rbp]
1659 sar rcx,3+2
1660 jmp $L$sqr4x_sub
1661
1662 ALIGN 32
1663 $L$sqr4x_sub::
1664 DB 066h
1665 mov r12,QWORD PTR[rbx]
1666 mov r13,QWORD PTR[8+rbx]
1667 sbb r12,QWORD PTR[rbp]
1668 mov r14,QWORD PTR[16+rbx]
1669 sbb r13,QWORD PTR[16+rbp]
1670 mov r15,QWORD PTR[24+rbx]
1671 lea rbx,QWORD PTR[32+rbx]
1672 sbb r14,QWORD PTR[32+rbp]
1673 mov QWORD PTR[rdi],r12
1674 sbb r15,QWORD PTR[48+rbp]
1675 lea rbp,QWORD PTR[64+rbp]
1676 mov QWORD PTR[8+rdi],r13
1677 mov QWORD PTR[16+rdi],r14
1678 mov QWORD PTR[24+rdi],r15
1679 lea rdi,QWORD PTR[32+rdi]
1680
1681 inc rcx
1682 jnz $L$sqr4x_sub
1683 mov r10,r9
1684 neg r9
1685 DB 0F3h,0C3h ;repret
1686 bn_sqr8x_internal ENDP
1687 PUBLIC bn_from_montgomery
1688
1689 ALIGN 32
1690 bn_from_montgomery PROC PUBLIC
1691 test DWORD PTR[48+rsp],7
1692 jz bn_from_mont8x
1693 xor eax,eax
1694 DB 0F3h,0C3h ;repret
1695 bn_from_montgomery ENDP
1696
1697
1698 ALIGN 32
1699 bn_from_mont8x PROC PRIVATE
1700 mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
1701 mov QWORD PTR[16+rsp],rsi
1702 mov rax,rsp
1703 $L$SEH_begin_bn_from_mont8x::
1704 mov rdi,rcx
1705 mov rsi,rdx
1706 mov rdx,r8
1707 mov rcx,r9
1708 mov r8,QWORD PTR[40+rsp]
1709 mov r9,QWORD PTR[48+rsp]
1710
1711
1712 DB 067h
1713 mov rax,rsp
1714 push rbx
1715 push rbp
1716 push r12
1717 push r13
1718 push r14
1719 push r15
1720 lea rsp,QWORD PTR[((-40))+rsp]
1721 movaps XMMWORD PTR[rsp],xmm6
1722 movaps XMMWORD PTR[16+rsp],xmm7
1723 DB 067h
1724 mov r10d,r9d
1725 shl r9d,3
1726 shl r10d,3+2
1727 neg r9
1728 mov r8,QWORD PTR[r8]
1729
1730
1731
1732
1733
1734
1735
1736 lea r11,QWORD PTR[((-64))+r9*2+rsp]
1737 sub r11,rsi
1738 and r11,4095
1739 cmp r10,r11
1740 jb $L$from_sp_alt
1741 sub rsp,r11
1742 lea rsp,QWORD PTR[((-64))+r9*2+rsp]
1743 jmp $L$from_sp_done
1744
1745 ALIGN 32
1746 $L$from_sp_alt::
1747 lea r10,QWORD PTR[((4096-64))+r9*2]
1748 lea rsp,QWORD PTR[((-64))+r9*2+rsp]
1749 sub r11,r10
1750 mov r10,0
1751 cmovc r11,r10
1752 sub rsp,r11
1753 $L$from_sp_done::
1754 and rsp,-64
1755 mov r10,r9
1756 neg r9
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767 mov QWORD PTR[32+rsp],r8
1768 mov QWORD PTR[40+rsp],rax
1769 $L$from_body::
1770 mov r11,r9
1771 lea rax,QWORD PTR[48+rsp]
1772 pxor xmm0,xmm0
1773 jmp $L$mul_by_1
1774
1775 ALIGN 32
1776 $L$mul_by_1::
1777 movdqu xmm1,XMMWORD PTR[rsi]
1778 movdqu xmm2,XMMWORD PTR[16+rsi]
1779 movdqu xmm3,XMMWORD PTR[32+rsi]
1780 movdqa XMMWORD PTR[r9*1+rax],xmm0
1781 movdqu xmm4,XMMWORD PTR[48+rsi]
1782 movdqa XMMWORD PTR[16+r9*1+rax],xmm0
1783 DB 048h,08dh,0b6h,040h,000h,000h,000h
1784 movdqa XMMWORD PTR[rax],xmm1
1785 movdqa XMMWORD PTR[32+r9*1+rax],xmm0
1786 movdqa XMMWORD PTR[16+rax],xmm2
1787 movdqa XMMWORD PTR[48+r9*1+rax],xmm0
1788 movdqa XMMWORD PTR[32+rax],xmm3
1789 movdqa XMMWORD PTR[48+rax],xmm4
1790 lea rax,QWORD PTR[64+rax]
1791 sub r11,64
1792 jnz $L$mul_by_1
1793
1794 DB 102,72,15,110,207
1795 DB 102,72,15,110,209
1796 DB 067h
1797 mov rbp,rcx
1798 DB 102,73,15,110,218
1799 call sqr8x_reduction
1800
1801 pxor xmm0,xmm0
1802 lea rax,QWORD PTR[48+rsp]
1803 mov rsi,QWORD PTR[40+rsp]
1804 jmp $L$from_mont_zero
1805
1806 ALIGN 32
1807 $L$from_mont_zero::
1808 movdqa XMMWORD PTR[rax],xmm0
1809 movdqa XMMWORD PTR[16+rax],xmm0
1810 movdqa XMMWORD PTR[32+rax],xmm0
1811 movdqa XMMWORD PTR[48+rax],xmm0
1812 lea rax,QWORD PTR[64+rax]
1813 sub r9,32
1814 jnz $L$from_mont_zero
1815
1816 mov rax,1
1817 mov r15,QWORD PTR[((-48))+rsi]
1818 mov r14,QWORD PTR[((-40))+rsi]
1819 mov r13,QWORD PTR[((-32))+rsi]
1820 mov r12,QWORD PTR[((-24))+rsi]
1821 mov rbp,QWORD PTR[((-16))+rsi]
1822 mov rbx,QWORD PTR[((-8))+rsi]
1823 lea rsp,QWORD PTR[rsi]
1824 $L$from_epilogue::
1825 mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
1826 mov rsi,QWORD PTR[16+rsp]
1827 DB 0F3h,0C3h ;repret
1828 $L$SEH_end_bn_from_mont8x::
1829 bn_from_mont8x ENDP
1830 PUBLIC bn_get_bits5
1831
1832 ALIGN 16
1833 bn_get_bits5 PROC PUBLIC
1834 mov r10,rcx
1835 mov ecx,edx
1836 shr edx,3
1837 movzx eax,WORD PTR[rdx*1+r10]
1838 and ecx,7
1839 shr eax,cl
1840 and eax,31
1841 DB 0F3h,0C3h ;repret
1842 bn_get_bits5 ENDP
1843
1844 PUBLIC bn_scatter5
1845
1846 ALIGN 16
1847 bn_scatter5 PROC PUBLIC
1848 cmp edx,0
1849 jz $L$scatter_epilogue
1850 lea r8,QWORD PTR[r9*8+r8]
1851 $L$scatter::
1852 mov rax,QWORD PTR[rcx]
1853 lea rcx,QWORD PTR[8+rcx]
1854 mov QWORD PTR[r8],rax
1855 lea r8,QWORD PTR[256+r8]
1856 sub edx,1
1857 jnz $L$scatter
1858 $L$scatter_epilogue::
1859 DB 0F3h,0C3h ;repret
1860 bn_scatter5 ENDP
1861
1862 PUBLIC bn_gather5
1863
1864 ALIGN 16
1865 bn_gather5 PROC PUBLIC
1866 $L$SEH_begin_bn_gather5::
1867
1868 DB 048h,083h,0ech,028h
1869 DB 00fh,029h,034h,024h
1870 DB 00fh,029h,07ch,024h,010h
1871 mov r11d,r9d
1872 shr r9d,3
1873 and r11,7
1874 not r9d
1875 lea rax,QWORD PTR[$L$magic_masks]
1876 and r9d,3
1877 lea r8,QWORD PTR[128+r11*8+r8]
1878 movq xmm4,QWORD PTR[r9*8+rax]
1879 movq xmm5,QWORD PTR[8+r9*8+rax]
1880 movq xmm6,QWORD PTR[16+r9*8+rax]
1881 movq xmm7,QWORD PTR[24+r9*8+rax]
1882 jmp $L$gather
1883 ALIGN 16
1884 $L$gather::
1885 movq xmm0,QWORD PTR[(((-128)))+r8]
1886 movq xmm1,QWORD PTR[((-64))+r8]
1887 pand xmm0,xmm4
1888 movq xmm2,QWORD PTR[r8]
1889 pand xmm1,xmm5
1890 movq xmm3,QWORD PTR[64+r8]
1891 pand xmm2,xmm6
1892 por xmm0,xmm1
1893 pand xmm3,xmm7
1894 DB 067h,067h
1895 por xmm0,xmm2
1896 lea r8,QWORD PTR[256+r8]
1897 por xmm0,xmm3
1898
1899 movq QWORD PTR[rcx],xmm0
1900 lea rcx,QWORD PTR[8+rcx]
1901 sub edx,1
1902 jnz $L$gather
1903 movaps xmm6,XMMWORD PTR[rsp]
1904 movaps xmm7,XMMWORD PTR[16+rsp]
1905 lea rsp,QWORD PTR[40+rsp]
1906 DB 0F3h,0C3h ;repret
1907 $L$SEH_end_bn_gather5::
1908 bn_gather5 ENDP
1909 ALIGN 64
1910 $L$magic_masks::
1911 DD 0,0,0,0,0,0,-1,-1
1912 DD 0,0,0,0,0,0,0,0
1913 DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
1914 DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115
1915 DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111
1916 DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79
1917 DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111
1918 DB 112,101,110,115,115,108,46,111,114,103,62,0
1919 EXTERN __imp_RtlVirtualUnwind:NEAR
1920
1921 ALIGN 16
1922 mul_handler PROC PRIVATE
1923 push rsi
1924 push rdi
1925 push rbx
1926 push rbp
1927 push r12
1928 push r13
1929 push r14
1930 push r15
1931 pushfq
1932 sub rsp,64
1933
1934 mov rax,QWORD PTR[120+r8]
1935 mov rbx,QWORD PTR[248+r8]
1936
1937 mov rsi,QWORD PTR[8+r9]
1938 mov r11,QWORD PTR[56+r9]
1939
1940 mov r10d,DWORD PTR[r11]
1941 lea r10,QWORD PTR[r10*1+rsi]
1942 cmp rbx,r10
1943 jb $L$common_seh_tail
1944
1945 mov rax,QWORD PTR[152+r8]
1946
1947 mov r10d,DWORD PTR[4+r11]
1948 lea r10,QWORD PTR[r10*1+rsi]
1949 cmp rbx,r10
1950 jae $L$common_seh_tail
1951
1952 lea r10,QWORD PTR[$L$mul_epilogue]
1953 cmp rbx,r10
1954 jb $L$body_40
1955
1956 mov r10,QWORD PTR[192+r8]
1957 mov rax,QWORD PTR[8+r10*8+rax]
1958 jmp $L$body_proceed
1959
1960 $L$body_40::
1961 mov rax,QWORD PTR[40+rax]
1962 $L$body_proceed::
1963
1964 movaps xmm0,XMMWORD PTR[((-88))+rax]
1965 movaps xmm1,XMMWORD PTR[((-72))+rax]
1966
1967 mov rbx,QWORD PTR[((-8))+rax]
1968 mov rbp,QWORD PTR[((-16))+rax]
1969 mov r12,QWORD PTR[((-24))+rax]
1970 mov r13,QWORD PTR[((-32))+rax]
1971 mov r14,QWORD PTR[((-40))+rax]
1972 mov r15,QWORD PTR[((-48))+rax]
1973 mov QWORD PTR[144+r8],rbx
1974 mov QWORD PTR[160+r8],rbp
1975 mov QWORD PTR[216+r8],r12
1976 mov QWORD PTR[224+r8],r13
1977 mov QWORD PTR[232+r8],r14
1978 mov QWORD PTR[240+r8],r15
1979 movups XMMWORD PTR[512+r8],xmm0
1980 movups XMMWORD PTR[528+r8],xmm1
1981
1982 $L$common_seh_tail::
1983 mov rdi,QWORD PTR[8+rax]
1984 mov rsi,QWORD PTR[16+rax]
1985 mov QWORD PTR[152+r8],rax
1986 mov QWORD PTR[168+r8],rsi
1987 mov QWORD PTR[176+r8],rdi
1988
1989 mov rdi,QWORD PTR[40+r9]
1990 mov rsi,r8
1991 mov ecx,154
1992 DD 0a548f3fch
1993
1994 mov rsi,r9
1995 xor rcx,rcx
1996 mov rdx,QWORD PTR[8+rsi]
1997 mov r8,QWORD PTR[rsi]
1998 mov r9,QWORD PTR[16+rsi]
1999 mov r10,QWORD PTR[40+rsi]
2000 lea r11,QWORD PTR[56+rsi]
2001 lea r12,QWORD PTR[24+rsi]
2002 mov QWORD PTR[32+rsp],r10
2003 mov QWORD PTR[40+rsp],r11
2004 mov QWORD PTR[48+rsp],r12
2005 mov QWORD PTR[56+rsp],rcx
2006 call QWORD PTR[__imp_RtlVirtualUnwind]
2007
2008 mov eax,1
2009 add rsp,64
2010 popfq
2011 pop r15
2012 pop r14
2013 pop r13
2014 pop r12
2015 pop rbp
2016 pop rbx
2017 pop rdi
2018 pop rsi
2019 DB 0F3h,0C3h ;repret
2020 mul_handler ENDP
2021
2022 .text$ ENDS
2023 .pdata SEGMENT READONLY ALIGN(4)
2024 ALIGN 4
2025 DD imagerel $L$SEH_begin_bn_mul_mont_gather5
2026 DD imagerel $L$SEH_end_bn_mul_mont_gather5
2027 DD imagerel $L$SEH_info_bn_mul_mont_gather5
2028
2029 DD imagerel $L$SEH_begin_bn_mul4x_mont_gather5
2030 DD imagerel $L$SEH_end_bn_mul4x_mont_gather5
2031 DD imagerel $L$SEH_info_bn_mul4x_mont_gather5
2032
2033 DD imagerel $L$SEH_begin_bn_power5
2034 DD imagerel $L$SEH_end_bn_power5
2035 DD imagerel $L$SEH_info_bn_power5
2036
2037 DD imagerel $L$SEH_begin_bn_from_mont8x
2038 DD imagerel $L$SEH_end_bn_from_mont8x
2039 DD imagerel $L$SEH_info_bn_from_mont8x
2040 DD imagerel $L$SEH_begin_bn_gather5
2041 DD imagerel $L$SEH_end_bn_gather5
2042 DD imagerel $L$SEH_info_bn_gather5
2043
2044 .pdata ENDS
2045 .xdata SEGMENT READONLY ALIGN(8)
2046 ALIGN 8
2047 $L$SEH_info_bn_mul_mont_gather5::
2048 DB 9,0,0,0
2049 DD imagerel mul_handler
2050 DD imagerel $L$mul_body,imagerel $L$mul_epilogue
2051 ALIGN 8
2052 $L$SEH_info_bn_mul4x_mont_gather5::
2053 DB 9,0,0,0
2054 DD imagerel mul_handler
2055 DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue
2056 ALIGN 8
2057 $L$SEH_info_bn_power5::
2058 DB 9,0,0,0
2059 DD imagerel mul_handler
2060 DD imagerel $L$power5_body,imagerel $L$power5_epilogue
2061 ALIGN 8
2062 $L$SEH_info_bn_from_mont8x::
2063 DB 9,0,0,0
2064 DD imagerel mul_handler
2065 DD imagerel $L$from_body,imagerel $L$from_epilogue
2066 ALIGN 8
2067 $L$SEH_info_bn_gather5::
2068 DB 001h,00dh,005h,000h
2069 DB 00dh,078h,001h,000h
2070 DB 008h,068h,000h,000h
2071 DB 004h,042h,000h,000h
2072 ALIGN 8
2073
2074 .xdata ENDS
2075 END
OLDNEW
« no previous file with comments | « third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont.asm ('k') | third_party/boringssl/win-x86_64/crypto/cpu-x86_64-asm.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698