Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont5.asm

Issue 2354623003: Pull boringssl generated source from boringssl_gen (Closed)
Patch Set: . Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 default rel
2 %define XMMWORD
3 %define YMMWORD
4 %define ZMMWORD
5 section .text code align=64
6
7
8 EXTERN OPENSSL_ia32cap_P
9
10 global bn_mul_mont_gather5
11
12 ALIGN 64
13 bn_mul_mont_gather5:
14 mov QWORD[8+rsp],rdi ;WIN64 prologue
15 mov QWORD[16+rsp],rsi
16 mov rax,rsp
17 $L$SEH_begin_bn_mul_mont_gather5:
18 mov rdi,rcx
19 mov rsi,rdx
20 mov rdx,r8
21 mov rcx,r9
22 mov r8,QWORD[40+rsp]
23 mov r9,QWORD[48+rsp]
24
25
26 test r9d,7
27 jnz NEAR $L$mul_enter
28 jmp NEAR $L$mul4x_enter
29
30 ALIGN 16
31 $L$mul_enter:
32 mov r9d,r9d
33 mov rax,rsp
34 movd xmm5,DWORD[56+rsp]
35 lea r10,[$L$inc]
36 push rbx
37 push rbp
38 push r12
39 push r13
40 push r14
41 push r15
42
43 lea r11,[2+r9]
44 neg r11
45 lea rsp,[((-264))+r11*8+rsp]
46 and rsp,-1024
47
48 mov QWORD[8+r9*8+rsp],rax
49 $L$mul_body:
50 lea r12,[128+rdx]
51 movdqa xmm0,XMMWORD[r10]
52 movdqa xmm1,XMMWORD[16+r10]
53 lea r10,[((24-112))+r9*8+rsp]
54 and r10,-16
55
56 pshufd xmm5,xmm5,0
57 movdqa xmm4,xmm1
58 movdqa xmm2,xmm1
59 paddd xmm1,xmm0
60 pcmpeqd xmm0,xmm5
61 DB 0x67
62 movdqa xmm3,xmm4
63 paddd xmm2,xmm1
64 pcmpeqd xmm1,xmm5
65 movdqa XMMWORD[112+r10],xmm0
66 movdqa xmm0,xmm4
67
68 paddd xmm3,xmm2
69 pcmpeqd xmm2,xmm5
70 movdqa XMMWORD[128+r10],xmm1
71 movdqa xmm1,xmm4
72
73 paddd xmm0,xmm3
74 pcmpeqd xmm3,xmm5
75 movdqa XMMWORD[144+r10],xmm2
76 movdqa xmm2,xmm4
77
78 paddd xmm1,xmm0
79 pcmpeqd xmm0,xmm5
80 movdqa XMMWORD[160+r10],xmm3
81 movdqa xmm3,xmm4
82 paddd xmm2,xmm1
83 pcmpeqd xmm1,xmm5
84 movdqa XMMWORD[176+r10],xmm0
85 movdqa xmm0,xmm4
86
87 paddd xmm3,xmm2
88 pcmpeqd xmm2,xmm5
89 movdqa XMMWORD[192+r10],xmm1
90 movdqa xmm1,xmm4
91
92 paddd xmm0,xmm3
93 pcmpeqd xmm3,xmm5
94 movdqa XMMWORD[208+r10],xmm2
95 movdqa xmm2,xmm4
96
97 paddd xmm1,xmm0
98 pcmpeqd xmm0,xmm5
99 movdqa XMMWORD[224+r10],xmm3
100 movdqa xmm3,xmm4
101 paddd xmm2,xmm1
102 pcmpeqd xmm1,xmm5
103 movdqa XMMWORD[240+r10],xmm0
104 movdqa xmm0,xmm4
105
106 paddd xmm3,xmm2
107 pcmpeqd xmm2,xmm5
108 movdqa XMMWORD[256+r10],xmm1
109 movdqa xmm1,xmm4
110
111 paddd xmm0,xmm3
112 pcmpeqd xmm3,xmm5
113 movdqa XMMWORD[272+r10],xmm2
114 movdqa xmm2,xmm4
115
116 paddd xmm1,xmm0
117 pcmpeqd xmm0,xmm5
118 movdqa XMMWORD[288+r10],xmm3
119 movdqa xmm3,xmm4
120 paddd xmm2,xmm1
121 pcmpeqd xmm1,xmm5
122 movdqa XMMWORD[304+r10],xmm0
123
124 paddd xmm3,xmm2
125 DB 0x67
126 pcmpeqd xmm2,xmm5
127 movdqa XMMWORD[320+r10],xmm1
128
129 pcmpeqd xmm3,xmm5
130 movdqa XMMWORD[336+r10],xmm2
131 pand xmm0,XMMWORD[64+r12]
132
133 pand xmm1,XMMWORD[80+r12]
134 pand xmm2,XMMWORD[96+r12]
135 movdqa XMMWORD[352+r10],xmm3
136 pand xmm3,XMMWORD[112+r12]
137 por xmm0,xmm2
138 por xmm1,xmm3
139 movdqa xmm4,XMMWORD[((-128))+r12]
140 movdqa xmm5,XMMWORD[((-112))+r12]
141 movdqa xmm2,XMMWORD[((-96))+r12]
142 pand xmm4,XMMWORD[112+r10]
143 movdqa xmm3,XMMWORD[((-80))+r12]
144 pand xmm5,XMMWORD[128+r10]
145 por xmm0,xmm4
146 pand xmm2,XMMWORD[144+r10]
147 por xmm1,xmm5
148 pand xmm3,XMMWORD[160+r10]
149 por xmm0,xmm2
150 por xmm1,xmm3
151 movdqa xmm4,XMMWORD[((-64))+r12]
152 movdqa xmm5,XMMWORD[((-48))+r12]
153 movdqa xmm2,XMMWORD[((-32))+r12]
154 pand xmm4,XMMWORD[176+r10]
155 movdqa xmm3,XMMWORD[((-16))+r12]
156 pand xmm5,XMMWORD[192+r10]
157 por xmm0,xmm4
158 pand xmm2,XMMWORD[208+r10]
159 por xmm1,xmm5
160 pand xmm3,XMMWORD[224+r10]
161 por xmm0,xmm2
162 por xmm1,xmm3
163 movdqa xmm4,XMMWORD[r12]
164 movdqa xmm5,XMMWORD[16+r12]
165 movdqa xmm2,XMMWORD[32+r12]
166 pand xmm4,XMMWORD[240+r10]
167 movdqa xmm3,XMMWORD[48+r12]
168 pand xmm5,XMMWORD[256+r10]
169 por xmm0,xmm4
170 pand xmm2,XMMWORD[272+r10]
171 por xmm1,xmm5
172 pand xmm3,XMMWORD[288+r10]
173 por xmm0,xmm2
174 por xmm1,xmm3
175 por xmm0,xmm1
176 pshufd xmm1,xmm0,0x4e
177 por xmm0,xmm1
178 lea r12,[256+r12]
179 DB 102,72,15,126,195
180
181 mov r8,QWORD[r8]
182 mov rax,QWORD[rsi]
183
184 xor r14,r14
185 xor r15,r15
186
187 mov rbp,r8
188 mul rbx
189 mov r10,rax
190 mov rax,QWORD[rcx]
191
192 imul rbp,r10
193 mov r11,rdx
194
195 mul rbp
196 add r10,rax
197 mov rax,QWORD[8+rsi]
198 adc rdx,0
199 mov r13,rdx
200
201 lea r15,[1+r15]
202 jmp NEAR $L$1st_enter
203
204 ALIGN 16
205 $L$1st:
206 add r13,rax
207 mov rax,QWORD[r15*8+rsi]
208 adc rdx,0
209 add r13,r11
210 mov r11,r10
211 adc rdx,0
212 mov QWORD[((-16))+r15*8+rsp],r13
213 mov r13,rdx
214
215 $L$1st_enter:
216 mul rbx
217 add r11,rax
218 mov rax,QWORD[r15*8+rcx]
219 adc rdx,0
220 lea r15,[1+r15]
221 mov r10,rdx
222
223 mul rbp
224 cmp r15,r9
225 jne NEAR $L$1st
226
227
228 add r13,rax
229 adc rdx,0
230 add r13,r11
231 adc rdx,0
232 mov QWORD[((-16))+r9*8+rsp],r13
233 mov r13,rdx
234 mov r11,r10
235
236 xor rdx,rdx
237 add r13,r11
238 adc rdx,0
239 mov QWORD[((-8))+r9*8+rsp],r13
240 mov QWORD[r9*8+rsp],rdx
241
242 lea r14,[1+r14]
243 jmp NEAR $L$outer
244 ALIGN 16
245 $L$outer:
246 lea rdx,[((24+128))+r9*8+rsp]
247 and rdx,-16
248 pxor xmm4,xmm4
249 pxor xmm5,xmm5
250 movdqa xmm0,XMMWORD[((-128))+r12]
251 movdqa xmm1,XMMWORD[((-112))+r12]
252 movdqa xmm2,XMMWORD[((-96))+r12]
253 movdqa xmm3,XMMWORD[((-80))+r12]
254 pand xmm0,XMMWORD[((-128))+rdx]
255 pand xmm1,XMMWORD[((-112))+rdx]
256 por xmm4,xmm0
257 pand xmm2,XMMWORD[((-96))+rdx]
258 por xmm5,xmm1
259 pand xmm3,XMMWORD[((-80))+rdx]
260 por xmm4,xmm2
261 por xmm5,xmm3
262 movdqa xmm0,XMMWORD[((-64))+r12]
263 movdqa xmm1,XMMWORD[((-48))+r12]
264 movdqa xmm2,XMMWORD[((-32))+r12]
265 movdqa xmm3,XMMWORD[((-16))+r12]
266 pand xmm0,XMMWORD[((-64))+rdx]
267 pand xmm1,XMMWORD[((-48))+rdx]
268 por xmm4,xmm0
269 pand xmm2,XMMWORD[((-32))+rdx]
270 por xmm5,xmm1
271 pand xmm3,XMMWORD[((-16))+rdx]
272 por xmm4,xmm2
273 por xmm5,xmm3
274 movdqa xmm0,XMMWORD[r12]
275 movdqa xmm1,XMMWORD[16+r12]
276 movdqa xmm2,XMMWORD[32+r12]
277 movdqa xmm3,XMMWORD[48+r12]
278 pand xmm0,XMMWORD[rdx]
279 pand xmm1,XMMWORD[16+rdx]
280 por xmm4,xmm0
281 pand xmm2,XMMWORD[32+rdx]
282 por xmm5,xmm1
283 pand xmm3,XMMWORD[48+rdx]
284 por xmm4,xmm2
285 por xmm5,xmm3
286 movdqa xmm0,XMMWORD[64+r12]
287 movdqa xmm1,XMMWORD[80+r12]
288 movdqa xmm2,XMMWORD[96+r12]
289 movdqa xmm3,XMMWORD[112+r12]
290 pand xmm0,XMMWORD[64+rdx]
291 pand xmm1,XMMWORD[80+rdx]
292 por xmm4,xmm0
293 pand xmm2,XMMWORD[96+rdx]
294 por xmm5,xmm1
295 pand xmm3,XMMWORD[112+rdx]
296 por xmm4,xmm2
297 por xmm5,xmm3
298 por xmm4,xmm5
299 pshufd xmm0,xmm4,0x4e
300 por xmm0,xmm4
301 lea r12,[256+r12]
302
303 mov rax,QWORD[rsi]
304 DB 102,72,15,126,195
305
306 xor r15,r15
307 mov rbp,r8
308 mov r10,QWORD[rsp]
309
310 mul rbx
311 add r10,rax
312 mov rax,QWORD[rcx]
313 adc rdx,0
314
315 imul rbp,r10
316 mov r11,rdx
317
318 mul rbp
319 add r10,rax
320 mov rax,QWORD[8+rsi]
321 adc rdx,0
322 mov r10,QWORD[8+rsp]
323 mov r13,rdx
324
325 lea r15,[1+r15]
326 jmp NEAR $L$inner_enter
327
328 ALIGN 16
329 $L$inner:
330 add r13,rax
331 mov rax,QWORD[r15*8+rsi]
332 adc rdx,0
333 add r13,r10
334 mov r10,QWORD[r15*8+rsp]
335 adc rdx,0
336 mov QWORD[((-16))+r15*8+rsp],r13
337 mov r13,rdx
338
339 $L$inner_enter:
340 mul rbx
341 add r11,rax
342 mov rax,QWORD[r15*8+rcx]
343 adc rdx,0
344 add r10,r11
345 mov r11,rdx
346 adc r11,0
347 lea r15,[1+r15]
348
349 mul rbp
350 cmp r15,r9
351 jne NEAR $L$inner
352
353 add r13,rax
354 adc rdx,0
355 add r13,r10
356 mov r10,QWORD[r9*8+rsp]
357 adc rdx,0
358 mov QWORD[((-16))+r9*8+rsp],r13
359 mov r13,rdx
360
361 xor rdx,rdx
362 add r13,r11
363 adc rdx,0
364 add r13,r10
365 adc rdx,0
366 mov QWORD[((-8))+r9*8+rsp],r13
367 mov QWORD[r9*8+rsp],rdx
368
369 lea r14,[1+r14]
370 cmp r14,r9
371 jb NEAR $L$outer
372
373 xor r14,r14
374 mov rax,QWORD[rsp]
375 lea rsi,[rsp]
376 mov r15,r9
377 jmp NEAR $L$sub
378 ALIGN 16
379 $L$sub: sbb rax,QWORD[r14*8+rcx]
380 mov QWORD[r14*8+rdi],rax
381 mov rax,QWORD[8+r14*8+rsi]
382 lea r14,[1+r14]
383 dec r15
384 jnz NEAR $L$sub
385
386 sbb rax,0
387 xor r14,r14
388 mov r15,r9
389 ALIGN 16
390 $L$copy:
391 mov rsi,QWORD[r14*8+rsp]
392 mov rcx,QWORD[r14*8+rdi]
393 xor rsi,rcx
394 and rsi,rax
395 xor rsi,rcx
396 mov QWORD[r14*8+rsp],r14
397 mov QWORD[r14*8+rdi],rsi
398 lea r14,[1+r14]
399 sub r15,1
400 jnz NEAR $L$copy
401
402 mov rsi,QWORD[8+r9*8+rsp]
403 mov rax,1
404
405 mov r15,QWORD[((-48))+rsi]
406 mov r14,QWORD[((-40))+rsi]
407 mov r13,QWORD[((-32))+rsi]
408 mov r12,QWORD[((-24))+rsi]
409 mov rbp,QWORD[((-16))+rsi]
410 mov rbx,QWORD[((-8))+rsi]
411 lea rsp,[rsi]
412 $L$mul_epilogue:
413 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
414 mov rsi,QWORD[16+rsp]
415 DB 0F3h,0C3h ;repret
416 $L$SEH_end_bn_mul_mont_gather5:
417
418 ALIGN 32
419 bn_mul4x_mont_gather5:
420 mov QWORD[8+rsp],rdi ;WIN64 prologue
421 mov QWORD[16+rsp],rsi
422 mov rax,rsp
423 $L$SEH_begin_bn_mul4x_mont_gather5:
424 mov rdi,rcx
425 mov rsi,rdx
426 mov rdx,r8
427 mov rcx,r9
428 mov r8,QWORD[40+rsp]
429 mov r9,QWORD[48+rsp]
430
431
432 $L$mul4x_enter:
433 DB 0x67
434 mov rax,rsp
435 push rbx
436 push rbp
437 push r12
438 push r13
439 push r14
440 push r15
441
442 DB 0x67
443 shl r9d,3
444 lea r10,[r9*2+r9]
445 neg r9
446
447
448
449
450
451
452
453
454
455
456 lea r11,[((-320))+r9*2+rsp]
457 sub r11,rdi
458 and r11,4095
459 cmp r10,r11
460 jb NEAR $L$mul4xsp_alt
461 sub rsp,r11
462 lea rsp,[((-320))+r9*2+rsp]
463 jmp NEAR $L$mul4xsp_done
464
465 ALIGN 32
466 $L$mul4xsp_alt:
467 lea r10,[((4096-320))+r9*2]
468 lea rsp,[((-320))+r9*2+rsp]
469 sub r11,r10
470 mov r10,0
471 cmovc r11,r10
472 sub rsp,r11
473 $L$mul4xsp_done:
474 and rsp,-64
475 neg r9
476
477 mov QWORD[40+rsp],rax
478 $L$mul4x_body:
479
480 call mul4x_internal
481
482 mov rsi,QWORD[40+rsp]
483 mov rax,1
484
485 mov r15,QWORD[((-48))+rsi]
486 mov r14,QWORD[((-40))+rsi]
487 mov r13,QWORD[((-32))+rsi]
488 mov r12,QWORD[((-24))+rsi]
489 mov rbp,QWORD[((-16))+rsi]
490 mov rbx,QWORD[((-8))+rsi]
491 lea rsp,[rsi]
492 $L$mul4x_epilogue:
493 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
494 mov rsi,QWORD[16+rsp]
495 DB 0F3h,0C3h ;repret
496 $L$SEH_end_bn_mul4x_mont_gather5:
497
498
499 ALIGN 32
500 mul4x_internal:
501 shl r9,5
502 movd xmm5,DWORD[56+rax]
503 lea rax,[$L$inc]
504 lea r13,[128+r9*1+rdx]
505 shr r9,5
506 movdqa xmm0,XMMWORD[rax]
507 movdqa xmm1,XMMWORD[16+rax]
508 lea r10,[((88-112))+r9*1+rsp]
509 lea r12,[128+rdx]
510
511 pshufd xmm5,xmm5,0
512 movdqa xmm4,xmm1
513 DB 0x67,0x67
514 movdqa xmm2,xmm1
515 paddd xmm1,xmm0
516 pcmpeqd xmm0,xmm5
517 DB 0x67
518 movdqa xmm3,xmm4
519 paddd xmm2,xmm1
520 pcmpeqd xmm1,xmm5
521 movdqa XMMWORD[112+r10],xmm0
522 movdqa xmm0,xmm4
523
524 paddd xmm3,xmm2
525 pcmpeqd xmm2,xmm5
526 movdqa XMMWORD[128+r10],xmm1
527 movdqa xmm1,xmm4
528
529 paddd xmm0,xmm3
530 pcmpeqd xmm3,xmm5
531 movdqa XMMWORD[144+r10],xmm2
532 movdqa xmm2,xmm4
533
534 paddd xmm1,xmm0
535 pcmpeqd xmm0,xmm5
536 movdqa XMMWORD[160+r10],xmm3
537 movdqa xmm3,xmm4
538 paddd xmm2,xmm1
539 pcmpeqd xmm1,xmm5
540 movdqa XMMWORD[176+r10],xmm0
541 movdqa xmm0,xmm4
542
543 paddd xmm3,xmm2
544 pcmpeqd xmm2,xmm5
545 movdqa XMMWORD[192+r10],xmm1
546 movdqa xmm1,xmm4
547
548 paddd xmm0,xmm3
549 pcmpeqd xmm3,xmm5
550 movdqa XMMWORD[208+r10],xmm2
551 movdqa xmm2,xmm4
552
553 paddd xmm1,xmm0
554 pcmpeqd xmm0,xmm5
555 movdqa XMMWORD[224+r10],xmm3
556 movdqa xmm3,xmm4
557 paddd xmm2,xmm1
558 pcmpeqd xmm1,xmm5
559 movdqa XMMWORD[240+r10],xmm0
560 movdqa xmm0,xmm4
561
562 paddd xmm3,xmm2
563 pcmpeqd xmm2,xmm5
564 movdqa XMMWORD[256+r10],xmm1
565 movdqa xmm1,xmm4
566
567 paddd xmm0,xmm3
568 pcmpeqd xmm3,xmm5
569 movdqa XMMWORD[272+r10],xmm2
570 movdqa xmm2,xmm4
571
572 paddd xmm1,xmm0
573 pcmpeqd xmm0,xmm5
574 movdqa XMMWORD[288+r10],xmm3
575 movdqa xmm3,xmm4
576 paddd xmm2,xmm1
577 pcmpeqd xmm1,xmm5
578 movdqa XMMWORD[304+r10],xmm0
579
580 paddd xmm3,xmm2
581 DB 0x67
582 pcmpeqd xmm2,xmm5
583 movdqa XMMWORD[320+r10],xmm1
584
585 pcmpeqd xmm3,xmm5
586 movdqa XMMWORD[336+r10],xmm2
587 pand xmm0,XMMWORD[64+r12]
588
589 pand xmm1,XMMWORD[80+r12]
590 pand xmm2,XMMWORD[96+r12]
591 movdqa XMMWORD[352+r10],xmm3
592 pand xmm3,XMMWORD[112+r12]
593 por xmm0,xmm2
594 por xmm1,xmm3
595 movdqa xmm4,XMMWORD[((-128))+r12]
596 movdqa xmm5,XMMWORD[((-112))+r12]
597 movdqa xmm2,XMMWORD[((-96))+r12]
598 pand xmm4,XMMWORD[112+r10]
599 movdqa xmm3,XMMWORD[((-80))+r12]
600 pand xmm5,XMMWORD[128+r10]
601 por xmm0,xmm4
602 pand xmm2,XMMWORD[144+r10]
603 por xmm1,xmm5
604 pand xmm3,XMMWORD[160+r10]
605 por xmm0,xmm2
606 por xmm1,xmm3
607 movdqa xmm4,XMMWORD[((-64))+r12]
608 movdqa xmm5,XMMWORD[((-48))+r12]
609 movdqa xmm2,XMMWORD[((-32))+r12]
610 pand xmm4,XMMWORD[176+r10]
611 movdqa xmm3,XMMWORD[((-16))+r12]
612 pand xmm5,XMMWORD[192+r10]
613 por xmm0,xmm4
614 pand xmm2,XMMWORD[208+r10]
615 por xmm1,xmm5
616 pand xmm3,XMMWORD[224+r10]
617 por xmm0,xmm2
618 por xmm1,xmm3
619 movdqa xmm4,XMMWORD[r12]
620 movdqa xmm5,XMMWORD[16+r12]
621 movdqa xmm2,XMMWORD[32+r12]
622 pand xmm4,XMMWORD[240+r10]
623 movdqa xmm3,XMMWORD[48+r12]
624 pand xmm5,XMMWORD[256+r10]
625 por xmm0,xmm4
626 pand xmm2,XMMWORD[272+r10]
627 por xmm1,xmm5
628 pand xmm3,XMMWORD[288+r10]
629 por xmm0,xmm2
630 por xmm1,xmm3
631 por xmm0,xmm1
632 pshufd xmm1,xmm0,0x4e
633 por xmm0,xmm1
634 lea r12,[256+r12]
635 DB 102,72,15,126,195
636
637 mov QWORD[((16+8))+rsp],r13
638 mov QWORD[((56+8))+rsp],rdi
639
640 mov r8,QWORD[r8]
641 mov rax,QWORD[rsi]
642 lea rsi,[r9*1+rsi]
643 neg r9
644
645 mov rbp,r8
646 mul rbx
647 mov r10,rax
648 mov rax,QWORD[rcx]
649
650 imul rbp,r10
651 lea r14,[((64+8))+rsp]
652 mov r11,rdx
653
654 mul rbp
655 add r10,rax
656 mov rax,QWORD[8+r9*1+rsi]
657 adc rdx,0
658 mov rdi,rdx
659
660 mul rbx
661 add r11,rax
662 mov rax,QWORD[8+rcx]
663 adc rdx,0
664 mov r10,rdx
665
666 mul rbp
667 add rdi,rax
668 mov rax,QWORD[16+r9*1+rsi]
669 adc rdx,0
670 add rdi,r11
671 lea r15,[32+r9]
672 lea rcx,[32+rcx]
673 adc rdx,0
674 mov QWORD[r14],rdi
675 mov r13,rdx
676 jmp NEAR $L$1st4x
677
678 ALIGN 32
679 $L$1st4x:
680 mul rbx
681 add r10,rax
682 mov rax,QWORD[((-16))+rcx]
683 lea r14,[32+r14]
684 adc rdx,0
685 mov r11,rdx
686
687 mul rbp
688 add r13,rax
689 mov rax,QWORD[((-8))+r15*1+rsi]
690 adc rdx,0
691 add r13,r10
692 adc rdx,0
693 mov QWORD[((-24))+r14],r13
694 mov rdi,rdx
695
696 mul rbx
697 add r11,rax
698 mov rax,QWORD[((-8))+rcx]
699 adc rdx,0
700 mov r10,rdx
701
702 mul rbp
703 add rdi,rax
704 mov rax,QWORD[r15*1+rsi]
705 adc rdx,0
706 add rdi,r11
707 adc rdx,0
708 mov QWORD[((-16))+r14],rdi
709 mov r13,rdx
710
711 mul rbx
712 add r10,rax
713 mov rax,QWORD[rcx]
714 adc rdx,0
715 mov r11,rdx
716
717 mul rbp
718 add r13,rax
719 mov rax,QWORD[8+r15*1+rsi]
720 adc rdx,0
721 add r13,r10
722 adc rdx,0
723 mov QWORD[((-8))+r14],r13
724 mov rdi,rdx
725
726 mul rbx
727 add r11,rax
728 mov rax,QWORD[8+rcx]
729 adc rdx,0
730 mov r10,rdx
731
732 mul rbp
733 add rdi,rax
734 mov rax,QWORD[16+r15*1+rsi]
735 adc rdx,0
736 add rdi,r11
737 lea rcx,[32+rcx]
738 adc rdx,0
739 mov QWORD[r14],rdi
740 mov r13,rdx
741
742 add r15,32
743 jnz NEAR $L$1st4x
744
745 mul rbx
746 add r10,rax
747 mov rax,QWORD[((-16))+rcx]
748 lea r14,[32+r14]
749 adc rdx,0
750 mov r11,rdx
751
752 mul rbp
753 add r13,rax
754 mov rax,QWORD[((-8))+rsi]
755 adc rdx,0
756 add r13,r10
757 adc rdx,0
758 mov QWORD[((-24))+r14],r13
759 mov rdi,rdx
760
761 mul rbx
762 add r11,rax
763 mov rax,QWORD[((-8))+rcx]
764 adc rdx,0
765 mov r10,rdx
766
767 mul rbp
768 add rdi,rax
769 mov rax,QWORD[r9*1+rsi]
770 adc rdx,0
771 add rdi,r11
772 adc rdx,0
773 mov QWORD[((-16))+r14],rdi
774 mov r13,rdx
775
776 lea rcx,[r9*1+rcx]
777
778 xor rdi,rdi
779 add r13,r10
780 adc rdi,0
781 mov QWORD[((-8))+r14],r13
782
783 jmp NEAR $L$outer4x
784
785 ALIGN 32
786 $L$outer4x:
787 lea rdx,[((16+128))+r14]
788 pxor xmm4,xmm4
789 pxor xmm5,xmm5
790 movdqa xmm0,XMMWORD[((-128))+r12]
791 movdqa xmm1,XMMWORD[((-112))+r12]
792 movdqa xmm2,XMMWORD[((-96))+r12]
793 movdqa xmm3,XMMWORD[((-80))+r12]
794 pand xmm0,XMMWORD[((-128))+rdx]
795 pand xmm1,XMMWORD[((-112))+rdx]
796 por xmm4,xmm0
797 pand xmm2,XMMWORD[((-96))+rdx]
798 por xmm5,xmm1
799 pand xmm3,XMMWORD[((-80))+rdx]
800 por xmm4,xmm2
801 por xmm5,xmm3
802 movdqa xmm0,XMMWORD[((-64))+r12]
803 movdqa xmm1,XMMWORD[((-48))+r12]
804 movdqa xmm2,XMMWORD[((-32))+r12]
805 movdqa xmm3,XMMWORD[((-16))+r12]
806 pand xmm0,XMMWORD[((-64))+rdx]
807 pand xmm1,XMMWORD[((-48))+rdx]
808 por xmm4,xmm0
809 pand xmm2,XMMWORD[((-32))+rdx]
810 por xmm5,xmm1
811 pand xmm3,XMMWORD[((-16))+rdx]
812 por xmm4,xmm2
813 por xmm5,xmm3
814 movdqa xmm0,XMMWORD[r12]
815 movdqa xmm1,XMMWORD[16+r12]
816 movdqa xmm2,XMMWORD[32+r12]
817 movdqa xmm3,XMMWORD[48+r12]
818 pand xmm0,XMMWORD[rdx]
819 pand xmm1,XMMWORD[16+rdx]
820 por xmm4,xmm0
821 pand xmm2,XMMWORD[32+rdx]
822 por xmm5,xmm1
823 pand xmm3,XMMWORD[48+rdx]
824 por xmm4,xmm2
825 por xmm5,xmm3
826 movdqa xmm0,XMMWORD[64+r12]
827 movdqa xmm1,XMMWORD[80+r12]
828 movdqa xmm2,XMMWORD[96+r12]
829 movdqa xmm3,XMMWORD[112+r12]
830 pand xmm0,XMMWORD[64+rdx]
831 pand xmm1,XMMWORD[80+rdx]
832 por xmm4,xmm0
833 pand xmm2,XMMWORD[96+rdx]
834 por xmm5,xmm1
835 pand xmm3,XMMWORD[112+rdx]
836 por xmm4,xmm2
837 por xmm5,xmm3
838 por xmm4,xmm5
839 pshufd xmm0,xmm4,0x4e
840 por xmm0,xmm4
841 lea r12,[256+r12]
842 DB 102,72,15,126,195
843
844 mov r10,QWORD[r9*1+r14]
845 mov rbp,r8
846 mul rbx
847 add r10,rax
848 mov rax,QWORD[rcx]
849 adc rdx,0
850
851 imul rbp,r10
852 mov r11,rdx
853 mov QWORD[r14],rdi
854
855 lea r14,[r9*1+r14]
856
857 mul rbp
858 add r10,rax
859 mov rax,QWORD[8+r9*1+rsi]
860 adc rdx,0
861 mov rdi,rdx
862
863 mul rbx
864 add r11,rax
865 mov rax,QWORD[8+rcx]
866 adc rdx,0
867 add r11,QWORD[8+r14]
868 adc rdx,0
869 mov r10,rdx
870
871 mul rbp
872 add rdi,rax
873 mov rax,QWORD[16+r9*1+rsi]
874 adc rdx,0
875 add rdi,r11
876 lea r15,[32+r9]
877 lea rcx,[32+rcx]
878 adc rdx,0
879 mov r13,rdx
880 jmp NEAR $L$inner4x
881
882 ALIGN 32
883 $L$inner4x:
884 mul rbx
885 add r10,rax
886 mov rax,QWORD[((-16))+rcx]
887 adc rdx,0
888 add r10,QWORD[16+r14]
889 lea r14,[32+r14]
890 adc rdx,0
891 mov r11,rdx
892
893 mul rbp
894 add r13,rax
895 mov rax,QWORD[((-8))+r15*1+rsi]
896 adc rdx,0
897 add r13,r10
898 adc rdx,0
899 mov QWORD[((-32))+r14],rdi
900 mov rdi,rdx
901
902 mul rbx
903 add r11,rax
904 mov rax,QWORD[((-8))+rcx]
905 adc rdx,0
906 add r11,QWORD[((-8))+r14]
907 adc rdx,0
908 mov r10,rdx
909
910 mul rbp
911 add rdi,rax
912 mov rax,QWORD[r15*1+rsi]
913 adc rdx,0
914 add rdi,r11
915 adc rdx,0
916 mov QWORD[((-24))+r14],r13
917 mov r13,rdx
918
919 mul rbx
920 add r10,rax
921 mov rax,QWORD[rcx]
922 adc rdx,0
923 add r10,QWORD[r14]
924 adc rdx,0
925 mov r11,rdx
926
927 mul rbp
928 add r13,rax
929 mov rax,QWORD[8+r15*1+rsi]
930 adc rdx,0
931 add r13,r10
932 adc rdx,0
933 mov QWORD[((-16))+r14],rdi
934 mov rdi,rdx
935
936 mul rbx
937 add r11,rax
938 mov rax,QWORD[8+rcx]
939 adc rdx,0
940 add r11,QWORD[8+r14]
941 adc rdx,0
942 mov r10,rdx
943
944 mul rbp
945 add rdi,rax
946 mov rax,QWORD[16+r15*1+rsi]
947 adc rdx,0
948 add rdi,r11
949 lea rcx,[32+rcx]
950 adc rdx,0
951 mov QWORD[((-8))+r14],r13
952 mov r13,rdx
953
954 add r15,32
955 jnz NEAR $L$inner4x
956
957 mul rbx
958 add r10,rax
959 mov rax,QWORD[((-16))+rcx]
960 adc rdx,0
961 add r10,QWORD[16+r14]
962 lea r14,[32+r14]
963 adc rdx,0
964 mov r11,rdx
965
966 mul rbp
967 add r13,rax
968 mov rax,QWORD[((-8))+rsi]
969 adc rdx,0
970 add r13,r10
971 adc rdx,0
972 mov QWORD[((-32))+r14],rdi
973 mov rdi,rdx
974
975 mul rbx
976 add r11,rax
977 mov rax,rbp
978 mov rbp,QWORD[((-8))+rcx]
979 adc rdx,0
980 add r11,QWORD[((-8))+r14]
981 adc rdx,0
982 mov r10,rdx
983
984 mul rbp
985 add rdi,rax
986 mov rax,QWORD[r9*1+rsi]
987 adc rdx,0
988 add rdi,r11
989 adc rdx,0
990 mov QWORD[((-24))+r14],r13
991 mov r13,rdx
992
993 mov QWORD[((-16))+r14],rdi
994 lea rcx,[r9*1+rcx]
995
996 xor rdi,rdi
997 add r13,r10
998 adc rdi,0
999 add r13,QWORD[r14]
1000 adc rdi,0
1001 mov QWORD[((-8))+r14],r13
1002
1003 cmp r12,QWORD[((16+8))+rsp]
1004 jb NEAR $L$outer4x
1005 xor rax,rax
1006 sub rbp,r13
1007 adc r15,r15
1008 or rdi,r15
1009 sub rax,rdi
1010 lea rbx,[r9*1+r14]
1011 mov r12,QWORD[rcx]
1012 lea rbp,[rcx]
1013 mov rcx,r9
1014 sar rcx,3+2
1015 mov rdi,QWORD[((56+8))+rsp]
1016 dec r12
1017 xor r10,r10
1018 mov r13,QWORD[8+rbp]
1019 mov r14,QWORD[16+rbp]
1020 mov r15,QWORD[24+rbp]
1021 jmp NEAR $L$sqr4x_sub_entry
1022
1023 global bn_power5
1024
1025 ALIGN 32
1026 bn_power5:
1027 mov QWORD[8+rsp],rdi ;WIN64 prologue
1028 mov QWORD[16+rsp],rsi
1029 mov rax,rsp
1030 $L$SEH_begin_bn_power5:
1031 mov rdi,rcx
1032 mov rsi,rdx
1033 mov rdx,r8
1034 mov rcx,r9
1035 mov r8,QWORD[40+rsp]
1036 mov r9,QWORD[48+rsp]
1037
1038
1039 mov rax,rsp
1040 push rbx
1041 push rbp
1042 push r12
1043 push r13
1044 push r14
1045 push r15
1046
1047 shl r9d,3
1048 lea r10d,[r9*2+r9]
1049 neg r9
1050 mov r8,QWORD[r8]
1051
1052
1053
1054
1055
1056
1057
1058
1059 lea r11,[((-320))+r9*2+rsp]
1060 sub r11,rdi
1061 and r11,4095
1062 cmp r10,r11
1063 jb NEAR $L$pwr_sp_alt
1064 sub rsp,r11
1065 lea rsp,[((-320))+r9*2+rsp]
1066 jmp NEAR $L$pwr_sp_done
1067
1068 ALIGN 32
1069 $L$pwr_sp_alt:
1070 lea r10,[((4096-320))+r9*2]
1071 lea rsp,[((-320))+r9*2+rsp]
1072 sub r11,r10
1073 mov r10,0
1074 cmovc r11,r10
1075 sub rsp,r11
1076 $L$pwr_sp_done:
1077 and rsp,-64
1078 mov r10,r9
1079 neg r9
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090 mov QWORD[32+rsp],r8
1091 mov QWORD[40+rsp],rax
1092 $L$power5_body:
1093 DB 102,72,15,110,207
1094 DB 102,72,15,110,209
1095 DB 102,73,15,110,218
1096 DB 102,72,15,110,226
1097
1098 call __bn_sqr8x_internal
1099 call __bn_post4x_internal
1100 call __bn_sqr8x_internal
1101 call __bn_post4x_internal
1102 call __bn_sqr8x_internal
1103 call __bn_post4x_internal
1104 call __bn_sqr8x_internal
1105 call __bn_post4x_internal
1106 call __bn_sqr8x_internal
1107 call __bn_post4x_internal
1108
1109 DB 102,72,15,126,209
1110 DB 102,72,15,126,226
1111 mov rdi,rsi
1112 mov rax,QWORD[40+rsp]
1113 lea r8,[32+rsp]
1114
1115 call mul4x_internal
1116
1117 mov rsi,QWORD[40+rsp]
1118 mov rax,1
1119 mov r15,QWORD[((-48))+rsi]
1120 mov r14,QWORD[((-40))+rsi]
1121 mov r13,QWORD[((-32))+rsi]
1122 mov r12,QWORD[((-24))+rsi]
1123 mov rbp,QWORD[((-16))+rsi]
1124 mov rbx,QWORD[((-8))+rsi]
1125 lea rsp,[rsi]
1126 $L$power5_epilogue:
1127 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1128 mov rsi,QWORD[16+rsp]
1129 DB 0F3h,0C3h ;repret
1130 $L$SEH_end_bn_power5:
1131
1132 global bn_sqr8x_internal
1133
1134
1135 ALIGN 32
1136 bn_sqr8x_internal:
1137 __bn_sqr8x_internal:
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211 lea rbp,[32+r10]
1212 lea rsi,[r9*1+rsi]
1213
1214 mov rcx,r9
1215
1216
1217 mov r14,QWORD[((-32))+rbp*1+rsi]
1218 lea rdi,[((48+8))+r9*2+rsp]
1219 mov rax,QWORD[((-24))+rbp*1+rsi]
1220 lea rdi,[((-32))+rbp*1+rdi]
1221 mov rbx,QWORD[((-16))+rbp*1+rsi]
1222 mov r15,rax
1223
1224 mul r14
1225 mov r10,rax
1226 mov rax,rbx
1227 mov r11,rdx
1228 mov QWORD[((-24))+rbp*1+rdi],r10
1229
1230 mul r14
1231 add r11,rax
1232 mov rax,rbx
1233 adc rdx,0
1234 mov QWORD[((-16))+rbp*1+rdi],r11
1235 mov r10,rdx
1236
1237
1238 mov rbx,QWORD[((-8))+rbp*1+rsi]
1239 mul r15
1240 mov r12,rax
1241 mov rax,rbx
1242 mov r13,rdx
1243
1244 lea rcx,[rbp]
1245 mul r14
1246 add r10,rax
1247 mov rax,rbx
1248 mov r11,rdx
1249 adc r11,0
1250 add r10,r12
1251 adc r11,0
1252 mov QWORD[((-8))+rcx*1+rdi],r10
1253 jmp NEAR $L$sqr4x_1st
1254
1255 ALIGN 32
1256 $L$sqr4x_1st:
1257 mov rbx,QWORD[rcx*1+rsi]
1258 mul r15
1259 add r13,rax
1260 mov rax,rbx
1261 mov r12,rdx
1262 adc r12,0
1263
1264 mul r14
1265 add r11,rax
1266 mov rax,rbx
1267 mov rbx,QWORD[8+rcx*1+rsi]
1268 mov r10,rdx
1269 adc r10,0
1270 add r11,r13
1271 adc r10,0
1272
1273
1274 mul r15
1275 add r12,rax
1276 mov rax,rbx
1277 mov QWORD[rcx*1+rdi],r11
1278 mov r13,rdx
1279 adc r13,0
1280
1281 mul r14
1282 add r10,rax
1283 mov rax,rbx
1284 mov rbx,QWORD[16+rcx*1+rsi]
1285 mov r11,rdx
1286 adc r11,0
1287 add r10,r12
1288 adc r11,0
1289
1290 mul r15
1291 add r13,rax
1292 mov rax,rbx
1293 mov QWORD[8+rcx*1+rdi],r10
1294 mov r12,rdx
1295 adc r12,0
1296
1297 mul r14
1298 add r11,rax
1299 mov rax,rbx
1300 mov rbx,QWORD[24+rcx*1+rsi]
1301 mov r10,rdx
1302 adc r10,0
1303 add r11,r13
1304 adc r10,0
1305
1306
1307 mul r15
1308 add r12,rax
1309 mov rax,rbx
1310 mov QWORD[16+rcx*1+rdi],r11
1311 mov r13,rdx
1312 adc r13,0
1313 lea rcx,[32+rcx]
1314
1315 mul r14
1316 add r10,rax
1317 mov rax,rbx
1318 mov r11,rdx
1319 adc r11,0
1320 add r10,r12
1321 adc r11,0
1322 mov QWORD[((-8))+rcx*1+rdi],r10
1323
1324 cmp rcx,0
1325 jne NEAR $L$sqr4x_1st
1326
1327 mul r15
1328 add r13,rax
1329 lea rbp,[16+rbp]
1330 adc rdx,0
1331 add r13,r11
1332 adc rdx,0
1333
1334 mov QWORD[rdi],r13
1335 mov r12,rdx
1336 mov QWORD[8+rdi],rdx
1337 jmp NEAR $L$sqr4x_outer
1338
1339 ALIGN 32
1340 $L$sqr4x_outer:
1341 mov r14,QWORD[((-32))+rbp*1+rsi]
1342 lea rdi,[((48+8))+r9*2+rsp]
1343 mov rax,QWORD[((-24))+rbp*1+rsi]
1344 lea rdi,[((-32))+rbp*1+rdi]
1345 mov rbx,QWORD[((-16))+rbp*1+rsi]
1346 mov r15,rax
1347
1348 mul r14
1349 mov r10,QWORD[((-24))+rbp*1+rdi]
1350 add r10,rax
1351 mov rax,rbx
1352 adc rdx,0
1353 mov QWORD[((-24))+rbp*1+rdi],r10
1354 mov r11,rdx
1355
1356 mul r14
1357 add r11,rax
1358 mov rax,rbx
1359 adc rdx,0
1360 add r11,QWORD[((-16))+rbp*1+rdi]
1361 mov r10,rdx
1362 adc r10,0
1363 mov QWORD[((-16))+rbp*1+rdi],r11
1364
1365 xor r12,r12
1366
1367 mov rbx,QWORD[((-8))+rbp*1+rsi]
1368 mul r15
1369 add r12,rax
1370 mov rax,rbx
1371 adc rdx,0
1372 add r12,QWORD[((-8))+rbp*1+rdi]
1373 mov r13,rdx
1374 adc r13,0
1375
1376 mul r14
1377 add r10,rax
1378 mov rax,rbx
1379 adc rdx,0
1380 add r10,r12
1381 mov r11,rdx
1382 adc r11,0
1383 mov QWORD[((-8))+rbp*1+rdi],r10
1384
1385 lea rcx,[rbp]
1386 jmp NEAR $L$sqr4x_inner
1387
1388 ALIGN 32
1389 $L$sqr4x_inner:
1390 mov rbx,QWORD[rcx*1+rsi]
1391 mul r15
1392 add r13,rax
1393 mov rax,rbx
1394 mov r12,rdx
1395 adc r12,0
1396 add r13,QWORD[rcx*1+rdi]
1397 adc r12,0
1398
1399 DB 0x67
1400 mul r14
1401 add r11,rax
1402 mov rax,rbx
1403 mov rbx,QWORD[8+rcx*1+rsi]
1404 mov r10,rdx
1405 adc r10,0
1406 add r11,r13
1407 adc r10,0
1408
1409 mul r15
1410 add r12,rax
1411 mov QWORD[rcx*1+rdi],r11
1412 mov rax,rbx
1413 mov r13,rdx
1414 adc r13,0
1415 add r12,QWORD[8+rcx*1+rdi]
1416 lea rcx,[16+rcx]
1417 adc r13,0
1418
1419 mul r14
1420 add r10,rax
1421 mov rax,rbx
1422 adc rdx,0
1423 add r10,r12
1424 mov r11,rdx
1425 adc r11,0
1426 mov QWORD[((-8))+rcx*1+rdi],r10
1427
1428 cmp rcx,0
1429 jne NEAR $L$sqr4x_inner
1430
1431 DB 0x67
1432 mul r15
1433 add r13,rax
1434 adc rdx,0
1435 add r13,r11
1436 adc rdx,0
1437
1438 mov QWORD[rdi],r13
1439 mov r12,rdx
1440 mov QWORD[8+rdi],rdx
1441
1442 add rbp,16
1443 jnz NEAR $L$sqr4x_outer
1444
1445
1446 mov r14,QWORD[((-32))+rsi]
1447 lea rdi,[((48+8))+r9*2+rsp]
1448 mov rax,QWORD[((-24))+rsi]
1449 lea rdi,[((-32))+rbp*1+rdi]
1450 mov rbx,QWORD[((-16))+rsi]
1451 mov r15,rax
1452
1453 mul r14
1454 add r10,rax
1455 mov rax,rbx
1456 mov r11,rdx
1457 adc r11,0
1458
1459 mul r14
1460 add r11,rax
1461 mov rax,rbx
1462 mov QWORD[((-24))+rdi],r10
1463 mov r10,rdx
1464 adc r10,0
1465 add r11,r13
1466 mov rbx,QWORD[((-8))+rsi]
1467 adc r10,0
1468
1469 mul r15
1470 add r12,rax
1471 mov rax,rbx
1472 mov QWORD[((-16))+rdi],r11
1473 mov r13,rdx
1474 adc r13,0
1475
1476 mul r14
1477 add r10,rax
1478 mov rax,rbx
1479 mov r11,rdx
1480 adc r11,0
1481 add r10,r12
1482 adc r11,0
1483 mov QWORD[((-8))+rdi],r10
1484
1485 mul r15
1486 add r13,rax
1487 mov rax,QWORD[((-16))+rsi]
1488 adc rdx,0
1489 add r13,r11
1490 adc rdx,0
1491
1492 mov QWORD[rdi],r13
1493 mov r12,rdx
1494 mov QWORD[8+rdi],rdx
1495
1496 mul rbx
1497 add rbp,16
1498 xor r14,r14
1499 sub rbp,r9
1500 xor r15,r15
1501
1502 add rax,r12
1503 adc rdx,0
1504 mov QWORD[8+rdi],rax
1505 mov QWORD[16+rdi],rdx
1506 mov QWORD[24+rdi],r15
1507
1508 mov rax,QWORD[((-16))+rbp*1+rsi]
1509 lea rdi,[((48+8))+rsp]
1510 xor r10,r10
1511 mov r11,QWORD[8+rdi]
1512
1513 lea r12,[r10*2+r14]
1514 shr r10,63
1515 lea r13,[r11*2+rcx]
1516 shr r11,63
1517 or r13,r10
1518 mov r10,QWORD[16+rdi]
1519 mov r14,r11
1520 mul rax
1521 neg r15
1522 mov r11,QWORD[24+rdi]
1523 adc r12,rax
1524 mov rax,QWORD[((-8))+rbp*1+rsi]
1525 mov QWORD[rdi],r12
1526 adc r13,rdx
1527
1528 lea rbx,[r10*2+r14]
1529 mov QWORD[8+rdi],r13
1530 sbb r15,r15
1531 shr r10,63
1532 lea r8,[r11*2+rcx]
1533 shr r11,63
1534 or r8,r10
1535 mov r10,QWORD[32+rdi]
1536 mov r14,r11
1537 mul rax
1538 neg r15
1539 mov r11,QWORD[40+rdi]
1540 adc rbx,rax
1541 mov rax,QWORD[rbp*1+rsi]
1542 mov QWORD[16+rdi],rbx
1543 adc r8,rdx
1544 lea rbp,[16+rbp]
1545 mov QWORD[24+rdi],r8
1546 sbb r15,r15
1547 lea rdi,[64+rdi]
1548 jmp NEAR $L$sqr4x_shift_n_add
1549
1550 ALIGN 32
1551 $L$sqr4x_shift_n_add:
1552 lea r12,[r10*2+r14]
1553 shr r10,63
1554 lea r13,[r11*2+rcx]
1555 shr r11,63
1556 or r13,r10
1557 mov r10,QWORD[((-16))+rdi]
1558 mov r14,r11
1559 mul rax
1560 neg r15
1561 mov r11,QWORD[((-8))+rdi]
1562 adc r12,rax
1563 mov rax,QWORD[((-8))+rbp*1+rsi]
1564 mov QWORD[((-32))+rdi],r12
1565 adc r13,rdx
1566
1567 lea rbx,[r10*2+r14]
1568 mov QWORD[((-24))+rdi],r13
1569 sbb r15,r15
1570 shr r10,63
1571 lea r8,[r11*2+rcx]
1572 shr r11,63
1573 or r8,r10
1574 mov r10,QWORD[rdi]
1575 mov r14,r11
1576 mul rax
1577 neg r15
1578 mov r11,QWORD[8+rdi]
1579 adc rbx,rax
1580 mov rax,QWORD[rbp*1+rsi]
1581 mov QWORD[((-16))+rdi],rbx
1582 adc r8,rdx
1583
1584 lea r12,[r10*2+r14]
1585 mov QWORD[((-8))+rdi],r8
1586 sbb r15,r15
1587 shr r10,63
1588 lea r13,[r11*2+rcx]
1589 shr r11,63
1590 or r13,r10
1591 mov r10,QWORD[16+rdi]
1592 mov r14,r11
1593 mul rax
1594 neg r15
1595 mov r11,QWORD[24+rdi]
1596 adc r12,rax
1597 mov rax,QWORD[8+rbp*1+rsi]
1598 mov QWORD[rdi],r12
1599 adc r13,rdx
1600
1601 lea rbx,[r10*2+r14]
1602 mov QWORD[8+rdi],r13
1603 sbb r15,r15
1604 shr r10,63
1605 lea r8,[r11*2+rcx]
1606 shr r11,63
1607 or r8,r10
1608 mov r10,QWORD[32+rdi]
1609 mov r14,r11
1610 mul rax
1611 neg r15
1612 mov r11,QWORD[40+rdi]
1613 adc rbx,rax
1614 mov rax,QWORD[16+rbp*1+rsi]
1615 mov QWORD[16+rdi],rbx
1616 adc r8,rdx
1617 mov QWORD[24+rdi],r8
1618 sbb r15,r15
1619 lea rdi,[64+rdi]
1620 add rbp,32
1621 jnz NEAR $L$sqr4x_shift_n_add
1622
1623 lea r12,[r10*2+r14]
1624 DB 0x67
1625 shr r10,63
1626 lea r13,[r11*2+rcx]
1627 shr r11,63
1628 or r13,r10
1629 mov r10,QWORD[((-16))+rdi]
1630 mov r14,r11
1631 mul rax
1632 neg r15
1633 mov r11,QWORD[((-8))+rdi]
1634 adc r12,rax
1635 mov rax,QWORD[((-8))+rsi]
1636 mov QWORD[((-32))+rdi],r12
1637 adc r13,rdx
1638
1639 lea rbx,[r10*2+r14]
1640 mov QWORD[((-24))+rdi],r13
1641 sbb r15,r15
1642 shr r10,63
1643 lea r8,[r11*2+rcx]
1644 shr r11,63
1645 or r8,r10
1646 mul rax
1647 neg r15
1648 adc rbx,rax
1649 adc r8,rdx
1650 mov QWORD[((-16))+rdi],rbx
1651 mov QWORD[((-8))+rdi],r8
1652 DB 102,72,15,126,213
1653 __bn_sqr8x_reduction:
1654 xor rax,rax
1655 lea rcx,[rbp*1+r9]
1656 lea rdx,[((48+8))+r9*2+rsp]
1657 mov QWORD[((0+8))+rsp],rcx
1658 lea rdi,[((48+8))+r9*1+rsp]
1659 mov QWORD[((8+8))+rsp],rdx
1660 neg r9
1661 jmp NEAR $L$8x_reduction_loop
1662
1663 ALIGN 32
1664 $L$8x_reduction_loop:
1665 lea rdi,[r9*1+rdi]
1666 DB 0x66
1667 mov rbx,QWORD[rdi]
1668 mov r9,QWORD[8+rdi]
1669 mov r10,QWORD[16+rdi]
1670 mov r11,QWORD[24+rdi]
1671 mov r12,QWORD[32+rdi]
1672 mov r13,QWORD[40+rdi]
1673 mov r14,QWORD[48+rdi]
1674 mov r15,QWORD[56+rdi]
1675 mov QWORD[rdx],rax
1676 lea rdi,[64+rdi]
1677
1678 DB 0x67
1679 mov r8,rbx
1680 imul rbx,QWORD[((32+8))+rsp]
1681 mov rax,QWORD[rbp]
1682 mov ecx,8
1683 jmp NEAR $L$8x_reduce
1684
1685 ALIGN 32
1686 $L$8x_reduce:
1687 mul rbx
1688 mov rax,QWORD[8+rbp]
1689 neg r8
1690 mov r8,rdx
1691 adc r8,0
1692
1693 mul rbx
1694 add r9,rax
1695 mov rax,QWORD[16+rbp]
1696 adc rdx,0
1697 add r8,r9
1698 mov QWORD[((48-8+8))+rcx*8+rsp],rbx
1699 mov r9,rdx
1700 adc r9,0
1701
1702 mul rbx
1703 add r10,rax
1704 mov rax,QWORD[24+rbp]
1705 adc rdx,0
1706 add r9,r10
1707 mov rsi,QWORD[((32+8))+rsp]
1708 mov r10,rdx
1709 adc r10,0
1710
1711 mul rbx
1712 add r11,rax
1713 mov rax,QWORD[32+rbp]
1714 adc rdx,0
1715 imul rsi,r8
1716 add r10,r11
1717 mov r11,rdx
1718 adc r11,0
1719
1720 mul rbx
1721 add r12,rax
1722 mov rax,QWORD[40+rbp]
1723 adc rdx,0
1724 add r11,r12
1725 mov r12,rdx
1726 adc r12,0
1727
1728 mul rbx
1729 add r13,rax
1730 mov rax,QWORD[48+rbp]
1731 adc rdx,0
1732 add r12,r13
1733 mov r13,rdx
1734 adc r13,0
1735
1736 mul rbx
1737 add r14,rax
1738 mov rax,QWORD[56+rbp]
1739 adc rdx,0
1740 add r13,r14
1741 mov r14,rdx
1742 adc r14,0
1743
1744 mul rbx
1745 mov rbx,rsi
1746 add r15,rax
1747 mov rax,QWORD[rbp]
1748 adc rdx,0
1749 add r14,r15
1750 mov r15,rdx
1751 adc r15,0
1752
1753 dec ecx
1754 jnz NEAR $L$8x_reduce
1755
1756 lea rbp,[64+rbp]
1757 xor rax,rax
1758 mov rdx,QWORD[((8+8))+rsp]
1759 cmp rbp,QWORD[((0+8))+rsp]
1760 jae NEAR $L$8x_no_tail
1761
1762 DB 0x66
1763 add r8,QWORD[rdi]
1764 adc r9,QWORD[8+rdi]
1765 adc r10,QWORD[16+rdi]
1766 adc r11,QWORD[24+rdi]
1767 adc r12,QWORD[32+rdi]
1768 adc r13,QWORD[40+rdi]
1769 adc r14,QWORD[48+rdi]
1770 adc r15,QWORD[56+rdi]
1771 sbb rsi,rsi
1772
1773 mov rbx,QWORD[((48+56+8))+rsp]
1774 mov ecx,8
1775 mov rax,QWORD[rbp]
1776 jmp NEAR $L$8x_tail
1777
1778 ALIGN 32
1779 $L$8x_tail:
1780 mul rbx
1781 add r8,rax
1782 mov rax,QWORD[8+rbp]
1783 mov QWORD[rdi],r8
1784 mov r8,rdx
1785 adc r8,0
1786
1787 mul rbx
1788 add r9,rax
1789 mov rax,QWORD[16+rbp]
1790 adc rdx,0
1791 add r8,r9
1792 lea rdi,[8+rdi]
1793 mov r9,rdx
1794 adc r9,0
1795
1796 mul rbx
1797 add r10,rax
1798 mov rax,QWORD[24+rbp]
1799 adc rdx,0
1800 add r9,r10
1801 mov r10,rdx
1802 adc r10,0
1803
1804 mul rbx
1805 add r11,rax
1806 mov rax,QWORD[32+rbp]
1807 adc rdx,0
1808 add r10,r11
1809 mov r11,rdx
1810 adc r11,0
1811
1812 mul rbx
1813 add r12,rax
1814 mov rax,QWORD[40+rbp]
1815 adc rdx,0
1816 add r11,r12
1817 mov r12,rdx
1818 adc r12,0
1819
1820 mul rbx
1821 add r13,rax
1822 mov rax,QWORD[48+rbp]
1823 adc rdx,0
1824 add r12,r13
1825 mov r13,rdx
1826 adc r13,0
1827
1828 mul rbx
1829 add r14,rax
1830 mov rax,QWORD[56+rbp]
1831 adc rdx,0
1832 add r13,r14
1833 mov r14,rdx
1834 adc r14,0
1835
1836 mul rbx
1837 mov rbx,QWORD[((48-16+8))+rcx*8+rsp]
1838 add r15,rax
1839 adc rdx,0
1840 add r14,r15
1841 mov rax,QWORD[rbp]
1842 mov r15,rdx
1843 adc r15,0
1844
1845 dec ecx
1846 jnz NEAR $L$8x_tail
1847
1848 lea rbp,[64+rbp]
1849 mov rdx,QWORD[((8+8))+rsp]
1850 cmp rbp,QWORD[((0+8))+rsp]
1851 jae NEAR $L$8x_tail_done
1852
1853 mov rbx,QWORD[((48+56+8))+rsp]
1854 neg rsi
1855 mov rax,QWORD[rbp]
1856 adc r8,QWORD[rdi]
1857 adc r9,QWORD[8+rdi]
1858 adc r10,QWORD[16+rdi]
1859 adc r11,QWORD[24+rdi]
1860 adc r12,QWORD[32+rdi]
1861 adc r13,QWORD[40+rdi]
1862 adc r14,QWORD[48+rdi]
1863 adc r15,QWORD[56+rdi]
1864 sbb rsi,rsi
1865
1866 mov ecx,8
1867 jmp NEAR $L$8x_tail
1868
1869 ALIGN 32
1870 $L$8x_tail_done:
1871 add r8,QWORD[rdx]
1872 adc r9,0
1873 adc r10,0
1874 adc r11,0
1875 adc r12,0
1876 adc r13,0
1877 adc r14,0
1878 adc r15,0
1879
1880
1881 xor rax,rax
1882
1883 neg rsi
1884 $L$8x_no_tail:
1885 adc r8,QWORD[rdi]
1886 adc r9,QWORD[8+rdi]
1887 adc r10,QWORD[16+rdi]
1888 adc r11,QWORD[24+rdi]
1889 adc r12,QWORD[32+rdi]
1890 adc r13,QWORD[40+rdi]
1891 adc r14,QWORD[48+rdi]
1892 adc r15,QWORD[56+rdi]
1893 adc rax,0
1894 mov rcx,QWORD[((-8))+rbp]
1895 xor rsi,rsi
1896
1897 DB 102,72,15,126,213
1898
1899 mov QWORD[rdi],r8
1900 mov QWORD[8+rdi],r9
1901 DB 102,73,15,126,217
1902 mov QWORD[16+rdi],r10
1903 mov QWORD[24+rdi],r11
1904 mov QWORD[32+rdi],r12
1905 mov QWORD[40+rdi],r13
1906 mov QWORD[48+rdi],r14
1907 mov QWORD[56+rdi],r15
1908 lea rdi,[64+rdi]
1909
1910 cmp rdi,rdx
1911 jb NEAR $L$8x_reduction_loop
1912 DB 0F3h,0C3h ;repret
1913
1914
1915 ALIGN 32
1916 __bn_post4x_internal:
1917 mov r12,QWORD[rbp]
1918 lea rbx,[r9*1+rdi]
1919 mov rcx,r9
1920 DB 102,72,15,126,207
1921 neg rax
1922 DB 102,72,15,126,206
1923 sar rcx,3+2
1924 dec r12
1925 xor r10,r10
1926 mov r13,QWORD[8+rbp]
1927 mov r14,QWORD[16+rbp]
1928 mov r15,QWORD[24+rbp]
1929 jmp NEAR $L$sqr4x_sub_entry
1930
1931 ALIGN 16
1932 $L$sqr4x_sub:
1933 mov r12,QWORD[rbp]
1934 mov r13,QWORD[8+rbp]
1935 mov r14,QWORD[16+rbp]
1936 mov r15,QWORD[24+rbp]
1937 $L$sqr4x_sub_entry:
1938 lea rbp,[32+rbp]
1939 not r12
1940 not r13
1941 not r14
1942 not r15
1943 and r12,rax
1944 and r13,rax
1945 and r14,rax
1946 and r15,rax
1947
1948 neg r10
1949 adc r12,QWORD[rbx]
1950 adc r13,QWORD[8+rbx]
1951 adc r14,QWORD[16+rbx]
1952 adc r15,QWORD[24+rbx]
1953 mov QWORD[rdi],r12
1954 lea rbx,[32+rbx]
1955 mov QWORD[8+rdi],r13
1956 sbb r10,r10
1957 mov QWORD[16+rdi],r14
1958 mov QWORD[24+rdi],r15
1959 lea rdi,[32+rdi]
1960
1961 inc rcx
1962 jnz NEAR $L$sqr4x_sub
1963
1964 mov r10,r9
1965 neg r9
1966 DB 0F3h,0C3h ;repret
1967
1968 global bn_from_montgomery
1969
1970 ALIGN 32
1971 bn_from_montgomery:
1972 test DWORD[48+rsp],7
1973 jz NEAR bn_from_mont8x
1974 xor eax,eax
1975 DB 0F3h,0C3h ;repret
1976
1977
1978
1979 ALIGN 32
1980 bn_from_mont8x:
1981 mov QWORD[8+rsp],rdi ;WIN64 prologue
1982 mov QWORD[16+rsp],rsi
1983 mov rax,rsp
1984 $L$SEH_begin_bn_from_mont8x:
1985 mov rdi,rcx
1986 mov rsi,rdx
1987 mov rdx,r8
1988 mov rcx,r9
1989 mov r8,QWORD[40+rsp]
1990 mov r9,QWORD[48+rsp]
1991
1992
1993 DB 0x67
1994 mov rax,rsp
1995 push rbx
1996 push rbp
1997 push r12
1998 push r13
1999 push r14
2000 push r15
2001
2002 shl r9d,3
2003 lea r10,[r9*2+r9]
2004 neg r9
2005 mov r8,QWORD[r8]
2006
2007
2008
2009
2010
2011
2012
2013
2014 lea r11,[((-320))+r9*2+rsp]
2015 sub r11,rdi
2016 and r11,4095
2017 cmp r10,r11
2018 jb NEAR $L$from_sp_alt
2019 sub rsp,r11
2020 lea rsp,[((-320))+r9*2+rsp]
2021 jmp NEAR $L$from_sp_done
2022
2023 ALIGN 32
2024 $L$from_sp_alt:
2025 lea r10,[((4096-320))+r9*2]
2026 lea rsp,[((-320))+r9*2+rsp]
2027 sub r11,r10
2028 mov r10,0
2029 cmovc r11,r10
2030 sub rsp,r11
2031 $L$from_sp_done:
2032 and rsp,-64
2033 mov r10,r9
2034 neg r9
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045 mov QWORD[32+rsp],r8
2046 mov QWORD[40+rsp],rax
2047 $L$from_body:
2048 mov r11,r9
2049 lea rax,[48+rsp]
2050 pxor xmm0,xmm0
2051 jmp NEAR $L$mul_by_1
2052
2053 ALIGN 32
2054 $L$mul_by_1:
2055 movdqu xmm1,XMMWORD[rsi]
2056 movdqu xmm2,XMMWORD[16+rsi]
2057 movdqu xmm3,XMMWORD[32+rsi]
2058 movdqa XMMWORD[r9*1+rax],xmm0
2059 movdqu xmm4,XMMWORD[48+rsi]
2060 movdqa XMMWORD[16+r9*1+rax],xmm0
2061 DB 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
2062 movdqa XMMWORD[rax],xmm1
2063 movdqa XMMWORD[32+r9*1+rax],xmm0
2064 movdqa XMMWORD[16+rax],xmm2
2065 movdqa XMMWORD[48+r9*1+rax],xmm0
2066 movdqa XMMWORD[32+rax],xmm3
2067 movdqa XMMWORD[48+rax],xmm4
2068 lea rax,[64+rax]
2069 sub r11,64
2070 jnz NEAR $L$mul_by_1
2071
2072 DB 102,72,15,110,207
2073 DB 102,72,15,110,209
2074 DB 0x67
2075 mov rbp,rcx
2076 DB 102,73,15,110,218
2077 call __bn_sqr8x_reduction
2078 call __bn_post4x_internal
2079
2080 pxor xmm0,xmm0
2081 lea rax,[48+rsp]
2082 mov rsi,QWORD[40+rsp]
2083 jmp NEAR $L$from_mont_zero
2084
2085 ALIGN 32
2086 $L$from_mont_zero:
2087 movdqa XMMWORD[rax],xmm0
2088 movdqa XMMWORD[16+rax],xmm0
2089 movdqa XMMWORD[32+rax],xmm0
2090 movdqa XMMWORD[48+rax],xmm0
2091 lea rax,[64+rax]
2092 sub r9,32
2093 jnz NEAR $L$from_mont_zero
2094
2095 mov rax,1
2096 mov r15,QWORD[((-48))+rsi]
2097 mov r14,QWORD[((-40))+rsi]
2098 mov r13,QWORD[((-32))+rsi]
2099 mov r12,QWORD[((-24))+rsi]
2100 mov rbp,QWORD[((-16))+rsi]
2101 mov rbx,QWORD[((-8))+rsi]
2102 lea rsp,[rsi]
2103 $L$from_epilogue:
2104 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2105 mov rsi,QWORD[16+rsp]
2106 DB 0F3h,0C3h ;repret
2107 $L$SEH_end_bn_from_mont8x:
2108 global bn_scatter5
2109
2110 ALIGN 16
2111 bn_scatter5:
2112 cmp edx,0
2113 jz NEAR $L$scatter_epilogue
2114 lea r8,[r9*8+r8]
2115 $L$scatter:
2116 mov rax,QWORD[rcx]
2117 lea rcx,[8+rcx]
2118 mov QWORD[r8],rax
2119 lea r8,[256+r8]
2120 sub edx,1
2121 jnz NEAR $L$scatter
2122 $L$scatter_epilogue:
2123 DB 0F3h,0C3h ;repret
2124
2125
2126 global bn_gather5
2127
2128 ALIGN 32
2129 bn_gather5:
2130 $L$SEH_begin_bn_gather5:
2131
2132 DB 0x4c,0x8d,0x14,0x24
2133 DB 0x48,0x81,0xec,0x08,0x01,0x00,0x00
2134 lea rax,[$L$inc]
2135 and rsp,-16
2136
2137 movd xmm5,r9d
2138 movdqa xmm0,XMMWORD[rax]
2139 movdqa xmm1,XMMWORD[16+rax]
2140 lea r11,[128+r8]
2141 lea rax,[128+rsp]
2142
2143 pshufd xmm5,xmm5,0
2144 movdqa xmm4,xmm1
2145 movdqa xmm2,xmm1
2146 paddd xmm1,xmm0
2147 pcmpeqd xmm0,xmm5
2148 movdqa xmm3,xmm4
2149
2150 paddd xmm2,xmm1
2151 pcmpeqd xmm1,xmm5
2152 movdqa XMMWORD[(-128)+rax],xmm0
2153 movdqa xmm0,xmm4
2154
2155 paddd xmm3,xmm2
2156 pcmpeqd xmm2,xmm5
2157 movdqa XMMWORD[(-112)+rax],xmm1
2158 movdqa xmm1,xmm4
2159
2160 paddd xmm0,xmm3
2161 pcmpeqd xmm3,xmm5
2162 movdqa XMMWORD[(-96)+rax],xmm2
2163 movdqa xmm2,xmm4
2164 paddd xmm1,xmm0
2165 pcmpeqd xmm0,xmm5
2166 movdqa XMMWORD[(-80)+rax],xmm3
2167 movdqa xmm3,xmm4
2168
2169 paddd xmm2,xmm1
2170 pcmpeqd xmm1,xmm5
2171 movdqa XMMWORD[(-64)+rax],xmm0
2172 movdqa xmm0,xmm4
2173
2174 paddd xmm3,xmm2
2175 pcmpeqd xmm2,xmm5
2176 movdqa XMMWORD[(-48)+rax],xmm1
2177 movdqa xmm1,xmm4
2178
2179 paddd xmm0,xmm3
2180 pcmpeqd xmm3,xmm5
2181 movdqa XMMWORD[(-32)+rax],xmm2
2182 movdqa xmm2,xmm4
2183 paddd xmm1,xmm0
2184 pcmpeqd xmm0,xmm5
2185 movdqa XMMWORD[(-16)+rax],xmm3
2186 movdqa xmm3,xmm4
2187
2188 paddd xmm2,xmm1
2189 pcmpeqd xmm1,xmm5
2190 movdqa XMMWORD[rax],xmm0
2191 movdqa xmm0,xmm4
2192
2193 paddd xmm3,xmm2
2194 pcmpeqd xmm2,xmm5
2195 movdqa XMMWORD[16+rax],xmm1
2196 movdqa xmm1,xmm4
2197
2198 paddd xmm0,xmm3
2199 pcmpeqd xmm3,xmm5
2200 movdqa XMMWORD[32+rax],xmm2
2201 movdqa xmm2,xmm4
2202 paddd xmm1,xmm0
2203 pcmpeqd xmm0,xmm5
2204 movdqa XMMWORD[48+rax],xmm3
2205 movdqa xmm3,xmm4
2206
2207 paddd xmm2,xmm1
2208 pcmpeqd xmm1,xmm5
2209 movdqa XMMWORD[64+rax],xmm0
2210 movdqa xmm0,xmm4
2211
2212 paddd xmm3,xmm2
2213 pcmpeqd xmm2,xmm5
2214 movdqa XMMWORD[80+rax],xmm1
2215 movdqa xmm1,xmm4
2216
2217 paddd xmm0,xmm3
2218 pcmpeqd xmm3,xmm5
2219 movdqa XMMWORD[96+rax],xmm2
2220 movdqa xmm2,xmm4
2221 movdqa XMMWORD[112+rax],xmm3
2222 jmp NEAR $L$gather
2223
2224 ALIGN 32
2225 $L$gather:
2226 pxor xmm4,xmm4
2227 pxor xmm5,xmm5
2228 movdqa xmm0,XMMWORD[((-128))+r11]
2229 movdqa xmm1,XMMWORD[((-112))+r11]
2230 movdqa xmm2,XMMWORD[((-96))+r11]
2231 pand xmm0,XMMWORD[((-128))+rax]
2232 movdqa xmm3,XMMWORD[((-80))+r11]
2233 pand xmm1,XMMWORD[((-112))+rax]
2234 por xmm4,xmm0
2235 pand xmm2,XMMWORD[((-96))+rax]
2236 por xmm5,xmm1
2237 pand xmm3,XMMWORD[((-80))+rax]
2238 por xmm4,xmm2
2239 por xmm5,xmm3
2240 movdqa xmm0,XMMWORD[((-64))+r11]
2241 movdqa xmm1,XMMWORD[((-48))+r11]
2242 movdqa xmm2,XMMWORD[((-32))+r11]
2243 pand xmm0,XMMWORD[((-64))+rax]
2244 movdqa xmm3,XMMWORD[((-16))+r11]
2245 pand xmm1,XMMWORD[((-48))+rax]
2246 por xmm4,xmm0
2247 pand xmm2,XMMWORD[((-32))+rax]
2248 por xmm5,xmm1
2249 pand xmm3,XMMWORD[((-16))+rax]
2250 por xmm4,xmm2
2251 por xmm5,xmm3
2252 movdqa xmm0,XMMWORD[r11]
2253 movdqa xmm1,XMMWORD[16+r11]
2254 movdqa xmm2,XMMWORD[32+r11]
2255 pand xmm0,XMMWORD[rax]
2256 movdqa xmm3,XMMWORD[48+r11]
2257 pand xmm1,XMMWORD[16+rax]
2258 por xmm4,xmm0
2259 pand xmm2,XMMWORD[32+rax]
2260 por xmm5,xmm1
2261 pand xmm3,XMMWORD[48+rax]
2262 por xmm4,xmm2
2263 por xmm5,xmm3
2264 movdqa xmm0,XMMWORD[64+r11]
2265 movdqa xmm1,XMMWORD[80+r11]
2266 movdqa xmm2,XMMWORD[96+r11]
2267 pand xmm0,XMMWORD[64+rax]
2268 movdqa xmm3,XMMWORD[112+r11]
2269 pand xmm1,XMMWORD[80+rax]
2270 por xmm4,xmm0
2271 pand xmm2,XMMWORD[96+rax]
2272 por xmm5,xmm1
2273 pand xmm3,XMMWORD[112+rax]
2274 por xmm4,xmm2
2275 por xmm5,xmm3
2276 por xmm4,xmm5
2277 lea r11,[256+r11]
2278 pshufd xmm0,xmm4,0x4e
2279 por xmm0,xmm4
2280 movq QWORD[rcx],xmm0
2281 lea rcx,[8+rcx]
2282 sub edx,1
2283 jnz NEAR $L$gather
2284
2285 lea rsp,[r10]
2286 DB 0F3h,0C3h ;repret
2287 $L$SEH_end_bn_gather5:
2288
2289 ALIGN 64
2290 $L$inc:
2291 DD 0,0,1,1
2292 DD 2,2,2,2
2293 DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
2294 DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115
2295 DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111
2296 DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79
2297 DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111
2298 DB 112,101,110,115,115,108,46,111,114,103,62,0
2299 EXTERN __imp_RtlVirtualUnwind
2300
2301 ALIGN 16
2302 mul_handler:
2303 push rsi
2304 push rdi
2305 push rbx
2306 push rbp
2307 push r12
2308 push r13
2309 push r14
2310 push r15
2311 pushfq
2312 sub rsp,64
2313
2314 mov rax,QWORD[120+r8]
2315 mov rbx,QWORD[248+r8]
2316
2317 mov rsi,QWORD[8+r9]
2318 mov r11,QWORD[56+r9]
2319
2320 mov r10d,DWORD[r11]
2321 lea r10,[r10*1+rsi]
2322 cmp rbx,r10
2323 jb NEAR $L$common_seh_tail
2324
2325 mov rax,QWORD[152+r8]
2326
2327 mov r10d,DWORD[4+r11]
2328 lea r10,[r10*1+rsi]
2329 cmp rbx,r10
2330 jae NEAR $L$common_seh_tail
2331
2332 lea r10,[$L$mul_epilogue]
2333 cmp rbx,r10
2334 ja NEAR $L$body_40
2335
2336 mov r10,QWORD[192+r8]
2337 mov rax,QWORD[8+r10*8+rax]
2338
2339 jmp NEAR $L$body_proceed
2340
2341 $L$body_40:
2342 mov rax,QWORD[40+rax]
2343 $L$body_proceed:
2344 mov rbx,QWORD[((-8))+rax]
2345 mov rbp,QWORD[((-16))+rax]
2346 mov r12,QWORD[((-24))+rax]
2347 mov r13,QWORD[((-32))+rax]
2348 mov r14,QWORD[((-40))+rax]
2349 mov r15,QWORD[((-48))+rax]
2350 mov QWORD[144+r8],rbx
2351 mov QWORD[160+r8],rbp
2352 mov QWORD[216+r8],r12
2353 mov QWORD[224+r8],r13
2354 mov QWORD[232+r8],r14
2355 mov QWORD[240+r8],r15
2356
2357 $L$common_seh_tail:
2358 mov rdi,QWORD[8+rax]
2359 mov rsi,QWORD[16+rax]
2360 mov QWORD[152+r8],rax
2361 mov QWORD[168+r8],rsi
2362 mov QWORD[176+r8],rdi
2363
2364 mov rdi,QWORD[40+r9]
2365 mov rsi,r8
2366 mov ecx,154
2367 DD 0xa548f3fc
2368
2369 mov rsi,r9
2370 xor rcx,rcx
2371 mov rdx,QWORD[8+rsi]
2372 mov r8,QWORD[rsi]
2373 mov r9,QWORD[16+rsi]
2374 mov r10,QWORD[40+rsi]
2375 lea r11,[56+rsi]
2376 lea r12,[24+rsi]
2377 mov QWORD[32+rsp],r10
2378 mov QWORD[40+rsp],r11
2379 mov QWORD[48+rsp],r12
2380 mov QWORD[56+rsp],rcx
2381 call QWORD[__imp_RtlVirtualUnwind]
2382
2383 mov eax,1
2384 add rsp,64
2385 popfq
2386 pop r15
2387 pop r14
2388 pop r13
2389 pop r12
2390 pop rbp
2391 pop rbx
2392 pop rdi
2393 pop rsi
2394 DB 0F3h,0C3h ;repret
2395
2396
2397 section .pdata rdata align=4
2398 ALIGN 4
2399 DD $L$SEH_begin_bn_mul_mont_gather5 wrt ..imagebase
2400 DD $L$SEH_end_bn_mul_mont_gather5 wrt ..imagebase
2401 DD $L$SEH_info_bn_mul_mont_gather5 wrt ..imagebase
2402
2403 DD $L$SEH_begin_bn_mul4x_mont_gather5 wrt ..imagebase
2404 DD $L$SEH_end_bn_mul4x_mont_gather5 wrt ..imagebase
2405 DD $L$SEH_info_bn_mul4x_mont_gather5 wrt ..imagebase
2406
2407 DD $L$SEH_begin_bn_power5 wrt ..imagebase
2408 DD $L$SEH_end_bn_power5 wrt ..imagebase
2409 DD $L$SEH_info_bn_power5 wrt ..imagebase
2410
2411 DD $L$SEH_begin_bn_from_mont8x wrt ..imagebase
2412 DD $L$SEH_end_bn_from_mont8x wrt ..imagebase
2413 DD $L$SEH_info_bn_from_mont8x wrt ..imagebase
2414 DD $L$SEH_begin_bn_gather5 wrt ..imagebase
2415 DD $L$SEH_end_bn_gather5 wrt ..imagebase
2416 DD $L$SEH_info_bn_gather5 wrt ..imagebase
2417
2418 section .xdata rdata align=8
2419 ALIGN 8
2420 $L$SEH_info_bn_mul_mont_gather5:
2421 DB 9,0,0,0
2422 DD mul_handler wrt ..imagebase
2423 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
2424 ALIGN 8
2425 $L$SEH_info_bn_mul4x_mont_gather5:
2426 DB 9,0,0,0
2427 DD mul_handler wrt ..imagebase
2428 DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase
2429 ALIGN 8
2430 $L$SEH_info_bn_power5:
2431 DB 9,0,0,0
2432 DD mul_handler wrt ..imagebase
2433 DD $L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebas e
2434 ALIGN 8
2435 $L$SEH_info_bn_from_mont8x:
2436 DB 9,0,0,0
2437 DD mul_handler wrt ..imagebase
2438 DD $L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase
2439 ALIGN 8
2440 $L$SEH_info_bn_gather5:
2441 DB 0x01,0x0b,0x03,0x0a
2442 DB 0x0b,0x01,0x21,0x00
2443 DB 0x04,0xa3,0x00,0x00
2444 ALIGN 8
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698