OLD | NEW |
| (Empty) |
1 default rel | |
2 %define XMMWORD | |
3 %define YMMWORD | |
4 %define ZMMWORD | |
5 section .text code align=64 | |
6 | |
7 | |
8 EXTERN OPENSSL_ia32cap_P | |
9 | |
10 global bn_mul_mont | |
11 | |
12 ALIGN 16 | |
13 bn_mul_mont: | |
14 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
15 mov QWORD[16+rsp],rsi | |
16 mov rax,rsp | |
17 $L$SEH_begin_bn_mul_mont: | |
18 mov rdi,rcx | |
19 mov rsi,rdx | |
20 mov rdx,r8 | |
21 mov rcx,r9 | |
22 mov r8,QWORD[40+rsp] | |
23 mov r9,QWORD[48+rsp] | |
24 | |
25 | |
26 test r9d,3 | |
27 jnz NEAR $L$mul_enter | |
28 cmp r9d,8 | |
29 jb NEAR $L$mul_enter | |
30 cmp rdx,rsi | |
31 jne NEAR $L$mul4x_enter | |
32 test r9d,7 | |
33 jz NEAR $L$sqr8x_enter | |
34 jmp NEAR $L$mul4x_enter | |
35 | |
36 ALIGN 16 | |
37 $L$mul_enter: | |
38 push rbx | |
39 push rbp | |
40 push r12 | |
41 push r13 | |
42 push r14 | |
43 push r15 | |
44 | |
45 mov r9d,r9d | |
46 lea r10,[2+r9] | |
47 mov r11,rsp | |
48 neg r10 | |
49 lea rsp,[r10*8+rsp] | |
50 and rsp,-1024 | |
51 | |
52 mov QWORD[8+r9*8+rsp],r11 | |
53 $L$mul_body: | |
54 mov r12,rdx | |
55 mov r8,QWORD[r8] | |
56 mov rbx,QWORD[r12] | |
57 mov rax,QWORD[rsi] | |
58 | |
59 xor r14,r14 | |
60 xor r15,r15 | |
61 | |
62 mov rbp,r8 | |
63 mul rbx | |
64 mov r10,rax | |
65 mov rax,QWORD[rcx] | |
66 | |
67 imul rbp,r10 | |
68 mov r11,rdx | |
69 | |
70 mul rbp | |
71 add r10,rax | |
72 mov rax,QWORD[8+rsi] | |
73 adc rdx,0 | |
74 mov r13,rdx | |
75 | |
76 lea r15,[1+r15] | |
77 jmp NEAR $L$1st_enter | |
78 | |
79 ALIGN 16 | |
80 $L$1st: | |
81 add r13,rax | |
82 mov rax,QWORD[r15*8+rsi] | |
83 adc rdx,0 | |
84 add r13,r11 | |
85 mov r11,r10 | |
86 adc rdx,0 | |
87 mov QWORD[((-16))+r15*8+rsp],r13 | |
88 mov r13,rdx | |
89 | |
90 $L$1st_enter: | |
91 mul rbx | |
92 add r11,rax | |
93 mov rax,QWORD[r15*8+rcx] | |
94 adc rdx,0 | |
95 lea r15,[1+r15] | |
96 mov r10,rdx | |
97 | |
98 mul rbp | |
99 cmp r15,r9 | |
100 jne NEAR $L$1st | |
101 | |
102 add r13,rax | |
103 mov rax,QWORD[rsi] | |
104 adc rdx,0 | |
105 add r13,r11 | |
106 adc rdx,0 | |
107 mov QWORD[((-16))+r15*8+rsp],r13 | |
108 mov r13,rdx | |
109 mov r11,r10 | |
110 | |
111 xor rdx,rdx | |
112 add r13,r11 | |
113 adc rdx,0 | |
114 mov QWORD[((-8))+r9*8+rsp],r13 | |
115 mov QWORD[r9*8+rsp],rdx | |
116 | |
117 lea r14,[1+r14] | |
118 jmp NEAR $L$outer | |
119 ALIGN 16 | |
120 $L$outer: | |
121 mov rbx,QWORD[r14*8+r12] | |
122 xor r15,r15 | |
123 mov rbp,r8 | |
124 mov r10,QWORD[rsp] | |
125 mul rbx | |
126 add r10,rax | |
127 mov rax,QWORD[rcx] | |
128 adc rdx,0 | |
129 | |
130 imul rbp,r10 | |
131 mov r11,rdx | |
132 | |
133 mul rbp | |
134 add r10,rax | |
135 mov rax,QWORD[8+rsi] | |
136 adc rdx,0 | |
137 mov r10,QWORD[8+rsp] | |
138 mov r13,rdx | |
139 | |
140 lea r15,[1+r15] | |
141 jmp NEAR $L$inner_enter | |
142 | |
143 ALIGN 16 | |
144 $L$inner: | |
145 add r13,rax | |
146 mov rax,QWORD[r15*8+rsi] | |
147 adc rdx,0 | |
148 add r13,r10 | |
149 mov r10,QWORD[r15*8+rsp] | |
150 adc rdx,0 | |
151 mov QWORD[((-16))+r15*8+rsp],r13 | |
152 mov r13,rdx | |
153 | |
154 $L$inner_enter: | |
155 mul rbx | |
156 add r11,rax | |
157 mov rax,QWORD[r15*8+rcx] | |
158 adc rdx,0 | |
159 add r10,r11 | |
160 mov r11,rdx | |
161 adc r11,0 | |
162 lea r15,[1+r15] | |
163 | |
164 mul rbp | |
165 cmp r15,r9 | |
166 jne NEAR $L$inner | |
167 | |
168 add r13,rax | |
169 mov rax,QWORD[rsi] | |
170 adc rdx,0 | |
171 add r13,r10 | |
172 mov r10,QWORD[r15*8+rsp] | |
173 adc rdx,0 | |
174 mov QWORD[((-16))+r15*8+rsp],r13 | |
175 mov r13,rdx | |
176 | |
177 xor rdx,rdx | |
178 add r13,r11 | |
179 adc rdx,0 | |
180 add r13,r10 | |
181 adc rdx,0 | |
182 mov QWORD[((-8))+r9*8+rsp],r13 | |
183 mov QWORD[r9*8+rsp],rdx | |
184 | |
185 lea r14,[1+r14] | |
186 cmp r14,r9 | |
187 jb NEAR $L$outer | |
188 | |
189 xor r14,r14 | |
190 mov rax,QWORD[rsp] | |
191 lea rsi,[rsp] | |
192 mov r15,r9 | |
193 jmp NEAR $L$sub | |
194 ALIGN 16 | |
195 $L$sub: sbb rax,QWORD[r14*8+rcx] | |
196 mov QWORD[r14*8+rdi],rax | |
197 mov rax,QWORD[8+r14*8+rsi] | |
198 lea r14,[1+r14] | |
199 dec r15 | |
200 jnz NEAR $L$sub | |
201 | |
202 sbb rax,0 | |
203 xor r14,r14 | |
204 mov r15,r9 | |
205 ALIGN 16 | |
206 $L$copy: | |
207 mov rsi,QWORD[r14*8+rsp] | |
208 mov rcx,QWORD[r14*8+rdi] | |
209 xor rsi,rcx | |
210 and rsi,rax | |
211 xor rsi,rcx | |
212 mov QWORD[r14*8+rsp],r14 | |
213 mov QWORD[r14*8+rdi],rsi | |
214 lea r14,[1+r14] | |
215 sub r15,1 | |
216 jnz NEAR $L$copy | |
217 | |
218 mov rsi,QWORD[8+r9*8+rsp] | |
219 mov rax,1 | |
220 mov r15,QWORD[rsi] | |
221 mov r14,QWORD[8+rsi] | |
222 mov r13,QWORD[16+rsi] | |
223 mov r12,QWORD[24+rsi] | |
224 mov rbp,QWORD[32+rsi] | |
225 mov rbx,QWORD[40+rsi] | |
226 lea rsp,[48+rsi] | |
227 $L$mul_epilogue: | |
228 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
229 mov rsi,QWORD[16+rsp] | |
230 DB 0F3h,0C3h ;repret | |
231 $L$SEH_end_bn_mul_mont: | |
232 | |
233 ALIGN 16 | |
234 bn_mul4x_mont: | |
235 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
236 mov QWORD[16+rsp],rsi | |
237 mov rax,rsp | |
238 $L$SEH_begin_bn_mul4x_mont: | |
239 mov rdi,rcx | |
240 mov rsi,rdx | |
241 mov rdx,r8 | |
242 mov rcx,r9 | |
243 mov r8,QWORD[40+rsp] | |
244 mov r9,QWORD[48+rsp] | |
245 | |
246 | |
247 $L$mul4x_enter: | |
248 push rbx | |
249 push rbp | |
250 push r12 | |
251 push r13 | |
252 push r14 | |
253 push r15 | |
254 | |
255 mov r9d,r9d | |
256 lea r10,[4+r9] | |
257 mov r11,rsp | |
258 neg r10 | |
259 lea rsp,[r10*8+rsp] | |
260 and rsp,-1024 | |
261 | |
262 mov QWORD[8+r9*8+rsp],r11 | |
263 $L$mul4x_body: | |
264 mov QWORD[16+r9*8+rsp],rdi | |
265 mov r12,rdx | |
266 mov r8,QWORD[r8] | |
267 mov rbx,QWORD[r12] | |
268 mov rax,QWORD[rsi] | |
269 | |
270 xor r14,r14 | |
271 xor r15,r15 | |
272 | |
273 mov rbp,r8 | |
274 mul rbx | |
275 mov r10,rax | |
276 mov rax,QWORD[rcx] | |
277 | |
278 imul rbp,r10 | |
279 mov r11,rdx | |
280 | |
281 mul rbp | |
282 add r10,rax | |
283 mov rax,QWORD[8+rsi] | |
284 adc rdx,0 | |
285 mov rdi,rdx | |
286 | |
287 mul rbx | |
288 add r11,rax | |
289 mov rax,QWORD[8+rcx] | |
290 adc rdx,0 | |
291 mov r10,rdx | |
292 | |
293 mul rbp | |
294 add rdi,rax | |
295 mov rax,QWORD[16+rsi] | |
296 adc rdx,0 | |
297 add rdi,r11 | |
298 lea r15,[4+r15] | |
299 adc rdx,0 | |
300 mov QWORD[rsp],rdi | |
301 mov r13,rdx | |
302 jmp NEAR $L$1st4x | |
303 ALIGN 16 | |
304 $L$1st4x: | |
305 mul rbx | |
306 add r10,rax | |
307 mov rax,QWORD[((-16))+r15*8+rcx] | |
308 adc rdx,0 | |
309 mov r11,rdx | |
310 | |
311 mul rbp | |
312 add r13,rax | |
313 mov rax,QWORD[((-8))+r15*8+rsi] | |
314 adc rdx,0 | |
315 add r13,r10 | |
316 adc rdx,0 | |
317 mov QWORD[((-24))+r15*8+rsp],r13 | |
318 mov rdi,rdx | |
319 | |
320 mul rbx | |
321 add r11,rax | |
322 mov rax,QWORD[((-8))+r15*8+rcx] | |
323 adc rdx,0 | |
324 mov r10,rdx | |
325 | |
326 mul rbp | |
327 add rdi,rax | |
328 mov rax,QWORD[r15*8+rsi] | |
329 adc rdx,0 | |
330 add rdi,r11 | |
331 adc rdx,0 | |
332 mov QWORD[((-16))+r15*8+rsp],rdi | |
333 mov r13,rdx | |
334 | |
335 mul rbx | |
336 add r10,rax | |
337 mov rax,QWORD[r15*8+rcx] | |
338 adc rdx,0 | |
339 mov r11,rdx | |
340 | |
341 mul rbp | |
342 add r13,rax | |
343 mov rax,QWORD[8+r15*8+rsi] | |
344 adc rdx,0 | |
345 add r13,r10 | |
346 adc rdx,0 | |
347 mov QWORD[((-8))+r15*8+rsp],r13 | |
348 mov rdi,rdx | |
349 | |
350 mul rbx | |
351 add r11,rax | |
352 mov rax,QWORD[8+r15*8+rcx] | |
353 adc rdx,0 | |
354 lea r15,[4+r15] | |
355 mov r10,rdx | |
356 | |
357 mul rbp | |
358 add rdi,rax | |
359 mov rax,QWORD[((-16))+r15*8+rsi] | |
360 adc rdx,0 | |
361 add rdi,r11 | |
362 adc rdx,0 | |
363 mov QWORD[((-32))+r15*8+rsp],rdi | |
364 mov r13,rdx | |
365 cmp r15,r9 | |
366 jb NEAR $L$1st4x | |
367 | |
368 mul rbx | |
369 add r10,rax | |
370 mov rax,QWORD[((-16))+r15*8+rcx] | |
371 adc rdx,0 | |
372 mov r11,rdx | |
373 | |
374 mul rbp | |
375 add r13,rax | |
376 mov rax,QWORD[((-8))+r15*8+rsi] | |
377 adc rdx,0 | |
378 add r13,r10 | |
379 adc rdx,0 | |
380 mov QWORD[((-24))+r15*8+rsp],r13 | |
381 mov rdi,rdx | |
382 | |
383 mul rbx | |
384 add r11,rax | |
385 mov rax,QWORD[((-8))+r15*8+rcx] | |
386 adc rdx,0 | |
387 mov r10,rdx | |
388 | |
389 mul rbp | |
390 add rdi,rax | |
391 mov rax,QWORD[rsi] | |
392 adc rdx,0 | |
393 add rdi,r11 | |
394 adc rdx,0 | |
395 mov QWORD[((-16))+r15*8+rsp],rdi | |
396 mov r13,rdx | |
397 | |
398 xor rdi,rdi | |
399 add r13,r10 | |
400 adc rdi,0 | |
401 mov QWORD[((-8))+r15*8+rsp],r13 | |
402 mov QWORD[r15*8+rsp],rdi | |
403 | |
404 lea r14,[1+r14] | |
405 ALIGN 4 | |
406 $L$outer4x: | |
407 mov rbx,QWORD[r14*8+r12] | |
408 xor r15,r15 | |
409 mov r10,QWORD[rsp] | |
410 mov rbp,r8 | |
411 mul rbx | |
412 add r10,rax | |
413 mov rax,QWORD[rcx] | |
414 adc rdx,0 | |
415 | |
416 imul rbp,r10 | |
417 mov r11,rdx | |
418 | |
419 mul rbp | |
420 add r10,rax | |
421 mov rax,QWORD[8+rsi] | |
422 adc rdx,0 | |
423 mov rdi,rdx | |
424 | |
425 mul rbx | |
426 add r11,rax | |
427 mov rax,QWORD[8+rcx] | |
428 adc rdx,0 | |
429 add r11,QWORD[8+rsp] | |
430 adc rdx,0 | |
431 mov r10,rdx | |
432 | |
433 mul rbp | |
434 add rdi,rax | |
435 mov rax,QWORD[16+rsi] | |
436 adc rdx,0 | |
437 add rdi,r11 | |
438 lea r15,[4+r15] | |
439 adc rdx,0 | |
440 mov QWORD[rsp],rdi | |
441 mov r13,rdx | |
442 jmp NEAR $L$inner4x | |
443 ALIGN 16 | |
444 $L$inner4x: | |
445 mul rbx | |
446 add r10,rax | |
447 mov rax,QWORD[((-16))+r15*8+rcx] | |
448 adc rdx,0 | |
449 add r10,QWORD[((-16))+r15*8+rsp] | |
450 adc rdx,0 | |
451 mov r11,rdx | |
452 | |
453 mul rbp | |
454 add r13,rax | |
455 mov rax,QWORD[((-8))+r15*8+rsi] | |
456 adc rdx,0 | |
457 add r13,r10 | |
458 adc rdx,0 | |
459 mov QWORD[((-24))+r15*8+rsp],r13 | |
460 mov rdi,rdx | |
461 | |
462 mul rbx | |
463 add r11,rax | |
464 mov rax,QWORD[((-8))+r15*8+rcx] | |
465 adc rdx,0 | |
466 add r11,QWORD[((-8))+r15*8+rsp] | |
467 adc rdx,0 | |
468 mov r10,rdx | |
469 | |
470 mul rbp | |
471 add rdi,rax | |
472 mov rax,QWORD[r15*8+rsi] | |
473 adc rdx,0 | |
474 add rdi,r11 | |
475 adc rdx,0 | |
476 mov QWORD[((-16))+r15*8+rsp],rdi | |
477 mov r13,rdx | |
478 | |
479 mul rbx | |
480 add r10,rax | |
481 mov rax,QWORD[r15*8+rcx] | |
482 adc rdx,0 | |
483 add r10,QWORD[r15*8+rsp] | |
484 adc rdx,0 | |
485 mov r11,rdx | |
486 | |
487 mul rbp | |
488 add r13,rax | |
489 mov rax,QWORD[8+r15*8+rsi] | |
490 adc rdx,0 | |
491 add r13,r10 | |
492 adc rdx,0 | |
493 mov QWORD[((-8))+r15*8+rsp],r13 | |
494 mov rdi,rdx | |
495 | |
496 mul rbx | |
497 add r11,rax | |
498 mov rax,QWORD[8+r15*8+rcx] | |
499 adc rdx,0 | |
500 add r11,QWORD[8+r15*8+rsp] | |
501 adc rdx,0 | |
502 lea r15,[4+r15] | |
503 mov r10,rdx | |
504 | |
505 mul rbp | |
506 add rdi,rax | |
507 mov rax,QWORD[((-16))+r15*8+rsi] | |
508 adc rdx,0 | |
509 add rdi,r11 | |
510 adc rdx,0 | |
511 mov QWORD[((-32))+r15*8+rsp],rdi | |
512 mov r13,rdx | |
513 cmp r15,r9 | |
514 jb NEAR $L$inner4x | |
515 | |
516 mul rbx | |
517 add r10,rax | |
518 mov rax,QWORD[((-16))+r15*8+rcx] | |
519 adc rdx,0 | |
520 add r10,QWORD[((-16))+r15*8+rsp] | |
521 adc rdx,0 | |
522 mov r11,rdx | |
523 | |
524 mul rbp | |
525 add r13,rax | |
526 mov rax,QWORD[((-8))+r15*8+rsi] | |
527 adc rdx,0 | |
528 add r13,r10 | |
529 adc rdx,0 | |
530 mov QWORD[((-24))+r15*8+rsp],r13 | |
531 mov rdi,rdx | |
532 | |
533 mul rbx | |
534 add r11,rax | |
535 mov rax,QWORD[((-8))+r15*8+rcx] | |
536 adc rdx,0 | |
537 add r11,QWORD[((-8))+r15*8+rsp] | |
538 adc rdx,0 | |
539 lea r14,[1+r14] | |
540 mov r10,rdx | |
541 | |
542 mul rbp | |
543 add rdi,rax | |
544 mov rax,QWORD[rsi] | |
545 adc rdx,0 | |
546 add rdi,r11 | |
547 adc rdx,0 | |
548 mov QWORD[((-16))+r15*8+rsp],rdi | |
549 mov r13,rdx | |
550 | |
551 xor rdi,rdi | |
552 add r13,r10 | |
553 adc rdi,0 | |
554 add r13,QWORD[r9*8+rsp] | |
555 adc rdi,0 | |
556 mov QWORD[((-8))+r15*8+rsp],r13 | |
557 mov QWORD[r15*8+rsp],rdi | |
558 | |
559 cmp r14,r9 | |
560 jb NEAR $L$outer4x | |
561 mov rdi,QWORD[16+r9*8+rsp] | |
562 mov rax,QWORD[rsp] | |
563 mov rdx,QWORD[8+rsp] | |
564 shr r9,2 | |
565 lea rsi,[rsp] | |
566 xor r14,r14 | |
567 | |
568 sub rax,QWORD[rcx] | |
569 mov rbx,QWORD[16+rsi] | |
570 mov rbp,QWORD[24+rsi] | |
571 sbb rdx,QWORD[8+rcx] | |
572 lea r15,[((-1))+r9] | |
573 jmp NEAR $L$sub4x | |
574 ALIGN 16 | |
575 $L$sub4x: | |
576 mov QWORD[r14*8+rdi],rax | |
577 mov QWORD[8+r14*8+rdi],rdx | |
578 sbb rbx,QWORD[16+r14*8+rcx] | |
579 mov rax,QWORD[32+r14*8+rsi] | |
580 mov rdx,QWORD[40+r14*8+rsi] | |
581 sbb rbp,QWORD[24+r14*8+rcx] | |
582 mov QWORD[16+r14*8+rdi],rbx | |
583 mov QWORD[24+r14*8+rdi],rbp | |
584 sbb rax,QWORD[32+r14*8+rcx] | |
585 mov rbx,QWORD[48+r14*8+rsi] | |
586 mov rbp,QWORD[56+r14*8+rsi] | |
587 sbb rdx,QWORD[40+r14*8+rcx] | |
588 lea r14,[4+r14] | |
589 dec r15 | |
590 jnz NEAR $L$sub4x | |
591 | |
592 mov QWORD[r14*8+rdi],rax | |
593 mov rax,QWORD[32+r14*8+rsi] | |
594 sbb rbx,QWORD[16+r14*8+rcx] | |
595 mov QWORD[8+r14*8+rdi],rdx | |
596 sbb rbp,QWORD[24+r14*8+rcx] | |
597 mov QWORD[16+r14*8+rdi],rbx | |
598 | |
599 sbb rax,0 | |
600 DB 66h, 48h, 0fh, 6eh, 0c0h | |
601 punpcklqdq xmm0,xmm0 | |
602 mov QWORD[24+r14*8+rdi],rbp | |
603 xor r14,r14 | |
604 | |
605 mov r15,r9 | |
606 pxor xmm5,xmm5 | |
607 jmp NEAR $L$copy4x | |
608 ALIGN 16 | |
609 $L$copy4x: | |
610 movdqu xmm2,XMMWORD[r14*1+rsp] | |
611 movdqu xmm4,XMMWORD[16+r14*1+rsp] | |
612 movdqu xmm1,XMMWORD[r14*1+rdi] | |
613 movdqu xmm3,XMMWORD[16+r14*1+rdi] | |
614 pxor xmm2,xmm1 | |
615 pxor xmm4,xmm3 | |
616 pand xmm2,xmm0 | |
617 pand xmm4,xmm0 | |
618 pxor xmm2,xmm1 | |
619 pxor xmm4,xmm3 | |
620 movdqu XMMWORD[r14*1+rdi],xmm2 | |
621 movdqu XMMWORD[16+r14*1+rdi],xmm4 | |
622 movdqa XMMWORD[r14*1+rsp],xmm5 | |
623 movdqa XMMWORD[16+r14*1+rsp],xmm5 | |
624 | |
625 lea r14,[32+r14] | |
626 dec r15 | |
627 jnz NEAR $L$copy4x | |
628 | |
629 shl r9,2 | |
630 mov rsi,QWORD[8+r9*8+rsp] | |
631 mov rax,1 | |
632 mov r15,QWORD[rsi] | |
633 mov r14,QWORD[8+rsi] | |
634 mov r13,QWORD[16+rsi] | |
635 mov r12,QWORD[24+rsi] | |
636 mov rbp,QWORD[32+rsi] | |
637 mov rbx,QWORD[40+rsi] | |
638 lea rsp,[48+rsi] | |
639 $L$mul4x_epilogue: | |
640 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
641 mov rsi,QWORD[16+rsp] | |
642 DB 0F3h,0C3h ;repret | |
643 $L$SEH_end_bn_mul4x_mont: | |
644 EXTERN bn_sqr8x_internal | |
645 | |
646 | |
647 ALIGN 32 | |
648 bn_sqr8x_mont: | |
649 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
650 mov QWORD[16+rsp],rsi | |
651 mov rax,rsp | |
652 $L$SEH_begin_bn_sqr8x_mont: | |
653 mov rdi,rcx | |
654 mov rsi,rdx | |
655 mov rdx,r8 | |
656 mov rcx,r9 | |
657 mov r8,QWORD[40+rsp] | |
658 mov r9,QWORD[48+rsp] | |
659 | |
660 | |
661 $L$sqr8x_enter: | |
662 mov rax,rsp | |
663 push rbx | |
664 push rbp | |
665 push r12 | |
666 push r13 | |
667 push r14 | |
668 push r15 | |
669 | |
670 mov r10d,r9d | |
671 shl r9d,3 | |
672 shl r10,3+2 | |
673 neg r9 | |
674 | |
675 | |
676 | |
677 | |
678 | |
679 | |
680 lea r11,[((-64))+r9*2+rsp] | |
681 mov r8,QWORD[r8] | |
682 sub r11,rsi | |
683 and r11,4095 | |
684 cmp r10,r11 | |
685 jb NEAR $L$sqr8x_sp_alt | |
686 sub rsp,r11 | |
687 lea rsp,[((-64))+r9*2+rsp] | |
688 jmp NEAR $L$sqr8x_sp_done | |
689 | |
690 ALIGN 32 | |
691 $L$sqr8x_sp_alt: | |
692 lea r10,[((4096-64))+r9*2] | |
693 lea rsp,[((-64))+r9*2+rsp] | |
694 sub r11,r10 | |
695 mov r10,0 | |
696 cmovc r11,r10 | |
697 sub rsp,r11 | |
698 $L$sqr8x_sp_done: | |
699 and rsp,-64 | |
700 mov r10,r9 | |
701 neg r9 | |
702 | |
703 mov QWORD[32+rsp],r8 | |
704 mov QWORD[40+rsp],rax | |
705 $L$sqr8x_body: | |
706 | |
707 DB 102,72,15,110,209 | |
708 pxor xmm0,xmm0 | |
709 DB 102,72,15,110,207 | |
710 DB 102,73,15,110,218 | |
711 call bn_sqr8x_internal | |
712 | |
713 | |
714 | |
715 | |
716 lea rbx,[r9*1+rdi] | |
717 mov rcx,r9 | |
718 mov rdx,r9 | |
719 DB 102,72,15,126,207 | |
720 sar rcx,3+2 | |
721 jmp NEAR $L$sqr8x_sub | |
722 | |
723 ALIGN 32 | |
724 $L$sqr8x_sub: | |
725 mov r12,QWORD[rbx] | |
726 mov r13,QWORD[8+rbx] | |
727 mov r14,QWORD[16+rbx] | |
728 mov r15,QWORD[24+rbx] | |
729 lea rbx,[32+rbx] | |
730 sbb r12,QWORD[rbp] | |
731 sbb r13,QWORD[8+rbp] | |
732 sbb r14,QWORD[16+rbp] | |
733 sbb r15,QWORD[24+rbp] | |
734 lea rbp,[32+rbp] | |
735 mov QWORD[rdi],r12 | |
736 mov QWORD[8+rdi],r13 | |
737 mov QWORD[16+rdi],r14 | |
738 mov QWORD[24+rdi],r15 | |
739 lea rdi,[32+rdi] | |
740 inc rcx | |
741 jnz NEAR $L$sqr8x_sub | |
742 | |
743 sbb rax,0 | |
744 lea rbx,[r9*1+rbx] | |
745 lea rdi,[r9*1+rdi] | |
746 | |
747 DB 102,72,15,110,200 | |
748 pxor xmm0,xmm0 | |
749 pshufd xmm1,xmm1,0 | |
750 mov rsi,QWORD[40+rsp] | |
751 jmp NEAR $L$sqr8x_cond_copy | |
752 | |
753 ALIGN 32 | |
754 $L$sqr8x_cond_copy: | |
755 movdqa xmm2,XMMWORD[rbx] | |
756 movdqa xmm3,XMMWORD[16+rbx] | |
757 lea rbx,[32+rbx] | |
758 movdqu xmm4,XMMWORD[rdi] | |
759 movdqu xmm5,XMMWORD[16+rdi] | |
760 lea rdi,[32+rdi] | |
761 movdqa XMMWORD[(-32)+rbx],xmm0 | |
762 movdqa XMMWORD[(-16)+rbx],xmm0 | |
763 movdqa XMMWORD[(-32)+rdx*1+rbx],xmm0 | |
764 movdqa XMMWORD[(-16)+rdx*1+rbx],xmm0 | |
765 pcmpeqd xmm0,xmm1 | |
766 pand xmm2,xmm1 | |
767 pand xmm3,xmm1 | |
768 pand xmm4,xmm0 | |
769 pand xmm5,xmm0 | |
770 pxor xmm0,xmm0 | |
771 por xmm4,xmm2 | |
772 por xmm5,xmm3 | |
773 movdqu XMMWORD[(-32)+rdi],xmm4 | |
774 movdqu XMMWORD[(-16)+rdi],xmm5 | |
775 add r9,32 | |
776 jnz NEAR $L$sqr8x_cond_copy | |
777 | |
778 mov rax,1 | |
779 mov r15,QWORD[((-48))+rsi] | |
780 mov r14,QWORD[((-40))+rsi] | |
781 mov r13,QWORD[((-32))+rsi] | |
782 mov r12,QWORD[((-24))+rsi] | |
783 mov rbp,QWORD[((-16))+rsi] | |
784 mov rbx,QWORD[((-8))+rsi] | |
785 lea rsp,[rsi] | |
786 $L$sqr8x_epilogue: | |
787 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
788 mov rsi,QWORD[16+rsp] | |
789 DB 0F3h,0C3h ;repret | |
790 $L$SEH_end_bn_sqr8x_mont: | |
791 DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 | |
792 DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 | |
793 DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83 | |
794 DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 | |
795 DB 115,108,46,111,114,103,62,0 | |
796 ALIGN 16 | |
797 EXTERN __imp_RtlVirtualUnwind | |
798 | |
799 ALIGN 16 | |
800 mul_handler: | |
801 push rsi | |
802 push rdi | |
803 push rbx | |
804 push rbp | |
805 push r12 | |
806 push r13 | |
807 push r14 | |
808 push r15 | |
809 pushfq | |
810 sub rsp,64 | |
811 | |
812 mov rax,QWORD[120+r8] | |
813 mov rbx,QWORD[248+r8] | |
814 | |
815 mov rsi,QWORD[8+r9] | |
816 mov r11,QWORD[56+r9] | |
817 | |
818 mov r10d,DWORD[r11] | |
819 lea r10,[r10*1+rsi] | |
820 cmp rbx,r10 | |
821 jb NEAR $L$common_seh_tail | |
822 | |
823 mov rax,QWORD[152+r8] | |
824 | |
825 mov r10d,DWORD[4+r11] | |
826 lea r10,[r10*1+rsi] | |
827 cmp rbx,r10 | |
828 jae NEAR $L$common_seh_tail | |
829 | |
830 mov r10,QWORD[192+r8] | |
831 mov rax,QWORD[8+r10*8+rax] | |
832 lea rax,[48+rax] | |
833 | |
834 mov rbx,QWORD[((-8))+rax] | |
835 mov rbp,QWORD[((-16))+rax] | |
836 mov r12,QWORD[((-24))+rax] | |
837 mov r13,QWORD[((-32))+rax] | |
838 mov r14,QWORD[((-40))+rax] | |
839 mov r15,QWORD[((-48))+rax] | |
840 mov QWORD[144+r8],rbx | |
841 mov QWORD[160+r8],rbp | |
842 mov QWORD[216+r8],r12 | |
843 mov QWORD[224+r8],r13 | |
844 mov QWORD[232+r8],r14 | |
845 mov QWORD[240+r8],r15 | |
846 | |
847 jmp NEAR $L$common_seh_tail | |
848 | |
849 | |
850 | |
851 ALIGN 16 | |
852 sqr_handler: | |
853 push rsi | |
854 push rdi | |
855 push rbx | |
856 push rbp | |
857 push r12 | |
858 push r13 | |
859 push r14 | |
860 push r15 | |
861 pushfq | |
862 sub rsp,64 | |
863 | |
864 mov rax,QWORD[120+r8] | |
865 mov rbx,QWORD[248+r8] | |
866 | |
867 mov rsi,QWORD[8+r9] | |
868 mov r11,QWORD[56+r9] | |
869 | |
870 mov r10d,DWORD[r11] | |
871 lea r10,[r10*1+rsi] | |
872 cmp rbx,r10 | |
873 jb NEAR $L$common_seh_tail | |
874 | |
875 mov rax,QWORD[152+r8] | |
876 | |
877 mov r10d,DWORD[4+r11] | |
878 lea r10,[r10*1+rsi] | |
879 cmp rbx,r10 | |
880 jae NEAR $L$common_seh_tail | |
881 | |
882 mov rax,QWORD[40+rax] | |
883 | |
884 mov rbx,QWORD[((-8))+rax] | |
885 mov rbp,QWORD[((-16))+rax] | |
886 mov r12,QWORD[((-24))+rax] | |
887 mov r13,QWORD[((-32))+rax] | |
888 mov r14,QWORD[((-40))+rax] | |
889 mov r15,QWORD[((-48))+rax] | |
890 mov QWORD[144+r8],rbx | |
891 mov QWORD[160+r8],rbp | |
892 mov QWORD[216+r8],r12 | |
893 mov QWORD[224+r8],r13 | |
894 mov QWORD[232+r8],r14 | |
895 mov QWORD[240+r8],r15 | |
896 | |
897 $L$common_seh_tail: | |
898 mov rdi,QWORD[8+rax] | |
899 mov rsi,QWORD[16+rax] | |
900 mov QWORD[152+r8],rax | |
901 mov QWORD[168+r8],rsi | |
902 mov QWORD[176+r8],rdi | |
903 | |
904 mov rdi,QWORD[40+r9] | |
905 mov rsi,r8 | |
906 mov ecx,154 | |
907 DD 0xa548f3fc | |
908 | |
909 mov rsi,r9 | |
910 xor rcx,rcx | |
911 mov rdx,QWORD[8+rsi] | |
912 mov r8,QWORD[rsi] | |
913 mov r9,QWORD[16+rsi] | |
914 mov r10,QWORD[40+rsi] | |
915 lea r11,[56+rsi] | |
916 lea r12,[24+rsi] | |
917 mov QWORD[32+rsp],r10 | |
918 mov QWORD[40+rsp],r11 | |
919 mov QWORD[48+rsp],r12 | |
920 mov QWORD[56+rsp],rcx | |
921 call QWORD[__imp_RtlVirtualUnwind] | |
922 | |
923 mov eax,1 | |
924 add rsp,64 | |
925 popfq | |
926 pop r15 | |
927 pop r14 | |
928 pop r13 | |
929 pop r12 | |
930 pop rbp | |
931 pop rbx | |
932 pop rdi | |
933 pop rsi | |
934 DB 0F3h,0C3h ;repret | |
935 | |
936 | |
937 section .pdata rdata align=4 | |
938 ALIGN 4 | |
939 DD $L$SEH_begin_bn_mul_mont wrt ..imagebase | |
940 DD $L$SEH_end_bn_mul_mont wrt ..imagebase | |
941 DD $L$SEH_info_bn_mul_mont wrt ..imagebase | |
942 | |
943 DD $L$SEH_begin_bn_mul4x_mont wrt ..imagebase | |
944 DD $L$SEH_end_bn_mul4x_mont wrt ..imagebase | |
945 DD $L$SEH_info_bn_mul4x_mont wrt ..imagebase | |
946 | |
947 DD $L$SEH_begin_bn_sqr8x_mont wrt ..imagebase | |
948 DD $L$SEH_end_bn_sqr8x_mont wrt ..imagebase | |
949 DD $L$SEH_info_bn_sqr8x_mont wrt ..imagebase | |
950 section .xdata rdata align=8 | |
951 ALIGN 8 | |
952 $L$SEH_info_bn_mul_mont: | |
953 DB 9,0,0,0 | |
954 DD mul_handler wrt ..imagebase | |
955 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase | |
956 $L$SEH_info_bn_mul4x_mont: | |
957 DB 9,0,0,0 | |
958 DD mul_handler wrt ..imagebase | |
959 DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase | |
960 $L$SEH_info_bn_sqr8x_mont: | |
961 DB 9,0,0,0 | |
962 DD sqr_handler wrt ..imagebase | |
963 DD $L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase | |
OLD | NEW |