OLD | NEW |
| (Empty) |
1 default rel | |
2 %define XMMWORD | |
3 %define YMMWORD | |
4 %define ZMMWORD | |
5 section .text code align=64 | |
6 | |
7 | |
8 EXTERN OPENSSL_ia32cap_P | |
9 | |
10 global rsaz_512_sqr | |
11 | |
12 ALIGN 32 | |
13 rsaz_512_sqr: | |
14 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
15 mov QWORD[16+rsp],rsi | |
16 mov rax,rsp | |
17 $L$SEH_begin_rsaz_512_sqr: | |
18 mov rdi,rcx | |
19 mov rsi,rdx | |
20 mov rdx,r8 | |
21 mov rcx,r9 | |
22 mov r8,QWORD[40+rsp] | |
23 | |
24 | |
25 push rbx | |
26 push rbp | |
27 push r12 | |
28 push r13 | |
29 push r14 | |
30 push r15 | |
31 | |
32 sub rsp,128+24 | |
33 $L$sqr_body: | |
34 mov rbp,rdx | |
35 mov rdx,QWORD[rsi] | |
36 mov rax,QWORD[8+rsi] | |
37 mov QWORD[128+rsp],rcx | |
38 jmp NEAR $L$oop_sqr | |
39 | |
40 ALIGN 32 | |
41 $L$oop_sqr: | |
42 mov DWORD[((128+8))+rsp],r8d | |
43 | |
44 mov rbx,rdx | |
45 mul rdx | |
46 mov r8,rax | |
47 mov rax,QWORD[16+rsi] | |
48 mov r9,rdx | |
49 | |
50 mul rbx | |
51 add r9,rax | |
52 mov rax,QWORD[24+rsi] | |
53 mov r10,rdx | |
54 adc r10,0 | |
55 | |
56 mul rbx | |
57 add r10,rax | |
58 mov rax,QWORD[32+rsi] | |
59 mov r11,rdx | |
60 adc r11,0 | |
61 | |
62 mul rbx | |
63 add r11,rax | |
64 mov rax,QWORD[40+rsi] | |
65 mov r12,rdx | |
66 adc r12,0 | |
67 | |
68 mul rbx | |
69 add r12,rax | |
70 mov rax,QWORD[48+rsi] | |
71 mov r13,rdx | |
72 adc r13,0 | |
73 | |
74 mul rbx | |
75 add r13,rax | |
76 mov rax,QWORD[56+rsi] | |
77 mov r14,rdx | |
78 adc r14,0 | |
79 | |
80 mul rbx | |
81 add r14,rax | |
82 mov rax,rbx | |
83 mov r15,rdx | |
84 adc r15,0 | |
85 | |
86 add r8,r8 | |
87 mov rcx,r9 | |
88 adc r9,r9 | |
89 | |
90 mul rax | |
91 mov QWORD[rsp],rax | |
92 add r8,rdx | |
93 adc r9,0 | |
94 | |
95 mov QWORD[8+rsp],r8 | |
96 shr rcx,63 | |
97 | |
98 | |
99 mov r8,QWORD[8+rsi] | |
100 mov rax,QWORD[16+rsi] | |
101 mul r8 | |
102 add r10,rax | |
103 mov rax,QWORD[24+rsi] | |
104 mov rbx,rdx | |
105 adc rbx,0 | |
106 | |
107 mul r8 | |
108 add r11,rax | |
109 mov rax,QWORD[32+rsi] | |
110 adc rdx,0 | |
111 add r11,rbx | |
112 mov rbx,rdx | |
113 adc rbx,0 | |
114 | |
115 mul r8 | |
116 add r12,rax | |
117 mov rax,QWORD[40+rsi] | |
118 adc rdx,0 | |
119 add r12,rbx | |
120 mov rbx,rdx | |
121 adc rbx,0 | |
122 | |
123 mul r8 | |
124 add r13,rax | |
125 mov rax,QWORD[48+rsi] | |
126 adc rdx,0 | |
127 add r13,rbx | |
128 mov rbx,rdx | |
129 adc rbx,0 | |
130 | |
131 mul r8 | |
132 add r14,rax | |
133 mov rax,QWORD[56+rsi] | |
134 adc rdx,0 | |
135 add r14,rbx | |
136 mov rbx,rdx | |
137 adc rbx,0 | |
138 | |
139 mul r8 | |
140 add r15,rax | |
141 mov rax,r8 | |
142 adc rdx,0 | |
143 add r15,rbx | |
144 mov r8,rdx | |
145 mov rdx,r10 | |
146 adc r8,0 | |
147 | |
148 add rdx,rdx | |
149 lea r10,[r10*2+rcx] | |
150 mov rbx,r11 | |
151 adc r11,r11 | |
152 | |
153 mul rax | |
154 add r9,rax | |
155 adc r10,rdx | |
156 adc r11,0 | |
157 | |
158 mov QWORD[16+rsp],r9 | |
159 mov QWORD[24+rsp],r10 | |
160 shr rbx,63 | |
161 | |
162 | |
163 mov r9,QWORD[16+rsi] | |
164 mov rax,QWORD[24+rsi] | |
165 mul r9 | |
166 add r12,rax | |
167 mov rax,QWORD[32+rsi] | |
168 mov rcx,rdx | |
169 adc rcx,0 | |
170 | |
171 mul r9 | |
172 add r13,rax | |
173 mov rax,QWORD[40+rsi] | |
174 adc rdx,0 | |
175 add r13,rcx | |
176 mov rcx,rdx | |
177 adc rcx,0 | |
178 | |
179 mul r9 | |
180 add r14,rax | |
181 mov rax,QWORD[48+rsi] | |
182 adc rdx,0 | |
183 add r14,rcx | |
184 mov rcx,rdx | |
185 adc rcx,0 | |
186 | |
187 mul r9 | |
188 mov r10,r12 | |
189 lea r12,[r12*2+rbx] | |
190 add r15,rax | |
191 mov rax,QWORD[56+rsi] | |
192 adc rdx,0 | |
193 add r15,rcx | |
194 mov rcx,rdx | |
195 adc rcx,0 | |
196 | |
197 mul r9 | |
198 shr r10,63 | |
199 add r8,rax | |
200 mov rax,r9 | |
201 adc rdx,0 | |
202 add r8,rcx | |
203 mov r9,rdx | |
204 adc r9,0 | |
205 | |
206 mov rcx,r13 | |
207 lea r13,[r13*2+r10] | |
208 | |
209 mul rax | |
210 add r11,rax | |
211 adc r12,rdx | |
212 adc r13,0 | |
213 | |
214 mov QWORD[32+rsp],r11 | |
215 mov QWORD[40+rsp],r12 | |
216 shr rcx,63 | |
217 | |
218 | |
219 mov r10,QWORD[24+rsi] | |
220 mov rax,QWORD[32+rsi] | |
221 mul r10 | |
222 add r14,rax | |
223 mov rax,QWORD[40+rsi] | |
224 mov rbx,rdx | |
225 adc rbx,0 | |
226 | |
227 mul r10 | |
228 add r15,rax | |
229 mov rax,QWORD[48+rsi] | |
230 adc rdx,0 | |
231 add r15,rbx | |
232 mov rbx,rdx | |
233 adc rbx,0 | |
234 | |
235 mul r10 | |
236 mov r12,r14 | |
237 lea r14,[r14*2+rcx] | |
238 add r8,rax | |
239 mov rax,QWORD[56+rsi] | |
240 adc rdx,0 | |
241 add r8,rbx | |
242 mov rbx,rdx | |
243 adc rbx,0 | |
244 | |
245 mul r10 | |
246 shr r12,63 | |
247 add r9,rax | |
248 mov rax,r10 | |
249 adc rdx,0 | |
250 add r9,rbx | |
251 mov r10,rdx | |
252 adc r10,0 | |
253 | |
254 mov rbx,r15 | |
255 lea r15,[r15*2+r12] | |
256 | |
257 mul rax | |
258 add r13,rax | |
259 adc r14,rdx | |
260 adc r15,0 | |
261 | |
262 mov QWORD[48+rsp],r13 | |
263 mov QWORD[56+rsp],r14 | |
264 shr rbx,63 | |
265 | |
266 | |
267 mov r11,QWORD[32+rsi] | |
268 mov rax,QWORD[40+rsi] | |
269 mul r11 | |
270 add r8,rax | |
271 mov rax,QWORD[48+rsi] | |
272 mov rcx,rdx | |
273 adc rcx,0 | |
274 | |
275 mul r11 | |
276 add r9,rax | |
277 mov rax,QWORD[56+rsi] | |
278 adc rdx,0 | |
279 mov r12,r8 | |
280 lea r8,[r8*2+rbx] | |
281 add r9,rcx | |
282 mov rcx,rdx | |
283 adc rcx,0 | |
284 | |
285 mul r11 | |
286 shr r12,63 | |
287 add r10,rax | |
288 mov rax,r11 | |
289 adc rdx,0 | |
290 add r10,rcx | |
291 mov r11,rdx | |
292 adc r11,0 | |
293 | |
294 mov rcx,r9 | |
295 lea r9,[r9*2+r12] | |
296 | |
297 mul rax | |
298 add r15,rax | |
299 adc r8,rdx | |
300 adc r9,0 | |
301 | |
302 mov QWORD[64+rsp],r15 | |
303 mov QWORD[72+rsp],r8 | |
304 shr rcx,63 | |
305 | |
306 | |
307 mov r12,QWORD[40+rsi] | |
308 mov rax,QWORD[48+rsi] | |
309 mul r12 | |
310 add r10,rax | |
311 mov rax,QWORD[56+rsi] | |
312 mov rbx,rdx | |
313 adc rbx,0 | |
314 | |
315 mul r12 | |
316 add r11,rax | |
317 mov rax,r12 | |
318 mov r15,r10 | |
319 lea r10,[r10*2+rcx] | |
320 adc rdx,0 | |
321 shr r15,63 | |
322 add r11,rbx | |
323 mov r12,rdx | |
324 adc r12,0 | |
325 | |
326 mov rbx,r11 | |
327 lea r11,[r11*2+r15] | |
328 | |
329 mul rax | |
330 add r9,rax | |
331 adc r10,rdx | |
332 adc r11,0 | |
333 | |
334 mov QWORD[80+rsp],r9 | |
335 mov QWORD[88+rsp],r10 | |
336 | |
337 | |
338 mov r13,QWORD[48+rsi] | |
339 mov rax,QWORD[56+rsi] | |
340 mul r13 | |
341 add r12,rax | |
342 mov rax,r13 | |
343 mov r13,rdx | |
344 adc r13,0 | |
345 | |
346 xor r14,r14 | |
347 shl rbx,1 | |
348 adc r12,r12 | |
349 adc r13,r13 | |
350 adc r14,r14 | |
351 | |
352 mul rax | |
353 add r11,rax | |
354 adc r12,rdx | |
355 adc r13,0 | |
356 | |
357 mov QWORD[96+rsp],r11 | |
358 mov QWORD[104+rsp],r12 | |
359 | |
360 | |
361 mov rax,QWORD[56+rsi] | |
362 mul rax | |
363 add r13,rax | |
364 adc rdx,0 | |
365 | |
366 add r14,rdx | |
367 | |
368 mov QWORD[112+rsp],r13 | |
369 mov QWORD[120+rsp],r14 | |
370 | |
371 mov r8,QWORD[rsp] | |
372 mov r9,QWORD[8+rsp] | |
373 mov r10,QWORD[16+rsp] | |
374 mov r11,QWORD[24+rsp] | |
375 mov r12,QWORD[32+rsp] | |
376 mov r13,QWORD[40+rsp] | |
377 mov r14,QWORD[48+rsp] | |
378 mov r15,QWORD[56+rsp] | |
379 | |
380 call __rsaz_512_reduce | |
381 | |
382 add r8,QWORD[64+rsp] | |
383 adc r9,QWORD[72+rsp] | |
384 adc r10,QWORD[80+rsp] | |
385 adc r11,QWORD[88+rsp] | |
386 adc r12,QWORD[96+rsp] | |
387 adc r13,QWORD[104+rsp] | |
388 adc r14,QWORD[112+rsp] | |
389 adc r15,QWORD[120+rsp] | |
390 sbb rcx,rcx | |
391 | |
392 call __rsaz_512_subtract | |
393 | |
394 mov rdx,r8 | |
395 mov rax,r9 | |
396 mov r8d,DWORD[((128+8))+rsp] | |
397 mov rsi,rdi | |
398 | |
399 dec r8d | |
400 jnz NEAR $L$oop_sqr | |
401 | |
402 lea rax,[((128+24+48))+rsp] | |
403 mov r15,QWORD[((-48))+rax] | |
404 mov r14,QWORD[((-40))+rax] | |
405 mov r13,QWORD[((-32))+rax] | |
406 mov r12,QWORD[((-24))+rax] | |
407 mov rbp,QWORD[((-16))+rax] | |
408 mov rbx,QWORD[((-8))+rax] | |
409 lea rsp,[rax] | |
410 $L$sqr_epilogue: | |
411 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
412 mov rsi,QWORD[16+rsp] | |
413 DB 0F3h,0C3h ;repret | |
414 $L$SEH_end_rsaz_512_sqr: | |
415 global rsaz_512_mul | |
416 | |
417 ALIGN 32 | |
418 rsaz_512_mul: | |
419 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
420 mov QWORD[16+rsp],rsi | |
421 mov rax,rsp | |
422 $L$SEH_begin_rsaz_512_mul: | |
423 mov rdi,rcx | |
424 mov rsi,rdx | |
425 mov rdx,r8 | |
426 mov rcx,r9 | |
427 mov r8,QWORD[40+rsp] | |
428 | |
429 | |
430 push rbx | |
431 push rbp | |
432 push r12 | |
433 push r13 | |
434 push r14 | |
435 push r15 | |
436 | |
437 sub rsp,128+24 | |
438 $L$mul_body: | |
439 DB 102,72,15,110,199 | |
440 DB 102,72,15,110,201 | |
441 mov QWORD[128+rsp],r8 | |
442 mov rbx,QWORD[rdx] | |
443 mov rbp,rdx | |
444 call __rsaz_512_mul | |
445 | |
446 DB 102,72,15,126,199 | |
447 DB 102,72,15,126,205 | |
448 | |
449 mov r8,QWORD[rsp] | |
450 mov r9,QWORD[8+rsp] | |
451 mov r10,QWORD[16+rsp] | |
452 mov r11,QWORD[24+rsp] | |
453 mov r12,QWORD[32+rsp] | |
454 mov r13,QWORD[40+rsp] | |
455 mov r14,QWORD[48+rsp] | |
456 mov r15,QWORD[56+rsp] | |
457 | |
458 call __rsaz_512_reduce | |
459 add r8,QWORD[64+rsp] | |
460 adc r9,QWORD[72+rsp] | |
461 adc r10,QWORD[80+rsp] | |
462 adc r11,QWORD[88+rsp] | |
463 adc r12,QWORD[96+rsp] | |
464 adc r13,QWORD[104+rsp] | |
465 adc r14,QWORD[112+rsp] | |
466 adc r15,QWORD[120+rsp] | |
467 sbb rcx,rcx | |
468 | |
469 call __rsaz_512_subtract | |
470 | |
471 lea rax,[((128+24+48))+rsp] | |
472 mov r15,QWORD[((-48))+rax] | |
473 mov r14,QWORD[((-40))+rax] | |
474 mov r13,QWORD[((-32))+rax] | |
475 mov r12,QWORD[((-24))+rax] | |
476 mov rbp,QWORD[((-16))+rax] | |
477 mov rbx,QWORD[((-8))+rax] | |
478 lea rsp,[rax] | |
479 $L$mul_epilogue: | |
480 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
481 mov rsi,QWORD[16+rsp] | |
482 DB 0F3h,0C3h ;repret | |
483 $L$SEH_end_rsaz_512_mul: | |
484 global rsaz_512_mul_gather4 | |
485 | |
486 ALIGN 32 | |
487 rsaz_512_mul_gather4: | |
488 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
489 mov QWORD[16+rsp],rsi | |
490 mov rax,rsp | |
491 $L$SEH_begin_rsaz_512_mul_gather4: | |
492 mov rdi,rcx | |
493 mov rsi,rdx | |
494 mov rdx,r8 | |
495 mov rcx,r9 | |
496 mov r8,QWORD[40+rsp] | |
497 mov r9,QWORD[48+rsp] | |
498 | |
499 | |
500 push rbx | |
501 push rbp | |
502 push r12 | |
503 push r13 | |
504 push r14 | |
505 push r15 | |
506 | |
507 sub rsp,328 | |
508 movaps XMMWORD[160+rsp],xmm6 | |
509 movaps XMMWORD[176+rsp],xmm7 | |
510 movaps XMMWORD[192+rsp],xmm8 | |
511 movaps XMMWORD[208+rsp],xmm9 | |
512 movaps XMMWORD[224+rsp],xmm10 | |
513 movaps XMMWORD[240+rsp],xmm11 | |
514 movaps XMMWORD[256+rsp],xmm12 | |
515 movaps XMMWORD[272+rsp],xmm13 | |
516 movaps XMMWORD[288+rsp],xmm14 | |
517 movaps XMMWORD[304+rsp],xmm15 | |
518 $L$mul_gather4_body: | |
519 movd xmm8,r9d | |
520 movdqa xmm1,XMMWORD[(($L$inc+16))] | |
521 movdqa xmm0,XMMWORD[$L$inc] | |
522 | |
523 pshufd xmm8,xmm8,0 | |
524 movdqa xmm7,xmm1 | |
525 movdqa xmm2,xmm1 | |
526 paddd xmm1,xmm0 | |
527 pcmpeqd xmm0,xmm8 | |
528 movdqa xmm3,xmm7 | |
529 paddd xmm2,xmm1 | |
530 pcmpeqd xmm1,xmm8 | |
531 movdqa xmm4,xmm7 | |
532 paddd xmm3,xmm2 | |
533 pcmpeqd xmm2,xmm8 | |
534 movdqa xmm5,xmm7 | |
535 paddd xmm4,xmm3 | |
536 pcmpeqd xmm3,xmm8 | |
537 movdqa xmm6,xmm7 | |
538 paddd xmm5,xmm4 | |
539 pcmpeqd xmm4,xmm8 | |
540 paddd xmm6,xmm5 | |
541 pcmpeqd xmm5,xmm8 | |
542 paddd xmm7,xmm6 | |
543 pcmpeqd xmm6,xmm8 | |
544 pcmpeqd xmm7,xmm8 | |
545 | |
546 movdqa xmm8,XMMWORD[rdx] | |
547 movdqa xmm9,XMMWORD[16+rdx] | |
548 movdqa xmm10,XMMWORD[32+rdx] | |
549 movdqa xmm11,XMMWORD[48+rdx] | |
550 pand xmm8,xmm0 | |
551 movdqa xmm12,XMMWORD[64+rdx] | |
552 pand xmm9,xmm1 | |
553 movdqa xmm13,XMMWORD[80+rdx] | |
554 pand xmm10,xmm2 | |
555 movdqa xmm14,XMMWORD[96+rdx] | |
556 pand xmm11,xmm3 | |
557 movdqa xmm15,XMMWORD[112+rdx] | |
558 lea rbp,[128+rdx] | |
559 pand xmm12,xmm4 | |
560 pand xmm13,xmm5 | |
561 pand xmm14,xmm6 | |
562 pand xmm15,xmm7 | |
563 por xmm8,xmm10 | |
564 por xmm9,xmm11 | |
565 por xmm8,xmm12 | |
566 por xmm9,xmm13 | |
567 por xmm8,xmm14 | |
568 por xmm9,xmm15 | |
569 | |
570 por xmm8,xmm9 | |
571 pshufd xmm9,xmm8,0x4e | |
572 por xmm8,xmm9 | |
573 DB 102,76,15,126,195 | |
574 | |
575 mov QWORD[128+rsp],r8 | |
576 mov QWORD[((128+8))+rsp],rdi | |
577 mov QWORD[((128+16))+rsp],rcx | |
578 | |
579 mov rax,QWORD[rsi] | |
580 mov rcx,QWORD[8+rsi] | |
581 mul rbx | |
582 mov QWORD[rsp],rax | |
583 mov rax,rcx | |
584 mov r8,rdx | |
585 | |
586 mul rbx | |
587 add r8,rax | |
588 mov rax,QWORD[16+rsi] | |
589 mov r9,rdx | |
590 adc r9,0 | |
591 | |
592 mul rbx | |
593 add r9,rax | |
594 mov rax,QWORD[24+rsi] | |
595 mov r10,rdx | |
596 adc r10,0 | |
597 | |
598 mul rbx | |
599 add r10,rax | |
600 mov rax,QWORD[32+rsi] | |
601 mov r11,rdx | |
602 adc r11,0 | |
603 | |
604 mul rbx | |
605 add r11,rax | |
606 mov rax,QWORD[40+rsi] | |
607 mov r12,rdx | |
608 adc r12,0 | |
609 | |
610 mul rbx | |
611 add r12,rax | |
612 mov rax,QWORD[48+rsi] | |
613 mov r13,rdx | |
614 adc r13,0 | |
615 | |
616 mul rbx | |
617 add r13,rax | |
618 mov rax,QWORD[56+rsi] | |
619 mov r14,rdx | |
620 adc r14,0 | |
621 | |
622 mul rbx | |
623 add r14,rax | |
624 mov rax,QWORD[rsi] | |
625 mov r15,rdx | |
626 adc r15,0 | |
627 | |
628 lea rdi,[8+rsp] | |
629 mov ecx,7 | |
630 jmp NEAR $L$oop_mul_gather | |
631 | |
632 ALIGN 32 | |
633 $L$oop_mul_gather: | |
634 movdqa xmm8,XMMWORD[rbp] | |
635 movdqa xmm9,XMMWORD[16+rbp] | |
636 movdqa xmm10,XMMWORD[32+rbp] | |
637 movdqa xmm11,XMMWORD[48+rbp] | |
638 pand xmm8,xmm0 | |
639 movdqa xmm12,XMMWORD[64+rbp] | |
640 pand xmm9,xmm1 | |
641 movdqa xmm13,XMMWORD[80+rbp] | |
642 pand xmm10,xmm2 | |
643 movdqa xmm14,XMMWORD[96+rbp] | |
644 pand xmm11,xmm3 | |
645 movdqa xmm15,XMMWORD[112+rbp] | |
646 lea rbp,[128+rbp] | |
647 pand xmm12,xmm4 | |
648 pand xmm13,xmm5 | |
649 pand xmm14,xmm6 | |
650 pand xmm15,xmm7 | |
651 por xmm8,xmm10 | |
652 por xmm9,xmm11 | |
653 por xmm8,xmm12 | |
654 por xmm9,xmm13 | |
655 por xmm8,xmm14 | |
656 por xmm9,xmm15 | |
657 | |
658 por xmm8,xmm9 | |
659 pshufd xmm9,xmm8,0x4e | |
660 por xmm8,xmm9 | |
661 DB 102,76,15,126,195 | |
662 | |
663 mul rbx | |
664 add r8,rax | |
665 mov rax,QWORD[8+rsi] | |
666 mov QWORD[rdi],r8 | |
667 mov r8,rdx | |
668 adc r8,0 | |
669 | |
670 mul rbx | |
671 add r9,rax | |
672 mov rax,QWORD[16+rsi] | |
673 adc rdx,0 | |
674 add r8,r9 | |
675 mov r9,rdx | |
676 adc r9,0 | |
677 | |
678 mul rbx | |
679 add r10,rax | |
680 mov rax,QWORD[24+rsi] | |
681 adc rdx,0 | |
682 add r9,r10 | |
683 mov r10,rdx | |
684 adc r10,0 | |
685 | |
686 mul rbx | |
687 add r11,rax | |
688 mov rax,QWORD[32+rsi] | |
689 adc rdx,0 | |
690 add r10,r11 | |
691 mov r11,rdx | |
692 adc r11,0 | |
693 | |
694 mul rbx | |
695 add r12,rax | |
696 mov rax,QWORD[40+rsi] | |
697 adc rdx,0 | |
698 add r11,r12 | |
699 mov r12,rdx | |
700 adc r12,0 | |
701 | |
702 mul rbx | |
703 add r13,rax | |
704 mov rax,QWORD[48+rsi] | |
705 adc rdx,0 | |
706 add r12,r13 | |
707 mov r13,rdx | |
708 adc r13,0 | |
709 | |
710 mul rbx | |
711 add r14,rax | |
712 mov rax,QWORD[56+rsi] | |
713 adc rdx,0 | |
714 add r13,r14 | |
715 mov r14,rdx | |
716 adc r14,0 | |
717 | |
718 mul rbx | |
719 add r15,rax | |
720 mov rax,QWORD[rsi] | |
721 adc rdx,0 | |
722 add r14,r15 | |
723 mov r15,rdx | |
724 adc r15,0 | |
725 | |
726 lea rdi,[8+rdi] | |
727 | |
728 dec ecx | |
729 jnz NEAR $L$oop_mul_gather | |
730 | |
731 mov QWORD[rdi],r8 | |
732 mov QWORD[8+rdi],r9 | |
733 mov QWORD[16+rdi],r10 | |
734 mov QWORD[24+rdi],r11 | |
735 mov QWORD[32+rdi],r12 | |
736 mov QWORD[40+rdi],r13 | |
737 mov QWORD[48+rdi],r14 | |
738 mov QWORD[56+rdi],r15 | |
739 | |
740 mov rdi,QWORD[((128+8))+rsp] | |
741 mov rbp,QWORD[((128+16))+rsp] | |
742 | |
743 mov r8,QWORD[rsp] | |
744 mov r9,QWORD[8+rsp] | |
745 mov r10,QWORD[16+rsp] | |
746 mov r11,QWORD[24+rsp] | |
747 mov r12,QWORD[32+rsp] | |
748 mov r13,QWORD[40+rsp] | |
749 mov r14,QWORD[48+rsp] | |
750 mov r15,QWORD[56+rsp] | |
751 | |
752 call __rsaz_512_reduce | |
753 add r8,QWORD[64+rsp] | |
754 adc r9,QWORD[72+rsp] | |
755 adc r10,QWORD[80+rsp] | |
756 adc r11,QWORD[88+rsp] | |
757 adc r12,QWORD[96+rsp] | |
758 adc r13,QWORD[104+rsp] | |
759 adc r14,QWORD[112+rsp] | |
760 adc r15,QWORD[120+rsp] | |
761 sbb rcx,rcx | |
762 | |
763 call __rsaz_512_subtract | |
764 | |
765 lea rax,[((128+24+48))+rsp] | |
766 movaps xmm6,XMMWORD[((160-200))+rax] | |
767 movaps xmm7,XMMWORD[((176-200))+rax] | |
768 movaps xmm8,XMMWORD[((192-200))+rax] | |
769 movaps xmm9,XMMWORD[((208-200))+rax] | |
770 movaps xmm10,XMMWORD[((224-200))+rax] | |
771 movaps xmm11,XMMWORD[((240-200))+rax] | |
772 movaps xmm12,XMMWORD[((256-200))+rax] | |
773 movaps xmm13,XMMWORD[((272-200))+rax] | |
774 movaps xmm14,XMMWORD[((288-200))+rax] | |
775 movaps xmm15,XMMWORD[((304-200))+rax] | |
776 lea rax,[176+rax] | |
777 mov r15,QWORD[((-48))+rax] | |
778 mov r14,QWORD[((-40))+rax] | |
779 mov r13,QWORD[((-32))+rax] | |
780 mov r12,QWORD[((-24))+rax] | |
781 mov rbp,QWORD[((-16))+rax] | |
782 mov rbx,QWORD[((-8))+rax] | |
783 lea rsp,[rax] | |
784 $L$mul_gather4_epilogue: | |
785 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
786 mov rsi,QWORD[16+rsp] | |
787 DB 0F3h,0C3h ;repret | |
788 $L$SEH_end_rsaz_512_mul_gather4: | |
789 global rsaz_512_mul_scatter4 | |
790 | |
791 ALIGN 32 | |
792 rsaz_512_mul_scatter4: | |
793 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
794 mov QWORD[16+rsp],rsi | |
795 mov rax,rsp | |
796 $L$SEH_begin_rsaz_512_mul_scatter4: | |
797 mov rdi,rcx | |
798 mov rsi,rdx | |
799 mov rdx,r8 | |
800 mov rcx,r9 | |
801 mov r8,QWORD[40+rsp] | |
802 mov r9,QWORD[48+rsp] | |
803 | |
804 | |
805 push rbx | |
806 push rbp | |
807 push r12 | |
808 push r13 | |
809 push r14 | |
810 push r15 | |
811 | |
812 mov r9d,r9d | |
813 sub rsp,128+24 | |
814 $L$mul_scatter4_body: | |
815 lea r8,[r9*8+r8] | |
816 DB 102,72,15,110,199 | |
817 DB 102,72,15,110,202 | |
818 DB 102,73,15,110,208 | |
819 mov QWORD[128+rsp],rcx | |
820 | |
821 mov rbp,rdi | |
822 mov rbx,QWORD[rdi] | |
823 call __rsaz_512_mul | |
824 | |
825 DB 102,72,15,126,199 | |
826 DB 102,72,15,126,205 | |
827 | |
828 mov r8,QWORD[rsp] | |
829 mov r9,QWORD[8+rsp] | |
830 mov r10,QWORD[16+rsp] | |
831 mov r11,QWORD[24+rsp] | |
832 mov r12,QWORD[32+rsp] | |
833 mov r13,QWORD[40+rsp] | |
834 mov r14,QWORD[48+rsp] | |
835 mov r15,QWORD[56+rsp] | |
836 | |
837 call __rsaz_512_reduce | |
838 add r8,QWORD[64+rsp] | |
839 adc r9,QWORD[72+rsp] | |
840 adc r10,QWORD[80+rsp] | |
841 adc r11,QWORD[88+rsp] | |
842 adc r12,QWORD[96+rsp] | |
843 adc r13,QWORD[104+rsp] | |
844 adc r14,QWORD[112+rsp] | |
845 adc r15,QWORD[120+rsp] | |
846 DB 102,72,15,126,214 | |
847 sbb rcx,rcx | |
848 | |
849 call __rsaz_512_subtract | |
850 | |
851 mov QWORD[rsi],r8 | |
852 mov QWORD[128+rsi],r9 | |
853 mov QWORD[256+rsi],r10 | |
854 mov QWORD[384+rsi],r11 | |
855 mov QWORD[512+rsi],r12 | |
856 mov QWORD[640+rsi],r13 | |
857 mov QWORD[768+rsi],r14 | |
858 mov QWORD[896+rsi],r15 | |
859 | |
860 lea rax,[((128+24+48))+rsp] | |
861 mov r15,QWORD[((-48))+rax] | |
862 mov r14,QWORD[((-40))+rax] | |
863 mov r13,QWORD[((-32))+rax] | |
864 mov r12,QWORD[((-24))+rax] | |
865 mov rbp,QWORD[((-16))+rax] | |
866 mov rbx,QWORD[((-8))+rax] | |
867 lea rsp,[rax] | |
868 $L$mul_scatter4_epilogue: | |
869 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
870 mov rsi,QWORD[16+rsp] | |
871 DB 0F3h,0C3h ;repret | |
872 $L$SEH_end_rsaz_512_mul_scatter4: | |
873 global rsaz_512_mul_by_one | |
874 | |
875 ALIGN 32 | |
876 rsaz_512_mul_by_one: | |
877 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
878 mov QWORD[16+rsp],rsi | |
879 mov rax,rsp | |
880 $L$SEH_begin_rsaz_512_mul_by_one: | |
881 mov rdi,rcx | |
882 mov rsi,rdx | |
883 mov rdx,r8 | |
884 mov rcx,r9 | |
885 | |
886 | |
887 push rbx | |
888 push rbp | |
889 push r12 | |
890 push r13 | |
891 push r14 | |
892 push r15 | |
893 | |
894 sub rsp,128+24 | |
895 $L$mul_by_one_body: | |
896 mov rbp,rdx | |
897 mov QWORD[128+rsp],rcx | |
898 | |
899 mov r8,QWORD[rsi] | |
900 pxor xmm0,xmm0 | |
901 mov r9,QWORD[8+rsi] | |
902 mov r10,QWORD[16+rsi] | |
903 mov r11,QWORD[24+rsi] | |
904 mov r12,QWORD[32+rsi] | |
905 mov r13,QWORD[40+rsi] | |
906 mov r14,QWORD[48+rsi] | |
907 mov r15,QWORD[56+rsi] | |
908 | |
909 movdqa XMMWORD[rsp],xmm0 | |
910 movdqa XMMWORD[16+rsp],xmm0 | |
911 movdqa XMMWORD[32+rsp],xmm0 | |
912 movdqa XMMWORD[48+rsp],xmm0 | |
913 movdqa XMMWORD[64+rsp],xmm0 | |
914 movdqa XMMWORD[80+rsp],xmm0 | |
915 movdqa XMMWORD[96+rsp],xmm0 | |
916 call __rsaz_512_reduce | |
917 mov QWORD[rdi],r8 | |
918 mov QWORD[8+rdi],r9 | |
919 mov QWORD[16+rdi],r10 | |
920 mov QWORD[24+rdi],r11 | |
921 mov QWORD[32+rdi],r12 | |
922 mov QWORD[40+rdi],r13 | |
923 mov QWORD[48+rdi],r14 | |
924 mov QWORD[56+rdi],r15 | |
925 | |
926 lea rax,[((128+24+48))+rsp] | |
927 mov r15,QWORD[((-48))+rax] | |
928 mov r14,QWORD[((-40))+rax] | |
929 mov r13,QWORD[((-32))+rax] | |
930 mov r12,QWORD[((-24))+rax] | |
931 mov rbp,QWORD[((-16))+rax] | |
932 mov rbx,QWORD[((-8))+rax] | |
933 lea rsp,[rax] | |
934 $L$mul_by_one_epilogue: | |
935 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
936 mov rsi,QWORD[16+rsp] | |
937 DB 0F3h,0C3h ;repret | |
938 $L$SEH_end_rsaz_512_mul_by_one: | |
939 | |
940 ALIGN 32 | |
941 __rsaz_512_reduce: | |
942 mov rbx,r8 | |
943 imul rbx,QWORD[((128+8))+rsp] | |
944 mov rax,QWORD[rbp] | |
945 mov ecx,8 | |
946 jmp NEAR $L$reduction_loop | |
947 | |
948 ALIGN 32 | |
949 $L$reduction_loop: | |
950 mul rbx | |
951 mov rax,QWORD[8+rbp] | |
952 neg r8 | |
953 mov r8,rdx | |
954 adc r8,0 | |
955 | |
956 mul rbx | |
957 add r9,rax | |
958 mov rax,QWORD[16+rbp] | |
959 adc rdx,0 | |
960 add r8,r9 | |
961 mov r9,rdx | |
962 adc r9,0 | |
963 | |
964 mul rbx | |
965 add r10,rax | |
966 mov rax,QWORD[24+rbp] | |
967 adc rdx,0 | |
968 add r9,r10 | |
969 mov r10,rdx | |
970 adc r10,0 | |
971 | |
972 mul rbx | |
973 add r11,rax | |
974 mov rax,QWORD[32+rbp] | |
975 adc rdx,0 | |
976 add r10,r11 | |
977 mov rsi,QWORD[((128+8))+rsp] | |
978 | |
979 | |
980 adc rdx,0 | |
981 mov r11,rdx | |
982 | |
983 mul rbx | |
984 add r12,rax | |
985 mov rax,QWORD[40+rbp] | |
986 adc rdx,0 | |
987 imul rsi,r8 | |
988 add r11,r12 | |
989 mov r12,rdx | |
990 adc r12,0 | |
991 | |
992 mul rbx | |
993 add r13,rax | |
994 mov rax,QWORD[48+rbp] | |
995 adc rdx,0 | |
996 add r12,r13 | |
997 mov r13,rdx | |
998 adc r13,0 | |
999 | |
1000 mul rbx | |
1001 add r14,rax | |
1002 mov rax,QWORD[56+rbp] | |
1003 adc rdx,0 | |
1004 add r13,r14 | |
1005 mov r14,rdx | |
1006 adc r14,0 | |
1007 | |
1008 mul rbx | |
1009 mov rbx,rsi | |
1010 add r15,rax | |
1011 mov rax,QWORD[rbp] | |
1012 adc rdx,0 | |
1013 add r14,r15 | |
1014 mov r15,rdx | |
1015 adc r15,0 | |
1016 | |
1017 dec ecx | |
1018 jne NEAR $L$reduction_loop | |
1019 | |
1020 DB 0F3h,0C3h ;repret | |
1021 | |
1022 | |
1023 ALIGN 32 | |
1024 __rsaz_512_subtract: | |
1025 mov QWORD[rdi],r8 | |
1026 mov QWORD[8+rdi],r9 | |
1027 mov QWORD[16+rdi],r10 | |
1028 mov QWORD[24+rdi],r11 | |
1029 mov QWORD[32+rdi],r12 | |
1030 mov QWORD[40+rdi],r13 | |
1031 mov QWORD[48+rdi],r14 | |
1032 mov QWORD[56+rdi],r15 | |
1033 | |
1034 mov r8,QWORD[rbp] | |
1035 mov r9,QWORD[8+rbp] | |
1036 neg r8 | |
1037 not r9 | |
1038 and r8,rcx | |
1039 mov r10,QWORD[16+rbp] | |
1040 and r9,rcx | |
1041 not r10 | |
1042 mov r11,QWORD[24+rbp] | |
1043 and r10,rcx | |
1044 not r11 | |
1045 mov r12,QWORD[32+rbp] | |
1046 and r11,rcx | |
1047 not r12 | |
1048 mov r13,QWORD[40+rbp] | |
1049 and r12,rcx | |
1050 not r13 | |
1051 mov r14,QWORD[48+rbp] | |
1052 and r13,rcx | |
1053 not r14 | |
1054 mov r15,QWORD[56+rbp] | |
1055 and r14,rcx | |
1056 not r15 | |
1057 and r15,rcx | |
1058 | |
1059 add r8,QWORD[rdi] | |
1060 adc r9,QWORD[8+rdi] | |
1061 adc r10,QWORD[16+rdi] | |
1062 adc r11,QWORD[24+rdi] | |
1063 adc r12,QWORD[32+rdi] | |
1064 adc r13,QWORD[40+rdi] | |
1065 adc r14,QWORD[48+rdi] | |
1066 adc r15,QWORD[56+rdi] | |
1067 | |
1068 mov QWORD[rdi],r8 | |
1069 mov QWORD[8+rdi],r9 | |
1070 mov QWORD[16+rdi],r10 | |
1071 mov QWORD[24+rdi],r11 | |
1072 mov QWORD[32+rdi],r12 | |
1073 mov QWORD[40+rdi],r13 | |
1074 mov QWORD[48+rdi],r14 | |
1075 mov QWORD[56+rdi],r15 | |
1076 | |
1077 DB 0F3h,0C3h ;repret | |
1078 | |
1079 | |
1080 ALIGN 32 | |
1081 __rsaz_512_mul: | |
1082 lea rdi,[8+rsp] | |
1083 | |
1084 mov rax,QWORD[rsi] | |
1085 mul rbx | |
1086 mov QWORD[rdi],rax | |
1087 mov rax,QWORD[8+rsi] | |
1088 mov r8,rdx | |
1089 | |
1090 mul rbx | |
1091 add r8,rax | |
1092 mov rax,QWORD[16+rsi] | |
1093 mov r9,rdx | |
1094 adc r9,0 | |
1095 | |
1096 mul rbx | |
1097 add r9,rax | |
1098 mov rax,QWORD[24+rsi] | |
1099 mov r10,rdx | |
1100 adc r10,0 | |
1101 | |
1102 mul rbx | |
1103 add r10,rax | |
1104 mov rax,QWORD[32+rsi] | |
1105 mov r11,rdx | |
1106 adc r11,0 | |
1107 | |
1108 mul rbx | |
1109 add r11,rax | |
1110 mov rax,QWORD[40+rsi] | |
1111 mov r12,rdx | |
1112 adc r12,0 | |
1113 | |
1114 mul rbx | |
1115 add r12,rax | |
1116 mov rax,QWORD[48+rsi] | |
1117 mov r13,rdx | |
1118 adc r13,0 | |
1119 | |
1120 mul rbx | |
1121 add r13,rax | |
1122 mov rax,QWORD[56+rsi] | |
1123 mov r14,rdx | |
1124 adc r14,0 | |
1125 | |
1126 mul rbx | |
1127 add r14,rax | |
1128 mov rax,QWORD[rsi] | |
1129 mov r15,rdx | |
1130 adc r15,0 | |
1131 | |
1132 lea rbp,[8+rbp] | |
1133 lea rdi,[8+rdi] | |
1134 | |
1135 mov ecx,7 | |
1136 jmp NEAR $L$oop_mul | |
1137 | |
1138 ALIGN 32 | |
1139 $L$oop_mul: | |
1140 mov rbx,QWORD[rbp] | |
1141 mul rbx | |
1142 add r8,rax | |
1143 mov rax,QWORD[8+rsi] | |
1144 mov QWORD[rdi],r8 | |
1145 mov r8,rdx | |
1146 adc r8,0 | |
1147 | |
1148 mul rbx | |
1149 add r9,rax | |
1150 mov rax,QWORD[16+rsi] | |
1151 adc rdx,0 | |
1152 add r8,r9 | |
1153 mov r9,rdx | |
1154 adc r9,0 | |
1155 | |
1156 mul rbx | |
1157 add r10,rax | |
1158 mov rax,QWORD[24+rsi] | |
1159 adc rdx,0 | |
1160 add r9,r10 | |
1161 mov r10,rdx | |
1162 adc r10,0 | |
1163 | |
1164 mul rbx | |
1165 add r11,rax | |
1166 mov rax,QWORD[32+rsi] | |
1167 adc rdx,0 | |
1168 add r10,r11 | |
1169 mov r11,rdx | |
1170 adc r11,0 | |
1171 | |
1172 mul rbx | |
1173 add r12,rax | |
1174 mov rax,QWORD[40+rsi] | |
1175 adc rdx,0 | |
1176 add r11,r12 | |
1177 mov r12,rdx | |
1178 adc r12,0 | |
1179 | |
1180 mul rbx | |
1181 add r13,rax | |
1182 mov rax,QWORD[48+rsi] | |
1183 adc rdx,0 | |
1184 add r12,r13 | |
1185 mov r13,rdx | |
1186 adc r13,0 | |
1187 | |
1188 mul rbx | |
1189 add r14,rax | |
1190 mov rax,QWORD[56+rsi] | |
1191 adc rdx,0 | |
1192 add r13,r14 | |
1193 mov r14,rdx | |
1194 lea rbp,[8+rbp] | |
1195 adc r14,0 | |
1196 | |
1197 mul rbx | |
1198 add r15,rax | |
1199 mov rax,QWORD[rsi] | |
1200 adc rdx,0 | |
1201 add r14,r15 | |
1202 mov r15,rdx | |
1203 adc r15,0 | |
1204 | |
1205 lea rdi,[8+rdi] | |
1206 | |
1207 dec ecx | |
1208 jnz NEAR $L$oop_mul | |
1209 | |
1210 mov QWORD[rdi],r8 | |
1211 mov QWORD[8+rdi],r9 | |
1212 mov QWORD[16+rdi],r10 | |
1213 mov QWORD[24+rdi],r11 | |
1214 mov QWORD[32+rdi],r12 | |
1215 mov QWORD[40+rdi],r13 | |
1216 mov QWORD[48+rdi],r14 | |
1217 mov QWORD[56+rdi],r15 | |
1218 | |
1219 DB 0F3h,0C3h ;repret | |
1220 | |
1221 global rsaz_512_scatter4 | |
1222 | |
1223 ALIGN 16 | |
1224 rsaz_512_scatter4: | |
1225 lea rcx,[r8*8+rcx] | |
1226 mov r9d,8 | |
1227 jmp NEAR $L$oop_scatter | |
1228 ALIGN 16 | |
1229 $L$oop_scatter: | |
1230 mov rax,QWORD[rdx] | |
1231 lea rdx,[8+rdx] | |
1232 mov QWORD[rcx],rax | |
1233 lea rcx,[128+rcx] | |
1234 dec r9d | |
1235 jnz NEAR $L$oop_scatter | |
1236 DB 0F3h,0C3h ;repret | |
1237 | |
1238 | |
1239 global rsaz_512_gather4 | |
1240 | |
1241 ALIGN 16 | |
1242 rsaz_512_gather4: | |
1243 $L$SEH_begin_rsaz_512_gather4: | |
1244 DB 0x48,0x81,0xec,0xa8,0x00,0x00,0x00 | |
1245 DB 0x0f,0x29,0x34,0x24 | |
1246 DB 0x0f,0x29,0x7c,0x24,0x10 | |
1247 DB 0x44,0x0f,0x29,0x44,0x24,0x20 | |
1248 DB 0x44,0x0f,0x29,0x4c,0x24,0x30 | |
1249 DB 0x44,0x0f,0x29,0x54,0x24,0x40 | |
1250 DB 0x44,0x0f,0x29,0x5c,0x24,0x50 | |
1251 DB 0x44,0x0f,0x29,0x64,0x24,0x60 | |
1252 DB 0x44,0x0f,0x29,0x6c,0x24,0x70 | |
1253 DB 0x44,0x0f,0x29,0xb4,0x24,0x80,0,0,0 | |
1254 DB 0x44,0x0f,0x29,0xbc,0x24,0x90,0,0,0 | |
1255 movd xmm8,r8d | |
1256 movdqa xmm1,XMMWORD[(($L$inc+16))] | |
1257 movdqa xmm0,XMMWORD[$L$inc] | |
1258 | |
1259 pshufd xmm8,xmm8,0 | |
1260 movdqa xmm7,xmm1 | |
1261 movdqa xmm2,xmm1 | |
1262 paddd xmm1,xmm0 | |
1263 pcmpeqd xmm0,xmm8 | |
1264 movdqa xmm3,xmm7 | |
1265 paddd xmm2,xmm1 | |
1266 pcmpeqd xmm1,xmm8 | |
1267 movdqa xmm4,xmm7 | |
1268 paddd xmm3,xmm2 | |
1269 pcmpeqd xmm2,xmm8 | |
1270 movdqa xmm5,xmm7 | |
1271 paddd xmm4,xmm3 | |
1272 pcmpeqd xmm3,xmm8 | |
1273 movdqa xmm6,xmm7 | |
1274 paddd xmm5,xmm4 | |
1275 pcmpeqd xmm4,xmm8 | |
1276 paddd xmm6,xmm5 | |
1277 pcmpeqd xmm5,xmm8 | |
1278 paddd xmm7,xmm6 | |
1279 pcmpeqd xmm6,xmm8 | |
1280 pcmpeqd xmm7,xmm8 | |
1281 mov r9d,8 | |
1282 jmp NEAR $L$oop_gather | |
1283 ALIGN 16 | |
1284 $L$oop_gather: | |
1285 movdqa xmm8,XMMWORD[rdx] | |
1286 movdqa xmm9,XMMWORD[16+rdx] | |
1287 movdqa xmm10,XMMWORD[32+rdx] | |
1288 movdqa xmm11,XMMWORD[48+rdx] | |
1289 pand xmm8,xmm0 | |
1290 movdqa xmm12,XMMWORD[64+rdx] | |
1291 pand xmm9,xmm1 | |
1292 movdqa xmm13,XMMWORD[80+rdx] | |
1293 pand xmm10,xmm2 | |
1294 movdqa xmm14,XMMWORD[96+rdx] | |
1295 pand xmm11,xmm3 | |
1296 movdqa xmm15,XMMWORD[112+rdx] | |
1297 lea rdx,[128+rdx] | |
1298 pand xmm12,xmm4 | |
1299 pand xmm13,xmm5 | |
1300 pand xmm14,xmm6 | |
1301 pand xmm15,xmm7 | |
1302 por xmm8,xmm10 | |
1303 por xmm9,xmm11 | |
1304 por xmm8,xmm12 | |
1305 por xmm9,xmm13 | |
1306 por xmm8,xmm14 | |
1307 por xmm9,xmm15 | |
1308 | |
1309 por xmm8,xmm9 | |
1310 pshufd xmm9,xmm8,0x4e | |
1311 por xmm8,xmm9 | |
1312 movq QWORD[rcx],xmm8 | |
1313 lea rcx,[8+rcx] | |
1314 dec r9d | |
1315 jnz NEAR $L$oop_gather | |
1316 movaps xmm6,XMMWORD[rsp] | |
1317 movaps xmm7,XMMWORD[16+rsp] | |
1318 movaps xmm8,XMMWORD[32+rsp] | |
1319 movaps xmm9,XMMWORD[48+rsp] | |
1320 movaps xmm10,XMMWORD[64+rsp] | |
1321 movaps xmm11,XMMWORD[80+rsp] | |
1322 movaps xmm12,XMMWORD[96+rsp] | |
1323 movaps xmm13,XMMWORD[112+rsp] | |
1324 movaps xmm14,XMMWORD[128+rsp] | |
1325 movaps xmm15,XMMWORD[144+rsp] | |
1326 add rsp,0xa8 | |
1327 DB 0F3h,0C3h ;repret | |
1328 $L$SEH_end_rsaz_512_gather4: | |
1329 | |
1330 | |
1331 ALIGN 64 | |
1332 $L$inc: | |
1333 DD 0,0,1,1 | |
1334 DD 2,2,2,2 | |
1335 EXTERN __imp_RtlVirtualUnwind | |
1336 | |
1337 ALIGN 16 | |
1338 se_handler: | |
1339 push rsi | |
1340 push rdi | |
1341 push rbx | |
1342 push rbp | |
1343 push r12 | |
1344 push r13 | |
1345 push r14 | |
1346 push r15 | |
1347 pushfq | |
1348 sub rsp,64 | |
1349 | |
1350 mov rax,QWORD[120+r8] | |
1351 mov rbx,QWORD[248+r8] | |
1352 | |
1353 mov rsi,QWORD[8+r9] | |
1354 mov r11,QWORD[56+r9] | |
1355 | |
1356 mov r10d,DWORD[r11] | |
1357 lea r10,[r10*1+rsi] | |
1358 cmp rbx,r10 | |
1359 jb NEAR $L$common_seh_tail | |
1360 | |
1361 mov rax,QWORD[152+r8] | |
1362 | |
1363 mov r10d,DWORD[4+r11] | |
1364 lea r10,[r10*1+rsi] | |
1365 cmp rbx,r10 | |
1366 jae NEAR $L$common_seh_tail | |
1367 | |
1368 lea rax,[((128+24+48))+rax] | |
1369 | |
1370 lea rbx,[$L$mul_gather4_epilogue] | |
1371 cmp rbx,r10 | |
1372 jne NEAR $L$se_not_in_mul_gather4 | |
1373 | |
1374 lea rax,[176+rax] | |
1375 | |
1376 lea rsi,[((-48-168))+rax] | |
1377 lea rdi,[512+r8] | |
1378 mov ecx,20 | |
1379 DD 0xa548f3fc | |
1380 | |
1381 $L$se_not_in_mul_gather4: | |
1382 mov rbx,QWORD[((-8))+rax] | |
1383 mov rbp,QWORD[((-16))+rax] | |
1384 mov r12,QWORD[((-24))+rax] | |
1385 mov r13,QWORD[((-32))+rax] | |
1386 mov r14,QWORD[((-40))+rax] | |
1387 mov r15,QWORD[((-48))+rax] | |
1388 mov QWORD[144+r8],rbx | |
1389 mov QWORD[160+r8],rbp | |
1390 mov QWORD[216+r8],r12 | |
1391 mov QWORD[224+r8],r13 | |
1392 mov QWORD[232+r8],r14 | |
1393 mov QWORD[240+r8],r15 | |
1394 | |
1395 $L$common_seh_tail: | |
1396 mov rdi,QWORD[8+rax] | |
1397 mov rsi,QWORD[16+rax] | |
1398 mov QWORD[152+r8],rax | |
1399 mov QWORD[168+r8],rsi | |
1400 mov QWORD[176+r8],rdi | |
1401 | |
1402 mov rdi,QWORD[40+r9] | |
1403 mov rsi,r8 | |
1404 mov ecx,154 | |
1405 DD 0xa548f3fc | |
1406 | |
1407 mov rsi,r9 | |
1408 xor rcx,rcx | |
1409 mov rdx,QWORD[8+rsi] | |
1410 mov r8,QWORD[rsi] | |
1411 mov r9,QWORD[16+rsi] | |
1412 mov r10,QWORD[40+rsi] | |
1413 lea r11,[56+rsi] | |
1414 lea r12,[24+rsi] | |
1415 mov QWORD[32+rsp],r10 | |
1416 mov QWORD[40+rsp],r11 | |
1417 mov QWORD[48+rsp],r12 | |
1418 mov QWORD[56+rsp],rcx | |
1419 call QWORD[__imp_RtlVirtualUnwind] | |
1420 | |
1421 mov eax,1 | |
1422 add rsp,64 | |
1423 popfq | |
1424 pop r15 | |
1425 pop r14 | |
1426 pop r13 | |
1427 pop r12 | |
1428 pop rbp | |
1429 pop rbx | |
1430 pop rdi | |
1431 pop rsi | |
1432 DB 0F3h,0C3h ;repret | |
1433 | |
1434 | |
1435 section .pdata rdata align=4 | |
1436 ALIGN 4 | |
1437 DD $L$SEH_begin_rsaz_512_sqr wrt ..imagebase | |
1438 DD $L$SEH_end_rsaz_512_sqr wrt ..imagebase | |
1439 DD $L$SEH_info_rsaz_512_sqr wrt ..imagebase | |
1440 | |
1441 DD $L$SEH_begin_rsaz_512_mul wrt ..imagebase | |
1442 DD $L$SEH_end_rsaz_512_mul wrt ..imagebase | |
1443 DD $L$SEH_info_rsaz_512_mul wrt ..imagebase | |
1444 | |
1445 DD $L$SEH_begin_rsaz_512_mul_gather4 wrt ..imagebase | |
1446 DD $L$SEH_end_rsaz_512_mul_gather4 wrt ..imagebase | |
1447 DD $L$SEH_info_rsaz_512_mul_gather4 wrt ..imagebase | |
1448 | |
1449 DD $L$SEH_begin_rsaz_512_mul_scatter4 wrt ..imagebase | |
1450 DD $L$SEH_end_rsaz_512_mul_scatter4 wrt ..imagebase | |
1451 DD $L$SEH_info_rsaz_512_mul_scatter4 wrt ..imagebase | |
1452 | |
1453 DD $L$SEH_begin_rsaz_512_mul_by_one wrt ..imagebase | |
1454 DD $L$SEH_end_rsaz_512_mul_by_one wrt ..imagebase | |
1455 DD $L$SEH_info_rsaz_512_mul_by_one wrt ..imagebase | |
1456 | |
1457 DD $L$SEH_begin_rsaz_512_gather4 wrt ..imagebase | |
1458 DD $L$SEH_end_rsaz_512_gather4 wrt ..imagebase | |
1459 DD $L$SEH_info_rsaz_512_gather4 wrt ..imagebase | |
1460 | |
1461 section .xdata rdata align=8 | |
1462 ALIGN 8 | |
1463 $L$SEH_info_rsaz_512_sqr: | |
1464 DB 9,0,0,0 | |
1465 DD se_handler wrt ..imagebase | |
1466 DD $L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase | |
1467 $L$SEH_info_rsaz_512_mul: | |
1468 DB 9,0,0,0 | |
1469 DD se_handler wrt ..imagebase | |
1470 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase | |
1471 $L$SEH_info_rsaz_512_mul_gather4: | |
1472 DB 9,0,0,0 | |
1473 DD se_handler wrt ..imagebase | |
1474 DD $L$mul_gather4_body wrt ..imagebase,$L$mul_gather4_epilogue wrt
..imagebase | |
1475 $L$SEH_info_rsaz_512_mul_scatter4: | |
1476 DB 9,0,0,0 | |
1477 DD se_handler wrt ..imagebase | |
1478 DD $L$mul_scatter4_body wrt ..imagebase,$L$mul_scatter4_epilogue wr
t ..imagebase | |
1479 $L$SEH_info_rsaz_512_mul_by_one: | |
1480 DB 9,0,0,0 | |
1481 DD se_handler wrt ..imagebase | |
1482 DD $L$mul_by_one_body wrt ..imagebase,$L$mul_by_one_epilogue wrt ..
imagebase | |
1483 $L$SEH_info_rsaz_512_gather4: | |
1484 DB 0x01,0x46,0x16,0x00 | |
1485 DB 0x46,0xf8,0x09,0x00 | |
1486 DB 0x3d,0xe8,0x08,0x00 | |
1487 DB 0x34,0xd8,0x07,0x00 | |
1488 DB 0x2e,0xc8,0x06,0x00 | |
1489 DB 0x28,0xb8,0x05,0x00 | |
1490 DB 0x22,0xa8,0x04,0x00 | |
1491 DB 0x1c,0x98,0x03,0x00 | |
1492 DB 0x16,0x88,0x02,0x00 | |
1493 DB 0x10,0x78,0x01,0x00 | |
1494 DB 0x0b,0x68,0x00,0x00 | |
1495 DB 0x07,0x01,0x15,0x00 | |
OLD | NEW |