OLD | NEW |
| (Empty) |
1 default rel | |
2 %define XMMWORD | |
3 %define YMMWORD | |
4 %define ZMMWORD | |
5 section .text code align=64 | |
6 | |
7 | |
8 EXTERN OPENSSL_ia32cap_P | |
9 | |
10 global bn_mul_mont_gather5 | |
11 | |
12 ALIGN 64 | |
13 bn_mul_mont_gather5: | |
14 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
15 mov QWORD[16+rsp],rsi | |
16 mov rax,rsp | |
17 $L$SEH_begin_bn_mul_mont_gather5: | |
18 mov rdi,rcx | |
19 mov rsi,rdx | |
20 mov rdx,r8 | |
21 mov rcx,r9 | |
22 mov r8,QWORD[40+rsp] | |
23 mov r9,QWORD[48+rsp] | |
24 | |
25 | |
26 test r9d,7 | |
27 jnz NEAR $L$mul_enter | |
28 jmp NEAR $L$mul4x_enter | |
29 | |
30 ALIGN 16 | |
31 $L$mul_enter: | |
32 mov r9d,r9d | |
33 mov rax,rsp | |
34 movd xmm5,DWORD[56+rsp] | |
35 lea r10,[$L$inc] | |
36 push rbx | |
37 push rbp | |
38 push r12 | |
39 push r13 | |
40 push r14 | |
41 push r15 | |
42 | |
43 lea r11,[2+r9] | |
44 neg r11 | |
45 lea rsp,[((-264))+r11*8+rsp] | |
46 and rsp,-1024 | |
47 | |
48 mov QWORD[8+r9*8+rsp],rax | |
49 $L$mul_body: | |
50 lea r12,[128+rdx] | |
51 movdqa xmm0,XMMWORD[r10] | |
52 movdqa xmm1,XMMWORD[16+r10] | |
53 lea r10,[((24-112))+r9*8+rsp] | |
54 and r10,-16 | |
55 | |
56 pshufd xmm5,xmm5,0 | |
57 movdqa xmm4,xmm1 | |
58 movdqa xmm2,xmm1 | |
59 paddd xmm1,xmm0 | |
60 pcmpeqd xmm0,xmm5 | |
61 DB 0x67 | |
62 movdqa xmm3,xmm4 | |
63 paddd xmm2,xmm1 | |
64 pcmpeqd xmm1,xmm5 | |
65 movdqa XMMWORD[112+r10],xmm0 | |
66 movdqa xmm0,xmm4 | |
67 | |
68 paddd xmm3,xmm2 | |
69 pcmpeqd xmm2,xmm5 | |
70 movdqa XMMWORD[128+r10],xmm1 | |
71 movdqa xmm1,xmm4 | |
72 | |
73 paddd xmm0,xmm3 | |
74 pcmpeqd xmm3,xmm5 | |
75 movdqa XMMWORD[144+r10],xmm2 | |
76 movdqa xmm2,xmm4 | |
77 | |
78 paddd xmm1,xmm0 | |
79 pcmpeqd xmm0,xmm5 | |
80 movdqa XMMWORD[160+r10],xmm3 | |
81 movdqa xmm3,xmm4 | |
82 paddd xmm2,xmm1 | |
83 pcmpeqd xmm1,xmm5 | |
84 movdqa XMMWORD[176+r10],xmm0 | |
85 movdqa xmm0,xmm4 | |
86 | |
87 paddd xmm3,xmm2 | |
88 pcmpeqd xmm2,xmm5 | |
89 movdqa XMMWORD[192+r10],xmm1 | |
90 movdqa xmm1,xmm4 | |
91 | |
92 paddd xmm0,xmm3 | |
93 pcmpeqd xmm3,xmm5 | |
94 movdqa XMMWORD[208+r10],xmm2 | |
95 movdqa xmm2,xmm4 | |
96 | |
97 paddd xmm1,xmm0 | |
98 pcmpeqd xmm0,xmm5 | |
99 movdqa XMMWORD[224+r10],xmm3 | |
100 movdqa xmm3,xmm4 | |
101 paddd xmm2,xmm1 | |
102 pcmpeqd xmm1,xmm5 | |
103 movdqa XMMWORD[240+r10],xmm0 | |
104 movdqa xmm0,xmm4 | |
105 | |
106 paddd xmm3,xmm2 | |
107 pcmpeqd xmm2,xmm5 | |
108 movdqa XMMWORD[256+r10],xmm1 | |
109 movdqa xmm1,xmm4 | |
110 | |
111 paddd xmm0,xmm3 | |
112 pcmpeqd xmm3,xmm5 | |
113 movdqa XMMWORD[272+r10],xmm2 | |
114 movdqa xmm2,xmm4 | |
115 | |
116 paddd xmm1,xmm0 | |
117 pcmpeqd xmm0,xmm5 | |
118 movdqa XMMWORD[288+r10],xmm3 | |
119 movdqa xmm3,xmm4 | |
120 paddd xmm2,xmm1 | |
121 pcmpeqd xmm1,xmm5 | |
122 movdqa XMMWORD[304+r10],xmm0 | |
123 | |
124 paddd xmm3,xmm2 | |
125 DB 0x67 | |
126 pcmpeqd xmm2,xmm5 | |
127 movdqa XMMWORD[320+r10],xmm1 | |
128 | |
129 pcmpeqd xmm3,xmm5 | |
130 movdqa XMMWORD[336+r10],xmm2 | |
131 pand xmm0,XMMWORD[64+r12] | |
132 | |
133 pand xmm1,XMMWORD[80+r12] | |
134 pand xmm2,XMMWORD[96+r12] | |
135 movdqa XMMWORD[352+r10],xmm3 | |
136 pand xmm3,XMMWORD[112+r12] | |
137 por xmm0,xmm2 | |
138 por xmm1,xmm3 | |
139 movdqa xmm4,XMMWORD[((-128))+r12] | |
140 movdqa xmm5,XMMWORD[((-112))+r12] | |
141 movdqa xmm2,XMMWORD[((-96))+r12] | |
142 pand xmm4,XMMWORD[112+r10] | |
143 movdqa xmm3,XMMWORD[((-80))+r12] | |
144 pand xmm5,XMMWORD[128+r10] | |
145 por xmm0,xmm4 | |
146 pand xmm2,XMMWORD[144+r10] | |
147 por xmm1,xmm5 | |
148 pand xmm3,XMMWORD[160+r10] | |
149 por xmm0,xmm2 | |
150 por xmm1,xmm3 | |
151 movdqa xmm4,XMMWORD[((-64))+r12] | |
152 movdqa xmm5,XMMWORD[((-48))+r12] | |
153 movdqa xmm2,XMMWORD[((-32))+r12] | |
154 pand xmm4,XMMWORD[176+r10] | |
155 movdqa xmm3,XMMWORD[((-16))+r12] | |
156 pand xmm5,XMMWORD[192+r10] | |
157 por xmm0,xmm4 | |
158 pand xmm2,XMMWORD[208+r10] | |
159 por xmm1,xmm5 | |
160 pand xmm3,XMMWORD[224+r10] | |
161 por xmm0,xmm2 | |
162 por xmm1,xmm3 | |
163 movdqa xmm4,XMMWORD[r12] | |
164 movdqa xmm5,XMMWORD[16+r12] | |
165 movdqa xmm2,XMMWORD[32+r12] | |
166 pand xmm4,XMMWORD[240+r10] | |
167 movdqa xmm3,XMMWORD[48+r12] | |
168 pand xmm5,XMMWORD[256+r10] | |
169 por xmm0,xmm4 | |
170 pand xmm2,XMMWORD[272+r10] | |
171 por xmm1,xmm5 | |
172 pand xmm3,XMMWORD[288+r10] | |
173 por xmm0,xmm2 | |
174 por xmm1,xmm3 | |
175 por xmm0,xmm1 | |
176 pshufd xmm1,xmm0,0x4e | |
177 por xmm0,xmm1 | |
178 lea r12,[256+r12] | |
179 DB 102,72,15,126,195 | |
180 | |
181 mov r8,QWORD[r8] | |
182 mov rax,QWORD[rsi] | |
183 | |
184 xor r14,r14 | |
185 xor r15,r15 | |
186 | |
187 mov rbp,r8 | |
188 mul rbx | |
189 mov r10,rax | |
190 mov rax,QWORD[rcx] | |
191 | |
192 imul rbp,r10 | |
193 mov r11,rdx | |
194 | |
195 mul rbp | |
196 add r10,rax | |
197 mov rax,QWORD[8+rsi] | |
198 adc rdx,0 | |
199 mov r13,rdx | |
200 | |
201 lea r15,[1+r15] | |
202 jmp NEAR $L$1st_enter | |
203 | |
204 ALIGN 16 | |
205 $L$1st: | |
206 add r13,rax | |
207 mov rax,QWORD[r15*8+rsi] | |
208 adc rdx,0 | |
209 add r13,r11 | |
210 mov r11,r10 | |
211 adc rdx,0 | |
212 mov QWORD[((-16))+r15*8+rsp],r13 | |
213 mov r13,rdx | |
214 | |
215 $L$1st_enter: | |
216 mul rbx | |
217 add r11,rax | |
218 mov rax,QWORD[r15*8+rcx] | |
219 adc rdx,0 | |
220 lea r15,[1+r15] | |
221 mov r10,rdx | |
222 | |
223 mul rbp | |
224 cmp r15,r9 | |
225 jne NEAR $L$1st | |
226 | |
227 | |
228 add r13,rax | |
229 adc rdx,0 | |
230 add r13,r11 | |
231 adc rdx,0 | |
232 mov QWORD[((-16))+r9*8+rsp],r13 | |
233 mov r13,rdx | |
234 mov r11,r10 | |
235 | |
236 xor rdx,rdx | |
237 add r13,r11 | |
238 adc rdx,0 | |
239 mov QWORD[((-8))+r9*8+rsp],r13 | |
240 mov QWORD[r9*8+rsp],rdx | |
241 | |
242 lea r14,[1+r14] | |
243 jmp NEAR $L$outer | |
244 ALIGN 16 | |
245 $L$outer: | |
246 lea rdx,[((24+128))+r9*8+rsp] | |
247 and rdx,-16 | |
248 pxor xmm4,xmm4 | |
249 pxor xmm5,xmm5 | |
250 movdqa xmm0,XMMWORD[((-128))+r12] | |
251 movdqa xmm1,XMMWORD[((-112))+r12] | |
252 movdqa xmm2,XMMWORD[((-96))+r12] | |
253 movdqa xmm3,XMMWORD[((-80))+r12] | |
254 pand xmm0,XMMWORD[((-128))+rdx] | |
255 pand xmm1,XMMWORD[((-112))+rdx] | |
256 por xmm4,xmm0 | |
257 pand xmm2,XMMWORD[((-96))+rdx] | |
258 por xmm5,xmm1 | |
259 pand xmm3,XMMWORD[((-80))+rdx] | |
260 por xmm4,xmm2 | |
261 por xmm5,xmm3 | |
262 movdqa xmm0,XMMWORD[((-64))+r12] | |
263 movdqa xmm1,XMMWORD[((-48))+r12] | |
264 movdqa xmm2,XMMWORD[((-32))+r12] | |
265 movdqa xmm3,XMMWORD[((-16))+r12] | |
266 pand xmm0,XMMWORD[((-64))+rdx] | |
267 pand xmm1,XMMWORD[((-48))+rdx] | |
268 por xmm4,xmm0 | |
269 pand xmm2,XMMWORD[((-32))+rdx] | |
270 por xmm5,xmm1 | |
271 pand xmm3,XMMWORD[((-16))+rdx] | |
272 por xmm4,xmm2 | |
273 por xmm5,xmm3 | |
274 movdqa xmm0,XMMWORD[r12] | |
275 movdqa xmm1,XMMWORD[16+r12] | |
276 movdqa xmm2,XMMWORD[32+r12] | |
277 movdqa xmm3,XMMWORD[48+r12] | |
278 pand xmm0,XMMWORD[rdx] | |
279 pand xmm1,XMMWORD[16+rdx] | |
280 por xmm4,xmm0 | |
281 pand xmm2,XMMWORD[32+rdx] | |
282 por xmm5,xmm1 | |
283 pand xmm3,XMMWORD[48+rdx] | |
284 por xmm4,xmm2 | |
285 por xmm5,xmm3 | |
286 movdqa xmm0,XMMWORD[64+r12] | |
287 movdqa xmm1,XMMWORD[80+r12] | |
288 movdqa xmm2,XMMWORD[96+r12] | |
289 movdqa xmm3,XMMWORD[112+r12] | |
290 pand xmm0,XMMWORD[64+rdx] | |
291 pand xmm1,XMMWORD[80+rdx] | |
292 por xmm4,xmm0 | |
293 pand xmm2,XMMWORD[96+rdx] | |
294 por xmm5,xmm1 | |
295 pand xmm3,XMMWORD[112+rdx] | |
296 por xmm4,xmm2 | |
297 por xmm5,xmm3 | |
298 por xmm4,xmm5 | |
299 pshufd xmm0,xmm4,0x4e | |
300 por xmm0,xmm4 | |
301 lea r12,[256+r12] | |
302 | |
303 mov rax,QWORD[rsi] | |
304 DB 102,72,15,126,195 | |
305 | |
306 xor r15,r15 | |
307 mov rbp,r8 | |
308 mov r10,QWORD[rsp] | |
309 | |
310 mul rbx | |
311 add r10,rax | |
312 mov rax,QWORD[rcx] | |
313 adc rdx,0 | |
314 | |
315 imul rbp,r10 | |
316 mov r11,rdx | |
317 | |
318 mul rbp | |
319 add r10,rax | |
320 mov rax,QWORD[8+rsi] | |
321 adc rdx,0 | |
322 mov r10,QWORD[8+rsp] | |
323 mov r13,rdx | |
324 | |
325 lea r15,[1+r15] | |
326 jmp NEAR $L$inner_enter | |
327 | |
328 ALIGN 16 | |
329 $L$inner: | |
330 add r13,rax | |
331 mov rax,QWORD[r15*8+rsi] | |
332 adc rdx,0 | |
333 add r13,r10 | |
334 mov r10,QWORD[r15*8+rsp] | |
335 adc rdx,0 | |
336 mov QWORD[((-16))+r15*8+rsp],r13 | |
337 mov r13,rdx | |
338 | |
339 $L$inner_enter: | |
340 mul rbx | |
341 add r11,rax | |
342 mov rax,QWORD[r15*8+rcx] | |
343 adc rdx,0 | |
344 add r10,r11 | |
345 mov r11,rdx | |
346 adc r11,0 | |
347 lea r15,[1+r15] | |
348 | |
349 mul rbp | |
350 cmp r15,r9 | |
351 jne NEAR $L$inner | |
352 | |
353 add r13,rax | |
354 adc rdx,0 | |
355 add r13,r10 | |
356 mov r10,QWORD[r9*8+rsp] | |
357 adc rdx,0 | |
358 mov QWORD[((-16))+r9*8+rsp],r13 | |
359 mov r13,rdx | |
360 | |
361 xor rdx,rdx | |
362 add r13,r11 | |
363 adc rdx,0 | |
364 add r13,r10 | |
365 adc rdx,0 | |
366 mov QWORD[((-8))+r9*8+rsp],r13 | |
367 mov QWORD[r9*8+rsp],rdx | |
368 | |
369 lea r14,[1+r14] | |
370 cmp r14,r9 | |
371 jb NEAR $L$outer | |
372 | |
373 xor r14,r14 | |
374 mov rax,QWORD[rsp] | |
375 lea rsi,[rsp] | |
376 mov r15,r9 | |
377 jmp NEAR $L$sub | |
378 ALIGN 16 | |
379 $L$sub: sbb rax,QWORD[r14*8+rcx] | |
380 mov QWORD[r14*8+rdi],rax | |
381 mov rax,QWORD[8+r14*8+rsi] | |
382 lea r14,[1+r14] | |
383 dec r15 | |
384 jnz NEAR $L$sub | |
385 | |
386 sbb rax,0 | |
387 xor r14,r14 | |
388 mov r15,r9 | |
389 ALIGN 16 | |
390 $L$copy: | |
391 mov rsi,QWORD[r14*8+rsp] | |
392 mov rcx,QWORD[r14*8+rdi] | |
393 xor rsi,rcx | |
394 and rsi,rax | |
395 xor rsi,rcx | |
396 mov QWORD[r14*8+rsp],r14 | |
397 mov QWORD[r14*8+rdi],rsi | |
398 lea r14,[1+r14] | |
399 sub r15,1 | |
400 jnz NEAR $L$copy | |
401 | |
402 mov rsi,QWORD[8+r9*8+rsp] | |
403 mov rax,1 | |
404 | |
405 mov r15,QWORD[((-48))+rsi] | |
406 mov r14,QWORD[((-40))+rsi] | |
407 mov r13,QWORD[((-32))+rsi] | |
408 mov r12,QWORD[((-24))+rsi] | |
409 mov rbp,QWORD[((-16))+rsi] | |
410 mov rbx,QWORD[((-8))+rsi] | |
411 lea rsp,[rsi] | |
412 $L$mul_epilogue: | |
413 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
414 mov rsi,QWORD[16+rsp] | |
415 DB 0F3h,0C3h ;repret | |
416 $L$SEH_end_bn_mul_mont_gather5: | |
417 | |
418 ALIGN 32 | |
419 bn_mul4x_mont_gather5: | |
420 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
421 mov QWORD[16+rsp],rsi | |
422 mov rax,rsp | |
423 $L$SEH_begin_bn_mul4x_mont_gather5: | |
424 mov rdi,rcx | |
425 mov rsi,rdx | |
426 mov rdx,r8 | |
427 mov rcx,r9 | |
428 mov r8,QWORD[40+rsp] | |
429 mov r9,QWORD[48+rsp] | |
430 | |
431 | |
432 $L$mul4x_enter: | |
433 DB 0x67 | |
434 mov rax,rsp | |
435 push rbx | |
436 push rbp | |
437 push r12 | |
438 push r13 | |
439 push r14 | |
440 push r15 | |
441 | |
442 DB 0x67 | |
443 shl r9d,3 | |
444 lea r10,[r9*2+r9] | |
445 neg r9 | |
446 | |
447 | |
448 | |
449 | |
450 | |
451 | |
452 | |
453 | |
454 | |
455 | |
456 lea r11,[((-320))+r9*2+rsp] | |
457 sub r11,rdi | |
458 and r11,4095 | |
459 cmp r10,r11 | |
460 jb NEAR $L$mul4xsp_alt | |
461 sub rsp,r11 | |
462 lea rsp,[((-320))+r9*2+rsp] | |
463 jmp NEAR $L$mul4xsp_done | |
464 | |
465 ALIGN 32 | |
466 $L$mul4xsp_alt: | |
467 lea r10,[((4096-320))+r9*2] | |
468 lea rsp,[((-320))+r9*2+rsp] | |
469 sub r11,r10 | |
470 mov r10,0 | |
471 cmovc r11,r10 | |
472 sub rsp,r11 | |
473 $L$mul4xsp_done: | |
474 and rsp,-64 | |
475 neg r9 | |
476 | |
477 mov QWORD[40+rsp],rax | |
478 $L$mul4x_body: | |
479 | |
480 call mul4x_internal | |
481 | |
482 mov rsi,QWORD[40+rsp] | |
483 mov rax,1 | |
484 | |
485 mov r15,QWORD[((-48))+rsi] | |
486 mov r14,QWORD[((-40))+rsi] | |
487 mov r13,QWORD[((-32))+rsi] | |
488 mov r12,QWORD[((-24))+rsi] | |
489 mov rbp,QWORD[((-16))+rsi] | |
490 mov rbx,QWORD[((-8))+rsi] | |
491 lea rsp,[rsi] | |
492 $L$mul4x_epilogue: | |
493 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
494 mov rsi,QWORD[16+rsp] | |
495 DB 0F3h,0C3h ;repret | |
496 $L$SEH_end_bn_mul4x_mont_gather5: | |
497 | |
498 | |
499 ALIGN 32 | |
500 mul4x_internal: | |
501 shl r9,5 | |
502 movd xmm5,DWORD[56+rax] | |
503 lea rax,[$L$inc] | |
504 lea r13,[128+r9*1+rdx] | |
505 shr r9,5 | |
506 movdqa xmm0,XMMWORD[rax] | |
507 movdqa xmm1,XMMWORD[16+rax] | |
508 lea r10,[((88-112))+r9*1+rsp] | |
509 lea r12,[128+rdx] | |
510 | |
511 pshufd xmm5,xmm5,0 | |
512 movdqa xmm4,xmm1 | |
513 DB 0x67,0x67 | |
514 movdqa xmm2,xmm1 | |
515 paddd xmm1,xmm0 | |
516 pcmpeqd xmm0,xmm5 | |
517 DB 0x67 | |
518 movdqa xmm3,xmm4 | |
519 paddd xmm2,xmm1 | |
520 pcmpeqd xmm1,xmm5 | |
521 movdqa XMMWORD[112+r10],xmm0 | |
522 movdqa xmm0,xmm4 | |
523 | |
524 paddd xmm3,xmm2 | |
525 pcmpeqd xmm2,xmm5 | |
526 movdqa XMMWORD[128+r10],xmm1 | |
527 movdqa xmm1,xmm4 | |
528 | |
529 paddd xmm0,xmm3 | |
530 pcmpeqd xmm3,xmm5 | |
531 movdqa XMMWORD[144+r10],xmm2 | |
532 movdqa xmm2,xmm4 | |
533 | |
534 paddd xmm1,xmm0 | |
535 pcmpeqd xmm0,xmm5 | |
536 movdqa XMMWORD[160+r10],xmm3 | |
537 movdqa xmm3,xmm4 | |
538 paddd xmm2,xmm1 | |
539 pcmpeqd xmm1,xmm5 | |
540 movdqa XMMWORD[176+r10],xmm0 | |
541 movdqa xmm0,xmm4 | |
542 | |
543 paddd xmm3,xmm2 | |
544 pcmpeqd xmm2,xmm5 | |
545 movdqa XMMWORD[192+r10],xmm1 | |
546 movdqa xmm1,xmm4 | |
547 | |
548 paddd xmm0,xmm3 | |
549 pcmpeqd xmm3,xmm5 | |
550 movdqa XMMWORD[208+r10],xmm2 | |
551 movdqa xmm2,xmm4 | |
552 | |
553 paddd xmm1,xmm0 | |
554 pcmpeqd xmm0,xmm5 | |
555 movdqa XMMWORD[224+r10],xmm3 | |
556 movdqa xmm3,xmm4 | |
557 paddd xmm2,xmm1 | |
558 pcmpeqd xmm1,xmm5 | |
559 movdqa XMMWORD[240+r10],xmm0 | |
560 movdqa xmm0,xmm4 | |
561 | |
562 paddd xmm3,xmm2 | |
563 pcmpeqd xmm2,xmm5 | |
564 movdqa XMMWORD[256+r10],xmm1 | |
565 movdqa xmm1,xmm4 | |
566 | |
567 paddd xmm0,xmm3 | |
568 pcmpeqd xmm3,xmm5 | |
569 movdqa XMMWORD[272+r10],xmm2 | |
570 movdqa xmm2,xmm4 | |
571 | |
572 paddd xmm1,xmm0 | |
573 pcmpeqd xmm0,xmm5 | |
574 movdqa XMMWORD[288+r10],xmm3 | |
575 movdqa xmm3,xmm4 | |
576 paddd xmm2,xmm1 | |
577 pcmpeqd xmm1,xmm5 | |
578 movdqa XMMWORD[304+r10],xmm0 | |
579 | |
580 paddd xmm3,xmm2 | |
581 DB 0x67 | |
582 pcmpeqd xmm2,xmm5 | |
583 movdqa XMMWORD[320+r10],xmm1 | |
584 | |
585 pcmpeqd xmm3,xmm5 | |
586 movdqa XMMWORD[336+r10],xmm2 | |
587 pand xmm0,XMMWORD[64+r12] | |
588 | |
589 pand xmm1,XMMWORD[80+r12] | |
590 pand xmm2,XMMWORD[96+r12] | |
591 movdqa XMMWORD[352+r10],xmm3 | |
592 pand xmm3,XMMWORD[112+r12] | |
593 por xmm0,xmm2 | |
594 por xmm1,xmm3 | |
595 movdqa xmm4,XMMWORD[((-128))+r12] | |
596 movdqa xmm5,XMMWORD[((-112))+r12] | |
597 movdqa xmm2,XMMWORD[((-96))+r12] | |
598 pand xmm4,XMMWORD[112+r10] | |
599 movdqa xmm3,XMMWORD[((-80))+r12] | |
600 pand xmm5,XMMWORD[128+r10] | |
601 por xmm0,xmm4 | |
602 pand xmm2,XMMWORD[144+r10] | |
603 por xmm1,xmm5 | |
604 pand xmm3,XMMWORD[160+r10] | |
605 por xmm0,xmm2 | |
606 por xmm1,xmm3 | |
607 movdqa xmm4,XMMWORD[((-64))+r12] | |
608 movdqa xmm5,XMMWORD[((-48))+r12] | |
609 movdqa xmm2,XMMWORD[((-32))+r12] | |
610 pand xmm4,XMMWORD[176+r10] | |
611 movdqa xmm3,XMMWORD[((-16))+r12] | |
612 pand xmm5,XMMWORD[192+r10] | |
613 por xmm0,xmm4 | |
614 pand xmm2,XMMWORD[208+r10] | |
615 por xmm1,xmm5 | |
616 pand xmm3,XMMWORD[224+r10] | |
617 por xmm0,xmm2 | |
618 por xmm1,xmm3 | |
619 movdqa xmm4,XMMWORD[r12] | |
620 movdqa xmm5,XMMWORD[16+r12] | |
621 movdqa xmm2,XMMWORD[32+r12] | |
622 pand xmm4,XMMWORD[240+r10] | |
623 movdqa xmm3,XMMWORD[48+r12] | |
624 pand xmm5,XMMWORD[256+r10] | |
625 por xmm0,xmm4 | |
626 pand xmm2,XMMWORD[272+r10] | |
627 por xmm1,xmm5 | |
628 pand xmm3,XMMWORD[288+r10] | |
629 por xmm0,xmm2 | |
630 por xmm1,xmm3 | |
631 por xmm0,xmm1 | |
632 pshufd xmm1,xmm0,0x4e | |
633 por xmm0,xmm1 | |
634 lea r12,[256+r12] | |
635 DB 102,72,15,126,195 | |
636 | |
637 mov QWORD[((16+8))+rsp],r13 | |
638 mov QWORD[((56+8))+rsp],rdi | |
639 | |
640 mov r8,QWORD[r8] | |
641 mov rax,QWORD[rsi] | |
642 lea rsi,[r9*1+rsi] | |
643 neg r9 | |
644 | |
645 mov rbp,r8 | |
646 mul rbx | |
647 mov r10,rax | |
648 mov rax,QWORD[rcx] | |
649 | |
650 imul rbp,r10 | |
651 lea r14,[((64+8))+rsp] | |
652 mov r11,rdx | |
653 | |
654 mul rbp | |
655 add r10,rax | |
656 mov rax,QWORD[8+r9*1+rsi] | |
657 adc rdx,0 | |
658 mov rdi,rdx | |
659 | |
660 mul rbx | |
661 add r11,rax | |
662 mov rax,QWORD[8+rcx] | |
663 adc rdx,0 | |
664 mov r10,rdx | |
665 | |
666 mul rbp | |
667 add rdi,rax | |
668 mov rax,QWORD[16+r9*1+rsi] | |
669 adc rdx,0 | |
670 add rdi,r11 | |
671 lea r15,[32+r9] | |
672 lea rcx,[32+rcx] | |
673 adc rdx,0 | |
674 mov QWORD[r14],rdi | |
675 mov r13,rdx | |
676 jmp NEAR $L$1st4x | |
677 | |
678 ALIGN 32 | |
679 $L$1st4x: | |
680 mul rbx | |
681 add r10,rax | |
682 mov rax,QWORD[((-16))+rcx] | |
683 lea r14,[32+r14] | |
684 adc rdx,0 | |
685 mov r11,rdx | |
686 | |
687 mul rbp | |
688 add r13,rax | |
689 mov rax,QWORD[((-8))+r15*1+rsi] | |
690 adc rdx,0 | |
691 add r13,r10 | |
692 adc rdx,0 | |
693 mov QWORD[((-24))+r14],r13 | |
694 mov rdi,rdx | |
695 | |
696 mul rbx | |
697 add r11,rax | |
698 mov rax,QWORD[((-8))+rcx] | |
699 adc rdx,0 | |
700 mov r10,rdx | |
701 | |
702 mul rbp | |
703 add rdi,rax | |
704 mov rax,QWORD[r15*1+rsi] | |
705 adc rdx,0 | |
706 add rdi,r11 | |
707 adc rdx,0 | |
708 mov QWORD[((-16))+r14],rdi | |
709 mov r13,rdx | |
710 | |
711 mul rbx | |
712 add r10,rax | |
713 mov rax,QWORD[rcx] | |
714 adc rdx,0 | |
715 mov r11,rdx | |
716 | |
717 mul rbp | |
718 add r13,rax | |
719 mov rax,QWORD[8+r15*1+rsi] | |
720 adc rdx,0 | |
721 add r13,r10 | |
722 adc rdx,0 | |
723 mov QWORD[((-8))+r14],r13 | |
724 mov rdi,rdx | |
725 | |
726 mul rbx | |
727 add r11,rax | |
728 mov rax,QWORD[8+rcx] | |
729 adc rdx,0 | |
730 mov r10,rdx | |
731 | |
732 mul rbp | |
733 add rdi,rax | |
734 mov rax,QWORD[16+r15*1+rsi] | |
735 adc rdx,0 | |
736 add rdi,r11 | |
737 lea rcx,[32+rcx] | |
738 adc rdx,0 | |
739 mov QWORD[r14],rdi | |
740 mov r13,rdx | |
741 | |
742 add r15,32 | |
743 jnz NEAR $L$1st4x | |
744 | |
745 mul rbx | |
746 add r10,rax | |
747 mov rax,QWORD[((-16))+rcx] | |
748 lea r14,[32+r14] | |
749 adc rdx,0 | |
750 mov r11,rdx | |
751 | |
752 mul rbp | |
753 add r13,rax | |
754 mov rax,QWORD[((-8))+rsi] | |
755 adc rdx,0 | |
756 add r13,r10 | |
757 adc rdx,0 | |
758 mov QWORD[((-24))+r14],r13 | |
759 mov rdi,rdx | |
760 | |
761 mul rbx | |
762 add r11,rax | |
763 mov rax,QWORD[((-8))+rcx] | |
764 adc rdx,0 | |
765 mov r10,rdx | |
766 | |
767 mul rbp | |
768 add rdi,rax | |
769 mov rax,QWORD[r9*1+rsi] | |
770 adc rdx,0 | |
771 add rdi,r11 | |
772 adc rdx,0 | |
773 mov QWORD[((-16))+r14],rdi | |
774 mov r13,rdx | |
775 | |
776 lea rcx,[r9*1+rcx] | |
777 | |
778 xor rdi,rdi | |
779 add r13,r10 | |
780 adc rdi,0 | |
781 mov QWORD[((-8))+r14],r13 | |
782 | |
783 jmp NEAR $L$outer4x | |
784 | |
785 ALIGN 32 | |
786 $L$outer4x: | |
787 lea rdx,[((16+128))+r14] | |
788 pxor xmm4,xmm4 | |
789 pxor xmm5,xmm5 | |
790 movdqa xmm0,XMMWORD[((-128))+r12] | |
791 movdqa xmm1,XMMWORD[((-112))+r12] | |
792 movdqa xmm2,XMMWORD[((-96))+r12] | |
793 movdqa xmm3,XMMWORD[((-80))+r12] | |
794 pand xmm0,XMMWORD[((-128))+rdx] | |
795 pand xmm1,XMMWORD[((-112))+rdx] | |
796 por xmm4,xmm0 | |
797 pand xmm2,XMMWORD[((-96))+rdx] | |
798 por xmm5,xmm1 | |
799 pand xmm3,XMMWORD[((-80))+rdx] | |
800 por xmm4,xmm2 | |
801 por xmm5,xmm3 | |
802 movdqa xmm0,XMMWORD[((-64))+r12] | |
803 movdqa xmm1,XMMWORD[((-48))+r12] | |
804 movdqa xmm2,XMMWORD[((-32))+r12] | |
805 movdqa xmm3,XMMWORD[((-16))+r12] | |
806 pand xmm0,XMMWORD[((-64))+rdx] | |
807 pand xmm1,XMMWORD[((-48))+rdx] | |
808 por xmm4,xmm0 | |
809 pand xmm2,XMMWORD[((-32))+rdx] | |
810 por xmm5,xmm1 | |
811 pand xmm3,XMMWORD[((-16))+rdx] | |
812 por xmm4,xmm2 | |
813 por xmm5,xmm3 | |
814 movdqa xmm0,XMMWORD[r12] | |
815 movdqa xmm1,XMMWORD[16+r12] | |
816 movdqa xmm2,XMMWORD[32+r12] | |
817 movdqa xmm3,XMMWORD[48+r12] | |
818 pand xmm0,XMMWORD[rdx] | |
819 pand xmm1,XMMWORD[16+rdx] | |
820 por xmm4,xmm0 | |
821 pand xmm2,XMMWORD[32+rdx] | |
822 por xmm5,xmm1 | |
823 pand xmm3,XMMWORD[48+rdx] | |
824 por xmm4,xmm2 | |
825 por xmm5,xmm3 | |
826 movdqa xmm0,XMMWORD[64+r12] | |
827 movdqa xmm1,XMMWORD[80+r12] | |
828 movdqa xmm2,XMMWORD[96+r12] | |
829 movdqa xmm3,XMMWORD[112+r12] | |
830 pand xmm0,XMMWORD[64+rdx] | |
831 pand xmm1,XMMWORD[80+rdx] | |
832 por xmm4,xmm0 | |
833 pand xmm2,XMMWORD[96+rdx] | |
834 por xmm5,xmm1 | |
835 pand xmm3,XMMWORD[112+rdx] | |
836 por xmm4,xmm2 | |
837 por xmm5,xmm3 | |
838 por xmm4,xmm5 | |
839 pshufd xmm0,xmm4,0x4e | |
840 por xmm0,xmm4 | |
841 lea r12,[256+r12] | |
842 DB 102,72,15,126,195 | |
843 | |
844 mov r10,QWORD[r9*1+r14] | |
845 mov rbp,r8 | |
846 mul rbx | |
847 add r10,rax | |
848 mov rax,QWORD[rcx] | |
849 adc rdx,0 | |
850 | |
851 imul rbp,r10 | |
852 mov r11,rdx | |
853 mov QWORD[r14],rdi | |
854 | |
855 lea r14,[r9*1+r14] | |
856 | |
857 mul rbp | |
858 add r10,rax | |
859 mov rax,QWORD[8+r9*1+rsi] | |
860 adc rdx,0 | |
861 mov rdi,rdx | |
862 | |
863 mul rbx | |
864 add r11,rax | |
865 mov rax,QWORD[8+rcx] | |
866 adc rdx,0 | |
867 add r11,QWORD[8+r14] | |
868 adc rdx,0 | |
869 mov r10,rdx | |
870 | |
871 mul rbp | |
872 add rdi,rax | |
873 mov rax,QWORD[16+r9*1+rsi] | |
874 adc rdx,0 | |
875 add rdi,r11 | |
876 lea r15,[32+r9] | |
877 lea rcx,[32+rcx] | |
878 adc rdx,0 | |
879 mov r13,rdx | |
880 jmp NEAR $L$inner4x | |
881 | |
882 ALIGN 32 | |
883 $L$inner4x: | |
884 mul rbx | |
885 add r10,rax | |
886 mov rax,QWORD[((-16))+rcx] | |
887 adc rdx,0 | |
888 add r10,QWORD[16+r14] | |
889 lea r14,[32+r14] | |
890 adc rdx,0 | |
891 mov r11,rdx | |
892 | |
893 mul rbp | |
894 add r13,rax | |
895 mov rax,QWORD[((-8))+r15*1+rsi] | |
896 adc rdx,0 | |
897 add r13,r10 | |
898 adc rdx,0 | |
899 mov QWORD[((-32))+r14],rdi | |
900 mov rdi,rdx | |
901 | |
902 mul rbx | |
903 add r11,rax | |
904 mov rax,QWORD[((-8))+rcx] | |
905 adc rdx,0 | |
906 add r11,QWORD[((-8))+r14] | |
907 adc rdx,0 | |
908 mov r10,rdx | |
909 | |
910 mul rbp | |
911 add rdi,rax | |
912 mov rax,QWORD[r15*1+rsi] | |
913 adc rdx,0 | |
914 add rdi,r11 | |
915 adc rdx,0 | |
916 mov QWORD[((-24))+r14],r13 | |
917 mov r13,rdx | |
918 | |
919 mul rbx | |
920 add r10,rax | |
921 mov rax,QWORD[rcx] | |
922 adc rdx,0 | |
923 add r10,QWORD[r14] | |
924 adc rdx,0 | |
925 mov r11,rdx | |
926 | |
927 mul rbp | |
928 add r13,rax | |
929 mov rax,QWORD[8+r15*1+rsi] | |
930 adc rdx,0 | |
931 add r13,r10 | |
932 adc rdx,0 | |
933 mov QWORD[((-16))+r14],rdi | |
934 mov rdi,rdx | |
935 | |
936 mul rbx | |
937 add r11,rax | |
938 mov rax,QWORD[8+rcx] | |
939 adc rdx,0 | |
940 add r11,QWORD[8+r14] | |
941 adc rdx,0 | |
942 mov r10,rdx | |
943 | |
944 mul rbp | |
945 add rdi,rax | |
946 mov rax,QWORD[16+r15*1+rsi] | |
947 adc rdx,0 | |
948 add rdi,r11 | |
949 lea rcx,[32+rcx] | |
950 adc rdx,0 | |
951 mov QWORD[((-8))+r14],r13 | |
952 mov r13,rdx | |
953 | |
954 add r15,32 | |
955 jnz NEAR $L$inner4x | |
956 | |
957 mul rbx | |
958 add r10,rax | |
959 mov rax,QWORD[((-16))+rcx] | |
960 adc rdx,0 | |
961 add r10,QWORD[16+r14] | |
962 lea r14,[32+r14] | |
963 adc rdx,0 | |
964 mov r11,rdx | |
965 | |
966 mul rbp | |
967 add r13,rax | |
968 mov rax,QWORD[((-8))+rsi] | |
969 adc rdx,0 | |
970 add r13,r10 | |
971 adc rdx,0 | |
972 mov QWORD[((-32))+r14],rdi | |
973 mov rdi,rdx | |
974 | |
975 mul rbx | |
976 add r11,rax | |
977 mov rax,rbp | |
978 mov rbp,QWORD[((-8))+rcx] | |
979 adc rdx,0 | |
980 add r11,QWORD[((-8))+r14] | |
981 adc rdx,0 | |
982 mov r10,rdx | |
983 | |
984 mul rbp | |
985 add rdi,rax | |
986 mov rax,QWORD[r9*1+rsi] | |
987 adc rdx,0 | |
988 add rdi,r11 | |
989 adc rdx,0 | |
990 mov QWORD[((-24))+r14],r13 | |
991 mov r13,rdx | |
992 | |
993 mov QWORD[((-16))+r14],rdi | |
994 lea rcx,[r9*1+rcx] | |
995 | |
996 xor rdi,rdi | |
997 add r13,r10 | |
998 adc rdi,0 | |
999 add r13,QWORD[r14] | |
1000 adc rdi,0 | |
1001 mov QWORD[((-8))+r14],r13 | |
1002 | |
1003 cmp r12,QWORD[((16+8))+rsp] | |
1004 jb NEAR $L$outer4x | |
1005 xor rax,rax | |
1006 sub rbp,r13 | |
1007 adc r15,r15 | |
1008 or rdi,r15 | |
1009 sub rax,rdi | |
1010 lea rbx,[r9*1+r14] | |
1011 mov r12,QWORD[rcx] | |
1012 lea rbp,[rcx] | |
1013 mov rcx,r9 | |
1014 sar rcx,3+2 | |
1015 mov rdi,QWORD[((56+8))+rsp] | |
1016 dec r12 | |
1017 xor r10,r10 | |
1018 mov r13,QWORD[8+rbp] | |
1019 mov r14,QWORD[16+rbp] | |
1020 mov r15,QWORD[24+rbp] | |
1021 jmp NEAR $L$sqr4x_sub_entry | |
1022 | |
1023 global bn_power5 | |
1024 | |
1025 ALIGN 32 | |
1026 bn_power5: | |
1027 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
1028 mov QWORD[16+rsp],rsi | |
1029 mov rax,rsp | |
1030 $L$SEH_begin_bn_power5: | |
1031 mov rdi,rcx | |
1032 mov rsi,rdx | |
1033 mov rdx,r8 | |
1034 mov rcx,r9 | |
1035 mov r8,QWORD[40+rsp] | |
1036 mov r9,QWORD[48+rsp] | |
1037 | |
1038 | |
1039 mov rax,rsp | |
1040 push rbx | |
1041 push rbp | |
1042 push r12 | |
1043 push r13 | |
1044 push r14 | |
1045 push r15 | |
1046 | |
1047 shl r9d,3 | |
1048 lea r10d,[r9*2+r9] | |
1049 neg r9 | |
1050 mov r8,QWORD[r8] | |
1051 | |
1052 | |
1053 | |
1054 | |
1055 | |
1056 | |
1057 | |
1058 | |
1059 lea r11,[((-320))+r9*2+rsp] | |
1060 sub r11,rdi | |
1061 and r11,4095 | |
1062 cmp r10,r11 | |
1063 jb NEAR $L$pwr_sp_alt | |
1064 sub rsp,r11 | |
1065 lea rsp,[((-320))+r9*2+rsp] | |
1066 jmp NEAR $L$pwr_sp_done | |
1067 | |
1068 ALIGN 32 | |
1069 $L$pwr_sp_alt: | |
1070 lea r10,[((4096-320))+r9*2] | |
1071 lea rsp,[((-320))+r9*2+rsp] | |
1072 sub r11,r10 | |
1073 mov r10,0 | |
1074 cmovc r11,r10 | |
1075 sub rsp,r11 | |
1076 $L$pwr_sp_done: | |
1077 and rsp,-64 | |
1078 mov r10,r9 | |
1079 neg r9 | |
1080 | |
1081 | |
1082 | |
1083 | |
1084 | |
1085 | |
1086 | |
1087 | |
1088 | |
1089 | |
1090 mov QWORD[32+rsp],r8 | |
1091 mov QWORD[40+rsp],rax | |
1092 $L$power5_body: | |
1093 DB 102,72,15,110,207 | |
1094 DB 102,72,15,110,209 | |
1095 DB 102,73,15,110,218 | |
1096 DB 102,72,15,110,226 | |
1097 | |
1098 call __bn_sqr8x_internal | |
1099 call __bn_post4x_internal | |
1100 call __bn_sqr8x_internal | |
1101 call __bn_post4x_internal | |
1102 call __bn_sqr8x_internal | |
1103 call __bn_post4x_internal | |
1104 call __bn_sqr8x_internal | |
1105 call __bn_post4x_internal | |
1106 call __bn_sqr8x_internal | |
1107 call __bn_post4x_internal | |
1108 | |
1109 DB 102,72,15,126,209 | |
1110 DB 102,72,15,126,226 | |
1111 mov rdi,rsi | |
1112 mov rax,QWORD[40+rsp] | |
1113 lea r8,[32+rsp] | |
1114 | |
1115 call mul4x_internal | |
1116 | |
1117 mov rsi,QWORD[40+rsp] | |
1118 mov rax,1 | |
1119 mov r15,QWORD[((-48))+rsi] | |
1120 mov r14,QWORD[((-40))+rsi] | |
1121 mov r13,QWORD[((-32))+rsi] | |
1122 mov r12,QWORD[((-24))+rsi] | |
1123 mov rbp,QWORD[((-16))+rsi] | |
1124 mov rbx,QWORD[((-8))+rsi] | |
1125 lea rsp,[rsi] | |
1126 $L$power5_epilogue: | |
1127 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
1128 mov rsi,QWORD[16+rsp] | |
1129 DB 0F3h,0C3h ;repret | |
1130 $L$SEH_end_bn_power5: | |
1131 | |
1132 global bn_sqr8x_internal | |
1133 | |
1134 | |
1135 ALIGN 32 | |
1136 bn_sqr8x_internal: | |
1137 __bn_sqr8x_internal: | |
1138 | |
1139 | |
1140 | |
1141 | |
1142 | |
1143 | |
1144 | |
1145 | |
1146 | |
1147 | |
1148 | |
1149 | |
1150 | |
1151 | |
1152 | |
1153 | |
1154 | |
1155 | |
1156 | |
1157 | |
1158 | |
1159 | |
1160 | |
1161 | |
1162 | |
1163 | |
1164 | |
1165 | |
1166 | |
1167 | |
1168 | |
1169 | |
1170 | |
1171 | |
1172 | |
1173 | |
1174 | |
1175 | |
1176 | |
1177 | |
1178 | |
1179 | |
1180 | |
1181 | |
1182 | |
1183 | |
1184 | |
1185 | |
1186 | |
1187 | |
1188 | |
1189 | |
1190 | |
1191 | |
1192 | |
1193 | |
1194 | |
1195 | |
1196 | |
1197 | |
1198 | |
1199 | |
1200 | |
1201 | |
1202 | |
1203 | |
1204 | |
1205 | |
1206 | |
1207 | |
1208 | |
1209 | |
1210 | |
1211 lea rbp,[32+r10] | |
1212 lea rsi,[r9*1+rsi] | |
1213 | |
1214 mov rcx,r9 | |
1215 | |
1216 | |
1217 mov r14,QWORD[((-32))+rbp*1+rsi] | |
1218 lea rdi,[((48+8))+r9*2+rsp] | |
1219 mov rax,QWORD[((-24))+rbp*1+rsi] | |
1220 lea rdi,[((-32))+rbp*1+rdi] | |
1221 mov rbx,QWORD[((-16))+rbp*1+rsi] | |
1222 mov r15,rax | |
1223 | |
1224 mul r14 | |
1225 mov r10,rax | |
1226 mov rax,rbx | |
1227 mov r11,rdx | |
1228 mov QWORD[((-24))+rbp*1+rdi],r10 | |
1229 | |
1230 mul r14 | |
1231 add r11,rax | |
1232 mov rax,rbx | |
1233 adc rdx,0 | |
1234 mov QWORD[((-16))+rbp*1+rdi],r11 | |
1235 mov r10,rdx | |
1236 | |
1237 | |
1238 mov rbx,QWORD[((-8))+rbp*1+rsi] | |
1239 mul r15 | |
1240 mov r12,rax | |
1241 mov rax,rbx | |
1242 mov r13,rdx | |
1243 | |
1244 lea rcx,[rbp] | |
1245 mul r14 | |
1246 add r10,rax | |
1247 mov rax,rbx | |
1248 mov r11,rdx | |
1249 adc r11,0 | |
1250 add r10,r12 | |
1251 adc r11,0 | |
1252 mov QWORD[((-8))+rcx*1+rdi],r10 | |
1253 jmp NEAR $L$sqr4x_1st | |
1254 | |
1255 ALIGN 32 | |
1256 $L$sqr4x_1st: | |
1257 mov rbx,QWORD[rcx*1+rsi] | |
1258 mul r15 | |
1259 add r13,rax | |
1260 mov rax,rbx | |
1261 mov r12,rdx | |
1262 adc r12,0 | |
1263 | |
1264 mul r14 | |
1265 add r11,rax | |
1266 mov rax,rbx | |
1267 mov rbx,QWORD[8+rcx*1+rsi] | |
1268 mov r10,rdx | |
1269 adc r10,0 | |
1270 add r11,r13 | |
1271 adc r10,0 | |
1272 | |
1273 | |
1274 mul r15 | |
1275 add r12,rax | |
1276 mov rax,rbx | |
1277 mov QWORD[rcx*1+rdi],r11 | |
1278 mov r13,rdx | |
1279 adc r13,0 | |
1280 | |
1281 mul r14 | |
1282 add r10,rax | |
1283 mov rax,rbx | |
1284 mov rbx,QWORD[16+rcx*1+rsi] | |
1285 mov r11,rdx | |
1286 adc r11,0 | |
1287 add r10,r12 | |
1288 adc r11,0 | |
1289 | |
1290 mul r15 | |
1291 add r13,rax | |
1292 mov rax,rbx | |
1293 mov QWORD[8+rcx*1+rdi],r10 | |
1294 mov r12,rdx | |
1295 adc r12,0 | |
1296 | |
1297 mul r14 | |
1298 add r11,rax | |
1299 mov rax,rbx | |
1300 mov rbx,QWORD[24+rcx*1+rsi] | |
1301 mov r10,rdx | |
1302 adc r10,0 | |
1303 add r11,r13 | |
1304 adc r10,0 | |
1305 | |
1306 | |
1307 mul r15 | |
1308 add r12,rax | |
1309 mov rax,rbx | |
1310 mov QWORD[16+rcx*1+rdi],r11 | |
1311 mov r13,rdx | |
1312 adc r13,0 | |
1313 lea rcx,[32+rcx] | |
1314 | |
1315 mul r14 | |
1316 add r10,rax | |
1317 mov rax,rbx | |
1318 mov r11,rdx | |
1319 adc r11,0 | |
1320 add r10,r12 | |
1321 adc r11,0 | |
1322 mov QWORD[((-8))+rcx*1+rdi],r10 | |
1323 | |
1324 cmp rcx,0 | |
1325 jne NEAR $L$sqr4x_1st | |
1326 | |
1327 mul r15 | |
1328 add r13,rax | |
1329 lea rbp,[16+rbp] | |
1330 adc rdx,0 | |
1331 add r13,r11 | |
1332 adc rdx,0 | |
1333 | |
1334 mov QWORD[rdi],r13 | |
1335 mov r12,rdx | |
1336 mov QWORD[8+rdi],rdx | |
1337 jmp NEAR $L$sqr4x_outer | |
1338 | |
1339 ALIGN 32 | |
1340 $L$sqr4x_outer: | |
1341 mov r14,QWORD[((-32))+rbp*1+rsi] | |
1342 lea rdi,[((48+8))+r9*2+rsp] | |
1343 mov rax,QWORD[((-24))+rbp*1+rsi] | |
1344 lea rdi,[((-32))+rbp*1+rdi] | |
1345 mov rbx,QWORD[((-16))+rbp*1+rsi] | |
1346 mov r15,rax | |
1347 | |
1348 mul r14 | |
1349 mov r10,QWORD[((-24))+rbp*1+rdi] | |
1350 add r10,rax | |
1351 mov rax,rbx | |
1352 adc rdx,0 | |
1353 mov QWORD[((-24))+rbp*1+rdi],r10 | |
1354 mov r11,rdx | |
1355 | |
1356 mul r14 | |
1357 add r11,rax | |
1358 mov rax,rbx | |
1359 adc rdx,0 | |
1360 add r11,QWORD[((-16))+rbp*1+rdi] | |
1361 mov r10,rdx | |
1362 adc r10,0 | |
1363 mov QWORD[((-16))+rbp*1+rdi],r11 | |
1364 | |
1365 xor r12,r12 | |
1366 | |
1367 mov rbx,QWORD[((-8))+rbp*1+rsi] | |
1368 mul r15 | |
1369 add r12,rax | |
1370 mov rax,rbx | |
1371 adc rdx,0 | |
1372 add r12,QWORD[((-8))+rbp*1+rdi] | |
1373 mov r13,rdx | |
1374 adc r13,0 | |
1375 | |
1376 mul r14 | |
1377 add r10,rax | |
1378 mov rax,rbx | |
1379 adc rdx,0 | |
1380 add r10,r12 | |
1381 mov r11,rdx | |
1382 adc r11,0 | |
1383 mov QWORD[((-8))+rbp*1+rdi],r10 | |
1384 | |
1385 lea rcx,[rbp] | |
1386 jmp NEAR $L$sqr4x_inner | |
1387 | |
1388 ALIGN 32 | |
1389 $L$sqr4x_inner: | |
1390 mov rbx,QWORD[rcx*1+rsi] | |
1391 mul r15 | |
1392 add r13,rax | |
1393 mov rax,rbx | |
1394 mov r12,rdx | |
1395 adc r12,0 | |
1396 add r13,QWORD[rcx*1+rdi] | |
1397 adc r12,0 | |
1398 | |
1399 DB 0x67 | |
1400 mul r14 | |
1401 add r11,rax | |
1402 mov rax,rbx | |
1403 mov rbx,QWORD[8+rcx*1+rsi] | |
1404 mov r10,rdx | |
1405 adc r10,0 | |
1406 add r11,r13 | |
1407 adc r10,0 | |
1408 | |
1409 mul r15 | |
1410 add r12,rax | |
1411 mov QWORD[rcx*1+rdi],r11 | |
1412 mov rax,rbx | |
1413 mov r13,rdx | |
1414 adc r13,0 | |
1415 add r12,QWORD[8+rcx*1+rdi] | |
1416 lea rcx,[16+rcx] | |
1417 adc r13,0 | |
1418 | |
1419 mul r14 | |
1420 add r10,rax | |
1421 mov rax,rbx | |
1422 adc rdx,0 | |
1423 add r10,r12 | |
1424 mov r11,rdx | |
1425 adc r11,0 | |
1426 mov QWORD[((-8))+rcx*1+rdi],r10 | |
1427 | |
1428 cmp rcx,0 | |
1429 jne NEAR $L$sqr4x_inner | |
1430 | |
1431 DB 0x67 | |
1432 mul r15 | |
1433 add r13,rax | |
1434 adc rdx,0 | |
1435 add r13,r11 | |
1436 adc rdx,0 | |
1437 | |
1438 mov QWORD[rdi],r13 | |
1439 mov r12,rdx | |
1440 mov QWORD[8+rdi],rdx | |
1441 | |
1442 add rbp,16 | |
1443 jnz NEAR $L$sqr4x_outer | |
1444 | |
1445 | |
1446 mov r14,QWORD[((-32))+rsi] | |
1447 lea rdi,[((48+8))+r9*2+rsp] | |
1448 mov rax,QWORD[((-24))+rsi] | |
1449 lea rdi,[((-32))+rbp*1+rdi] | |
1450 mov rbx,QWORD[((-16))+rsi] | |
1451 mov r15,rax | |
1452 | |
1453 mul r14 | |
1454 add r10,rax | |
1455 mov rax,rbx | |
1456 mov r11,rdx | |
1457 adc r11,0 | |
1458 | |
1459 mul r14 | |
1460 add r11,rax | |
1461 mov rax,rbx | |
1462 mov QWORD[((-24))+rdi],r10 | |
1463 mov r10,rdx | |
1464 adc r10,0 | |
1465 add r11,r13 | |
1466 mov rbx,QWORD[((-8))+rsi] | |
1467 adc r10,0 | |
1468 | |
1469 mul r15 | |
1470 add r12,rax | |
1471 mov rax,rbx | |
1472 mov QWORD[((-16))+rdi],r11 | |
1473 mov r13,rdx | |
1474 adc r13,0 | |
1475 | |
1476 mul r14 | |
1477 add r10,rax | |
1478 mov rax,rbx | |
1479 mov r11,rdx | |
1480 adc r11,0 | |
1481 add r10,r12 | |
1482 adc r11,0 | |
1483 mov QWORD[((-8))+rdi],r10 | |
1484 | |
1485 mul r15 | |
1486 add r13,rax | |
1487 mov rax,QWORD[((-16))+rsi] | |
1488 adc rdx,0 | |
1489 add r13,r11 | |
1490 adc rdx,0 | |
1491 | |
1492 mov QWORD[rdi],r13 | |
1493 mov r12,rdx | |
1494 mov QWORD[8+rdi],rdx | |
1495 | |
1496 mul rbx | |
1497 add rbp,16 | |
1498 xor r14,r14 | |
1499 sub rbp,r9 | |
1500 xor r15,r15 | |
1501 | |
1502 add rax,r12 | |
1503 adc rdx,0 | |
1504 mov QWORD[8+rdi],rax | |
1505 mov QWORD[16+rdi],rdx | |
1506 mov QWORD[24+rdi],r15 | |
1507 | |
1508 mov rax,QWORD[((-16))+rbp*1+rsi] | |
1509 lea rdi,[((48+8))+rsp] | |
1510 xor r10,r10 | |
1511 mov r11,QWORD[8+rdi] | |
1512 | |
1513 lea r12,[r10*2+r14] | |
1514 shr r10,63 | |
1515 lea r13,[r11*2+rcx] | |
1516 shr r11,63 | |
1517 or r13,r10 | |
1518 mov r10,QWORD[16+rdi] | |
1519 mov r14,r11 | |
1520 mul rax | |
1521 neg r15 | |
1522 mov r11,QWORD[24+rdi] | |
1523 adc r12,rax | |
1524 mov rax,QWORD[((-8))+rbp*1+rsi] | |
1525 mov QWORD[rdi],r12 | |
1526 adc r13,rdx | |
1527 | |
1528 lea rbx,[r10*2+r14] | |
1529 mov QWORD[8+rdi],r13 | |
1530 sbb r15,r15 | |
1531 shr r10,63 | |
1532 lea r8,[r11*2+rcx] | |
1533 shr r11,63 | |
1534 or r8,r10 | |
1535 mov r10,QWORD[32+rdi] | |
1536 mov r14,r11 | |
1537 mul rax | |
1538 neg r15 | |
1539 mov r11,QWORD[40+rdi] | |
1540 adc rbx,rax | |
1541 mov rax,QWORD[rbp*1+rsi] | |
1542 mov QWORD[16+rdi],rbx | |
1543 adc r8,rdx | |
1544 lea rbp,[16+rbp] | |
1545 mov QWORD[24+rdi],r8 | |
1546 sbb r15,r15 | |
1547 lea rdi,[64+rdi] | |
1548 jmp NEAR $L$sqr4x_shift_n_add | |
1549 | |
1550 ALIGN 32 | |
1551 $L$sqr4x_shift_n_add: | |
1552 lea r12,[r10*2+r14] | |
1553 shr r10,63 | |
1554 lea r13,[r11*2+rcx] | |
1555 shr r11,63 | |
1556 or r13,r10 | |
1557 mov r10,QWORD[((-16))+rdi] | |
1558 mov r14,r11 | |
1559 mul rax | |
1560 neg r15 | |
1561 mov r11,QWORD[((-8))+rdi] | |
1562 adc r12,rax | |
1563 mov rax,QWORD[((-8))+rbp*1+rsi] | |
1564 mov QWORD[((-32))+rdi],r12 | |
1565 adc r13,rdx | |
1566 | |
1567 lea rbx,[r10*2+r14] | |
1568 mov QWORD[((-24))+rdi],r13 | |
1569 sbb r15,r15 | |
1570 shr r10,63 | |
1571 lea r8,[r11*2+rcx] | |
1572 shr r11,63 | |
1573 or r8,r10 | |
1574 mov r10,QWORD[rdi] | |
1575 mov r14,r11 | |
1576 mul rax | |
1577 neg r15 | |
1578 mov r11,QWORD[8+rdi] | |
1579 adc rbx,rax | |
1580 mov rax,QWORD[rbp*1+rsi] | |
1581 mov QWORD[((-16))+rdi],rbx | |
1582 adc r8,rdx | |
1583 | |
1584 lea r12,[r10*2+r14] | |
1585 mov QWORD[((-8))+rdi],r8 | |
1586 sbb r15,r15 | |
1587 shr r10,63 | |
1588 lea r13,[r11*2+rcx] | |
1589 shr r11,63 | |
1590 or r13,r10 | |
1591 mov r10,QWORD[16+rdi] | |
1592 mov r14,r11 | |
1593 mul rax | |
1594 neg r15 | |
1595 mov r11,QWORD[24+rdi] | |
1596 adc r12,rax | |
1597 mov rax,QWORD[8+rbp*1+rsi] | |
1598 mov QWORD[rdi],r12 | |
1599 adc r13,rdx | |
1600 | |
1601 lea rbx,[r10*2+r14] | |
1602 mov QWORD[8+rdi],r13 | |
1603 sbb r15,r15 | |
1604 shr r10,63 | |
1605 lea r8,[r11*2+rcx] | |
1606 shr r11,63 | |
1607 or r8,r10 | |
1608 mov r10,QWORD[32+rdi] | |
1609 mov r14,r11 | |
1610 mul rax | |
1611 neg r15 | |
1612 mov r11,QWORD[40+rdi] | |
1613 adc rbx,rax | |
1614 mov rax,QWORD[16+rbp*1+rsi] | |
1615 mov QWORD[16+rdi],rbx | |
1616 adc r8,rdx | |
1617 mov QWORD[24+rdi],r8 | |
1618 sbb r15,r15 | |
1619 lea rdi,[64+rdi] | |
1620 add rbp,32 | |
1621 jnz NEAR $L$sqr4x_shift_n_add | |
1622 | |
1623 lea r12,[r10*2+r14] | |
1624 DB 0x67 | |
1625 shr r10,63 | |
1626 lea r13,[r11*2+rcx] | |
1627 shr r11,63 | |
1628 or r13,r10 | |
1629 mov r10,QWORD[((-16))+rdi] | |
1630 mov r14,r11 | |
1631 mul rax | |
1632 neg r15 | |
1633 mov r11,QWORD[((-8))+rdi] | |
1634 adc r12,rax | |
1635 mov rax,QWORD[((-8))+rsi] | |
1636 mov QWORD[((-32))+rdi],r12 | |
1637 adc r13,rdx | |
1638 | |
1639 lea rbx,[r10*2+r14] | |
1640 mov QWORD[((-24))+rdi],r13 | |
1641 sbb r15,r15 | |
1642 shr r10,63 | |
1643 lea r8,[r11*2+rcx] | |
1644 shr r11,63 | |
1645 or r8,r10 | |
1646 mul rax | |
1647 neg r15 | |
1648 adc rbx,rax | |
1649 adc r8,rdx | |
1650 mov QWORD[((-16))+rdi],rbx | |
1651 mov QWORD[((-8))+rdi],r8 | |
1652 DB 102,72,15,126,213 | |
1653 __bn_sqr8x_reduction: | |
1654 xor rax,rax | |
1655 lea rcx,[rbp*1+r9] | |
1656 lea rdx,[((48+8))+r9*2+rsp] | |
1657 mov QWORD[((0+8))+rsp],rcx | |
1658 lea rdi,[((48+8))+r9*1+rsp] | |
1659 mov QWORD[((8+8))+rsp],rdx | |
1660 neg r9 | |
1661 jmp NEAR $L$8x_reduction_loop | |
1662 | |
1663 ALIGN 32 | |
1664 $L$8x_reduction_loop: | |
1665 lea rdi,[r9*1+rdi] | |
1666 DB 0x66 | |
1667 mov rbx,QWORD[rdi] | |
1668 mov r9,QWORD[8+rdi] | |
1669 mov r10,QWORD[16+rdi] | |
1670 mov r11,QWORD[24+rdi] | |
1671 mov r12,QWORD[32+rdi] | |
1672 mov r13,QWORD[40+rdi] | |
1673 mov r14,QWORD[48+rdi] | |
1674 mov r15,QWORD[56+rdi] | |
1675 mov QWORD[rdx],rax | |
1676 lea rdi,[64+rdi] | |
1677 | |
1678 DB 0x67 | |
1679 mov r8,rbx | |
1680 imul rbx,QWORD[((32+8))+rsp] | |
1681 mov rax,QWORD[rbp] | |
1682 mov ecx,8 | |
1683 jmp NEAR $L$8x_reduce | |
1684 | |
1685 ALIGN 32 | |
1686 $L$8x_reduce: | |
1687 mul rbx | |
1688 mov rax,QWORD[8+rbp] | |
1689 neg r8 | |
1690 mov r8,rdx | |
1691 adc r8,0 | |
1692 | |
1693 mul rbx | |
1694 add r9,rax | |
1695 mov rax,QWORD[16+rbp] | |
1696 adc rdx,0 | |
1697 add r8,r9 | |
1698 mov QWORD[((48-8+8))+rcx*8+rsp],rbx | |
1699 mov r9,rdx | |
1700 adc r9,0 | |
1701 | |
1702 mul rbx | |
1703 add r10,rax | |
1704 mov rax,QWORD[24+rbp] | |
1705 adc rdx,0 | |
1706 add r9,r10 | |
1707 mov rsi,QWORD[((32+8))+rsp] | |
1708 mov r10,rdx | |
1709 adc r10,0 | |
1710 | |
1711 mul rbx | |
1712 add r11,rax | |
1713 mov rax,QWORD[32+rbp] | |
1714 adc rdx,0 | |
1715 imul rsi,r8 | |
1716 add r10,r11 | |
1717 mov r11,rdx | |
1718 adc r11,0 | |
1719 | |
1720 mul rbx | |
1721 add r12,rax | |
1722 mov rax,QWORD[40+rbp] | |
1723 adc rdx,0 | |
1724 add r11,r12 | |
1725 mov r12,rdx | |
1726 adc r12,0 | |
1727 | |
1728 mul rbx | |
1729 add r13,rax | |
1730 mov rax,QWORD[48+rbp] | |
1731 adc rdx,0 | |
1732 add r12,r13 | |
1733 mov r13,rdx | |
1734 adc r13,0 | |
1735 | |
1736 mul rbx | |
1737 add r14,rax | |
1738 mov rax,QWORD[56+rbp] | |
1739 adc rdx,0 | |
1740 add r13,r14 | |
1741 mov r14,rdx | |
1742 adc r14,0 | |
1743 | |
1744 mul rbx | |
1745 mov rbx,rsi | |
1746 add r15,rax | |
1747 mov rax,QWORD[rbp] | |
1748 adc rdx,0 | |
1749 add r14,r15 | |
1750 mov r15,rdx | |
1751 adc r15,0 | |
1752 | |
1753 dec ecx | |
1754 jnz NEAR $L$8x_reduce | |
1755 | |
1756 lea rbp,[64+rbp] | |
1757 xor rax,rax | |
1758 mov rdx,QWORD[((8+8))+rsp] | |
1759 cmp rbp,QWORD[((0+8))+rsp] | |
1760 jae NEAR $L$8x_no_tail | |
1761 | |
1762 DB 0x66 | |
1763 add r8,QWORD[rdi] | |
1764 adc r9,QWORD[8+rdi] | |
1765 adc r10,QWORD[16+rdi] | |
1766 adc r11,QWORD[24+rdi] | |
1767 adc r12,QWORD[32+rdi] | |
1768 adc r13,QWORD[40+rdi] | |
1769 adc r14,QWORD[48+rdi] | |
1770 adc r15,QWORD[56+rdi] | |
1771 sbb rsi,rsi | |
1772 | |
1773 mov rbx,QWORD[((48+56+8))+rsp] | |
1774 mov ecx,8 | |
1775 mov rax,QWORD[rbp] | |
1776 jmp NEAR $L$8x_tail | |
1777 | |
1778 ALIGN 32 | |
1779 $L$8x_tail: | |
1780 mul rbx | |
1781 add r8,rax | |
1782 mov rax,QWORD[8+rbp] | |
1783 mov QWORD[rdi],r8 | |
1784 mov r8,rdx | |
1785 adc r8,0 | |
1786 | |
1787 mul rbx | |
1788 add r9,rax | |
1789 mov rax,QWORD[16+rbp] | |
1790 adc rdx,0 | |
1791 add r8,r9 | |
1792 lea rdi,[8+rdi] | |
1793 mov r9,rdx | |
1794 adc r9,0 | |
1795 | |
1796 mul rbx | |
1797 add r10,rax | |
1798 mov rax,QWORD[24+rbp] | |
1799 adc rdx,0 | |
1800 add r9,r10 | |
1801 mov r10,rdx | |
1802 adc r10,0 | |
1803 | |
1804 mul rbx | |
1805 add r11,rax | |
1806 mov rax,QWORD[32+rbp] | |
1807 adc rdx,0 | |
1808 add r10,r11 | |
1809 mov r11,rdx | |
1810 adc r11,0 | |
1811 | |
1812 mul rbx | |
1813 add r12,rax | |
1814 mov rax,QWORD[40+rbp] | |
1815 adc rdx,0 | |
1816 add r11,r12 | |
1817 mov r12,rdx | |
1818 adc r12,0 | |
1819 | |
1820 mul rbx | |
1821 add r13,rax | |
1822 mov rax,QWORD[48+rbp] | |
1823 adc rdx,0 | |
1824 add r12,r13 | |
1825 mov r13,rdx | |
1826 adc r13,0 | |
1827 | |
1828 mul rbx | |
1829 add r14,rax | |
1830 mov rax,QWORD[56+rbp] | |
1831 adc rdx,0 | |
1832 add r13,r14 | |
1833 mov r14,rdx | |
1834 adc r14,0 | |
1835 | |
1836 mul rbx | |
1837 mov rbx,QWORD[((48-16+8))+rcx*8+rsp] | |
1838 add r15,rax | |
1839 adc rdx,0 | |
1840 add r14,r15 | |
1841 mov rax,QWORD[rbp] | |
1842 mov r15,rdx | |
1843 adc r15,0 | |
1844 | |
1845 dec ecx | |
1846 jnz NEAR $L$8x_tail | |
1847 | |
1848 lea rbp,[64+rbp] | |
1849 mov rdx,QWORD[((8+8))+rsp] | |
1850 cmp rbp,QWORD[((0+8))+rsp] | |
1851 jae NEAR $L$8x_tail_done | |
1852 | |
1853 mov rbx,QWORD[((48+56+8))+rsp] | |
1854 neg rsi | |
1855 mov rax,QWORD[rbp] | |
1856 adc r8,QWORD[rdi] | |
1857 adc r9,QWORD[8+rdi] | |
1858 adc r10,QWORD[16+rdi] | |
1859 adc r11,QWORD[24+rdi] | |
1860 adc r12,QWORD[32+rdi] | |
1861 adc r13,QWORD[40+rdi] | |
1862 adc r14,QWORD[48+rdi] | |
1863 adc r15,QWORD[56+rdi] | |
1864 sbb rsi,rsi | |
1865 | |
1866 mov ecx,8 | |
1867 jmp NEAR $L$8x_tail | |
1868 | |
1869 ALIGN 32 | |
1870 $L$8x_tail_done: | |
1871 add r8,QWORD[rdx] | |
1872 adc r9,0 | |
1873 adc r10,0 | |
1874 adc r11,0 | |
1875 adc r12,0 | |
1876 adc r13,0 | |
1877 adc r14,0 | |
1878 adc r15,0 | |
1879 | |
1880 | |
1881 xor rax,rax | |
1882 | |
1883 neg rsi | |
1884 $L$8x_no_tail: | |
1885 adc r8,QWORD[rdi] | |
1886 adc r9,QWORD[8+rdi] | |
1887 adc r10,QWORD[16+rdi] | |
1888 adc r11,QWORD[24+rdi] | |
1889 adc r12,QWORD[32+rdi] | |
1890 adc r13,QWORD[40+rdi] | |
1891 adc r14,QWORD[48+rdi] | |
1892 adc r15,QWORD[56+rdi] | |
1893 adc rax,0 | |
1894 mov rcx,QWORD[((-8))+rbp] | |
1895 xor rsi,rsi | |
1896 | |
1897 DB 102,72,15,126,213 | |
1898 | |
1899 mov QWORD[rdi],r8 | |
1900 mov QWORD[8+rdi],r9 | |
1901 DB 102,73,15,126,217 | |
1902 mov QWORD[16+rdi],r10 | |
1903 mov QWORD[24+rdi],r11 | |
1904 mov QWORD[32+rdi],r12 | |
1905 mov QWORD[40+rdi],r13 | |
1906 mov QWORD[48+rdi],r14 | |
1907 mov QWORD[56+rdi],r15 | |
1908 lea rdi,[64+rdi] | |
1909 | |
1910 cmp rdi,rdx | |
1911 jb NEAR $L$8x_reduction_loop | |
1912 DB 0F3h,0C3h ;repret | |
1913 | |
1914 | |
1915 ALIGN 32 | |
1916 __bn_post4x_internal: | |
1917 mov r12,QWORD[rbp] | |
1918 lea rbx,[r9*1+rdi] | |
1919 mov rcx,r9 | |
1920 DB 102,72,15,126,207 | |
1921 neg rax | |
1922 DB 102,72,15,126,206 | |
1923 sar rcx,3+2 | |
1924 dec r12 | |
1925 xor r10,r10 | |
1926 mov r13,QWORD[8+rbp] | |
1927 mov r14,QWORD[16+rbp] | |
1928 mov r15,QWORD[24+rbp] | |
1929 jmp NEAR $L$sqr4x_sub_entry | |
1930 | |
1931 ALIGN 16 | |
1932 $L$sqr4x_sub: | |
1933 mov r12,QWORD[rbp] | |
1934 mov r13,QWORD[8+rbp] | |
1935 mov r14,QWORD[16+rbp] | |
1936 mov r15,QWORD[24+rbp] | |
1937 $L$sqr4x_sub_entry: | |
1938 lea rbp,[32+rbp] | |
1939 not r12 | |
1940 not r13 | |
1941 not r14 | |
1942 not r15 | |
1943 and r12,rax | |
1944 and r13,rax | |
1945 and r14,rax | |
1946 and r15,rax | |
1947 | |
1948 neg r10 | |
1949 adc r12,QWORD[rbx] | |
1950 adc r13,QWORD[8+rbx] | |
1951 adc r14,QWORD[16+rbx] | |
1952 adc r15,QWORD[24+rbx] | |
1953 mov QWORD[rdi],r12 | |
1954 lea rbx,[32+rbx] | |
1955 mov QWORD[8+rdi],r13 | |
1956 sbb r10,r10 | |
1957 mov QWORD[16+rdi],r14 | |
1958 mov QWORD[24+rdi],r15 | |
1959 lea rdi,[32+rdi] | |
1960 | |
1961 inc rcx | |
1962 jnz NEAR $L$sqr4x_sub | |
1963 | |
1964 mov r10,r9 | |
1965 neg r9 | |
1966 DB 0F3h,0C3h ;repret | |
1967 | |
1968 global bn_from_montgomery | |
1969 | |
1970 ALIGN 32 | |
1971 bn_from_montgomery: | |
1972 test DWORD[48+rsp],7 | |
1973 jz NEAR bn_from_mont8x | |
1974 xor eax,eax | |
1975 DB 0F3h,0C3h ;repret | |
1976 | |
1977 | |
1978 | |
1979 ALIGN 32 | |
1980 bn_from_mont8x: | |
1981 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
1982 mov QWORD[16+rsp],rsi | |
1983 mov rax,rsp | |
1984 $L$SEH_begin_bn_from_mont8x: | |
1985 mov rdi,rcx | |
1986 mov rsi,rdx | |
1987 mov rdx,r8 | |
1988 mov rcx,r9 | |
1989 mov r8,QWORD[40+rsp] | |
1990 mov r9,QWORD[48+rsp] | |
1991 | |
1992 | |
1993 DB 0x67 | |
1994 mov rax,rsp | |
1995 push rbx | |
1996 push rbp | |
1997 push r12 | |
1998 push r13 | |
1999 push r14 | |
2000 push r15 | |
2001 | |
2002 shl r9d,3 | |
2003 lea r10,[r9*2+r9] | |
2004 neg r9 | |
2005 mov r8,QWORD[r8] | |
2006 | |
2007 | |
2008 | |
2009 | |
2010 | |
2011 | |
2012 | |
2013 | |
2014 lea r11,[((-320))+r9*2+rsp] | |
2015 sub r11,rdi | |
2016 and r11,4095 | |
2017 cmp r10,r11 | |
2018 jb NEAR $L$from_sp_alt | |
2019 sub rsp,r11 | |
2020 lea rsp,[((-320))+r9*2+rsp] | |
2021 jmp NEAR $L$from_sp_done | |
2022 | |
2023 ALIGN 32 | |
2024 $L$from_sp_alt: | |
2025 lea r10,[((4096-320))+r9*2] | |
2026 lea rsp,[((-320))+r9*2+rsp] | |
2027 sub r11,r10 | |
2028 mov r10,0 | |
2029 cmovc r11,r10 | |
2030 sub rsp,r11 | |
2031 $L$from_sp_done: | |
2032 and rsp,-64 | |
2033 mov r10,r9 | |
2034 neg r9 | |
2035 | |
2036 | |
2037 | |
2038 | |
2039 | |
2040 | |
2041 | |
2042 | |
2043 | |
2044 | |
2045 mov QWORD[32+rsp],r8 | |
2046 mov QWORD[40+rsp],rax | |
2047 $L$from_body: | |
2048 mov r11,r9 | |
2049 lea rax,[48+rsp] | |
2050 pxor xmm0,xmm0 | |
2051 jmp NEAR $L$mul_by_1 | |
2052 | |
2053 ALIGN 32 | |
2054 $L$mul_by_1: | |
2055 movdqu xmm1,XMMWORD[rsi] | |
2056 movdqu xmm2,XMMWORD[16+rsi] | |
2057 movdqu xmm3,XMMWORD[32+rsi] | |
2058 movdqa XMMWORD[r9*1+rax],xmm0 | |
2059 movdqu xmm4,XMMWORD[48+rsi] | |
2060 movdqa XMMWORD[16+r9*1+rax],xmm0 | |
2061 DB 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 | |
2062 movdqa XMMWORD[rax],xmm1 | |
2063 movdqa XMMWORD[32+r9*1+rax],xmm0 | |
2064 movdqa XMMWORD[16+rax],xmm2 | |
2065 movdqa XMMWORD[48+r9*1+rax],xmm0 | |
2066 movdqa XMMWORD[32+rax],xmm3 | |
2067 movdqa XMMWORD[48+rax],xmm4 | |
2068 lea rax,[64+rax] | |
2069 sub r11,64 | |
2070 jnz NEAR $L$mul_by_1 | |
2071 | |
2072 DB 102,72,15,110,207 | |
2073 DB 102,72,15,110,209 | |
2074 DB 0x67 | |
2075 mov rbp,rcx | |
2076 DB 102,73,15,110,218 | |
2077 call __bn_sqr8x_reduction | |
2078 call __bn_post4x_internal | |
2079 | |
2080 pxor xmm0,xmm0 | |
2081 lea rax,[48+rsp] | |
2082 mov rsi,QWORD[40+rsp] | |
2083 jmp NEAR $L$from_mont_zero | |
2084 | |
2085 ALIGN 32 | |
2086 $L$from_mont_zero: | |
2087 movdqa XMMWORD[rax],xmm0 | |
2088 movdqa XMMWORD[16+rax],xmm0 | |
2089 movdqa XMMWORD[32+rax],xmm0 | |
2090 movdqa XMMWORD[48+rax],xmm0 | |
2091 lea rax,[64+rax] | |
2092 sub r9,32 | |
2093 jnz NEAR $L$from_mont_zero | |
2094 | |
2095 mov rax,1 | |
2096 mov r15,QWORD[((-48))+rsi] | |
2097 mov r14,QWORD[((-40))+rsi] | |
2098 mov r13,QWORD[((-32))+rsi] | |
2099 mov r12,QWORD[((-24))+rsi] | |
2100 mov rbp,QWORD[((-16))+rsi] | |
2101 mov rbx,QWORD[((-8))+rsi] | |
2102 lea rsp,[rsi] | |
2103 $L$from_epilogue: | |
2104 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
2105 mov rsi,QWORD[16+rsp] | |
2106 DB 0F3h,0C3h ;repret | |
2107 $L$SEH_end_bn_from_mont8x: | |
2108 global bn_scatter5 | |
2109 | |
2110 ALIGN 16 | |
2111 bn_scatter5: | |
2112 cmp edx,0 | |
2113 jz NEAR $L$scatter_epilogue | |
2114 lea r8,[r9*8+r8] | |
2115 $L$scatter: | |
2116 mov rax,QWORD[rcx] | |
2117 lea rcx,[8+rcx] | |
2118 mov QWORD[r8],rax | |
2119 lea r8,[256+r8] | |
2120 sub edx,1 | |
2121 jnz NEAR $L$scatter | |
2122 $L$scatter_epilogue: | |
2123 DB 0F3h,0C3h ;repret | |
2124 | |
2125 | |
2126 global bn_gather5 | |
2127 | |
2128 ALIGN 32 | |
2129 bn_gather5: | |
2130 $L$SEH_begin_bn_gather5: | |
2131 | |
2132 DB 0x4c,0x8d,0x14,0x24 | |
2133 DB 0x48,0x81,0xec,0x08,0x01,0x00,0x00 | |
2134 lea rax,[$L$inc] | |
2135 and rsp,-16 | |
2136 | |
2137 movd xmm5,r9d | |
2138 movdqa xmm0,XMMWORD[rax] | |
2139 movdqa xmm1,XMMWORD[16+rax] | |
2140 lea r11,[128+r8] | |
2141 lea rax,[128+rsp] | |
2142 | |
2143 pshufd xmm5,xmm5,0 | |
2144 movdqa xmm4,xmm1 | |
2145 movdqa xmm2,xmm1 | |
2146 paddd xmm1,xmm0 | |
2147 pcmpeqd xmm0,xmm5 | |
2148 movdqa xmm3,xmm4 | |
2149 | |
2150 paddd xmm2,xmm1 | |
2151 pcmpeqd xmm1,xmm5 | |
2152 movdqa XMMWORD[(-128)+rax],xmm0 | |
2153 movdqa xmm0,xmm4 | |
2154 | |
2155 paddd xmm3,xmm2 | |
2156 pcmpeqd xmm2,xmm5 | |
2157 movdqa XMMWORD[(-112)+rax],xmm1 | |
2158 movdqa xmm1,xmm4 | |
2159 | |
2160 paddd xmm0,xmm3 | |
2161 pcmpeqd xmm3,xmm5 | |
2162 movdqa XMMWORD[(-96)+rax],xmm2 | |
2163 movdqa xmm2,xmm4 | |
2164 paddd xmm1,xmm0 | |
2165 pcmpeqd xmm0,xmm5 | |
2166 movdqa XMMWORD[(-80)+rax],xmm3 | |
2167 movdqa xmm3,xmm4 | |
2168 | |
2169 paddd xmm2,xmm1 | |
2170 pcmpeqd xmm1,xmm5 | |
2171 movdqa XMMWORD[(-64)+rax],xmm0 | |
2172 movdqa xmm0,xmm4 | |
2173 | |
2174 paddd xmm3,xmm2 | |
2175 pcmpeqd xmm2,xmm5 | |
2176 movdqa XMMWORD[(-48)+rax],xmm1 | |
2177 movdqa xmm1,xmm4 | |
2178 | |
2179 paddd xmm0,xmm3 | |
2180 pcmpeqd xmm3,xmm5 | |
2181 movdqa XMMWORD[(-32)+rax],xmm2 | |
2182 movdqa xmm2,xmm4 | |
2183 paddd xmm1,xmm0 | |
2184 pcmpeqd xmm0,xmm5 | |
2185 movdqa XMMWORD[(-16)+rax],xmm3 | |
2186 movdqa xmm3,xmm4 | |
2187 | |
2188 paddd xmm2,xmm1 | |
2189 pcmpeqd xmm1,xmm5 | |
2190 movdqa XMMWORD[rax],xmm0 | |
2191 movdqa xmm0,xmm4 | |
2192 | |
2193 paddd xmm3,xmm2 | |
2194 pcmpeqd xmm2,xmm5 | |
2195 movdqa XMMWORD[16+rax],xmm1 | |
2196 movdqa xmm1,xmm4 | |
2197 | |
2198 paddd xmm0,xmm3 | |
2199 pcmpeqd xmm3,xmm5 | |
2200 movdqa XMMWORD[32+rax],xmm2 | |
2201 movdqa xmm2,xmm4 | |
2202 paddd xmm1,xmm0 | |
2203 pcmpeqd xmm0,xmm5 | |
2204 movdqa XMMWORD[48+rax],xmm3 | |
2205 movdqa xmm3,xmm4 | |
2206 | |
2207 paddd xmm2,xmm1 | |
2208 pcmpeqd xmm1,xmm5 | |
2209 movdqa XMMWORD[64+rax],xmm0 | |
2210 movdqa xmm0,xmm4 | |
2211 | |
2212 paddd xmm3,xmm2 | |
2213 pcmpeqd xmm2,xmm5 | |
2214 movdqa XMMWORD[80+rax],xmm1 | |
2215 movdqa xmm1,xmm4 | |
2216 | |
2217 paddd xmm0,xmm3 | |
2218 pcmpeqd xmm3,xmm5 | |
2219 movdqa XMMWORD[96+rax],xmm2 | |
2220 movdqa xmm2,xmm4 | |
2221 movdqa XMMWORD[112+rax],xmm3 | |
2222 jmp NEAR $L$gather | |
2223 | |
2224 ALIGN 32 | |
2225 $L$gather: | |
2226 pxor xmm4,xmm4 | |
2227 pxor xmm5,xmm5 | |
2228 movdqa xmm0,XMMWORD[((-128))+r11] | |
2229 movdqa xmm1,XMMWORD[((-112))+r11] | |
2230 movdqa xmm2,XMMWORD[((-96))+r11] | |
2231 pand xmm0,XMMWORD[((-128))+rax] | |
2232 movdqa xmm3,XMMWORD[((-80))+r11] | |
2233 pand xmm1,XMMWORD[((-112))+rax] | |
2234 por xmm4,xmm0 | |
2235 pand xmm2,XMMWORD[((-96))+rax] | |
2236 por xmm5,xmm1 | |
2237 pand xmm3,XMMWORD[((-80))+rax] | |
2238 por xmm4,xmm2 | |
2239 por xmm5,xmm3 | |
2240 movdqa xmm0,XMMWORD[((-64))+r11] | |
2241 movdqa xmm1,XMMWORD[((-48))+r11] | |
2242 movdqa xmm2,XMMWORD[((-32))+r11] | |
2243 pand xmm0,XMMWORD[((-64))+rax] | |
2244 movdqa xmm3,XMMWORD[((-16))+r11] | |
2245 pand xmm1,XMMWORD[((-48))+rax] | |
2246 por xmm4,xmm0 | |
2247 pand xmm2,XMMWORD[((-32))+rax] | |
2248 por xmm5,xmm1 | |
2249 pand xmm3,XMMWORD[((-16))+rax] | |
2250 por xmm4,xmm2 | |
2251 por xmm5,xmm3 | |
2252 movdqa xmm0,XMMWORD[r11] | |
2253 movdqa xmm1,XMMWORD[16+r11] | |
2254 movdqa xmm2,XMMWORD[32+r11] | |
2255 pand xmm0,XMMWORD[rax] | |
2256 movdqa xmm3,XMMWORD[48+r11] | |
2257 pand xmm1,XMMWORD[16+rax] | |
2258 por xmm4,xmm0 | |
2259 pand xmm2,XMMWORD[32+rax] | |
2260 por xmm5,xmm1 | |
2261 pand xmm3,XMMWORD[48+rax] | |
2262 por xmm4,xmm2 | |
2263 por xmm5,xmm3 | |
2264 movdqa xmm0,XMMWORD[64+r11] | |
2265 movdqa xmm1,XMMWORD[80+r11] | |
2266 movdqa xmm2,XMMWORD[96+r11] | |
2267 pand xmm0,XMMWORD[64+rax] | |
2268 movdqa xmm3,XMMWORD[112+r11] | |
2269 pand xmm1,XMMWORD[80+rax] | |
2270 por xmm4,xmm0 | |
2271 pand xmm2,XMMWORD[96+rax] | |
2272 por xmm5,xmm1 | |
2273 pand xmm3,XMMWORD[112+rax] | |
2274 por xmm4,xmm2 | |
2275 por xmm5,xmm3 | |
2276 por xmm4,xmm5 | |
2277 lea r11,[256+r11] | |
2278 pshufd xmm0,xmm4,0x4e | |
2279 por xmm0,xmm4 | |
2280 movq QWORD[rcx],xmm0 | |
2281 lea rcx,[8+rcx] | |
2282 sub edx,1 | |
2283 jnz NEAR $L$gather | |
2284 | |
2285 lea rsp,[r10] | |
2286 DB 0F3h,0C3h ;repret | |
2287 $L$SEH_end_bn_gather5: | |
2288 | |
2289 ALIGN 64 | |
2290 $L$inc: | |
2291 DD 0,0,1,1 | |
2292 DD 2,2,2,2 | |
2293 DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 | |
2294 DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115 | |
2295 DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111 | |
2296 DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79 | |
2297 DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111 | |
2298 DB 112,101,110,115,115,108,46,111,114,103,62,0 | |
2299 EXTERN __imp_RtlVirtualUnwind | |
2300 | |
2301 ALIGN 16 | |
2302 mul_handler: | |
2303 push rsi | |
2304 push rdi | |
2305 push rbx | |
2306 push rbp | |
2307 push r12 | |
2308 push r13 | |
2309 push r14 | |
2310 push r15 | |
2311 pushfq | |
2312 sub rsp,64 | |
2313 | |
2314 mov rax,QWORD[120+r8] | |
2315 mov rbx,QWORD[248+r8] | |
2316 | |
2317 mov rsi,QWORD[8+r9] | |
2318 mov r11,QWORD[56+r9] | |
2319 | |
2320 mov r10d,DWORD[r11] | |
2321 lea r10,[r10*1+rsi] | |
2322 cmp rbx,r10 | |
2323 jb NEAR $L$common_seh_tail | |
2324 | |
2325 mov rax,QWORD[152+r8] | |
2326 | |
2327 mov r10d,DWORD[4+r11] | |
2328 lea r10,[r10*1+rsi] | |
2329 cmp rbx,r10 | |
2330 jae NEAR $L$common_seh_tail | |
2331 | |
2332 lea r10,[$L$mul_epilogue] | |
2333 cmp rbx,r10 | |
2334 ja NEAR $L$body_40 | |
2335 | |
2336 mov r10,QWORD[192+r8] | |
2337 mov rax,QWORD[8+r10*8+rax] | |
2338 | |
2339 jmp NEAR $L$body_proceed | |
2340 | |
2341 $L$body_40: | |
2342 mov rax,QWORD[40+rax] | |
2343 $L$body_proceed: | |
2344 mov rbx,QWORD[((-8))+rax] | |
2345 mov rbp,QWORD[((-16))+rax] | |
2346 mov r12,QWORD[((-24))+rax] | |
2347 mov r13,QWORD[((-32))+rax] | |
2348 mov r14,QWORD[((-40))+rax] | |
2349 mov r15,QWORD[((-48))+rax] | |
2350 mov QWORD[144+r8],rbx | |
2351 mov QWORD[160+r8],rbp | |
2352 mov QWORD[216+r8],r12 | |
2353 mov QWORD[224+r8],r13 | |
2354 mov QWORD[232+r8],r14 | |
2355 mov QWORD[240+r8],r15 | |
2356 | |
2357 $L$common_seh_tail: | |
2358 mov rdi,QWORD[8+rax] | |
2359 mov rsi,QWORD[16+rax] | |
2360 mov QWORD[152+r8],rax | |
2361 mov QWORD[168+r8],rsi | |
2362 mov QWORD[176+r8],rdi | |
2363 | |
2364 mov rdi,QWORD[40+r9] | |
2365 mov rsi,r8 | |
2366 mov ecx,154 | |
2367 DD 0xa548f3fc | |
2368 | |
2369 mov rsi,r9 | |
2370 xor rcx,rcx | |
2371 mov rdx,QWORD[8+rsi] | |
2372 mov r8,QWORD[rsi] | |
2373 mov r9,QWORD[16+rsi] | |
2374 mov r10,QWORD[40+rsi] | |
2375 lea r11,[56+rsi] | |
2376 lea r12,[24+rsi] | |
2377 mov QWORD[32+rsp],r10 | |
2378 mov QWORD[40+rsp],r11 | |
2379 mov QWORD[48+rsp],r12 | |
2380 mov QWORD[56+rsp],rcx | |
2381 call QWORD[__imp_RtlVirtualUnwind] | |
2382 | |
2383 mov eax,1 | |
2384 add rsp,64 | |
2385 popfq | |
2386 pop r15 | |
2387 pop r14 | |
2388 pop r13 | |
2389 pop r12 | |
2390 pop rbp | |
2391 pop rbx | |
2392 pop rdi | |
2393 pop rsi | |
2394 DB 0F3h,0C3h ;repret | |
2395 | |
2396 | |
2397 section .pdata rdata align=4 | |
2398 ALIGN 4 | |
2399 DD $L$SEH_begin_bn_mul_mont_gather5 wrt ..imagebase | |
2400 DD $L$SEH_end_bn_mul_mont_gather5 wrt ..imagebase | |
2401 DD $L$SEH_info_bn_mul_mont_gather5 wrt ..imagebase | |
2402 | |
2403 DD $L$SEH_begin_bn_mul4x_mont_gather5 wrt ..imagebase | |
2404 DD $L$SEH_end_bn_mul4x_mont_gather5 wrt ..imagebase | |
2405 DD $L$SEH_info_bn_mul4x_mont_gather5 wrt ..imagebase | |
2406 | |
2407 DD $L$SEH_begin_bn_power5 wrt ..imagebase | |
2408 DD $L$SEH_end_bn_power5 wrt ..imagebase | |
2409 DD $L$SEH_info_bn_power5 wrt ..imagebase | |
2410 | |
2411 DD $L$SEH_begin_bn_from_mont8x wrt ..imagebase | |
2412 DD $L$SEH_end_bn_from_mont8x wrt ..imagebase | |
2413 DD $L$SEH_info_bn_from_mont8x wrt ..imagebase | |
2414 DD $L$SEH_begin_bn_gather5 wrt ..imagebase | |
2415 DD $L$SEH_end_bn_gather5 wrt ..imagebase | |
2416 DD $L$SEH_info_bn_gather5 wrt ..imagebase | |
2417 | |
2418 section .xdata rdata align=8 | |
2419 ALIGN 8 | |
2420 $L$SEH_info_bn_mul_mont_gather5: | |
2421 DB 9,0,0,0 | |
2422 DD mul_handler wrt ..imagebase | |
2423 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase | |
2424 ALIGN 8 | |
2425 $L$SEH_info_bn_mul4x_mont_gather5: | |
2426 DB 9,0,0,0 | |
2427 DD mul_handler wrt ..imagebase | |
2428 DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase | |
2429 ALIGN 8 | |
2430 $L$SEH_info_bn_power5: | |
2431 DB 9,0,0,0 | |
2432 DD mul_handler wrt ..imagebase | |
2433 DD $L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebas
e | |
2434 ALIGN 8 | |
2435 $L$SEH_info_bn_from_mont8x: | |
2436 DB 9,0,0,0 | |
2437 DD mul_handler wrt ..imagebase | |
2438 DD $L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase | |
2439 ALIGN 8 | |
2440 $L$SEH_info_bn_gather5: | |
2441 DB 0x01,0x0b,0x03,0x0a | |
2442 DB 0x0b,0x01,0x21,0x00 | |
2443 DB 0x04,0xa3,0x00,0x00 | |
2444 ALIGN 8 | |
OLD | NEW |