OLD | NEW |
| (Empty) |
1 default rel | |
2 %define XMMWORD | |
3 %define YMMWORD | |
4 %define ZMMWORD | |
5 section .text code align=64 | |
6 | |
7 EXTERN OPENSSL_ia32cap_P | |
8 | |
9 | |
10 ALIGN 64 | |
11 $L$poly: | |
12 DQ 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xfffff
fff00000001 | |
13 | |
14 $L$One: | |
15 DD 1,1,1,1,1,1,1,1 | |
16 $L$Two: | |
17 DD 2,2,2,2,2,2,2,2 | |
18 $L$Three: | |
19 DD 3,3,3,3,3,3,3,3 | |
20 $L$ONE_mont: | |
21 DQ 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000
000fffffffe | |
22 | |
23 | |
24 | |
25 global ecp_nistz256_neg | |
26 | |
27 ALIGN 32 | |
28 ecp_nistz256_neg: | |
29 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
30 mov QWORD[16+rsp],rsi | |
31 mov rax,rsp | |
32 $L$SEH_begin_ecp_nistz256_neg: | |
33 mov rdi,rcx | |
34 mov rsi,rdx | |
35 | |
36 | |
37 push r12 | |
38 push r13 | |
39 | |
40 xor r8,r8 | |
41 xor r9,r9 | |
42 xor r10,r10 | |
43 xor r11,r11 | |
44 xor r13,r13 | |
45 | |
46 sub r8,QWORD[rsi] | |
47 sbb r9,QWORD[8+rsi] | |
48 sbb r10,QWORD[16+rsi] | |
49 mov rax,r8 | |
50 sbb r11,QWORD[24+rsi] | |
51 lea rsi,[$L$poly] | |
52 mov rdx,r9 | |
53 sbb r13,0 | |
54 | |
55 add r8,QWORD[rsi] | |
56 mov rcx,r10 | |
57 adc r9,QWORD[8+rsi] | |
58 adc r10,QWORD[16+rsi] | |
59 mov r12,r11 | |
60 adc r11,QWORD[24+rsi] | |
61 test r13,r13 | |
62 | |
63 cmovz r8,rax | |
64 cmovz r9,rdx | |
65 mov QWORD[rdi],r8 | |
66 cmovz r10,rcx | |
67 mov QWORD[8+rdi],r9 | |
68 cmovz r11,r12 | |
69 mov QWORD[16+rdi],r10 | |
70 mov QWORD[24+rdi],r11 | |
71 | |
72 pop r13 | |
73 pop r12 | |
74 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
75 mov rsi,QWORD[16+rsp] | |
76 DB 0F3h,0C3h ;repret | |
77 $L$SEH_end_ecp_nistz256_neg: | |
78 | |
79 | |
80 | |
81 | |
82 | |
83 | |
84 global ecp_nistz256_mul_mont | |
85 | |
86 ALIGN 32 | |
87 ecp_nistz256_mul_mont: | |
88 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
89 mov QWORD[16+rsp],rsi | |
90 mov rax,rsp | |
91 $L$SEH_begin_ecp_nistz256_mul_mont: | |
92 mov rdi,rcx | |
93 mov rsi,rdx | |
94 mov rdx,r8 | |
95 | |
96 | |
97 $L$mul_mont: | |
98 push rbp | |
99 push rbx | |
100 push r12 | |
101 push r13 | |
102 push r14 | |
103 push r15 | |
104 mov rbx,rdx | |
105 mov rax,QWORD[rdx] | |
106 mov r9,QWORD[rsi] | |
107 mov r10,QWORD[8+rsi] | |
108 mov r11,QWORD[16+rsi] | |
109 mov r12,QWORD[24+rsi] | |
110 | |
111 call __ecp_nistz256_mul_montq | |
112 $L$mul_mont_done: | |
113 pop r15 | |
114 pop r14 | |
115 pop r13 | |
116 pop r12 | |
117 pop rbx | |
118 pop rbp | |
119 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
120 mov rsi,QWORD[16+rsp] | |
121 DB 0F3h,0C3h ;repret | |
122 $L$SEH_end_ecp_nistz256_mul_mont: | |
123 | |
124 | |
125 ALIGN 32 | |
126 __ecp_nistz256_mul_montq: | |
127 | |
128 | |
129 mov rbp,rax | |
130 mul r9 | |
131 mov r14,QWORD[(($L$poly+8))] | |
132 mov r8,rax | |
133 mov rax,rbp | |
134 mov r9,rdx | |
135 | |
136 mul r10 | |
137 mov r15,QWORD[(($L$poly+24))] | |
138 add r9,rax | |
139 mov rax,rbp | |
140 adc rdx,0 | |
141 mov r10,rdx | |
142 | |
143 mul r11 | |
144 add r10,rax | |
145 mov rax,rbp | |
146 adc rdx,0 | |
147 mov r11,rdx | |
148 | |
149 mul r12 | |
150 add r11,rax | |
151 mov rax,r8 | |
152 adc rdx,0 | |
153 xor r13,r13 | |
154 mov r12,rdx | |
155 | |
156 | |
157 | |
158 | |
159 | |
160 | |
161 | |
162 | |
163 | |
164 | |
165 mov rbp,r8 | |
166 shl r8,32 | |
167 mul r15 | |
168 shr rbp,32 | |
169 add r9,r8 | |
170 adc r10,rbp | |
171 adc r11,rax | |
172 mov rax,QWORD[8+rbx] | |
173 adc r12,rdx | |
174 adc r13,0 | |
175 xor r8,r8 | |
176 | |
177 | |
178 | |
179 mov rbp,rax | |
180 mul QWORD[rsi] | |
181 add r9,rax | |
182 mov rax,rbp | |
183 adc rdx,0 | |
184 mov rcx,rdx | |
185 | |
186 mul QWORD[8+rsi] | |
187 add r10,rcx | |
188 adc rdx,0 | |
189 add r10,rax | |
190 mov rax,rbp | |
191 adc rdx,0 | |
192 mov rcx,rdx | |
193 | |
194 mul QWORD[16+rsi] | |
195 add r11,rcx | |
196 adc rdx,0 | |
197 add r11,rax | |
198 mov rax,rbp | |
199 adc rdx,0 | |
200 mov rcx,rdx | |
201 | |
202 mul QWORD[24+rsi] | |
203 add r12,rcx | |
204 adc rdx,0 | |
205 add r12,rax | |
206 mov rax,r9 | |
207 adc r13,rdx | |
208 adc r8,0 | |
209 | |
210 | |
211 | |
212 mov rbp,r9 | |
213 shl r9,32 | |
214 mul r15 | |
215 shr rbp,32 | |
216 add r10,r9 | |
217 adc r11,rbp | |
218 adc r12,rax | |
219 mov rax,QWORD[16+rbx] | |
220 adc r13,rdx | |
221 adc r8,0 | |
222 xor r9,r9 | |
223 | |
224 | |
225 | |
226 mov rbp,rax | |
227 mul QWORD[rsi] | |
228 add r10,rax | |
229 mov rax,rbp | |
230 adc rdx,0 | |
231 mov rcx,rdx | |
232 | |
233 mul QWORD[8+rsi] | |
234 add r11,rcx | |
235 adc rdx,0 | |
236 add r11,rax | |
237 mov rax,rbp | |
238 adc rdx,0 | |
239 mov rcx,rdx | |
240 | |
241 mul QWORD[16+rsi] | |
242 add r12,rcx | |
243 adc rdx,0 | |
244 add r12,rax | |
245 mov rax,rbp | |
246 adc rdx,0 | |
247 mov rcx,rdx | |
248 | |
249 mul QWORD[24+rsi] | |
250 add r13,rcx | |
251 adc rdx,0 | |
252 add r13,rax | |
253 mov rax,r10 | |
254 adc r8,rdx | |
255 adc r9,0 | |
256 | |
257 | |
258 | |
259 mov rbp,r10 | |
260 shl r10,32 | |
261 mul r15 | |
262 shr rbp,32 | |
263 add r11,r10 | |
264 adc r12,rbp | |
265 adc r13,rax | |
266 mov rax,QWORD[24+rbx] | |
267 adc r8,rdx | |
268 adc r9,0 | |
269 xor r10,r10 | |
270 | |
271 | |
272 | |
273 mov rbp,rax | |
274 mul QWORD[rsi] | |
275 add r11,rax | |
276 mov rax,rbp | |
277 adc rdx,0 | |
278 mov rcx,rdx | |
279 | |
280 mul QWORD[8+rsi] | |
281 add r12,rcx | |
282 adc rdx,0 | |
283 add r12,rax | |
284 mov rax,rbp | |
285 adc rdx,0 | |
286 mov rcx,rdx | |
287 | |
288 mul QWORD[16+rsi] | |
289 add r13,rcx | |
290 adc rdx,0 | |
291 add r13,rax | |
292 mov rax,rbp | |
293 adc rdx,0 | |
294 mov rcx,rdx | |
295 | |
296 mul QWORD[24+rsi] | |
297 add r8,rcx | |
298 adc rdx,0 | |
299 add r8,rax | |
300 mov rax,r11 | |
301 adc r9,rdx | |
302 adc r10,0 | |
303 | |
304 | |
305 | |
306 mov rbp,r11 | |
307 shl r11,32 | |
308 mul r15 | |
309 shr rbp,32 | |
310 add r12,r11 | |
311 adc r13,rbp | |
312 mov rcx,r12 | |
313 adc r8,rax | |
314 adc r9,rdx | |
315 mov rbp,r13 | |
316 adc r10,0 | |
317 | |
318 | |
319 | |
320 sub r12,-1 | |
321 mov rbx,r8 | |
322 sbb r13,r14 | |
323 sbb r8,0 | |
324 mov rdx,r9 | |
325 sbb r9,r15 | |
326 sbb r10,0 | |
327 | |
328 cmovc r12,rcx | |
329 cmovc r13,rbp | |
330 mov QWORD[rdi],r12 | |
331 cmovc r8,rbx | |
332 mov QWORD[8+rdi],r13 | |
333 cmovc r9,rdx | |
334 mov QWORD[16+rdi],r8 | |
335 mov QWORD[24+rdi],r9 | |
336 | |
337 DB 0F3h,0C3h ;repret | |
338 | |
339 | |
340 | |
341 | |
342 | |
343 | |
344 | |
345 | |
346 | |
347 global ecp_nistz256_sqr_mont | |
348 | |
349 ALIGN 32 | |
350 ecp_nistz256_sqr_mont: | |
351 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
352 mov QWORD[16+rsp],rsi | |
353 mov rax,rsp | |
354 $L$SEH_begin_ecp_nistz256_sqr_mont: | |
355 mov rdi,rcx | |
356 mov rsi,rdx | |
357 | |
358 | |
359 push rbp | |
360 push rbx | |
361 push r12 | |
362 push r13 | |
363 push r14 | |
364 push r15 | |
365 mov rax,QWORD[rsi] | |
366 mov r14,QWORD[8+rsi] | |
367 mov r15,QWORD[16+rsi] | |
368 mov r8,QWORD[24+rsi] | |
369 | |
370 call __ecp_nistz256_sqr_montq | |
371 $L$sqr_mont_done: | |
372 pop r15 | |
373 pop r14 | |
374 pop r13 | |
375 pop r12 | |
376 pop rbx | |
377 pop rbp | |
378 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
379 mov rsi,QWORD[16+rsp] | |
380 DB 0F3h,0C3h ;repret | |
381 $L$SEH_end_ecp_nistz256_sqr_mont: | |
382 | |
383 | |
384 ALIGN 32 | |
385 __ecp_nistz256_sqr_montq: | |
386 mov r13,rax | |
387 mul r14 | |
388 mov r9,rax | |
389 mov rax,r15 | |
390 mov r10,rdx | |
391 | |
392 mul r13 | |
393 add r10,rax | |
394 mov rax,r8 | |
395 adc rdx,0 | |
396 mov r11,rdx | |
397 | |
398 mul r13 | |
399 add r11,rax | |
400 mov rax,r15 | |
401 adc rdx,0 | |
402 mov r12,rdx | |
403 | |
404 | |
405 mul r14 | |
406 add r11,rax | |
407 mov rax,r8 | |
408 adc rdx,0 | |
409 mov rbp,rdx | |
410 | |
411 mul r14 | |
412 add r12,rax | |
413 mov rax,r8 | |
414 adc rdx,0 | |
415 add r12,rbp | |
416 mov r13,rdx | |
417 adc r13,0 | |
418 | |
419 | |
420 mul r15 | |
421 xor r15,r15 | |
422 add r13,rax | |
423 mov rax,QWORD[rsi] | |
424 mov r14,rdx | |
425 adc r14,0 | |
426 | |
427 add r9,r9 | |
428 adc r10,r10 | |
429 adc r11,r11 | |
430 adc r12,r12 | |
431 adc r13,r13 | |
432 adc r14,r14 | |
433 adc r15,0 | |
434 | |
435 mul rax | |
436 mov r8,rax | |
437 mov rax,QWORD[8+rsi] | |
438 mov rcx,rdx | |
439 | |
440 mul rax | |
441 add r9,rcx | |
442 adc r10,rax | |
443 mov rax,QWORD[16+rsi] | |
444 adc rdx,0 | |
445 mov rcx,rdx | |
446 | |
447 mul rax | |
448 add r11,rcx | |
449 adc r12,rax | |
450 mov rax,QWORD[24+rsi] | |
451 adc rdx,0 | |
452 mov rcx,rdx | |
453 | |
454 mul rax | |
455 add r13,rcx | |
456 adc r14,rax | |
457 mov rax,r8 | |
458 adc r15,rdx | |
459 | |
460 mov rsi,QWORD[(($L$poly+8))] | |
461 mov rbp,QWORD[(($L$poly+24))] | |
462 | |
463 | |
464 | |
465 | |
466 mov rcx,r8 | |
467 shl r8,32 | |
468 mul rbp | |
469 shr rcx,32 | |
470 add r9,r8 | |
471 adc r10,rcx | |
472 adc r11,rax | |
473 mov rax,r9 | |
474 adc rdx,0 | |
475 | |
476 | |
477 | |
478 mov rcx,r9 | |
479 shl r9,32 | |
480 mov r8,rdx | |
481 mul rbp | |
482 shr rcx,32 | |
483 add r10,r9 | |
484 adc r11,rcx | |
485 adc r8,rax | |
486 mov rax,r10 | |
487 adc rdx,0 | |
488 | |
489 | |
490 | |
491 mov rcx,r10 | |
492 shl r10,32 | |
493 mov r9,rdx | |
494 mul rbp | |
495 shr rcx,32 | |
496 add r11,r10 | |
497 adc r8,rcx | |
498 adc r9,rax | |
499 mov rax,r11 | |
500 adc rdx,0 | |
501 | |
502 | |
503 | |
504 mov rcx,r11 | |
505 shl r11,32 | |
506 mov r10,rdx | |
507 mul rbp | |
508 shr rcx,32 | |
509 add r8,r11 | |
510 adc r9,rcx | |
511 adc r10,rax | |
512 adc rdx,0 | |
513 xor r11,r11 | |
514 | |
515 | |
516 | |
517 add r12,r8 | |
518 adc r13,r9 | |
519 mov r8,r12 | |
520 adc r14,r10 | |
521 adc r15,rdx | |
522 mov r9,r13 | |
523 adc r11,0 | |
524 | |
525 sub r12,-1 | |
526 mov r10,r14 | |
527 sbb r13,rsi | |
528 sbb r14,0 | |
529 mov rcx,r15 | |
530 sbb r15,rbp | |
531 sbb r11,0 | |
532 | |
533 cmovc r12,r8 | |
534 cmovc r13,r9 | |
535 mov QWORD[rdi],r12 | |
536 cmovc r14,r10 | |
537 mov QWORD[8+rdi],r13 | |
538 cmovc r15,rcx | |
539 mov QWORD[16+rdi],r14 | |
540 mov QWORD[24+rdi],r15 | |
541 | |
542 DB 0F3h,0C3h ;repret | |
543 | |
544 | |
545 | |
546 global ecp_nistz256_select_w5 | |
547 | |
548 ALIGN 32 | |
549 ecp_nistz256_select_w5: | |
550 lea rax,[((-136))+rsp] | |
551 $L$SEH_begin_ecp_nistz256_select_w5: | |
552 DB 0x48,0x8d,0x60,0xe0 | |
553 DB 0x0f,0x29,0x70,0xe0 | |
554 DB 0x0f,0x29,0x78,0xf0 | |
555 DB 0x44,0x0f,0x29,0x00 | |
556 DB 0x44,0x0f,0x29,0x48,0x10 | |
557 DB 0x44,0x0f,0x29,0x50,0x20 | |
558 DB 0x44,0x0f,0x29,0x58,0x30 | |
559 DB 0x44,0x0f,0x29,0x60,0x40 | |
560 DB 0x44,0x0f,0x29,0x68,0x50 | |
561 DB 0x44,0x0f,0x29,0x70,0x60 | |
562 DB 0x44,0x0f,0x29,0x78,0x70 | |
563 movdqa xmm0,XMMWORD[$L$One] | |
564 movd xmm1,r8d | |
565 | |
566 pxor xmm2,xmm2 | |
567 pxor xmm3,xmm3 | |
568 pxor xmm4,xmm4 | |
569 pxor xmm5,xmm5 | |
570 pxor xmm6,xmm6 | |
571 pxor xmm7,xmm7 | |
572 | |
573 movdqa xmm8,xmm0 | |
574 pshufd xmm1,xmm1,0 | |
575 | |
576 mov rax,16 | |
577 $L$select_loop_sse_w5: | |
578 | |
579 movdqa xmm15,xmm8 | |
580 paddd xmm8,xmm0 | |
581 pcmpeqd xmm15,xmm1 | |
582 | |
583 movdqa xmm9,XMMWORD[rdx] | |
584 movdqa xmm10,XMMWORD[16+rdx] | |
585 movdqa xmm11,XMMWORD[32+rdx] | |
586 movdqa xmm12,XMMWORD[48+rdx] | |
587 movdqa xmm13,XMMWORD[64+rdx] | |
588 movdqa xmm14,XMMWORD[80+rdx] | |
589 lea rdx,[96+rdx] | |
590 | |
591 pand xmm9,xmm15 | |
592 pand xmm10,xmm15 | |
593 por xmm2,xmm9 | |
594 pand xmm11,xmm15 | |
595 por xmm3,xmm10 | |
596 pand xmm12,xmm15 | |
597 por xmm4,xmm11 | |
598 pand xmm13,xmm15 | |
599 por xmm5,xmm12 | |
600 pand xmm14,xmm15 | |
601 por xmm6,xmm13 | |
602 por xmm7,xmm14 | |
603 | |
604 dec rax | |
605 jnz NEAR $L$select_loop_sse_w5 | |
606 | |
607 movdqu XMMWORD[rcx],xmm2 | |
608 movdqu XMMWORD[16+rcx],xmm3 | |
609 movdqu XMMWORD[32+rcx],xmm4 | |
610 movdqu XMMWORD[48+rcx],xmm5 | |
611 movdqu XMMWORD[64+rcx],xmm6 | |
612 movdqu XMMWORD[80+rcx],xmm7 | |
613 movaps xmm6,XMMWORD[rsp] | |
614 movaps xmm7,XMMWORD[16+rsp] | |
615 movaps xmm8,XMMWORD[32+rsp] | |
616 movaps xmm9,XMMWORD[48+rsp] | |
617 movaps xmm10,XMMWORD[64+rsp] | |
618 movaps xmm11,XMMWORD[80+rsp] | |
619 movaps xmm12,XMMWORD[96+rsp] | |
620 movaps xmm13,XMMWORD[112+rsp] | |
621 movaps xmm14,XMMWORD[128+rsp] | |
622 movaps xmm15,XMMWORD[144+rsp] | |
623 lea rsp,[168+rsp] | |
624 $L$SEH_end_ecp_nistz256_select_w5: | |
625 DB 0F3h,0C3h ;repret | |
626 | |
627 | |
628 | |
629 | |
630 global ecp_nistz256_select_w7 | |
631 | |
632 ALIGN 32 | |
633 ecp_nistz256_select_w7: | |
634 lea rax,[((-136))+rsp] | |
635 $L$SEH_begin_ecp_nistz256_select_w7: | |
636 DB 0x48,0x8d,0x60,0xe0 | |
637 DB 0x0f,0x29,0x70,0xe0 | |
638 DB 0x0f,0x29,0x78,0xf0 | |
639 DB 0x44,0x0f,0x29,0x00 | |
640 DB 0x44,0x0f,0x29,0x48,0x10 | |
641 DB 0x44,0x0f,0x29,0x50,0x20 | |
642 DB 0x44,0x0f,0x29,0x58,0x30 | |
643 DB 0x44,0x0f,0x29,0x60,0x40 | |
644 DB 0x44,0x0f,0x29,0x68,0x50 | |
645 DB 0x44,0x0f,0x29,0x70,0x60 | |
646 DB 0x44,0x0f,0x29,0x78,0x70 | |
647 movdqa xmm8,XMMWORD[$L$One] | |
648 movd xmm1,r8d | |
649 | |
650 pxor xmm2,xmm2 | |
651 pxor xmm3,xmm3 | |
652 pxor xmm4,xmm4 | |
653 pxor xmm5,xmm5 | |
654 | |
655 movdqa xmm0,xmm8 | |
656 pshufd xmm1,xmm1,0 | |
657 mov rax,64 | |
658 | |
659 $L$select_loop_sse_w7: | |
660 movdqa xmm15,xmm8 | |
661 paddd xmm8,xmm0 | |
662 movdqa xmm9,XMMWORD[rdx] | |
663 movdqa xmm10,XMMWORD[16+rdx] | |
664 pcmpeqd xmm15,xmm1 | |
665 movdqa xmm11,XMMWORD[32+rdx] | |
666 movdqa xmm12,XMMWORD[48+rdx] | |
667 lea rdx,[64+rdx] | |
668 | |
669 pand xmm9,xmm15 | |
670 pand xmm10,xmm15 | |
671 por xmm2,xmm9 | |
672 pand xmm11,xmm15 | |
673 por xmm3,xmm10 | |
674 pand xmm12,xmm15 | |
675 por xmm4,xmm11 | |
676 prefetcht0 [255+rdx] | |
677 por xmm5,xmm12 | |
678 | |
679 dec rax | |
680 jnz NEAR $L$select_loop_sse_w7 | |
681 | |
682 movdqu XMMWORD[rcx],xmm2 | |
683 movdqu XMMWORD[16+rcx],xmm3 | |
684 movdqu XMMWORD[32+rcx],xmm4 | |
685 movdqu XMMWORD[48+rcx],xmm5 | |
686 movaps xmm6,XMMWORD[rsp] | |
687 movaps xmm7,XMMWORD[16+rsp] | |
688 movaps xmm8,XMMWORD[32+rsp] | |
689 movaps xmm9,XMMWORD[48+rsp] | |
690 movaps xmm10,XMMWORD[64+rsp] | |
691 movaps xmm11,XMMWORD[80+rsp] | |
692 movaps xmm12,XMMWORD[96+rsp] | |
693 movaps xmm13,XMMWORD[112+rsp] | |
694 movaps xmm14,XMMWORD[128+rsp] | |
695 movaps xmm15,XMMWORD[144+rsp] | |
696 lea rsp,[168+rsp] | |
697 $L$SEH_end_ecp_nistz256_select_w7: | |
698 DB 0F3h,0C3h ;repret | |
699 | |
700 global ecp_nistz256_avx2_select_w7 | |
701 | |
702 ALIGN 32 | |
703 ecp_nistz256_avx2_select_w7: | |
704 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
705 mov QWORD[16+rsp],rsi | |
706 mov rax,rsp | |
707 $L$SEH_begin_ecp_nistz256_avx2_select_w7: | |
708 mov rdi,rcx | |
709 mov rsi,rdx | |
710 mov rdx,r8 | |
711 | |
712 | |
713 DB 0x0f,0x0b | |
714 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
715 mov rsi,QWORD[16+rsp] | |
716 DB 0F3h,0C3h ;repret | |
717 $L$SEH_end_ecp_nistz256_avx2_select_w7: | |
718 | |
719 ALIGN 32 | |
720 __ecp_nistz256_add_toq: | |
721 xor r11,r11 | |
722 add r12,QWORD[rbx] | |
723 adc r13,QWORD[8+rbx] | |
724 mov rax,r12 | |
725 adc r8,QWORD[16+rbx] | |
726 adc r9,QWORD[24+rbx] | |
727 mov rbp,r13 | |
728 adc r11,0 | |
729 | |
730 sub r12,-1 | |
731 mov rcx,r8 | |
732 sbb r13,r14 | |
733 sbb r8,0 | |
734 mov r10,r9 | |
735 sbb r9,r15 | |
736 sbb r11,0 | |
737 | |
738 cmovc r12,rax | |
739 cmovc r13,rbp | |
740 mov QWORD[rdi],r12 | |
741 cmovc r8,rcx | |
742 mov QWORD[8+rdi],r13 | |
743 cmovc r9,r10 | |
744 mov QWORD[16+rdi],r8 | |
745 mov QWORD[24+rdi],r9 | |
746 | |
747 DB 0F3h,0C3h ;repret | |
748 | |
749 | |
750 | |
751 ALIGN 32 | |
752 __ecp_nistz256_sub_fromq: | |
753 sub r12,QWORD[rbx] | |
754 sbb r13,QWORD[8+rbx] | |
755 mov rax,r12 | |
756 sbb r8,QWORD[16+rbx] | |
757 sbb r9,QWORD[24+rbx] | |
758 mov rbp,r13 | |
759 sbb r11,r11 | |
760 | |
761 add r12,-1 | |
762 mov rcx,r8 | |
763 adc r13,r14 | |
764 adc r8,0 | |
765 mov r10,r9 | |
766 adc r9,r15 | |
767 test r11,r11 | |
768 | |
769 cmovz r12,rax | |
770 cmovz r13,rbp | |
771 mov QWORD[rdi],r12 | |
772 cmovz r8,rcx | |
773 mov QWORD[8+rdi],r13 | |
774 cmovz r9,r10 | |
775 mov QWORD[16+rdi],r8 | |
776 mov QWORD[24+rdi],r9 | |
777 | |
778 DB 0F3h,0C3h ;repret | |
779 | |
780 | |
781 | |
782 ALIGN 32 | |
783 __ecp_nistz256_subq: | |
784 sub rax,r12 | |
785 sbb rbp,r13 | |
786 mov r12,rax | |
787 sbb rcx,r8 | |
788 sbb r10,r9 | |
789 mov r13,rbp | |
790 sbb r11,r11 | |
791 | |
792 add rax,-1 | |
793 mov r8,rcx | |
794 adc rbp,r14 | |
795 adc rcx,0 | |
796 mov r9,r10 | |
797 adc r10,r15 | |
798 test r11,r11 | |
799 | |
800 cmovnz r12,rax | |
801 cmovnz r13,rbp | |
802 cmovnz r8,rcx | |
803 cmovnz r9,r10 | |
804 | |
805 DB 0F3h,0C3h ;repret | |
806 | |
807 | |
808 | |
809 ALIGN 32 | |
810 __ecp_nistz256_mul_by_2q: | |
811 xor r11,r11 | |
812 add r12,r12 | |
813 adc r13,r13 | |
814 mov rax,r12 | |
815 adc r8,r8 | |
816 adc r9,r9 | |
817 mov rbp,r13 | |
818 adc r11,0 | |
819 | |
820 sub r12,-1 | |
821 mov rcx,r8 | |
822 sbb r13,r14 | |
823 sbb r8,0 | |
824 mov r10,r9 | |
825 sbb r9,r15 | |
826 sbb r11,0 | |
827 | |
828 cmovc r12,rax | |
829 cmovc r13,rbp | |
830 mov QWORD[rdi],r12 | |
831 cmovc r8,rcx | |
832 mov QWORD[8+rdi],r13 | |
833 cmovc r9,r10 | |
834 mov QWORD[16+rdi],r8 | |
835 mov QWORD[24+rdi],r9 | |
836 | |
837 DB 0F3h,0C3h ;repret | |
838 | |
839 global ecp_nistz256_point_double | |
840 | |
841 ALIGN 32 | |
842 ecp_nistz256_point_double: | |
843 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
844 mov QWORD[16+rsp],rsi | |
845 mov rax,rsp | |
846 $L$SEH_begin_ecp_nistz256_point_double: | |
847 mov rdi,rcx | |
848 mov rsi,rdx | |
849 | |
850 | |
851 push rbp | |
852 push rbx | |
853 push r12 | |
854 push r13 | |
855 push r14 | |
856 push r15 | |
857 sub rsp,32*5+8 | |
858 | |
859 $L$point_double_shortcutq: | |
860 movdqu xmm0,XMMWORD[rsi] | |
861 mov rbx,rsi | |
862 movdqu xmm1,XMMWORD[16+rsi] | |
863 mov r12,QWORD[((32+0))+rsi] | |
864 mov r13,QWORD[((32+8))+rsi] | |
865 mov r8,QWORD[((32+16))+rsi] | |
866 mov r9,QWORD[((32+24))+rsi] | |
867 mov r14,QWORD[(($L$poly+8))] | |
868 mov r15,QWORD[(($L$poly+24))] | |
869 movdqa XMMWORD[96+rsp],xmm0 | |
870 movdqa XMMWORD[(96+16)+rsp],xmm1 | |
871 lea r10,[32+rdi] | |
872 lea r11,[64+rdi] | |
873 DB 102,72,15,110,199 | |
874 DB 102,73,15,110,202 | |
875 DB 102,73,15,110,211 | |
876 | |
877 lea rdi,[rsp] | |
878 call __ecp_nistz256_mul_by_2q | |
879 | |
880 mov rax,QWORD[((64+0))+rsi] | |
881 mov r14,QWORD[((64+8))+rsi] | |
882 mov r15,QWORD[((64+16))+rsi] | |
883 mov r8,QWORD[((64+24))+rsi] | |
884 lea rsi,[((64-0))+rsi] | |
885 lea rdi,[64+rsp] | |
886 call __ecp_nistz256_sqr_montq | |
887 | |
888 mov rax,QWORD[((0+0))+rsp] | |
889 mov r14,QWORD[((8+0))+rsp] | |
890 lea rsi,[((0+0))+rsp] | |
891 mov r15,QWORD[((16+0))+rsp] | |
892 mov r8,QWORD[((24+0))+rsp] | |
893 lea rdi,[rsp] | |
894 call __ecp_nistz256_sqr_montq | |
895 | |
896 mov rax,QWORD[32+rbx] | |
897 mov r9,QWORD[((64+0))+rbx] | |
898 mov r10,QWORD[((64+8))+rbx] | |
899 mov r11,QWORD[((64+16))+rbx] | |
900 mov r12,QWORD[((64+24))+rbx] | |
901 lea rsi,[((64-0))+rbx] | |
902 lea rbx,[32+rbx] | |
903 DB 102,72,15,126,215 | |
904 call __ecp_nistz256_mul_montq | |
905 call __ecp_nistz256_mul_by_2q | |
906 | |
907 mov r12,QWORD[((96+0))+rsp] | |
908 mov r13,QWORD[((96+8))+rsp] | |
909 lea rbx,[64+rsp] | |
910 mov r8,QWORD[((96+16))+rsp] | |
911 mov r9,QWORD[((96+24))+rsp] | |
912 lea rdi,[32+rsp] | |
913 call __ecp_nistz256_add_toq | |
914 | |
915 mov r12,QWORD[((96+0))+rsp] | |
916 mov r13,QWORD[((96+8))+rsp] | |
917 lea rbx,[64+rsp] | |
918 mov r8,QWORD[((96+16))+rsp] | |
919 mov r9,QWORD[((96+24))+rsp] | |
920 lea rdi,[64+rsp] | |
921 call __ecp_nistz256_sub_fromq | |
922 | |
923 mov rax,QWORD[((0+0))+rsp] | |
924 mov r14,QWORD[((8+0))+rsp] | |
925 lea rsi,[((0+0))+rsp] | |
926 mov r15,QWORD[((16+0))+rsp] | |
927 mov r8,QWORD[((24+0))+rsp] | |
928 DB 102,72,15,126,207 | |
929 call __ecp_nistz256_sqr_montq | |
930 xor r9,r9 | |
931 mov rax,r12 | |
932 add r12,-1 | |
933 mov r10,r13 | |
934 adc r13,rsi | |
935 mov rcx,r14 | |
936 adc r14,0 | |
937 mov r8,r15 | |
938 adc r15,rbp | |
939 adc r9,0 | |
940 xor rsi,rsi | |
941 test rax,1 | |
942 | |
943 cmovz r12,rax | |
944 cmovz r13,r10 | |
945 cmovz r14,rcx | |
946 cmovz r15,r8 | |
947 cmovz r9,rsi | |
948 | |
949 mov rax,r13 | |
950 shr r12,1 | |
951 shl rax,63 | |
952 mov r10,r14 | |
953 shr r13,1 | |
954 or r12,rax | |
955 shl r10,63 | |
956 mov rcx,r15 | |
957 shr r14,1 | |
958 or r13,r10 | |
959 shl rcx,63 | |
960 mov QWORD[rdi],r12 | |
961 shr r15,1 | |
962 mov QWORD[8+rdi],r13 | |
963 shl r9,63 | |
964 or r14,rcx | |
965 or r15,r9 | |
966 mov QWORD[16+rdi],r14 | |
967 mov QWORD[24+rdi],r15 | |
968 mov rax,QWORD[64+rsp] | |
969 lea rbx,[64+rsp] | |
970 mov r9,QWORD[((0+32))+rsp] | |
971 mov r10,QWORD[((8+32))+rsp] | |
972 lea rsi,[((0+32))+rsp] | |
973 mov r11,QWORD[((16+32))+rsp] | |
974 mov r12,QWORD[((24+32))+rsp] | |
975 lea rdi,[32+rsp] | |
976 call __ecp_nistz256_mul_montq | |
977 | |
978 lea rdi,[128+rsp] | |
979 call __ecp_nistz256_mul_by_2q | |
980 | |
981 lea rbx,[32+rsp] | |
982 lea rdi,[32+rsp] | |
983 call __ecp_nistz256_add_toq | |
984 | |
985 mov rax,QWORD[96+rsp] | |
986 lea rbx,[96+rsp] | |
987 mov r9,QWORD[((0+0))+rsp] | |
988 mov r10,QWORD[((8+0))+rsp] | |
989 lea rsi,[((0+0))+rsp] | |
990 mov r11,QWORD[((16+0))+rsp] | |
991 mov r12,QWORD[((24+0))+rsp] | |
992 lea rdi,[rsp] | |
993 call __ecp_nistz256_mul_montq | |
994 | |
995 lea rdi,[128+rsp] | |
996 call __ecp_nistz256_mul_by_2q | |
997 | |
998 mov rax,QWORD[((0+32))+rsp] | |
999 mov r14,QWORD[((8+32))+rsp] | |
1000 lea rsi,[((0+32))+rsp] | |
1001 mov r15,QWORD[((16+32))+rsp] | |
1002 mov r8,QWORD[((24+32))+rsp] | |
1003 DB 102,72,15,126,199 | |
1004 call __ecp_nistz256_sqr_montq | |
1005 | |
1006 lea rbx,[128+rsp] | |
1007 mov r8,r14 | |
1008 mov r9,r15 | |
1009 mov r14,rsi | |
1010 mov r15,rbp | |
1011 call __ecp_nistz256_sub_fromq | |
1012 | |
1013 mov rax,QWORD[((0+0))+rsp] | |
1014 mov rbp,QWORD[((0+8))+rsp] | |
1015 mov rcx,QWORD[((0+16))+rsp] | |
1016 mov r10,QWORD[((0+24))+rsp] | |
1017 lea rdi,[rsp] | |
1018 call __ecp_nistz256_subq | |
1019 | |
1020 mov rax,QWORD[32+rsp] | |
1021 lea rbx,[32+rsp] | |
1022 mov r14,r12 | |
1023 xor ecx,ecx | |
1024 mov QWORD[((0+0))+rsp],r12 | |
1025 mov r10,r13 | |
1026 mov QWORD[((0+8))+rsp],r13 | |
1027 cmovz r11,r8 | |
1028 mov QWORD[((0+16))+rsp],r8 | |
1029 lea rsi,[((0-0))+rsp] | |
1030 cmovz r12,r9 | |
1031 mov QWORD[((0+24))+rsp],r9 | |
1032 mov r9,r14 | |
1033 lea rdi,[rsp] | |
1034 call __ecp_nistz256_mul_montq | |
1035 | |
1036 DB 102,72,15,126,203 | |
1037 DB 102,72,15,126,207 | |
1038 call __ecp_nistz256_sub_fromq | |
1039 | |
1040 add rsp,32*5+8 | |
1041 pop r15 | |
1042 pop r14 | |
1043 pop r13 | |
1044 pop r12 | |
1045 pop rbx | |
1046 pop rbp | |
1047 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
1048 mov rsi,QWORD[16+rsp] | |
1049 DB 0F3h,0C3h ;repret | |
1050 $L$SEH_end_ecp_nistz256_point_double: | |
1051 global ecp_nistz256_point_add | |
1052 | |
1053 ALIGN 32 | |
1054 ecp_nistz256_point_add: | |
1055 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
1056 mov QWORD[16+rsp],rsi | |
1057 mov rax,rsp | |
1058 $L$SEH_begin_ecp_nistz256_point_add: | |
1059 mov rdi,rcx | |
1060 mov rsi,rdx | |
1061 mov rdx,r8 | |
1062 | |
1063 | |
1064 push rbp | |
1065 push rbx | |
1066 push r12 | |
1067 push r13 | |
1068 push r14 | |
1069 push r15 | |
1070 sub rsp,32*18+8 | |
1071 | |
1072 movdqu xmm0,XMMWORD[rsi] | |
1073 movdqu xmm1,XMMWORD[16+rsi] | |
1074 movdqu xmm2,XMMWORD[32+rsi] | |
1075 movdqu xmm3,XMMWORD[48+rsi] | |
1076 movdqu xmm4,XMMWORD[64+rsi] | |
1077 movdqu xmm5,XMMWORD[80+rsi] | |
1078 mov rbx,rsi | |
1079 mov rsi,rdx | |
1080 movdqa XMMWORD[384+rsp],xmm0 | |
1081 movdqa XMMWORD[(384+16)+rsp],xmm1 | |
1082 movdqa XMMWORD[416+rsp],xmm2 | |
1083 movdqa XMMWORD[(416+16)+rsp],xmm3 | |
1084 movdqa XMMWORD[448+rsp],xmm4 | |
1085 movdqa XMMWORD[(448+16)+rsp],xmm5 | |
1086 por xmm5,xmm4 | |
1087 | |
1088 movdqu xmm0,XMMWORD[rsi] | |
1089 pshufd xmm3,xmm5,0xb1 | |
1090 movdqu xmm1,XMMWORD[16+rsi] | |
1091 movdqu xmm2,XMMWORD[32+rsi] | |
1092 por xmm5,xmm3 | |
1093 movdqu xmm3,XMMWORD[48+rsi] | |
1094 mov rax,QWORD[((64+0))+rsi] | |
1095 mov r14,QWORD[((64+8))+rsi] | |
1096 mov r15,QWORD[((64+16))+rsi] | |
1097 mov r8,QWORD[((64+24))+rsi] | |
1098 movdqa XMMWORD[480+rsp],xmm0 | |
1099 pshufd xmm4,xmm5,0x1e | |
1100 movdqa XMMWORD[(480+16)+rsp],xmm1 | |
1101 movdqu xmm0,XMMWORD[64+rsi] | |
1102 movdqu xmm1,XMMWORD[80+rsi] | |
1103 movdqa XMMWORD[512+rsp],xmm2 | |
1104 movdqa XMMWORD[(512+16)+rsp],xmm3 | |
1105 por xmm5,xmm4 | |
1106 pxor xmm4,xmm4 | |
1107 por xmm1,xmm0 | |
1108 DB 102,72,15,110,199 | |
1109 | |
1110 lea rsi,[((64-0))+rsi] | |
1111 mov QWORD[((544+0))+rsp],rax | |
1112 mov QWORD[((544+8))+rsp],r14 | |
1113 mov QWORD[((544+16))+rsp],r15 | |
1114 mov QWORD[((544+24))+rsp],r8 | |
1115 lea rdi,[96+rsp] | |
1116 call __ecp_nistz256_sqr_montq | |
1117 | |
1118 pcmpeqd xmm5,xmm4 | |
1119 pshufd xmm4,xmm1,0xb1 | |
1120 por xmm4,xmm1 | |
1121 pshufd xmm5,xmm5,0 | |
1122 pshufd xmm3,xmm4,0x1e | |
1123 por xmm4,xmm3 | |
1124 pxor xmm3,xmm3 | |
1125 pcmpeqd xmm4,xmm3 | |
1126 pshufd xmm4,xmm4,0 | |
1127 mov rax,QWORD[((64+0))+rbx] | |
1128 mov r14,QWORD[((64+8))+rbx] | |
1129 mov r15,QWORD[((64+16))+rbx] | |
1130 mov r8,QWORD[((64+24))+rbx] | |
1131 DB 102,72,15,110,203 | |
1132 | |
1133 lea rsi,[((64-0))+rbx] | |
1134 lea rdi,[32+rsp] | |
1135 call __ecp_nistz256_sqr_montq | |
1136 | |
1137 mov rax,QWORD[544+rsp] | |
1138 lea rbx,[544+rsp] | |
1139 mov r9,QWORD[((0+96))+rsp] | |
1140 mov r10,QWORD[((8+96))+rsp] | |
1141 lea rsi,[((0+96))+rsp] | |
1142 mov r11,QWORD[((16+96))+rsp] | |
1143 mov r12,QWORD[((24+96))+rsp] | |
1144 lea rdi,[224+rsp] | |
1145 call __ecp_nistz256_mul_montq | |
1146 | |
1147 mov rax,QWORD[448+rsp] | |
1148 lea rbx,[448+rsp] | |
1149 mov r9,QWORD[((0+32))+rsp] | |
1150 mov r10,QWORD[((8+32))+rsp] | |
1151 lea rsi,[((0+32))+rsp] | |
1152 mov r11,QWORD[((16+32))+rsp] | |
1153 mov r12,QWORD[((24+32))+rsp] | |
1154 lea rdi,[256+rsp] | |
1155 call __ecp_nistz256_mul_montq | |
1156 | |
1157 mov rax,QWORD[416+rsp] | |
1158 lea rbx,[416+rsp] | |
1159 mov r9,QWORD[((0+224))+rsp] | |
1160 mov r10,QWORD[((8+224))+rsp] | |
1161 lea rsi,[((0+224))+rsp] | |
1162 mov r11,QWORD[((16+224))+rsp] | |
1163 mov r12,QWORD[((24+224))+rsp] | |
1164 lea rdi,[224+rsp] | |
1165 call __ecp_nistz256_mul_montq | |
1166 | |
1167 mov rax,QWORD[512+rsp] | |
1168 lea rbx,[512+rsp] | |
1169 mov r9,QWORD[((0+256))+rsp] | |
1170 mov r10,QWORD[((8+256))+rsp] | |
1171 lea rsi,[((0+256))+rsp] | |
1172 mov r11,QWORD[((16+256))+rsp] | |
1173 mov r12,QWORD[((24+256))+rsp] | |
1174 lea rdi,[256+rsp] | |
1175 call __ecp_nistz256_mul_montq | |
1176 | |
1177 lea rbx,[224+rsp] | |
1178 lea rdi,[64+rsp] | |
1179 call __ecp_nistz256_sub_fromq | |
1180 | |
1181 or r12,r13 | |
1182 movdqa xmm2,xmm4 | |
1183 or r12,r8 | |
1184 or r12,r9 | |
1185 por xmm2,xmm5 | |
1186 DB 102,73,15,110,220 | |
1187 | |
1188 mov rax,QWORD[384+rsp] | |
1189 lea rbx,[384+rsp] | |
1190 mov r9,QWORD[((0+96))+rsp] | |
1191 mov r10,QWORD[((8+96))+rsp] | |
1192 lea rsi,[((0+96))+rsp] | |
1193 mov r11,QWORD[((16+96))+rsp] | |
1194 mov r12,QWORD[((24+96))+rsp] | |
1195 lea rdi,[160+rsp] | |
1196 call __ecp_nistz256_mul_montq | |
1197 | |
1198 mov rax,QWORD[480+rsp] | |
1199 lea rbx,[480+rsp] | |
1200 mov r9,QWORD[((0+32))+rsp] | |
1201 mov r10,QWORD[((8+32))+rsp] | |
1202 lea rsi,[((0+32))+rsp] | |
1203 mov r11,QWORD[((16+32))+rsp] | |
1204 mov r12,QWORD[((24+32))+rsp] | |
1205 lea rdi,[192+rsp] | |
1206 call __ecp_nistz256_mul_montq | |
1207 | |
1208 lea rbx,[160+rsp] | |
1209 lea rdi,[rsp] | |
1210 call __ecp_nistz256_sub_fromq | |
1211 | |
1212 or r12,r13 | |
1213 or r12,r8 | |
1214 or r12,r9 | |
1215 | |
1216 DB 0x3e | |
1217 jnz NEAR $L$add_proceedq | |
1218 DB 102,73,15,126,208 | |
1219 DB 102,73,15,126,217 | |
1220 test r8,r8 | |
1221 jnz NEAR $L$add_proceedq | |
1222 test r9,r9 | |
1223 jz NEAR $L$add_doubleq | |
1224 | |
1225 DB 102,72,15,126,199 | |
1226 pxor xmm0,xmm0 | |
1227 movdqu XMMWORD[rdi],xmm0 | |
1228 movdqu XMMWORD[16+rdi],xmm0 | |
1229 movdqu XMMWORD[32+rdi],xmm0 | |
1230 movdqu XMMWORD[48+rdi],xmm0 | |
1231 movdqu XMMWORD[64+rdi],xmm0 | |
1232 movdqu XMMWORD[80+rdi],xmm0 | |
1233 jmp NEAR $L$add_doneq | |
1234 | |
1235 ALIGN 32 | |
1236 $L$add_doubleq: | |
1237 DB 102,72,15,126,206 | |
1238 DB 102,72,15,126,199 | |
1239 add rsp,416 | |
1240 jmp NEAR $L$point_double_shortcutq | |
1241 | |
1242 ALIGN 32 | |
1243 $L$add_proceedq: | |
1244 mov rax,QWORD[((0+64))+rsp] | |
1245 mov r14,QWORD[((8+64))+rsp] | |
1246 lea rsi,[((0+64))+rsp] | |
1247 mov r15,QWORD[((16+64))+rsp] | |
1248 mov r8,QWORD[((24+64))+rsp] | |
1249 lea rdi,[96+rsp] | |
1250 call __ecp_nistz256_sqr_montq | |
1251 | |
1252 mov rax,QWORD[448+rsp] | |
1253 lea rbx,[448+rsp] | |
1254 mov r9,QWORD[((0+0))+rsp] | |
1255 mov r10,QWORD[((8+0))+rsp] | |
1256 lea rsi,[((0+0))+rsp] | |
1257 mov r11,QWORD[((16+0))+rsp] | |
1258 mov r12,QWORD[((24+0))+rsp] | |
1259 lea rdi,[352+rsp] | |
1260 call __ecp_nistz256_mul_montq | |
1261 | |
1262 mov rax,QWORD[((0+0))+rsp] | |
1263 mov r14,QWORD[((8+0))+rsp] | |
1264 lea rsi,[((0+0))+rsp] | |
1265 mov r15,QWORD[((16+0))+rsp] | |
1266 mov r8,QWORD[((24+0))+rsp] | |
1267 lea rdi,[32+rsp] | |
1268 call __ecp_nistz256_sqr_montq | |
1269 | |
1270 mov rax,QWORD[544+rsp] | |
1271 lea rbx,[544+rsp] | |
1272 mov r9,QWORD[((0+352))+rsp] | |
1273 mov r10,QWORD[((8+352))+rsp] | |
1274 lea rsi,[((0+352))+rsp] | |
1275 mov r11,QWORD[((16+352))+rsp] | |
1276 mov r12,QWORD[((24+352))+rsp] | |
1277 lea rdi,[352+rsp] | |
1278 call __ecp_nistz256_mul_montq | |
1279 | |
1280 mov rax,QWORD[rsp] | |
1281 lea rbx,[rsp] | |
1282 mov r9,QWORD[((0+32))+rsp] | |
1283 mov r10,QWORD[((8+32))+rsp] | |
1284 lea rsi,[((0+32))+rsp] | |
1285 mov r11,QWORD[((16+32))+rsp] | |
1286 mov r12,QWORD[((24+32))+rsp] | |
1287 lea rdi,[128+rsp] | |
1288 call __ecp_nistz256_mul_montq | |
1289 | |
1290 mov rax,QWORD[160+rsp] | |
1291 lea rbx,[160+rsp] | |
1292 mov r9,QWORD[((0+32))+rsp] | |
1293 mov r10,QWORD[((8+32))+rsp] | |
1294 lea rsi,[((0+32))+rsp] | |
1295 mov r11,QWORD[((16+32))+rsp] | |
1296 mov r12,QWORD[((24+32))+rsp] | |
1297 lea rdi,[192+rsp] | |
1298 call __ecp_nistz256_mul_montq | |
1299 | |
1300 | |
1301 | |
1302 | |
1303 xor r11,r11 | |
1304 add r12,r12 | |
1305 lea rsi,[96+rsp] | |
1306 adc r13,r13 | |
1307 mov rax,r12 | |
1308 adc r8,r8 | |
1309 adc r9,r9 | |
1310 mov rbp,r13 | |
1311 adc r11,0 | |
1312 | |
1313 sub r12,-1 | |
1314 mov rcx,r8 | |
1315 sbb r13,r14 | |
1316 sbb r8,0 | |
1317 mov r10,r9 | |
1318 sbb r9,r15 | |
1319 sbb r11,0 | |
1320 | |
1321 cmovc r12,rax | |
1322 mov rax,QWORD[rsi] | |
1323 cmovc r13,rbp | |
1324 mov rbp,QWORD[8+rsi] | |
1325 cmovc r8,rcx | |
1326 mov rcx,QWORD[16+rsi] | |
1327 cmovc r9,r10 | |
1328 mov r10,QWORD[24+rsi] | |
1329 | |
1330 call __ecp_nistz256_subq | |
1331 | |
1332 lea rbx,[128+rsp] | |
1333 lea rdi,[288+rsp] | |
1334 call __ecp_nistz256_sub_fromq | |
1335 | |
1336 mov rax,QWORD[((192+0))+rsp] | |
1337 mov rbp,QWORD[((192+8))+rsp] | |
1338 mov rcx,QWORD[((192+16))+rsp] | |
1339 mov r10,QWORD[((192+24))+rsp] | |
1340 lea rdi,[320+rsp] | |
1341 | |
1342 call __ecp_nistz256_subq | |
1343 | |
1344 mov QWORD[rdi],r12 | |
1345 mov QWORD[8+rdi],r13 | |
1346 mov QWORD[16+rdi],r8 | |
1347 mov QWORD[24+rdi],r9 | |
1348 mov rax,QWORD[128+rsp] | |
1349 lea rbx,[128+rsp] | |
1350 mov r9,QWORD[((0+224))+rsp] | |
1351 mov r10,QWORD[((8+224))+rsp] | |
1352 lea rsi,[((0+224))+rsp] | |
1353 mov r11,QWORD[((16+224))+rsp] | |
1354 mov r12,QWORD[((24+224))+rsp] | |
1355 lea rdi,[256+rsp] | |
1356 call __ecp_nistz256_mul_montq | |
1357 | |
1358 mov rax,QWORD[320+rsp] | |
1359 lea rbx,[320+rsp] | |
1360 mov r9,QWORD[((0+64))+rsp] | |
1361 mov r10,QWORD[((8+64))+rsp] | |
1362 lea rsi,[((0+64))+rsp] | |
1363 mov r11,QWORD[((16+64))+rsp] | |
1364 mov r12,QWORD[((24+64))+rsp] | |
1365 lea rdi,[320+rsp] | |
1366 call __ecp_nistz256_mul_montq | |
1367 | |
1368 lea rbx,[256+rsp] | |
1369 lea rdi,[320+rsp] | |
1370 call __ecp_nistz256_sub_fromq | |
1371 | |
1372 DB 102,72,15,126,199 | |
1373 | |
1374 movdqa xmm0,xmm5 | |
1375 movdqa xmm1,xmm5 | |
1376 pandn xmm0,XMMWORD[352+rsp] | |
1377 movdqa xmm2,xmm5 | |
1378 pandn xmm1,XMMWORD[((352+16))+rsp] | |
1379 movdqa xmm3,xmm5 | |
1380 pand xmm2,XMMWORD[544+rsp] | |
1381 pand xmm3,XMMWORD[((544+16))+rsp] | |
1382 por xmm2,xmm0 | |
1383 por xmm3,xmm1 | |
1384 | |
1385 movdqa xmm0,xmm4 | |
1386 movdqa xmm1,xmm4 | |
1387 pandn xmm0,xmm2 | |
1388 movdqa xmm2,xmm4 | |
1389 pandn xmm1,xmm3 | |
1390 movdqa xmm3,xmm4 | |
1391 pand xmm2,XMMWORD[448+rsp] | |
1392 pand xmm3,XMMWORD[((448+16))+rsp] | |
1393 por xmm2,xmm0 | |
1394 por xmm3,xmm1 | |
1395 movdqu XMMWORD[64+rdi],xmm2 | |
1396 movdqu XMMWORD[80+rdi],xmm3 | |
1397 | |
1398 movdqa xmm0,xmm5 | |
1399 movdqa xmm1,xmm5 | |
1400 pandn xmm0,XMMWORD[288+rsp] | |
1401 movdqa xmm2,xmm5 | |
1402 pandn xmm1,XMMWORD[((288+16))+rsp] | |
1403 movdqa xmm3,xmm5 | |
1404 pand xmm2,XMMWORD[480+rsp] | |
1405 pand xmm3,XMMWORD[((480+16))+rsp] | |
1406 por xmm2,xmm0 | |
1407 por xmm3,xmm1 | |
1408 | |
1409 movdqa xmm0,xmm4 | |
1410 movdqa xmm1,xmm4 | |
1411 pandn xmm0,xmm2 | |
1412 movdqa xmm2,xmm4 | |
1413 pandn xmm1,xmm3 | |
1414 movdqa xmm3,xmm4 | |
1415 pand xmm2,XMMWORD[384+rsp] | |
1416 pand xmm3,XMMWORD[((384+16))+rsp] | |
1417 por xmm2,xmm0 | |
1418 por xmm3,xmm1 | |
1419 movdqu XMMWORD[rdi],xmm2 | |
1420 movdqu XMMWORD[16+rdi],xmm3 | |
1421 | |
1422 movdqa xmm0,xmm5 | |
1423 movdqa xmm1,xmm5 | |
1424 pandn xmm0,XMMWORD[320+rsp] | |
1425 movdqa xmm2,xmm5 | |
1426 pandn xmm1,XMMWORD[((320+16))+rsp] | |
1427 movdqa xmm3,xmm5 | |
1428 pand xmm2,XMMWORD[512+rsp] | |
1429 pand xmm3,XMMWORD[((512+16))+rsp] | |
1430 por xmm2,xmm0 | |
1431 por xmm3,xmm1 | |
1432 | |
1433 movdqa xmm0,xmm4 | |
1434 movdqa xmm1,xmm4 | |
1435 pandn xmm0,xmm2 | |
1436 movdqa xmm2,xmm4 | |
1437 pandn xmm1,xmm3 | |
1438 movdqa xmm3,xmm4 | |
1439 pand xmm2,XMMWORD[416+rsp] | |
1440 pand xmm3,XMMWORD[((416+16))+rsp] | |
1441 por xmm2,xmm0 | |
1442 por xmm3,xmm1 | |
1443 movdqu XMMWORD[32+rdi],xmm2 | |
1444 movdqu XMMWORD[48+rdi],xmm3 | |
1445 | |
1446 $L$add_doneq: | |
1447 add rsp,32*18+8 | |
1448 pop r15 | |
1449 pop r14 | |
1450 pop r13 | |
1451 pop r12 | |
1452 pop rbx | |
1453 pop rbp | |
1454 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
1455 mov rsi,QWORD[16+rsp] | |
1456 DB 0F3h,0C3h ;repret | |
1457 $L$SEH_end_ecp_nistz256_point_add: | |
1458 global ecp_nistz256_point_add_affine | |
1459 | |
1460 ALIGN 32 | |
1461 ecp_nistz256_point_add_affine: | |
1462 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
1463 mov QWORD[16+rsp],rsi | |
1464 mov rax,rsp | |
1465 $L$SEH_begin_ecp_nistz256_point_add_affine: | |
1466 mov rdi,rcx | |
1467 mov rsi,rdx | |
1468 mov rdx,r8 | |
1469 | |
1470 | |
1471 push rbp | |
1472 push rbx | |
1473 push r12 | |
1474 push r13 | |
1475 push r14 | |
1476 push r15 | |
1477 sub rsp,32*15+8 | |
1478 | |
1479 movdqu xmm0,XMMWORD[rsi] | |
1480 mov rbx,rdx | |
1481 movdqu xmm1,XMMWORD[16+rsi] | |
1482 movdqu xmm2,XMMWORD[32+rsi] | |
1483 movdqu xmm3,XMMWORD[48+rsi] | |
1484 movdqu xmm4,XMMWORD[64+rsi] | |
1485 movdqu xmm5,XMMWORD[80+rsi] | |
1486 mov rax,QWORD[((64+0))+rsi] | |
1487 mov r14,QWORD[((64+8))+rsi] | |
1488 mov r15,QWORD[((64+16))+rsi] | |
1489 mov r8,QWORD[((64+24))+rsi] | |
1490 movdqa XMMWORD[320+rsp],xmm0 | |
1491 movdqa XMMWORD[(320+16)+rsp],xmm1 | |
1492 movdqa XMMWORD[352+rsp],xmm2 | |
1493 movdqa XMMWORD[(352+16)+rsp],xmm3 | |
1494 movdqa XMMWORD[384+rsp],xmm4 | |
1495 movdqa XMMWORD[(384+16)+rsp],xmm5 | |
1496 por xmm5,xmm4 | |
1497 | |
1498 movdqu xmm0,XMMWORD[rbx] | |
1499 pshufd xmm3,xmm5,0xb1 | |
1500 movdqu xmm1,XMMWORD[16+rbx] | |
1501 movdqu xmm2,XMMWORD[32+rbx] | |
1502 por xmm5,xmm3 | |
1503 movdqu xmm3,XMMWORD[48+rbx] | |
1504 movdqa XMMWORD[416+rsp],xmm0 | |
1505 pshufd xmm4,xmm5,0x1e | |
1506 movdqa XMMWORD[(416+16)+rsp],xmm1 | |
1507 por xmm1,xmm0 | |
1508 DB 102,72,15,110,199 | |
1509 movdqa XMMWORD[448+rsp],xmm2 | |
1510 movdqa XMMWORD[(448+16)+rsp],xmm3 | |
1511 por xmm3,xmm2 | |
1512 por xmm5,xmm4 | |
1513 pxor xmm4,xmm4 | |
1514 por xmm3,xmm1 | |
1515 | |
1516 lea rsi,[((64-0))+rsi] | |
1517 lea rdi,[32+rsp] | |
1518 call __ecp_nistz256_sqr_montq | |
1519 | |
1520 pcmpeqd xmm5,xmm4 | |
1521 pshufd xmm4,xmm3,0xb1 | |
1522 mov rax,QWORD[rbx] | |
1523 | |
1524 mov r9,r12 | |
1525 por xmm4,xmm3 | |
1526 pshufd xmm5,xmm5,0 | |
1527 pshufd xmm3,xmm4,0x1e | |
1528 mov r10,r13 | |
1529 por xmm4,xmm3 | |
1530 pxor xmm3,xmm3 | |
1531 mov r11,r14 | |
1532 pcmpeqd xmm4,xmm3 | |
1533 pshufd xmm4,xmm4,0 | |
1534 | |
1535 lea rsi,[((32-0))+rsp] | |
1536 mov r12,r15 | |
1537 lea rdi,[rsp] | |
1538 call __ecp_nistz256_mul_montq | |
1539 | |
1540 lea rbx,[320+rsp] | |
1541 lea rdi,[64+rsp] | |
1542 call __ecp_nistz256_sub_fromq | |
1543 | |
1544 mov rax,QWORD[384+rsp] | |
1545 lea rbx,[384+rsp] | |
1546 mov r9,QWORD[((0+32))+rsp] | |
1547 mov r10,QWORD[((8+32))+rsp] | |
1548 lea rsi,[((0+32))+rsp] | |
1549 mov r11,QWORD[((16+32))+rsp] | |
1550 mov r12,QWORD[((24+32))+rsp] | |
1551 lea rdi,[32+rsp] | |
1552 call __ecp_nistz256_mul_montq | |
1553 | |
1554 mov rax,QWORD[384+rsp] | |
1555 lea rbx,[384+rsp] | |
1556 mov r9,QWORD[((0+64))+rsp] | |
1557 mov r10,QWORD[((8+64))+rsp] | |
1558 lea rsi,[((0+64))+rsp] | |
1559 mov r11,QWORD[((16+64))+rsp] | |
1560 mov r12,QWORD[((24+64))+rsp] | |
1561 lea rdi,[288+rsp] | |
1562 call __ecp_nistz256_mul_montq | |
1563 | |
1564 mov rax,QWORD[448+rsp] | |
1565 lea rbx,[448+rsp] | |
1566 mov r9,QWORD[((0+32))+rsp] | |
1567 mov r10,QWORD[((8+32))+rsp] | |
1568 lea rsi,[((0+32))+rsp] | |
1569 mov r11,QWORD[((16+32))+rsp] | |
1570 mov r12,QWORD[((24+32))+rsp] | |
1571 lea rdi,[32+rsp] | |
1572 call __ecp_nistz256_mul_montq | |
1573 | |
1574 lea rbx,[352+rsp] | |
1575 lea rdi,[96+rsp] | |
1576 call __ecp_nistz256_sub_fromq | |
1577 | |
1578 mov rax,QWORD[((0+64))+rsp] | |
1579 mov r14,QWORD[((8+64))+rsp] | |
1580 lea rsi,[((0+64))+rsp] | |
1581 mov r15,QWORD[((16+64))+rsp] | |
1582 mov r8,QWORD[((24+64))+rsp] | |
1583 lea rdi,[128+rsp] | |
1584 call __ecp_nistz256_sqr_montq | |
1585 | |
1586 mov rax,QWORD[((0+96))+rsp] | |
1587 mov r14,QWORD[((8+96))+rsp] | |
1588 lea rsi,[((0+96))+rsp] | |
1589 mov r15,QWORD[((16+96))+rsp] | |
1590 mov r8,QWORD[((24+96))+rsp] | |
1591 lea rdi,[192+rsp] | |
1592 call __ecp_nistz256_sqr_montq | |
1593 | |
1594 mov rax,QWORD[128+rsp] | |
1595 lea rbx,[128+rsp] | |
1596 mov r9,QWORD[((0+64))+rsp] | |
1597 mov r10,QWORD[((8+64))+rsp] | |
1598 lea rsi,[((0+64))+rsp] | |
1599 mov r11,QWORD[((16+64))+rsp] | |
1600 mov r12,QWORD[((24+64))+rsp] | |
1601 lea rdi,[160+rsp] | |
1602 call __ecp_nistz256_mul_montq | |
1603 | |
1604 mov rax,QWORD[320+rsp] | |
1605 lea rbx,[320+rsp] | |
1606 mov r9,QWORD[((0+128))+rsp] | |
1607 mov r10,QWORD[((8+128))+rsp] | |
1608 lea rsi,[((0+128))+rsp] | |
1609 mov r11,QWORD[((16+128))+rsp] | |
1610 mov r12,QWORD[((24+128))+rsp] | |
1611 lea rdi,[rsp] | |
1612 call __ecp_nistz256_mul_montq | |
1613 | |
1614 | |
1615 | |
1616 | |
1617 xor r11,r11 | |
1618 add r12,r12 | |
1619 lea rsi,[192+rsp] | |
1620 adc r13,r13 | |
1621 mov rax,r12 | |
1622 adc r8,r8 | |
1623 adc r9,r9 | |
1624 mov rbp,r13 | |
1625 adc r11,0 | |
1626 | |
1627 sub r12,-1 | |
1628 mov rcx,r8 | |
1629 sbb r13,r14 | |
1630 sbb r8,0 | |
1631 mov r10,r9 | |
1632 sbb r9,r15 | |
1633 sbb r11,0 | |
1634 | |
1635 cmovc r12,rax | |
1636 mov rax,QWORD[rsi] | |
1637 cmovc r13,rbp | |
1638 mov rbp,QWORD[8+rsi] | |
1639 cmovc r8,rcx | |
1640 mov rcx,QWORD[16+rsi] | |
1641 cmovc r9,r10 | |
1642 mov r10,QWORD[24+rsi] | |
1643 | |
1644 call __ecp_nistz256_subq | |
1645 | |
1646 lea rbx,[160+rsp] | |
1647 lea rdi,[224+rsp] | |
1648 call __ecp_nistz256_sub_fromq | |
1649 | |
1650 mov rax,QWORD[((0+0))+rsp] | |
1651 mov rbp,QWORD[((0+8))+rsp] | |
1652 mov rcx,QWORD[((0+16))+rsp] | |
1653 mov r10,QWORD[((0+24))+rsp] | |
1654 lea rdi,[64+rsp] | |
1655 | |
1656 call __ecp_nistz256_subq | |
1657 | |
1658 mov QWORD[rdi],r12 | |
1659 mov QWORD[8+rdi],r13 | |
1660 mov QWORD[16+rdi],r8 | |
1661 mov QWORD[24+rdi],r9 | |
1662 mov rax,QWORD[352+rsp] | |
1663 lea rbx,[352+rsp] | |
1664 mov r9,QWORD[((0+160))+rsp] | |
1665 mov r10,QWORD[((8+160))+rsp] | |
1666 lea rsi,[((0+160))+rsp] | |
1667 mov r11,QWORD[((16+160))+rsp] | |
1668 mov r12,QWORD[((24+160))+rsp] | |
1669 lea rdi,[32+rsp] | |
1670 call __ecp_nistz256_mul_montq | |
1671 | |
1672 mov rax,QWORD[96+rsp] | |
1673 lea rbx,[96+rsp] | |
1674 mov r9,QWORD[((0+64))+rsp] | |
1675 mov r10,QWORD[((8+64))+rsp] | |
1676 lea rsi,[((0+64))+rsp] | |
1677 mov r11,QWORD[((16+64))+rsp] | |
1678 mov r12,QWORD[((24+64))+rsp] | |
1679 lea rdi,[64+rsp] | |
1680 call __ecp_nistz256_mul_montq | |
1681 | |
1682 lea rbx,[32+rsp] | |
1683 lea rdi,[256+rsp] | |
1684 call __ecp_nistz256_sub_fromq | |
1685 | |
1686 DB 102,72,15,126,199 | |
1687 | |
1688 movdqa xmm0,xmm5 | |
1689 movdqa xmm1,xmm5 | |
1690 pandn xmm0,XMMWORD[288+rsp] | |
1691 movdqa xmm2,xmm5 | |
1692 pandn xmm1,XMMWORD[((288+16))+rsp] | |
1693 movdqa xmm3,xmm5 | |
1694 pand xmm2,XMMWORD[$L$ONE_mont] | |
1695 pand xmm3,XMMWORD[(($L$ONE_mont+16))] | |
1696 por xmm2,xmm0 | |
1697 por xmm3,xmm1 | |
1698 | |
1699 movdqa xmm0,xmm4 | |
1700 movdqa xmm1,xmm4 | |
1701 pandn xmm0,xmm2 | |
1702 movdqa xmm2,xmm4 | |
1703 pandn xmm1,xmm3 | |
1704 movdqa xmm3,xmm4 | |
1705 pand xmm2,XMMWORD[384+rsp] | |
1706 pand xmm3,XMMWORD[((384+16))+rsp] | |
1707 por xmm2,xmm0 | |
1708 por xmm3,xmm1 | |
1709 movdqu XMMWORD[64+rdi],xmm2 | |
1710 movdqu XMMWORD[80+rdi],xmm3 | |
1711 | |
1712 movdqa xmm0,xmm5 | |
1713 movdqa xmm1,xmm5 | |
1714 pandn xmm0,XMMWORD[224+rsp] | |
1715 movdqa xmm2,xmm5 | |
1716 pandn xmm1,XMMWORD[((224+16))+rsp] | |
1717 movdqa xmm3,xmm5 | |
1718 pand xmm2,XMMWORD[416+rsp] | |
1719 pand xmm3,XMMWORD[((416+16))+rsp] | |
1720 por xmm2,xmm0 | |
1721 por xmm3,xmm1 | |
1722 | |
1723 movdqa xmm0,xmm4 | |
1724 movdqa xmm1,xmm4 | |
1725 pandn xmm0,xmm2 | |
1726 movdqa xmm2,xmm4 | |
1727 pandn xmm1,xmm3 | |
1728 movdqa xmm3,xmm4 | |
1729 pand xmm2,XMMWORD[320+rsp] | |
1730 pand xmm3,XMMWORD[((320+16))+rsp] | |
1731 por xmm2,xmm0 | |
1732 por xmm3,xmm1 | |
1733 movdqu XMMWORD[rdi],xmm2 | |
1734 movdqu XMMWORD[16+rdi],xmm3 | |
1735 | |
1736 movdqa xmm0,xmm5 | |
1737 movdqa xmm1,xmm5 | |
1738 pandn xmm0,XMMWORD[256+rsp] | |
1739 movdqa xmm2,xmm5 | |
1740 pandn xmm1,XMMWORD[((256+16))+rsp] | |
1741 movdqa xmm3,xmm5 | |
1742 pand xmm2,XMMWORD[448+rsp] | |
1743 pand xmm3,XMMWORD[((448+16))+rsp] | |
1744 por xmm2,xmm0 | |
1745 por xmm3,xmm1 | |
1746 | |
1747 movdqa xmm0,xmm4 | |
1748 movdqa xmm1,xmm4 | |
1749 pandn xmm0,xmm2 | |
1750 movdqa xmm2,xmm4 | |
1751 pandn xmm1,xmm3 | |
1752 movdqa xmm3,xmm4 | |
1753 pand xmm2,XMMWORD[352+rsp] | |
1754 pand xmm3,XMMWORD[((352+16))+rsp] | |
1755 por xmm2,xmm0 | |
1756 por xmm3,xmm1 | |
1757 movdqu XMMWORD[32+rdi],xmm2 | |
1758 movdqu XMMWORD[48+rdi],xmm3 | |
1759 | |
1760 add rsp,32*15+8 | |
1761 pop r15 | |
1762 pop r14 | |
1763 pop r13 | |
1764 pop r12 | |
1765 pop rbx | |
1766 pop rbp | |
1767 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
1768 mov rsi,QWORD[16+rsp] | |
1769 DB 0F3h,0C3h ;repret | |
1770 $L$SEH_end_ecp_nistz256_point_add_affine: | |
OLD | NEW |