OLD | NEW |
| (Empty) |
1 #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) | |
2 .text | |
3 | |
4 | |
5 | |
6 .p2align 6 | |
7 L$poly: | |
8 .quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00
000001 | |
9 | |
10 L$One: | |
11 .long 1,1,1,1,1,1,1,1 | |
12 L$Two: | |
13 .long 2,2,2,2,2,2,2,2 | |
14 L$Three: | |
15 .long 3,3,3,3,3,3,3,3 | |
16 L$ONE_mont: | |
17 .quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000ff
fffffe | |
18 | |
19 | |
20 | |
21 .globl _ecp_nistz256_neg | |
22 .private_extern _ecp_nistz256_neg | |
23 | |
24 .p2align 5 | |
25 _ecp_nistz256_neg: | |
26 pushq %r12 | |
27 pushq %r13 | |
28 | |
29 xorq %r8,%r8 | |
30 xorq %r9,%r9 | |
31 xorq %r10,%r10 | |
32 xorq %r11,%r11 | |
33 xorq %r13,%r13 | |
34 | |
35 subq 0(%rsi),%r8 | |
36 sbbq 8(%rsi),%r9 | |
37 sbbq 16(%rsi),%r10 | |
38 movq %r8,%rax | |
39 sbbq 24(%rsi),%r11 | |
40 leaq L$poly(%rip),%rsi | |
41 movq %r9,%rdx | |
42 sbbq $0,%r13 | |
43 | |
44 addq 0(%rsi),%r8 | |
45 movq %r10,%rcx | |
46 adcq 8(%rsi),%r9 | |
47 adcq 16(%rsi),%r10 | |
48 movq %r11,%r12 | |
49 adcq 24(%rsi),%r11 | |
50 testq %r13,%r13 | |
51 | |
52 cmovzq %rax,%r8 | |
53 cmovzq %rdx,%r9 | |
54 movq %r8,0(%rdi) | |
55 cmovzq %rcx,%r10 | |
56 movq %r9,8(%rdi) | |
57 cmovzq %r12,%r11 | |
58 movq %r10,16(%rdi) | |
59 movq %r11,24(%rdi) | |
60 | |
61 popq %r13 | |
62 popq %r12 | |
63 .byte 0xf3,0xc3 | |
64 | |
65 | |
66 | |
67 | |
68 | |
69 | |
70 | |
71 .globl _ecp_nistz256_mul_mont | |
72 .private_extern _ecp_nistz256_mul_mont | |
73 | |
74 .p2align 5 | |
75 _ecp_nistz256_mul_mont: | |
76 L$mul_mont: | |
77 pushq %rbp | |
78 pushq %rbx | |
79 pushq %r12 | |
80 pushq %r13 | |
81 pushq %r14 | |
82 pushq %r15 | |
83 movq %rdx,%rbx | |
84 movq 0(%rdx),%rax | |
85 movq 0(%rsi),%r9 | |
86 movq 8(%rsi),%r10 | |
87 movq 16(%rsi),%r11 | |
88 movq 24(%rsi),%r12 | |
89 | |
90 call __ecp_nistz256_mul_montq | |
91 L$mul_mont_done: | |
92 popq %r15 | |
93 popq %r14 | |
94 popq %r13 | |
95 popq %r12 | |
96 popq %rbx | |
97 popq %rbp | |
98 .byte 0xf3,0xc3 | |
99 | |
100 | |
101 | |
102 .p2align 5 | |
103 __ecp_nistz256_mul_montq: | |
104 | |
105 | |
106 movq %rax,%rbp | |
107 mulq %r9 | |
108 movq L$poly+8(%rip),%r14 | |
109 movq %rax,%r8 | |
110 movq %rbp,%rax | |
111 movq %rdx,%r9 | |
112 | |
113 mulq %r10 | |
114 movq L$poly+24(%rip),%r15 | |
115 addq %rax,%r9 | |
116 movq %rbp,%rax | |
117 adcq $0,%rdx | |
118 movq %rdx,%r10 | |
119 | |
120 mulq %r11 | |
121 addq %rax,%r10 | |
122 movq %rbp,%rax | |
123 adcq $0,%rdx | |
124 movq %rdx,%r11 | |
125 | |
126 mulq %r12 | |
127 addq %rax,%r11 | |
128 movq %r8,%rax | |
129 adcq $0,%rdx | |
130 xorq %r13,%r13 | |
131 movq %rdx,%r12 | |
132 | |
133 | |
134 | |
135 | |
136 | |
137 | |
138 | |
139 | |
140 | |
141 | |
142 movq %r8,%rbp | |
143 shlq $32,%r8 | |
144 mulq %r15 | |
145 shrq $32,%rbp | |
146 addq %r8,%r9 | |
147 adcq %rbp,%r10 | |
148 adcq %rax,%r11 | |
149 movq 8(%rbx),%rax | |
150 adcq %rdx,%r12 | |
151 adcq $0,%r13 | |
152 xorq %r8,%r8 | |
153 | |
154 | |
155 | |
156 movq %rax,%rbp | |
157 mulq 0(%rsi) | |
158 addq %rax,%r9 | |
159 movq %rbp,%rax | |
160 adcq $0,%rdx | |
161 movq %rdx,%rcx | |
162 | |
163 mulq 8(%rsi) | |
164 addq %rcx,%r10 | |
165 adcq $0,%rdx | |
166 addq %rax,%r10 | |
167 movq %rbp,%rax | |
168 adcq $0,%rdx | |
169 movq %rdx,%rcx | |
170 | |
171 mulq 16(%rsi) | |
172 addq %rcx,%r11 | |
173 adcq $0,%rdx | |
174 addq %rax,%r11 | |
175 movq %rbp,%rax | |
176 adcq $0,%rdx | |
177 movq %rdx,%rcx | |
178 | |
179 mulq 24(%rsi) | |
180 addq %rcx,%r12 | |
181 adcq $0,%rdx | |
182 addq %rax,%r12 | |
183 movq %r9,%rax | |
184 adcq %rdx,%r13 | |
185 adcq $0,%r8 | |
186 | |
187 | |
188 | |
189 movq %r9,%rbp | |
190 shlq $32,%r9 | |
191 mulq %r15 | |
192 shrq $32,%rbp | |
193 addq %r9,%r10 | |
194 adcq %rbp,%r11 | |
195 adcq %rax,%r12 | |
196 movq 16(%rbx),%rax | |
197 adcq %rdx,%r13 | |
198 adcq $0,%r8 | |
199 xorq %r9,%r9 | |
200 | |
201 | |
202 | |
203 movq %rax,%rbp | |
204 mulq 0(%rsi) | |
205 addq %rax,%r10 | |
206 movq %rbp,%rax | |
207 adcq $0,%rdx | |
208 movq %rdx,%rcx | |
209 | |
210 mulq 8(%rsi) | |
211 addq %rcx,%r11 | |
212 adcq $0,%rdx | |
213 addq %rax,%r11 | |
214 movq %rbp,%rax | |
215 adcq $0,%rdx | |
216 movq %rdx,%rcx | |
217 | |
218 mulq 16(%rsi) | |
219 addq %rcx,%r12 | |
220 adcq $0,%rdx | |
221 addq %rax,%r12 | |
222 movq %rbp,%rax | |
223 adcq $0,%rdx | |
224 movq %rdx,%rcx | |
225 | |
226 mulq 24(%rsi) | |
227 addq %rcx,%r13 | |
228 adcq $0,%rdx | |
229 addq %rax,%r13 | |
230 movq %r10,%rax | |
231 adcq %rdx,%r8 | |
232 adcq $0,%r9 | |
233 | |
234 | |
235 | |
236 movq %r10,%rbp | |
237 shlq $32,%r10 | |
238 mulq %r15 | |
239 shrq $32,%rbp | |
240 addq %r10,%r11 | |
241 adcq %rbp,%r12 | |
242 adcq %rax,%r13 | |
243 movq 24(%rbx),%rax | |
244 adcq %rdx,%r8 | |
245 adcq $0,%r9 | |
246 xorq %r10,%r10 | |
247 | |
248 | |
249 | |
250 movq %rax,%rbp | |
251 mulq 0(%rsi) | |
252 addq %rax,%r11 | |
253 movq %rbp,%rax | |
254 adcq $0,%rdx | |
255 movq %rdx,%rcx | |
256 | |
257 mulq 8(%rsi) | |
258 addq %rcx,%r12 | |
259 adcq $0,%rdx | |
260 addq %rax,%r12 | |
261 movq %rbp,%rax | |
262 adcq $0,%rdx | |
263 movq %rdx,%rcx | |
264 | |
265 mulq 16(%rsi) | |
266 addq %rcx,%r13 | |
267 adcq $0,%rdx | |
268 addq %rax,%r13 | |
269 movq %rbp,%rax | |
270 adcq $0,%rdx | |
271 movq %rdx,%rcx | |
272 | |
273 mulq 24(%rsi) | |
274 addq %rcx,%r8 | |
275 adcq $0,%rdx | |
276 addq %rax,%r8 | |
277 movq %r11,%rax | |
278 adcq %rdx,%r9 | |
279 adcq $0,%r10 | |
280 | |
281 | |
282 | |
283 movq %r11,%rbp | |
284 shlq $32,%r11 | |
285 mulq %r15 | |
286 shrq $32,%rbp | |
287 addq %r11,%r12 | |
288 adcq %rbp,%r13 | |
289 movq %r12,%rcx | |
290 adcq %rax,%r8 | |
291 adcq %rdx,%r9 | |
292 movq %r13,%rbp | |
293 adcq $0,%r10 | |
294 | |
295 | |
296 | |
297 subq $-1,%r12 | |
298 movq %r8,%rbx | |
299 sbbq %r14,%r13 | |
300 sbbq $0,%r8 | |
301 movq %r9,%rdx | |
302 sbbq %r15,%r9 | |
303 sbbq $0,%r10 | |
304 | |
305 cmovcq %rcx,%r12 | |
306 cmovcq %rbp,%r13 | |
307 movq %r12,0(%rdi) | |
308 cmovcq %rbx,%r8 | |
309 movq %r13,8(%rdi) | |
310 cmovcq %rdx,%r9 | |
311 movq %r8,16(%rdi) | |
312 movq %r9,24(%rdi) | |
313 | |
314 .byte 0xf3,0xc3 | |
315 | |
316 | |
317 | |
318 | |
319 | |
320 | |
321 | |
322 | |
323 | |
324 .globl _ecp_nistz256_sqr_mont | |
325 .private_extern _ecp_nistz256_sqr_mont | |
326 | |
327 .p2align 5 | |
328 _ecp_nistz256_sqr_mont: | |
329 pushq %rbp | |
330 pushq %rbx | |
331 pushq %r12 | |
332 pushq %r13 | |
333 pushq %r14 | |
334 pushq %r15 | |
335 movq 0(%rsi),%rax | |
336 movq 8(%rsi),%r14 | |
337 movq 16(%rsi),%r15 | |
338 movq 24(%rsi),%r8 | |
339 | |
340 call __ecp_nistz256_sqr_montq | |
341 L$sqr_mont_done: | |
342 popq %r15 | |
343 popq %r14 | |
344 popq %r13 | |
345 popq %r12 | |
346 popq %rbx | |
347 popq %rbp | |
348 .byte 0xf3,0xc3 | |
349 | |
350 | |
351 | |
352 .p2align 5 | |
353 __ecp_nistz256_sqr_montq: | |
354 movq %rax,%r13 | |
355 mulq %r14 | |
356 movq %rax,%r9 | |
357 movq %r15,%rax | |
358 movq %rdx,%r10 | |
359 | |
360 mulq %r13 | |
361 addq %rax,%r10 | |
362 movq %r8,%rax | |
363 adcq $0,%rdx | |
364 movq %rdx,%r11 | |
365 | |
366 mulq %r13 | |
367 addq %rax,%r11 | |
368 movq %r15,%rax | |
369 adcq $0,%rdx | |
370 movq %rdx,%r12 | |
371 | |
372 | |
373 mulq %r14 | |
374 addq %rax,%r11 | |
375 movq %r8,%rax | |
376 adcq $0,%rdx | |
377 movq %rdx,%rbp | |
378 | |
379 mulq %r14 | |
380 addq %rax,%r12 | |
381 movq %r8,%rax | |
382 adcq $0,%rdx | |
383 addq %rbp,%r12 | |
384 movq %rdx,%r13 | |
385 adcq $0,%r13 | |
386 | |
387 | |
388 mulq %r15 | |
389 xorq %r15,%r15 | |
390 addq %rax,%r13 | |
391 movq 0(%rsi),%rax | |
392 movq %rdx,%r14 | |
393 adcq $0,%r14 | |
394 | |
395 addq %r9,%r9 | |
396 adcq %r10,%r10 | |
397 adcq %r11,%r11 | |
398 adcq %r12,%r12 | |
399 adcq %r13,%r13 | |
400 adcq %r14,%r14 | |
401 adcq $0,%r15 | |
402 | |
403 mulq %rax | |
404 movq %rax,%r8 | |
405 movq 8(%rsi),%rax | |
406 movq %rdx,%rcx | |
407 | |
408 mulq %rax | |
409 addq %rcx,%r9 | |
410 adcq %rax,%r10 | |
411 movq 16(%rsi),%rax | |
412 adcq $0,%rdx | |
413 movq %rdx,%rcx | |
414 | |
415 mulq %rax | |
416 addq %rcx,%r11 | |
417 adcq %rax,%r12 | |
418 movq 24(%rsi),%rax | |
419 adcq $0,%rdx | |
420 movq %rdx,%rcx | |
421 | |
422 mulq %rax | |
423 addq %rcx,%r13 | |
424 adcq %rax,%r14 | |
425 movq %r8,%rax | |
426 adcq %rdx,%r15 | |
427 | |
428 movq L$poly+8(%rip),%rsi | |
429 movq L$poly+24(%rip),%rbp | |
430 | |
431 | |
432 | |
433 | |
434 movq %r8,%rcx | |
435 shlq $32,%r8 | |
436 mulq %rbp | |
437 shrq $32,%rcx | |
438 addq %r8,%r9 | |
439 adcq %rcx,%r10 | |
440 adcq %rax,%r11 | |
441 movq %r9,%rax | |
442 adcq $0,%rdx | |
443 | |
444 | |
445 | |
446 movq %r9,%rcx | |
447 shlq $32,%r9 | |
448 movq %rdx,%r8 | |
449 mulq %rbp | |
450 shrq $32,%rcx | |
451 addq %r9,%r10 | |
452 adcq %rcx,%r11 | |
453 adcq %rax,%r8 | |
454 movq %r10,%rax | |
455 adcq $0,%rdx | |
456 | |
457 | |
458 | |
459 movq %r10,%rcx | |
460 shlq $32,%r10 | |
461 movq %rdx,%r9 | |
462 mulq %rbp | |
463 shrq $32,%rcx | |
464 addq %r10,%r11 | |
465 adcq %rcx,%r8 | |
466 adcq %rax,%r9 | |
467 movq %r11,%rax | |
468 adcq $0,%rdx | |
469 | |
470 | |
471 | |
472 movq %r11,%rcx | |
473 shlq $32,%r11 | |
474 movq %rdx,%r10 | |
475 mulq %rbp | |
476 shrq $32,%rcx | |
477 addq %r11,%r8 | |
478 adcq %rcx,%r9 | |
479 adcq %rax,%r10 | |
480 adcq $0,%rdx | |
481 xorq %r11,%r11 | |
482 | |
483 | |
484 | |
485 addq %r8,%r12 | |
486 adcq %r9,%r13 | |
487 movq %r12,%r8 | |
488 adcq %r10,%r14 | |
489 adcq %rdx,%r15 | |
490 movq %r13,%r9 | |
491 adcq $0,%r11 | |
492 | |
493 subq $-1,%r12 | |
494 movq %r14,%r10 | |
495 sbbq %rsi,%r13 | |
496 sbbq $0,%r14 | |
497 movq %r15,%rcx | |
498 sbbq %rbp,%r15 | |
499 sbbq $0,%r11 | |
500 | |
501 cmovcq %r8,%r12 | |
502 cmovcq %r9,%r13 | |
503 movq %r12,0(%rdi) | |
504 cmovcq %r10,%r14 | |
505 movq %r13,8(%rdi) | |
506 cmovcq %rcx,%r15 | |
507 movq %r14,16(%rdi) | |
508 movq %r15,24(%rdi) | |
509 | |
510 .byte 0xf3,0xc3 | |
511 | |
512 | |
513 | |
514 .globl _ecp_nistz256_select_w5 | |
515 .private_extern _ecp_nistz256_select_w5 | |
516 | |
517 .p2align 5 | |
518 _ecp_nistz256_select_w5: | |
519 movdqa L$One(%rip),%xmm0 | |
520 movd %edx,%xmm1 | |
521 | |
522 pxor %xmm2,%xmm2 | |
523 pxor %xmm3,%xmm3 | |
524 pxor %xmm4,%xmm4 | |
525 pxor %xmm5,%xmm5 | |
526 pxor %xmm6,%xmm6 | |
527 pxor %xmm7,%xmm7 | |
528 | |
529 movdqa %xmm0,%xmm8 | |
530 pshufd $0,%xmm1,%xmm1 | |
531 | |
532 movq $16,%rax | |
533 L$select_loop_sse_w5: | |
534 | |
535 movdqa %xmm8,%xmm15 | |
536 paddd %xmm0,%xmm8 | |
537 pcmpeqd %xmm1,%xmm15 | |
538 | |
539 movdqa 0(%rsi),%xmm9 | |
540 movdqa 16(%rsi),%xmm10 | |
541 movdqa 32(%rsi),%xmm11 | |
542 movdqa 48(%rsi),%xmm12 | |
543 movdqa 64(%rsi),%xmm13 | |
544 movdqa 80(%rsi),%xmm14 | |
545 leaq 96(%rsi),%rsi | |
546 | |
547 pand %xmm15,%xmm9 | |
548 pand %xmm15,%xmm10 | |
549 por %xmm9,%xmm2 | |
550 pand %xmm15,%xmm11 | |
551 por %xmm10,%xmm3 | |
552 pand %xmm15,%xmm12 | |
553 por %xmm11,%xmm4 | |
554 pand %xmm15,%xmm13 | |
555 por %xmm12,%xmm5 | |
556 pand %xmm15,%xmm14 | |
557 por %xmm13,%xmm6 | |
558 por %xmm14,%xmm7 | |
559 | |
560 decq %rax | |
561 jnz L$select_loop_sse_w5 | |
562 | |
563 movdqu %xmm2,0(%rdi) | |
564 movdqu %xmm3,16(%rdi) | |
565 movdqu %xmm4,32(%rdi) | |
566 movdqu %xmm5,48(%rdi) | |
567 movdqu %xmm6,64(%rdi) | |
568 movdqu %xmm7,80(%rdi) | |
569 .byte 0xf3,0xc3 | |
570 | |
571 | |
572 | |
573 | |
574 .globl _ecp_nistz256_select_w7 | |
575 .private_extern _ecp_nistz256_select_w7 | |
576 | |
577 .p2align 5 | |
578 _ecp_nistz256_select_w7: | |
579 movdqa L$One(%rip),%xmm8 | |
580 movd %edx,%xmm1 | |
581 | |
582 pxor %xmm2,%xmm2 | |
583 pxor %xmm3,%xmm3 | |
584 pxor %xmm4,%xmm4 | |
585 pxor %xmm5,%xmm5 | |
586 | |
587 movdqa %xmm8,%xmm0 | |
588 pshufd $0,%xmm1,%xmm1 | |
589 movq $64,%rax | |
590 | |
591 L$select_loop_sse_w7: | |
592 movdqa %xmm8,%xmm15 | |
593 paddd %xmm0,%xmm8 | |
594 movdqa 0(%rsi),%xmm9 | |
595 movdqa 16(%rsi),%xmm10 | |
596 pcmpeqd %xmm1,%xmm15 | |
597 movdqa 32(%rsi),%xmm11 | |
598 movdqa 48(%rsi),%xmm12 | |
599 leaq 64(%rsi),%rsi | |
600 | |
601 pand %xmm15,%xmm9 | |
602 pand %xmm15,%xmm10 | |
603 por %xmm9,%xmm2 | |
604 pand %xmm15,%xmm11 | |
605 por %xmm10,%xmm3 | |
606 pand %xmm15,%xmm12 | |
607 por %xmm11,%xmm4 | |
608 prefetcht0 255(%rsi) | |
609 por %xmm12,%xmm5 | |
610 | |
611 decq %rax | |
612 jnz L$select_loop_sse_w7 | |
613 | |
614 movdqu %xmm2,0(%rdi) | |
615 movdqu %xmm3,16(%rdi) | |
616 movdqu %xmm4,32(%rdi) | |
617 movdqu %xmm5,48(%rdi) | |
618 .byte 0xf3,0xc3 | |
619 | |
620 .globl _ecp_nistz256_avx2_select_w7 | |
621 .private_extern _ecp_nistz256_avx2_select_w7 | |
622 | |
623 .p2align 5 | |
624 _ecp_nistz256_avx2_select_w7: | |
625 .byte 0x0f,0x0b | |
626 .byte 0xf3,0xc3 | |
627 | |
628 | |
629 .p2align 5 | |
630 __ecp_nistz256_add_toq: | |
631 xorq %r11,%r11 | |
632 addq 0(%rbx),%r12 | |
633 adcq 8(%rbx),%r13 | |
634 movq %r12,%rax | |
635 adcq 16(%rbx),%r8 | |
636 adcq 24(%rbx),%r9 | |
637 movq %r13,%rbp | |
638 adcq $0,%r11 | |
639 | |
640 subq $-1,%r12 | |
641 movq %r8,%rcx | |
642 sbbq %r14,%r13 | |
643 sbbq $0,%r8 | |
644 movq %r9,%r10 | |
645 sbbq %r15,%r9 | |
646 sbbq $0,%r11 | |
647 | |
648 cmovcq %rax,%r12 | |
649 cmovcq %rbp,%r13 | |
650 movq %r12,0(%rdi) | |
651 cmovcq %rcx,%r8 | |
652 movq %r13,8(%rdi) | |
653 cmovcq %r10,%r9 | |
654 movq %r8,16(%rdi) | |
655 movq %r9,24(%rdi) | |
656 | |
657 .byte 0xf3,0xc3 | |
658 | |
659 | |
660 | |
661 .p2align 5 | |
662 __ecp_nistz256_sub_fromq: | |
663 subq 0(%rbx),%r12 | |
664 sbbq 8(%rbx),%r13 | |
665 movq %r12,%rax | |
666 sbbq 16(%rbx),%r8 | |
667 sbbq 24(%rbx),%r9 | |
668 movq %r13,%rbp | |
669 sbbq %r11,%r11 | |
670 | |
671 addq $-1,%r12 | |
672 movq %r8,%rcx | |
673 adcq %r14,%r13 | |
674 adcq $0,%r8 | |
675 movq %r9,%r10 | |
676 adcq %r15,%r9 | |
677 testq %r11,%r11 | |
678 | |
679 cmovzq %rax,%r12 | |
680 cmovzq %rbp,%r13 | |
681 movq %r12,0(%rdi) | |
682 cmovzq %rcx,%r8 | |
683 movq %r13,8(%rdi) | |
684 cmovzq %r10,%r9 | |
685 movq %r8,16(%rdi) | |
686 movq %r9,24(%rdi) | |
687 | |
688 .byte 0xf3,0xc3 | |
689 | |
690 | |
691 | |
692 .p2align 5 | |
693 __ecp_nistz256_subq: | |
694 subq %r12,%rax | |
695 sbbq %r13,%rbp | |
696 movq %rax,%r12 | |
697 sbbq %r8,%rcx | |
698 sbbq %r9,%r10 | |
699 movq %rbp,%r13 | |
700 sbbq %r11,%r11 | |
701 | |
702 addq $-1,%rax | |
703 movq %rcx,%r8 | |
704 adcq %r14,%rbp | |
705 adcq $0,%rcx | |
706 movq %r10,%r9 | |
707 adcq %r15,%r10 | |
708 testq %r11,%r11 | |
709 | |
710 cmovnzq %rax,%r12 | |
711 cmovnzq %rbp,%r13 | |
712 cmovnzq %rcx,%r8 | |
713 cmovnzq %r10,%r9 | |
714 | |
715 .byte 0xf3,0xc3 | |
716 | |
717 | |
718 | |
719 .p2align 5 | |
720 __ecp_nistz256_mul_by_2q: | |
721 xorq %r11,%r11 | |
722 addq %r12,%r12 | |
723 adcq %r13,%r13 | |
724 movq %r12,%rax | |
725 adcq %r8,%r8 | |
726 adcq %r9,%r9 | |
727 movq %r13,%rbp | |
728 adcq $0,%r11 | |
729 | |
730 subq $-1,%r12 | |
731 movq %r8,%rcx | |
732 sbbq %r14,%r13 | |
733 sbbq $0,%r8 | |
734 movq %r9,%r10 | |
735 sbbq %r15,%r9 | |
736 sbbq $0,%r11 | |
737 | |
738 cmovcq %rax,%r12 | |
739 cmovcq %rbp,%r13 | |
740 movq %r12,0(%rdi) | |
741 cmovcq %rcx,%r8 | |
742 movq %r13,8(%rdi) | |
743 cmovcq %r10,%r9 | |
744 movq %r8,16(%rdi) | |
745 movq %r9,24(%rdi) | |
746 | |
747 .byte 0xf3,0xc3 | |
748 | |
749 .globl _ecp_nistz256_point_double | |
750 .private_extern _ecp_nistz256_point_double | |
751 | |
752 .p2align 5 | |
753 _ecp_nistz256_point_double: | |
754 pushq %rbp | |
755 pushq %rbx | |
756 pushq %r12 | |
757 pushq %r13 | |
758 pushq %r14 | |
759 pushq %r15 | |
760 subq $160+8,%rsp | |
761 | |
762 L$point_double_shortcutq: | |
763 movdqu 0(%rsi),%xmm0 | |
764 movq %rsi,%rbx | |
765 movdqu 16(%rsi),%xmm1 | |
766 movq 32+0(%rsi),%r12 | |
767 movq 32+8(%rsi),%r13 | |
768 movq 32+16(%rsi),%r8 | |
769 movq 32+24(%rsi),%r9 | |
770 movq L$poly+8(%rip),%r14 | |
771 movq L$poly+24(%rip),%r15 | |
772 movdqa %xmm0,96(%rsp) | |
773 movdqa %xmm1,96+16(%rsp) | |
774 leaq 32(%rdi),%r10 | |
775 leaq 64(%rdi),%r11 | |
776 .byte 102,72,15,110,199 | |
777 .byte 102,73,15,110,202 | |
778 .byte 102,73,15,110,211 | |
779 | |
780 leaq 0(%rsp),%rdi | |
781 call __ecp_nistz256_mul_by_2q | |
782 | |
783 movq 64+0(%rsi),%rax | |
784 movq 64+8(%rsi),%r14 | |
785 movq 64+16(%rsi),%r15 | |
786 movq 64+24(%rsi),%r8 | |
787 leaq 64-0(%rsi),%rsi | |
788 leaq 64(%rsp),%rdi | |
789 call __ecp_nistz256_sqr_montq | |
790 | |
791 movq 0+0(%rsp),%rax | |
792 movq 8+0(%rsp),%r14 | |
793 leaq 0+0(%rsp),%rsi | |
794 movq 16+0(%rsp),%r15 | |
795 movq 24+0(%rsp),%r8 | |
796 leaq 0(%rsp),%rdi | |
797 call __ecp_nistz256_sqr_montq | |
798 | |
799 movq 32(%rbx),%rax | |
800 movq 64+0(%rbx),%r9 | |
801 movq 64+8(%rbx),%r10 | |
802 movq 64+16(%rbx),%r11 | |
803 movq 64+24(%rbx),%r12 | |
804 leaq 64-0(%rbx),%rsi | |
805 leaq 32(%rbx),%rbx | |
806 .byte 102,72,15,126,215 | |
807 call __ecp_nistz256_mul_montq | |
808 call __ecp_nistz256_mul_by_2q | |
809 | |
810 movq 96+0(%rsp),%r12 | |
811 movq 96+8(%rsp),%r13 | |
812 leaq 64(%rsp),%rbx | |
813 movq 96+16(%rsp),%r8 | |
814 movq 96+24(%rsp),%r9 | |
815 leaq 32(%rsp),%rdi | |
816 call __ecp_nistz256_add_toq | |
817 | |
818 movq 96+0(%rsp),%r12 | |
819 movq 96+8(%rsp),%r13 | |
820 leaq 64(%rsp),%rbx | |
821 movq 96+16(%rsp),%r8 | |
822 movq 96+24(%rsp),%r9 | |
823 leaq 64(%rsp),%rdi | |
824 call __ecp_nistz256_sub_fromq | |
825 | |
826 movq 0+0(%rsp),%rax | |
827 movq 8+0(%rsp),%r14 | |
828 leaq 0+0(%rsp),%rsi | |
829 movq 16+0(%rsp),%r15 | |
830 movq 24+0(%rsp),%r8 | |
831 .byte 102,72,15,126,207 | |
832 call __ecp_nistz256_sqr_montq | |
833 xorq %r9,%r9 | |
834 movq %r12,%rax | |
835 addq $-1,%r12 | |
836 movq %r13,%r10 | |
837 adcq %rsi,%r13 | |
838 movq %r14,%rcx | |
839 adcq $0,%r14 | |
840 movq %r15,%r8 | |
841 adcq %rbp,%r15 | |
842 adcq $0,%r9 | |
843 xorq %rsi,%rsi | |
844 testq $1,%rax | |
845 | |
846 cmovzq %rax,%r12 | |
847 cmovzq %r10,%r13 | |
848 cmovzq %rcx,%r14 | |
849 cmovzq %r8,%r15 | |
850 cmovzq %rsi,%r9 | |
851 | |
852 movq %r13,%rax | |
853 shrq $1,%r12 | |
854 shlq $63,%rax | |
855 movq %r14,%r10 | |
856 shrq $1,%r13 | |
857 orq %rax,%r12 | |
858 shlq $63,%r10 | |
859 movq %r15,%rcx | |
860 shrq $1,%r14 | |
861 orq %r10,%r13 | |
862 shlq $63,%rcx | |
863 movq %r12,0(%rdi) | |
864 shrq $1,%r15 | |
865 movq %r13,8(%rdi) | |
866 shlq $63,%r9 | |
867 orq %rcx,%r14 | |
868 orq %r9,%r15 | |
869 movq %r14,16(%rdi) | |
870 movq %r15,24(%rdi) | |
871 movq 64(%rsp),%rax | |
872 leaq 64(%rsp),%rbx | |
873 movq 0+32(%rsp),%r9 | |
874 movq 8+32(%rsp),%r10 | |
875 leaq 0+32(%rsp),%rsi | |
876 movq 16+32(%rsp),%r11 | |
877 movq 24+32(%rsp),%r12 | |
878 leaq 32(%rsp),%rdi | |
879 call __ecp_nistz256_mul_montq | |
880 | |
881 leaq 128(%rsp),%rdi | |
882 call __ecp_nistz256_mul_by_2q | |
883 | |
884 leaq 32(%rsp),%rbx | |
885 leaq 32(%rsp),%rdi | |
886 call __ecp_nistz256_add_toq | |
887 | |
888 movq 96(%rsp),%rax | |
889 leaq 96(%rsp),%rbx | |
890 movq 0+0(%rsp),%r9 | |
891 movq 8+0(%rsp),%r10 | |
892 leaq 0+0(%rsp),%rsi | |
893 movq 16+0(%rsp),%r11 | |
894 movq 24+0(%rsp),%r12 | |
895 leaq 0(%rsp),%rdi | |
896 call __ecp_nistz256_mul_montq | |
897 | |
898 leaq 128(%rsp),%rdi | |
899 call __ecp_nistz256_mul_by_2q | |
900 | |
901 movq 0+32(%rsp),%rax | |
902 movq 8+32(%rsp),%r14 | |
903 leaq 0+32(%rsp),%rsi | |
904 movq 16+32(%rsp),%r15 | |
905 movq 24+32(%rsp),%r8 | |
906 .byte 102,72,15,126,199 | |
907 call __ecp_nistz256_sqr_montq | |
908 | |
909 leaq 128(%rsp),%rbx | |
910 movq %r14,%r8 | |
911 movq %r15,%r9 | |
912 movq %rsi,%r14 | |
913 movq %rbp,%r15 | |
914 call __ecp_nistz256_sub_fromq | |
915 | |
916 movq 0+0(%rsp),%rax | |
917 movq 0+8(%rsp),%rbp | |
918 movq 0+16(%rsp),%rcx | |
919 movq 0+24(%rsp),%r10 | |
920 leaq 0(%rsp),%rdi | |
921 call __ecp_nistz256_subq | |
922 | |
923 movq 32(%rsp),%rax | |
924 leaq 32(%rsp),%rbx | |
925 movq %r12,%r14 | |
926 xorl %ecx,%ecx | |
927 movq %r12,0+0(%rsp) | |
928 movq %r13,%r10 | |
929 movq %r13,0+8(%rsp) | |
930 cmovzq %r8,%r11 | |
931 movq %r8,0+16(%rsp) | |
932 leaq 0-0(%rsp),%rsi | |
933 cmovzq %r9,%r12 | |
934 movq %r9,0+24(%rsp) | |
935 movq %r14,%r9 | |
936 leaq 0(%rsp),%rdi | |
937 call __ecp_nistz256_mul_montq | |
938 | |
939 .byte 102,72,15,126,203 | |
940 .byte 102,72,15,126,207 | |
941 call __ecp_nistz256_sub_fromq | |
942 | |
943 addq $160+8,%rsp | |
944 popq %r15 | |
945 popq %r14 | |
946 popq %r13 | |
947 popq %r12 | |
948 popq %rbx | |
949 popq %rbp | |
950 .byte 0xf3,0xc3 | |
951 | |
952 .globl _ecp_nistz256_point_add | |
953 .private_extern _ecp_nistz256_point_add | |
954 | |
955 .p2align 5 | |
956 _ecp_nistz256_point_add: | |
957 pushq %rbp | |
958 pushq %rbx | |
959 pushq %r12 | |
960 pushq %r13 | |
961 pushq %r14 | |
962 pushq %r15 | |
963 subq $576+8,%rsp | |
964 | |
965 movdqu 0(%rsi),%xmm0 | |
966 movdqu 16(%rsi),%xmm1 | |
967 movdqu 32(%rsi),%xmm2 | |
968 movdqu 48(%rsi),%xmm3 | |
969 movdqu 64(%rsi),%xmm4 | |
970 movdqu 80(%rsi),%xmm5 | |
971 movq %rsi,%rbx | |
972 movq %rdx,%rsi | |
973 movdqa %xmm0,384(%rsp) | |
974 movdqa %xmm1,384+16(%rsp) | |
975 movdqa %xmm2,416(%rsp) | |
976 movdqa %xmm3,416+16(%rsp) | |
977 movdqa %xmm4,448(%rsp) | |
978 movdqa %xmm5,448+16(%rsp) | |
979 por %xmm4,%xmm5 | |
980 | |
981 movdqu 0(%rsi),%xmm0 | |
982 pshufd $0xb1,%xmm5,%xmm3 | |
983 movdqu 16(%rsi),%xmm1 | |
984 movdqu 32(%rsi),%xmm2 | |
985 por %xmm3,%xmm5 | |
986 movdqu 48(%rsi),%xmm3 | |
987 movq 64+0(%rsi),%rax | |
988 movq 64+8(%rsi),%r14 | |
989 movq 64+16(%rsi),%r15 | |
990 movq 64+24(%rsi),%r8 | |
991 movdqa %xmm0,480(%rsp) | |
992 pshufd $0x1e,%xmm5,%xmm4 | |
993 movdqa %xmm1,480+16(%rsp) | |
994 movdqu 64(%rsi),%xmm0 | |
995 movdqu 80(%rsi),%xmm1 | |
996 movdqa %xmm2,512(%rsp) | |
997 movdqa %xmm3,512+16(%rsp) | |
998 por %xmm4,%xmm5 | |
999 pxor %xmm4,%xmm4 | |
1000 por %xmm0,%xmm1 | |
1001 .byte 102,72,15,110,199 | |
1002 | |
1003 leaq 64-0(%rsi),%rsi | |
1004 movq %rax,544+0(%rsp) | |
1005 movq %r14,544+8(%rsp) | |
1006 movq %r15,544+16(%rsp) | |
1007 movq %r8,544+24(%rsp) | |
1008 leaq 96(%rsp),%rdi | |
1009 call __ecp_nistz256_sqr_montq | |
1010 | |
1011 pcmpeqd %xmm4,%xmm5 | |
1012 pshufd $0xb1,%xmm1,%xmm4 | |
1013 por %xmm1,%xmm4 | |
1014 pshufd $0,%xmm5,%xmm5 | |
1015 pshufd $0x1e,%xmm4,%xmm3 | |
1016 por %xmm3,%xmm4 | |
1017 pxor %xmm3,%xmm3 | |
1018 pcmpeqd %xmm3,%xmm4 | |
1019 pshufd $0,%xmm4,%xmm4 | |
1020 movq 64+0(%rbx),%rax | |
1021 movq 64+8(%rbx),%r14 | |
1022 movq 64+16(%rbx),%r15 | |
1023 movq 64+24(%rbx),%r8 | |
1024 .byte 102,72,15,110,203 | |
1025 | |
1026 leaq 64-0(%rbx),%rsi | |
1027 leaq 32(%rsp),%rdi | |
1028 call __ecp_nistz256_sqr_montq | |
1029 | |
1030 movq 544(%rsp),%rax | |
1031 leaq 544(%rsp),%rbx | |
1032 movq 0+96(%rsp),%r9 | |
1033 movq 8+96(%rsp),%r10 | |
1034 leaq 0+96(%rsp),%rsi | |
1035 movq 16+96(%rsp),%r11 | |
1036 movq 24+96(%rsp),%r12 | |
1037 leaq 224(%rsp),%rdi | |
1038 call __ecp_nistz256_mul_montq | |
1039 | |
1040 movq 448(%rsp),%rax | |
1041 leaq 448(%rsp),%rbx | |
1042 movq 0+32(%rsp),%r9 | |
1043 movq 8+32(%rsp),%r10 | |
1044 leaq 0+32(%rsp),%rsi | |
1045 movq 16+32(%rsp),%r11 | |
1046 movq 24+32(%rsp),%r12 | |
1047 leaq 256(%rsp),%rdi | |
1048 call __ecp_nistz256_mul_montq | |
1049 | |
1050 movq 416(%rsp),%rax | |
1051 leaq 416(%rsp),%rbx | |
1052 movq 0+224(%rsp),%r9 | |
1053 movq 8+224(%rsp),%r10 | |
1054 leaq 0+224(%rsp),%rsi | |
1055 movq 16+224(%rsp),%r11 | |
1056 movq 24+224(%rsp),%r12 | |
1057 leaq 224(%rsp),%rdi | |
1058 call __ecp_nistz256_mul_montq | |
1059 | |
1060 movq 512(%rsp),%rax | |
1061 leaq 512(%rsp),%rbx | |
1062 movq 0+256(%rsp),%r9 | |
1063 movq 8+256(%rsp),%r10 | |
1064 leaq 0+256(%rsp),%rsi | |
1065 movq 16+256(%rsp),%r11 | |
1066 movq 24+256(%rsp),%r12 | |
1067 leaq 256(%rsp),%rdi | |
1068 call __ecp_nistz256_mul_montq | |
1069 | |
1070 leaq 224(%rsp),%rbx | |
1071 leaq 64(%rsp),%rdi | |
1072 call __ecp_nistz256_sub_fromq | |
1073 | |
1074 orq %r13,%r12 | |
1075 movdqa %xmm4,%xmm2 | |
1076 orq %r8,%r12 | |
1077 orq %r9,%r12 | |
1078 por %xmm5,%xmm2 | |
1079 .byte 102,73,15,110,220 | |
1080 | |
1081 movq 384(%rsp),%rax | |
1082 leaq 384(%rsp),%rbx | |
1083 movq 0+96(%rsp),%r9 | |
1084 movq 8+96(%rsp),%r10 | |
1085 leaq 0+96(%rsp),%rsi | |
1086 movq 16+96(%rsp),%r11 | |
1087 movq 24+96(%rsp),%r12 | |
1088 leaq 160(%rsp),%rdi | |
1089 call __ecp_nistz256_mul_montq | |
1090 | |
1091 movq 480(%rsp),%rax | |
1092 leaq 480(%rsp),%rbx | |
1093 movq 0+32(%rsp),%r9 | |
1094 movq 8+32(%rsp),%r10 | |
1095 leaq 0+32(%rsp),%rsi | |
1096 movq 16+32(%rsp),%r11 | |
1097 movq 24+32(%rsp),%r12 | |
1098 leaq 192(%rsp),%rdi | |
1099 call __ecp_nistz256_mul_montq | |
1100 | |
1101 leaq 160(%rsp),%rbx | |
1102 leaq 0(%rsp),%rdi | |
1103 call __ecp_nistz256_sub_fromq | |
1104 | |
1105 orq %r13,%r12 | |
1106 orq %r8,%r12 | |
1107 orq %r9,%r12 | |
1108 | |
1109 .byte 0x3e | |
1110 jnz L$add_proceedq | |
1111 .byte 102,73,15,126,208 | |
1112 .byte 102,73,15,126,217 | |
1113 testq %r8,%r8 | |
1114 jnz L$add_proceedq | |
1115 testq %r9,%r9 | |
1116 jz L$add_doubleq | |
1117 | |
1118 .byte 102,72,15,126,199 | |
1119 pxor %xmm0,%xmm0 | |
1120 movdqu %xmm0,0(%rdi) | |
1121 movdqu %xmm0,16(%rdi) | |
1122 movdqu %xmm0,32(%rdi) | |
1123 movdqu %xmm0,48(%rdi) | |
1124 movdqu %xmm0,64(%rdi) | |
1125 movdqu %xmm0,80(%rdi) | |
1126 jmp L$add_doneq | |
1127 | |
1128 .p2align 5 | |
1129 L$add_doubleq: | |
1130 .byte 102,72,15,126,206 | |
1131 .byte 102,72,15,126,199 | |
1132 addq $416,%rsp | |
1133 jmp L$point_double_shortcutq | |
1134 | |
1135 .p2align 5 | |
1136 L$add_proceedq: | |
1137 movq 0+64(%rsp),%rax | |
1138 movq 8+64(%rsp),%r14 | |
1139 leaq 0+64(%rsp),%rsi | |
1140 movq 16+64(%rsp),%r15 | |
1141 movq 24+64(%rsp),%r8 | |
1142 leaq 96(%rsp),%rdi | |
1143 call __ecp_nistz256_sqr_montq | |
1144 | |
1145 movq 448(%rsp),%rax | |
1146 leaq 448(%rsp),%rbx | |
1147 movq 0+0(%rsp),%r9 | |
1148 movq 8+0(%rsp),%r10 | |
1149 leaq 0+0(%rsp),%rsi | |
1150 movq 16+0(%rsp),%r11 | |
1151 movq 24+0(%rsp),%r12 | |
1152 leaq 352(%rsp),%rdi | |
1153 call __ecp_nistz256_mul_montq | |
1154 | |
1155 movq 0+0(%rsp),%rax | |
1156 movq 8+0(%rsp),%r14 | |
1157 leaq 0+0(%rsp),%rsi | |
1158 movq 16+0(%rsp),%r15 | |
1159 movq 24+0(%rsp),%r8 | |
1160 leaq 32(%rsp),%rdi | |
1161 call __ecp_nistz256_sqr_montq | |
1162 | |
1163 movq 544(%rsp),%rax | |
1164 leaq 544(%rsp),%rbx | |
1165 movq 0+352(%rsp),%r9 | |
1166 movq 8+352(%rsp),%r10 | |
1167 leaq 0+352(%rsp),%rsi | |
1168 movq 16+352(%rsp),%r11 | |
1169 movq 24+352(%rsp),%r12 | |
1170 leaq 352(%rsp),%rdi | |
1171 call __ecp_nistz256_mul_montq | |
1172 | |
1173 movq 0(%rsp),%rax | |
1174 leaq 0(%rsp),%rbx | |
1175 movq 0+32(%rsp),%r9 | |
1176 movq 8+32(%rsp),%r10 | |
1177 leaq 0+32(%rsp),%rsi | |
1178 movq 16+32(%rsp),%r11 | |
1179 movq 24+32(%rsp),%r12 | |
1180 leaq 128(%rsp),%rdi | |
1181 call __ecp_nistz256_mul_montq | |
1182 | |
1183 movq 160(%rsp),%rax | |
1184 leaq 160(%rsp),%rbx | |
1185 movq 0+32(%rsp),%r9 | |
1186 movq 8+32(%rsp),%r10 | |
1187 leaq 0+32(%rsp),%rsi | |
1188 movq 16+32(%rsp),%r11 | |
1189 movq 24+32(%rsp),%r12 | |
1190 leaq 192(%rsp),%rdi | |
1191 call __ecp_nistz256_mul_montq | |
1192 | |
1193 | |
1194 | |
1195 | |
1196 xorq %r11,%r11 | |
1197 addq %r12,%r12 | |
1198 leaq 96(%rsp),%rsi | |
1199 adcq %r13,%r13 | |
1200 movq %r12,%rax | |
1201 adcq %r8,%r8 | |
1202 adcq %r9,%r9 | |
1203 movq %r13,%rbp | |
1204 adcq $0,%r11 | |
1205 | |
1206 subq $-1,%r12 | |
1207 movq %r8,%rcx | |
1208 sbbq %r14,%r13 | |
1209 sbbq $0,%r8 | |
1210 movq %r9,%r10 | |
1211 sbbq %r15,%r9 | |
1212 sbbq $0,%r11 | |
1213 | |
1214 cmovcq %rax,%r12 | |
1215 movq 0(%rsi),%rax | |
1216 cmovcq %rbp,%r13 | |
1217 movq 8(%rsi),%rbp | |
1218 cmovcq %rcx,%r8 | |
1219 movq 16(%rsi),%rcx | |
1220 cmovcq %r10,%r9 | |
1221 movq 24(%rsi),%r10 | |
1222 | |
1223 call __ecp_nistz256_subq | |
1224 | |
1225 leaq 128(%rsp),%rbx | |
1226 leaq 288(%rsp),%rdi | |
1227 call __ecp_nistz256_sub_fromq | |
1228 | |
1229 movq 192+0(%rsp),%rax | |
1230 movq 192+8(%rsp),%rbp | |
1231 movq 192+16(%rsp),%rcx | |
1232 movq 192+24(%rsp),%r10 | |
1233 leaq 320(%rsp),%rdi | |
1234 | |
1235 call __ecp_nistz256_subq | |
1236 | |
1237 movq %r12,0(%rdi) | |
1238 movq %r13,8(%rdi) | |
1239 movq %r8,16(%rdi) | |
1240 movq %r9,24(%rdi) | |
1241 movq 128(%rsp),%rax | |
1242 leaq 128(%rsp),%rbx | |
1243 movq 0+224(%rsp),%r9 | |
1244 movq 8+224(%rsp),%r10 | |
1245 leaq 0+224(%rsp),%rsi | |
1246 movq 16+224(%rsp),%r11 | |
1247 movq 24+224(%rsp),%r12 | |
1248 leaq 256(%rsp),%rdi | |
1249 call __ecp_nistz256_mul_montq | |
1250 | |
1251 movq 320(%rsp),%rax | |
1252 leaq 320(%rsp),%rbx | |
1253 movq 0+64(%rsp),%r9 | |
1254 movq 8+64(%rsp),%r10 | |
1255 leaq 0+64(%rsp),%rsi | |
1256 movq 16+64(%rsp),%r11 | |
1257 movq 24+64(%rsp),%r12 | |
1258 leaq 320(%rsp),%rdi | |
1259 call __ecp_nistz256_mul_montq | |
1260 | |
1261 leaq 256(%rsp),%rbx | |
1262 leaq 320(%rsp),%rdi | |
1263 call __ecp_nistz256_sub_fromq | |
1264 | |
1265 .byte 102,72,15,126,199 | |
1266 | |
1267 movdqa %xmm5,%xmm0 | |
1268 movdqa %xmm5,%xmm1 | |
1269 pandn 352(%rsp),%xmm0 | |
1270 movdqa %xmm5,%xmm2 | |
1271 pandn 352+16(%rsp),%xmm1 | |
1272 movdqa %xmm5,%xmm3 | |
1273 pand 544(%rsp),%xmm2 | |
1274 pand 544+16(%rsp),%xmm3 | |
1275 por %xmm0,%xmm2 | |
1276 por %xmm1,%xmm3 | |
1277 | |
1278 movdqa %xmm4,%xmm0 | |
1279 movdqa %xmm4,%xmm1 | |
1280 pandn %xmm2,%xmm0 | |
1281 movdqa %xmm4,%xmm2 | |
1282 pandn %xmm3,%xmm1 | |
1283 movdqa %xmm4,%xmm3 | |
1284 pand 448(%rsp),%xmm2 | |
1285 pand 448+16(%rsp),%xmm3 | |
1286 por %xmm0,%xmm2 | |
1287 por %xmm1,%xmm3 | |
1288 movdqu %xmm2,64(%rdi) | |
1289 movdqu %xmm3,80(%rdi) | |
1290 | |
1291 movdqa %xmm5,%xmm0 | |
1292 movdqa %xmm5,%xmm1 | |
1293 pandn 288(%rsp),%xmm0 | |
1294 movdqa %xmm5,%xmm2 | |
1295 pandn 288+16(%rsp),%xmm1 | |
1296 movdqa %xmm5,%xmm3 | |
1297 pand 480(%rsp),%xmm2 | |
1298 pand 480+16(%rsp),%xmm3 | |
1299 por %xmm0,%xmm2 | |
1300 por %xmm1,%xmm3 | |
1301 | |
1302 movdqa %xmm4,%xmm0 | |
1303 movdqa %xmm4,%xmm1 | |
1304 pandn %xmm2,%xmm0 | |
1305 movdqa %xmm4,%xmm2 | |
1306 pandn %xmm3,%xmm1 | |
1307 movdqa %xmm4,%xmm3 | |
1308 pand 384(%rsp),%xmm2 | |
1309 pand 384+16(%rsp),%xmm3 | |
1310 por %xmm0,%xmm2 | |
1311 por %xmm1,%xmm3 | |
1312 movdqu %xmm2,0(%rdi) | |
1313 movdqu %xmm3,16(%rdi) | |
1314 | |
1315 movdqa %xmm5,%xmm0 | |
1316 movdqa %xmm5,%xmm1 | |
1317 pandn 320(%rsp),%xmm0 | |
1318 movdqa %xmm5,%xmm2 | |
1319 pandn 320+16(%rsp),%xmm1 | |
1320 movdqa %xmm5,%xmm3 | |
1321 pand 512(%rsp),%xmm2 | |
1322 pand 512+16(%rsp),%xmm3 | |
1323 por %xmm0,%xmm2 | |
1324 por %xmm1,%xmm3 | |
1325 | |
1326 movdqa %xmm4,%xmm0 | |
1327 movdqa %xmm4,%xmm1 | |
1328 pandn %xmm2,%xmm0 | |
1329 movdqa %xmm4,%xmm2 | |
1330 pandn %xmm3,%xmm1 | |
1331 movdqa %xmm4,%xmm3 | |
1332 pand 416(%rsp),%xmm2 | |
1333 pand 416+16(%rsp),%xmm3 | |
1334 por %xmm0,%xmm2 | |
1335 por %xmm1,%xmm3 | |
1336 movdqu %xmm2,32(%rdi) | |
1337 movdqu %xmm3,48(%rdi) | |
1338 | |
1339 L$add_doneq: | |
1340 addq $576+8,%rsp | |
1341 popq %r15 | |
1342 popq %r14 | |
1343 popq %r13 | |
1344 popq %r12 | |
1345 popq %rbx | |
1346 popq %rbp | |
1347 .byte 0xf3,0xc3 | |
1348 | |
1349 .globl _ecp_nistz256_point_add_affine | |
1350 .private_extern _ecp_nistz256_point_add_affine | |
1351 | |
1352 .p2align 5 | |
1353 _ecp_nistz256_point_add_affine: | |
1354 pushq %rbp | |
1355 pushq %rbx | |
1356 pushq %r12 | |
1357 pushq %r13 | |
1358 pushq %r14 | |
1359 pushq %r15 | |
1360 subq $480+8,%rsp | |
1361 | |
1362 movdqu 0(%rsi),%xmm0 | |
1363 movq %rdx,%rbx | |
1364 movdqu 16(%rsi),%xmm1 | |
1365 movdqu 32(%rsi),%xmm2 | |
1366 movdqu 48(%rsi),%xmm3 | |
1367 movdqu 64(%rsi),%xmm4 | |
1368 movdqu 80(%rsi),%xmm5 | |
1369 movq 64+0(%rsi),%rax | |
1370 movq 64+8(%rsi),%r14 | |
1371 movq 64+16(%rsi),%r15 | |
1372 movq 64+24(%rsi),%r8 | |
1373 movdqa %xmm0,320(%rsp) | |
1374 movdqa %xmm1,320+16(%rsp) | |
1375 movdqa %xmm2,352(%rsp) | |
1376 movdqa %xmm3,352+16(%rsp) | |
1377 movdqa %xmm4,384(%rsp) | |
1378 movdqa %xmm5,384+16(%rsp) | |
1379 por %xmm4,%xmm5 | |
1380 | |
1381 movdqu 0(%rbx),%xmm0 | |
1382 pshufd $0xb1,%xmm5,%xmm3 | |
1383 movdqu 16(%rbx),%xmm1 | |
1384 movdqu 32(%rbx),%xmm2 | |
1385 por %xmm3,%xmm5 | |
1386 movdqu 48(%rbx),%xmm3 | |
1387 movdqa %xmm0,416(%rsp) | |
1388 pshufd $0x1e,%xmm5,%xmm4 | |
1389 movdqa %xmm1,416+16(%rsp) | |
1390 por %xmm0,%xmm1 | |
1391 .byte 102,72,15,110,199 | |
1392 movdqa %xmm2,448(%rsp) | |
1393 movdqa %xmm3,448+16(%rsp) | |
1394 por %xmm2,%xmm3 | |
1395 por %xmm4,%xmm5 | |
1396 pxor %xmm4,%xmm4 | |
1397 por %xmm1,%xmm3 | |
1398 | |
1399 leaq 64-0(%rsi),%rsi | |
1400 leaq 32(%rsp),%rdi | |
1401 call __ecp_nistz256_sqr_montq | |
1402 | |
1403 pcmpeqd %xmm4,%xmm5 | |
1404 pshufd $0xb1,%xmm3,%xmm4 | |
1405 movq 0(%rbx),%rax | |
1406 | |
1407 movq %r12,%r9 | |
1408 por %xmm3,%xmm4 | |
1409 pshufd $0,%xmm5,%xmm5 | |
1410 pshufd $0x1e,%xmm4,%xmm3 | |
1411 movq %r13,%r10 | |
1412 por %xmm3,%xmm4 | |
1413 pxor %xmm3,%xmm3 | |
1414 movq %r14,%r11 | |
1415 pcmpeqd %xmm3,%xmm4 | |
1416 pshufd $0,%xmm4,%xmm4 | |
1417 | |
1418 leaq 32-0(%rsp),%rsi | |
1419 movq %r15,%r12 | |
1420 leaq 0(%rsp),%rdi | |
1421 call __ecp_nistz256_mul_montq | |
1422 | |
1423 leaq 320(%rsp),%rbx | |
1424 leaq 64(%rsp),%rdi | |
1425 call __ecp_nistz256_sub_fromq | |
1426 | |
1427 movq 384(%rsp),%rax | |
1428 leaq 384(%rsp),%rbx | |
1429 movq 0+32(%rsp),%r9 | |
1430 movq 8+32(%rsp),%r10 | |
1431 leaq 0+32(%rsp),%rsi | |
1432 movq 16+32(%rsp),%r11 | |
1433 movq 24+32(%rsp),%r12 | |
1434 leaq 32(%rsp),%rdi | |
1435 call __ecp_nistz256_mul_montq | |
1436 | |
1437 movq 384(%rsp),%rax | |
1438 leaq 384(%rsp),%rbx | |
1439 movq 0+64(%rsp),%r9 | |
1440 movq 8+64(%rsp),%r10 | |
1441 leaq 0+64(%rsp),%rsi | |
1442 movq 16+64(%rsp),%r11 | |
1443 movq 24+64(%rsp),%r12 | |
1444 leaq 288(%rsp),%rdi | |
1445 call __ecp_nistz256_mul_montq | |
1446 | |
1447 movq 448(%rsp),%rax | |
1448 leaq 448(%rsp),%rbx | |
1449 movq 0+32(%rsp),%r9 | |
1450 movq 8+32(%rsp),%r10 | |
1451 leaq 0+32(%rsp),%rsi | |
1452 movq 16+32(%rsp),%r11 | |
1453 movq 24+32(%rsp),%r12 | |
1454 leaq 32(%rsp),%rdi | |
1455 call __ecp_nistz256_mul_montq | |
1456 | |
1457 leaq 352(%rsp),%rbx | |
1458 leaq 96(%rsp),%rdi | |
1459 call __ecp_nistz256_sub_fromq | |
1460 | |
1461 movq 0+64(%rsp),%rax | |
1462 movq 8+64(%rsp),%r14 | |
1463 leaq 0+64(%rsp),%rsi | |
1464 movq 16+64(%rsp),%r15 | |
1465 movq 24+64(%rsp),%r8 | |
1466 leaq 128(%rsp),%rdi | |
1467 call __ecp_nistz256_sqr_montq | |
1468 | |
1469 movq 0+96(%rsp),%rax | |
1470 movq 8+96(%rsp),%r14 | |
1471 leaq 0+96(%rsp),%rsi | |
1472 movq 16+96(%rsp),%r15 | |
1473 movq 24+96(%rsp),%r8 | |
1474 leaq 192(%rsp),%rdi | |
1475 call __ecp_nistz256_sqr_montq | |
1476 | |
1477 movq 128(%rsp),%rax | |
1478 leaq 128(%rsp),%rbx | |
1479 movq 0+64(%rsp),%r9 | |
1480 movq 8+64(%rsp),%r10 | |
1481 leaq 0+64(%rsp),%rsi | |
1482 movq 16+64(%rsp),%r11 | |
1483 movq 24+64(%rsp),%r12 | |
1484 leaq 160(%rsp),%rdi | |
1485 call __ecp_nistz256_mul_montq | |
1486 | |
1487 movq 320(%rsp),%rax | |
1488 leaq 320(%rsp),%rbx | |
1489 movq 0+128(%rsp),%r9 | |
1490 movq 8+128(%rsp),%r10 | |
1491 leaq 0+128(%rsp),%rsi | |
1492 movq 16+128(%rsp),%r11 | |
1493 movq 24+128(%rsp),%r12 | |
1494 leaq 0(%rsp),%rdi | |
1495 call __ecp_nistz256_mul_montq | |
1496 | |
1497 | |
1498 | |
1499 | |
1500 xorq %r11,%r11 | |
1501 addq %r12,%r12 | |
1502 leaq 192(%rsp),%rsi | |
1503 adcq %r13,%r13 | |
1504 movq %r12,%rax | |
1505 adcq %r8,%r8 | |
1506 adcq %r9,%r9 | |
1507 movq %r13,%rbp | |
1508 adcq $0,%r11 | |
1509 | |
1510 subq $-1,%r12 | |
1511 movq %r8,%rcx | |
1512 sbbq %r14,%r13 | |
1513 sbbq $0,%r8 | |
1514 movq %r9,%r10 | |
1515 sbbq %r15,%r9 | |
1516 sbbq $0,%r11 | |
1517 | |
1518 cmovcq %rax,%r12 | |
1519 movq 0(%rsi),%rax | |
1520 cmovcq %rbp,%r13 | |
1521 movq 8(%rsi),%rbp | |
1522 cmovcq %rcx,%r8 | |
1523 movq 16(%rsi),%rcx | |
1524 cmovcq %r10,%r9 | |
1525 movq 24(%rsi),%r10 | |
1526 | |
1527 call __ecp_nistz256_subq | |
1528 | |
1529 leaq 160(%rsp),%rbx | |
1530 leaq 224(%rsp),%rdi | |
1531 call __ecp_nistz256_sub_fromq | |
1532 | |
1533 movq 0+0(%rsp),%rax | |
1534 movq 0+8(%rsp),%rbp | |
1535 movq 0+16(%rsp),%rcx | |
1536 movq 0+24(%rsp),%r10 | |
1537 leaq 64(%rsp),%rdi | |
1538 | |
1539 call __ecp_nistz256_subq | |
1540 | |
1541 movq %r12,0(%rdi) | |
1542 movq %r13,8(%rdi) | |
1543 movq %r8,16(%rdi) | |
1544 movq %r9,24(%rdi) | |
1545 movq 352(%rsp),%rax | |
1546 leaq 352(%rsp),%rbx | |
1547 movq 0+160(%rsp),%r9 | |
1548 movq 8+160(%rsp),%r10 | |
1549 leaq 0+160(%rsp),%rsi | |
1550 movq 16+160(%rsp),%r11 | |
1551 movq 24+160(%rsp),%r12 | |
1552 leaq 32(%rsp),%rdi | |
1553 call __ecp_nistz256_mul_montq | |
1554 | |
1555 movq 96(%rsp),%rax | |
1556 leaq 96(%rsp),%rbx | |
1557 movq 0+64(%rsp),%r9 | |
1558 movq 8+64(%rsp),%r10 | |
1559 leaq 0+64(%rsp),%rsi | |
1560 movq 16+64(%rsp),%r11 | |
1561 movq 24+64(%rsp),%r12 | |
1562 leaq 64(%rsp),%rdi | |
1563 call __ecp_nistz256_mul_montq | |
1564 | |
1565 leaq 32(%rsp),%rbx | |
1566 leaq 256(%rsp),%rdi | |
1567 call __ecp_nistz256_sub_fromq | |
1568 | |
1569 .byte 102,72,15,126,199 | |
1570 | |
1571 movdqa %xmm5,%xmm0 | |
1572 movdqa %xmm5,%xmm1 | |
1573 pandn 288(%rsp),%xmm0 | |
1574 movdqa %xmm5,%xmm2 | |
1575 pandn 288+16(%rsp),%xmm1 | |
1576 movdqa %xmm5,%xmm3 | |
1577 pand L$ONE_mont(%rip),%xmm2 | |
1578 pand L$ONE_mont+16(%rip),%xmm3 | |
1579 por %xmm0,%xmm2 | |
1580 por %xmm1,%xmm3 | |
1581 | |
1582 movdqa %xmm4,%xmm0 | |
1583 movdqa %xmm4,%xmm1 | |
1584 pandn %xmm2,%xmm0 | |
1585 movdqa %xmm4,%xmm2 | |
1586 pandn %xmm3,%xmm1 | |
1587 movdqa %xmm4,%xmm3 | |
1588 pand 384(%rsp),%xmm2 | |
1589 pand 384+16(%rsp),%xmm3 | |
1590 por %xmm0,%xmm2 | |
1591 por %xmm1,%xmm3 | |
1592 movdqu %xmm2,64(%rdi) | |
1593 movdqu %xmm3,80(%rdi) | |
1594 | |
1595 movdqa %xmm5,%xmm0 | |
1596 movdqa %xmm5,%xmm1 | |
1597 pandn 224(%rsp),%xmm0 | |
1598 movdqa %xmm5,%xmm2 | |
1599 pandn 224+16(%rsp),%xmm1 | |
1600 movdqa %xmm5,%xmm3 | |
1601 pand 416(%rsp),%xmm2 | |
1602 pand 416+16(%rsp),%xmm3 | |
1603 por %xmm0,%xmm2 | |
1604 por %xmm1,%xmm3 | |
1605 | |
1606 movdqa %xmm4,%xmm0 | |
1607 movdqa %xmm4,%xmm1 | |
1608 pandn %xmm2,%xmm0 | |
1609 movdqa %xmm4,%xmm2 | |
1610 pandn %xmm3,%xmm1 | |
1611 movdqa %xmm4,%xmm3 | |
1612 pand 320(%rsp),%xmm2 | |
1613 pand 320+16(%rsp),%xmm3 | |
1614 por %xmm0,%xmm2 | |
1615 por %xmm1,%xmm3 | |
1616 movdqu %xmm2,0(%rdi) | |
1617 movdqu %xmm3,16(%rdi) | |
1618 | |
1619 movdqa %xmm5,%xmm0 | |
1620 movdqa %xmm5,%xmm1 | |
1621 pandn 256(%rsp),%xmm0 | |
1622 movdqa %xmm5,%xmm2 | |
1623 pandn 256+16(%rsp),%xmm1 | |
1624 movdqa %xmm5,%xmm3 | |
1625 pand 448(%rsp),%xmm2 | |
1626 pand 448+16(%rsp),%xmm3 | |
1627 por %xmm0,%xmm2 | |
1628 por %xmm1,%xmm3 | |
1629 | |
1630 movdqa %xmm4,%xmm0 | |
1631 movdqa %xmm4,%xmm1 | |
1632 pandn %xmm2,%xmm0 | |
1633 movdqa %xmm4,%xmm2 | |
1634 pandn %xmm3,%xmm1 | |
1635 movdqa %xmm4,%xmm3 | |
1636 pand 352(%rsp),%xmm2 | |
1637 pand 352+16(%rsp),%xmm3 | |
1638 por %xmm0,%xmm2 | |
1639 por %xmm1,%xmm3 | |
1640 movdqu %xmm2,32(%rdi) | |
1641 movdqu %xmm3,48(%rdi) | |
1642 | |
1643 addq $480+8,%rsp | |
1644 popq %r15 | |
1645 popq %r14 | |
1646 popq %r13 | |
1647 popq %r12 | |
1648 popq %rbx | |
1649 popq %rbp | |
1650 .byte 0xf3,0xc3 | |
1651 | |
1652 #endif | |
OLD | NEW |