OLD | NEW |
| (Empty) |
1 #if defined(__x86_64__) | |
2 .text | |
3 .extern OPENSSL_ia32cap_P | |
4 .hidden OPENSSL_ia32cap_P | |
5 | |
6 | |
7 .align 64 | |
8 .Lpoly: | |
9 .quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00
000001 | |
10 | |
11 .LOne: | |
12 .long 1,1,1,1,1,1,1,1 | |
13 .LTwo: | |
14 .long 2,2,2,2,2,2,2,2 | |
15 .LThree: | |
16 .long 3,3,3,3,3,3,3,3 | |
17 .LONE_mont: | |
18 .quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000ff
fffffe | |
19 | |
20 .type ecp_nistz256_mul_by_2,@function | |
21 .align 64 | |
22 ecp_nistz256_mul_by_2: | |
23 pushq %r12 | |
24 pushq %r13 | |
25 | |
26 movq 0(%rsi),%r8 | |
27 movq 8(%rsi),%r9 | |
28 addq %r8,%r8 | |
29 movq 16(%rsi),%r10 | |
30 adcq %r9,%r9 | |
31 movq 24(%rsi),%r11 | |
32 leaq .Lpoly(%rip),%rsi | |
33 movq %r8,%rax | |
34 adcq %r10,%r10 | |
35 adcq %r11,%r11 | |
36 movq %r9,%rdx | |
37 sbbq %r13,%r13 | |
38 | |
39 subq 0(%rsi),%r8 | |
40 movq %r10,%rcx | |
41 sbbq 8(%rsi),%r9 | |
42 sbbq 16(%rsi),%r10 | |
43 movq %r11,%r12 | |
44 sbbq 24(%rsi),%r11 | |
45 testq %r13,%r13 | |
46 | |
47 cmovzq %rax,%r8 | |
48 cmovzq %rdx,%r9 | |
49 movq %r8,0(%rdi) | |
50 cmovzq %rcx,%r10 | |
51 movq %r9,8(%rdi) | |
52 cmovzq %r12,%r11 | |
53 movq %r10,16(%rdi) | |
54 movq %r11,24(%rdi) | |
55 | |
56 popq %r13 | |
57 popq %r12 | |
58 .byte 0xf3,0xc3 | |
59 .size ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2 | |
60 | |
61 | |
62 | |
63 .globl ecp_nistz256_neg | |
64 .hidden ecp_nistz256_neg | |
65 .type ecp_nistz256_neg,@function | |
66 .align 32 | |
67 ecp_nistz256_neg: | |
68 pushq %r12 | |
69 pushq %r13 | |
70 | |
71 xorq %r8,%r8 | |
72 xorq %r9,%r9 | |
73 xorq %r10,%r10 | |
74 xorq %r11,%r11 | |
75 xorq %r13,%r13 | |
76 | |
77 subq 0(%rsi),%r8 | |
78 sbbq 8(%rsi),%r9 | |
79 sbbq 16(%rsi),%r10 | |
80 movq %r8,%rax | |
81 sbbq 24(%rsi),%r11 | |
82 leaq .Lpoly(%rip),%rsi | |
83 movq %r9,%rdx | |
84 sbbq $0,%r13 | |
85 | |
86 addq 0(%rsi),%r8 | |
87 movq %r10,%rcx | |
88 adcq 8(%rsi),%r9 | |
89 adcq 16(%rsi),%r10 | |
90 movq %r11,%r12 | |
91 adcq 24(%rsi),%r11 | |
92 testq %r13,%r13 | |
93 | |
94 cmovzq %rax,%r8 | |
95 cmovzq %rdx,%r9 | |
96 movq %r8,0(%rdi) | |
97 cmovzq %rcx,%r10 | |
98 movq %r9,8(%rdi) | |
99 cmovzq %r12,%r11 | |
100 movq %r10,16(%rdi) | |
101 movq %r11,24(%rdi) | |
102 | |
103 popq %r13 | |
104 popq %r12 | |
105 .byte 0xf3,0xc3 | |
106 .size ecp_nistz256_neg,.-ecp_nistz256_neg | |
107 | |
108 | |
109 | |
110 | |
111 | |
112 | |
113 .globl ecp_nistz256_mul_mont | |
114 .hidden ecp_nistz256_mul_mont | |
115 .type ecp_nistz256_mul_mont,@function | |
116 .align 32 | |
117 ecp_nistz256_mul_mont: | |
118 .Lmul_mont: | |
119 pushq %rbp | |
120 pushq %rbx | |
121 pushq %r12 | |
122 pushq %r13 | |
123 pushq %r14 | |
124 pushq %r15 | |
125 movq %rdx,%rbx | |
126 movq 0(%rdx),%rax | |
127 movq 0(%rsi),%r9 | |
128 movq 8(%rsi),%r10 | |
129 movq 16(%rsi),%r11 | |
130 movq 24(%rsi),%r12 | |
131 | |
132 call __ecp_nistz256_mul_montq | |
133 .Lmul_mont_done: | |
134 popq %r15 | |
135 popq %r14 | |
136 popq %r13 | |
137 popq %r12 | |
138 popq %rbx | |
139 popq %rbp | |
140 .byte 0xf3,0xc3 | |
141 .size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont | |
142 | |
143 .type __ecp_nistz256_mul_montq,@function | |
144 .align 32 | |
145 __ecp_nistz256_mul_montq: | |
146 | |
147 | |
148 movq %rax,%rbp | |
149 mulq %r9 | |
150 movq .Lpoly+8(%rip),%r14 | |
151 movq %rax,%r8 | |
152 movq %rbp,%rax | |
153 movq %rdx,%r9 | |
154 | |
155 mulq %r10 | |
156 movq .Lpoly+24(%rip),%r15 | |
157 addq %rax,%r9 | |
158 movq %rbp,%rax | |
159 adcq $0,%rdx | |
160 movq %rdx,%r10 | |
161 | |
162 mulq %r11 | |
163 addq %rax,%r10 | |
164 movq %rbp,%rax | |
165 adcq $0,%rdx | |
166 movq %rdx,%r11 | |
167 | |
168 mulq %r12 | |
169 addq %rax,%r11 | |
170 movq %r8,%rax | |
171 adcq $0,%rdx | |
172 xorq %r13,%r13 | |
173 movq %rdx,%r12 | |
174 | |
175 | |
176 | |
177 | |
178 | |
179 | |
180 | |
181 | |
182 | |
183 | |
184 movq %r8,%rbp | |
185 shlq $32,%r8 | |
186 mulq %r15 | |
187 shrq $32,%rbp | |
188 addq %r8,%r9 | |
189 adcq %rbp,%r10 | |
190 adcq %rax,%r11 | |
191 movq 8(%rbx),%rax | |
192 adcq %rdx,%r12 | |
193 adcq $0,%r13 | |
194 xorq %r8,%r8 | |
195 | |
196 | |
197 | |
198 movq %rax,%rbp | |
199 mulq 0(%rsi) | |
200 addq %rax,%r9 | |
201 movq %rbp,%rax | |
202 adcq $0,%rdx | |
203 movq %rdx,%rcx | |
204 | |
205 mulq 8(%rsi) | |
206 addq %rcx,%r10 | |
207 adcq $0,%rdx | |
208 addq %rax,%r10 | |
209 movq %rbp,%rax | |
210 adcq $0,%rdx | |
211 movq %rdx,%rcx | |
212 | |
213 mulq 16(%rsi) | |
214 addq %rcx,%r11 | |
215 adcq $0,%rdx | |
216 addq %rax,%r11 | |
217 movq %rbp,%rax | |
218 adcq $0,%rdx | |
219 movq %rdx,%rcx | |
220 | |
221 mulq 24(%rsi) | |
222 addq %rcx,%r12 | |
223 adcq $0,%rdx | |
224 addq %rax,%r12 | |
225 movq %r9,%rax | |
226 adcq %rdx,%r13 | |
227 adcq $0,%r8 | |
228 | |
229 | |
230 | |
231 movq %r9,%rbp | |
232 shlq $32,%r9 | |
233 mulq %r15 | |
234 shrq $32,%rbp | |
235 addq %r9,%r10 | |
236 adcq %rbp,%r11 | |
237 adcq %rax,%r12 | |
238 movq 16(%rbx),%rax | |
239 adcq %rdx,%r13 | |
240 adcq $0,%r8 | |
241 xorq %r9,%r9 | |
242 | |
243 | |
244 | |
245 movq %rax,%rbp | |
246 mulq 0(%rsi) | |
247 addq %rax,%r10 | |
248 movq %rbp,%rax | |
249 adcq $0,%rdx | |
250 movq %rdx,%rcx | |
251 | |
252 mulq 8(%rsi) | |
253 addq %rcx,%r11 | |
254 adcq $0,%rdx | |
255 addq %rax,%r11 | |
256 movq %rbp,%rax | |
257 adcq $0,%rdx | |
258 movq %rdx,%rcx | |
259 | |
260 mulq 16(%rsi) | |
261 addq %rcx,%r12 | |
262 adcq $0,%rdx | |
263 addq %rax,%r12 | |
264 movq %rbp,%rax | |
265 adcq $0,%rdx | |
266 movq %rdx,%rcx | |
267 | |
268 mulq 24(%rsi) | |
269 addq %rcx,%r13 | |
270 adcq $0,%rdx | |
271 addq %rax,%r13 | |
272 movq %r10,%rax | |
273 adcq %rdx,%r8 | |
274 adcq $0,%r9 | |
275 | |
276 | |
277 | |
278 movq %r10,%rbp | |
279 shlq $32,%r10 | |
280 mulq %r15 | |
281 shrq $32,%rbp | |
282 addq %r10,%r11 | |
283 adcq %rbp,%r12 | |
284 adcq %rax,%r13 | |
285 movq 24(%rbx),%rax | |
286 adcq %rdx,%r8 | |
287 adcq $0,%r9 | |
288 xorq %r10,%r10 | |
289 | |
290 | |
291 | |
292 movq %rax,%rbp | |
293 mulq 0(%rsi) | |
294 addq %rax,%r11 | |
295 movq %rbp,%rax | |
296 adcq $0,%rdx | |
297 movq %rdx,%rcx | |
298 | |
299 mulq 8(%rsi) | |
300 addq %rcx,%r12 | |
301 adcq $0,%rdx | |
302 addq %rax,%r12 | |
303 movq %rbp,%rax | |
304 adcq $0,%rdx | |
305 movq %rdx,%rcx | |
306 | |
307 mulq 16(%rsi) | |
308 addq %rcx,%r13 | |
309 adcq $0,%rdx | |
310 addq %rax,%r13 | |
311 movq %rbp,%rax | |
312 adcq $0,%rdx | |
313 movq %rdx,%rcx | |
314 | |
315 mulq 24(%rsi) | |
316 addq %rcx,%r8 | |
317 adcq $0,%rdx | |
318 addq %rax,%r8 | |
319 movq %r11,%rax | |
320 adcq %rdx,%r9 | |
321 adcq $0,%r10 | |
322 | |
323 | |
324 | |
325 movq %r11,%rbp | |
326 shlq $32,%r11 | |
327 mulq %r15 | |
328 shrq $32,%rbp | |
329 addq %r11,%r12 | |
330 adcq %rbp,%r13 | |
331 movq %r12,%rcx | |
332 adcq %rax,%r8 | |
333 adcq %rdx,%r9 | |
334 movq %r13,%rbp | |
335 adcq $0,%r10 | |
336 | |
337 | |
338 | |
339 subq $-1,%r12 | |
340 movq %r8,%rbx | |
341 sbbq %r14,%r13 | |
342 sbbq $0,%r8 | |
343 movq %r9,%rdx | |
344 sbbq %r15,%r9 | |
345 sbbq $0,%r10 | |
346 | |
347 cmovcq %rcx,%r12 | |
348 cmovcq %rbp,%r13 | |
349 movq %r12,0(%rdi) | |
350 cmovcq %rbx,%r8 | |
351 movq %r13,8(%rdi) | |
352 cmovcq %rdx,%r9 | |
353 movq %r8,16(%rdi) | |
354 movq %r9,24(%rdi) | |
355 | |
356 .byte 0xf3,0xc3 | |
357 .size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq | |
358 | |
359 | |
360 | |
361 | |
362 | |
363 | |
364 | |
365 | |
366 .globl ecp_nistz256_sqr_mont | |
367 .hidden ecp_nistz256_sqr_mont | |
368 .type ecp_nistz256_sqr_mont,@function | |
369 .align 32 | |
370 ecp_nistz256_sqr_mont: | |
371 pushq %rbp | |
372 pushq %rbx | |
373 pushq %r12 | |
374 pushq %r13 | |
375 pushq %r14 | |
376 pushq %r15 | |
377 movq 0(%rsi),%rax | |
378 movq 8(%rsi),%r14 | |
379 movq 16(%rsi),%r15 | |
380 movq 24(%rsi),%r8 | |
381 | |
382 call __ecp_nistz256_sqr_montq | |
383 .Lsqr_mont_done: | |
384 popq %r15 | |
385 popq %r14 | |
386 popq %r13 | |
387 popq %r12 | |
388 popq %rbx | |
389 popq %rbp | |
390 .byte 0xf3,0xc3 | |
391 .size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont | |
392 | |
393 .type __ecp_nistz256_sqr_montq,@function | |
394 .align 32 | |
395 __ecp_nistz256_sqr_montq: | |
396 movq %rax,%r13 | |
397 mulq %r14 | |
398 movq %rax,%r9 | |
399 movq %r15,%rax | |
400 movq %rdx,%r10 | |
401 | |
402 mulq %r13 | |
403 addq %rax,%r10 | |
404 movq %r8,%rax | |
405 adcq $0,%rdx | |
406 movq %rdx,%r11 | |
407 | |
408 mulq %r13 | |
409 addq %rax,%r11 | |
410 movq %r15,%rax | |
411 adcq $0,%rdx | |
412 movq %rdx,%r12 | |
413 | |
414 | |
415 mulq %r14 | |
416 addq %rax,%r11 | |
417 movq %r8,%rax | |
418 adcq $0,%rdx | |
419 movq %rdx,%rbp | |
420 | |
421 mulq %r14 | |
422 addq %rax,%r12 | |
423 movq %r8,%rax | |
424 adcq $0,%rdx | |
425 addq %rbp,%r12 | |
426 movq %rdx,%r13 | |
427 adcq $0,%r13 | |
428 | |
429 | |
430 mulq %r15 | |
431 xorq %r15,%r15 | |
432 addq %rax,%r13 | |
433 movq 0(%rsi),%rax | |
434 movq %rdx,%r14 | |
435 adcq $0,%r14 | |
436 | |
437 addq %r9,%r9 | |
438 adcq %r10,%r10 | |
439 adcq %r11,%r11 | |
440 adcq %r12,%r12 | |
441 adcq %r13,%r13 | |
442 adcq %r14,%r14 | |
443 adcq $0,%r15 | |
444 | |
445 mulq %rax | |
446 movq %rax,%r8 | |
447 movq 8(%rsi),%rax | |
448 movq %rdx,%rcx | |
449 | |
450 mulq %rax | |
451 addq %rcx,%r9 | |
452 adcq %rax,%r10 | |
453 movq 16(%rsi),%rax | |
454 adcq $0,%rdx | |
455 movq %rdx,%rcx | |
456 | |
457 mulq %rax | |
458 addq %rcx,%r11 | |
459 adcq %rax,%r12 | |
460 movq 24(%rsi),%rax | |
461 adcq $0,%rdx | |
462 movq %rdx,%rcx | |
463 | |
464 mulq %rax | |
465 addq %rcx,%r13 | |
466 adcq %rax,%r14 | |
467 movq %r8,%rax | |
468 adcq %rdx,%r15 | |
469 | |
470 movq .Lpoly+8(%rip),%rsi | |
471 movq .Lpoly+24(%rip),%rbp | |
472 | |
473 | |
474 | |
475 | |
476 movq %r8,%rcx | |
477 shlq $32,%r8 | |
478 mulq %rbp | |
479 shrq $32,%rcx | |
480 addq %r8,%r9 | |
481 adcq %rcx,%r10 | |
482 adcq %rax,%r11 | |
483 movq %r9,%rax | |
484 adcq $0,%rdx | |
485 | |
486 | |
487 | |
488 movq %r9,%rcx | |
489 shlq $32,%r9 | |
490 movq %rdx,%r8 | |
491 mulq %rbp | |
492 shrq $32,%rcx | |
493 addq %r9,%r10 | |
494 adcq %rcx,%r11 | |
495 adcq %rax,%r8 | |
496 movq %r10,%rax | |
497 adcq $0,%rdx | |
498 | |
499 | |
500 | |
501 movq %r10,%rcx | |
502 shlq $32,%r10 | |
503 movq %rdx,%r9 | |
504 mulq %rbp | |
505 shrq $32,%rcx | |
506 addq %r10,%r11 | |
507 adcq %rcx,%r8 | |
508 adcq %rax,%r9 | |
509 movq %r11,%rax | |
510 adcq $0,%rdx | |
511 | |
512 | |
513 | |
514 movq %r11,%rcx | |
515 shlq $32,%r11 | |
516 movq %rdx,%r10 | |
517 mulq %rbp | |
518 shrq $32,%rcx | |
519 addq %r11,%r8 | |
520 adcq %rcx,%r9 | |
521 adcq %rax,%r10 | |
522 adcq $0,%rdx | |
523 xorq %r11,%r11 | |
524 | |
525 | |
526 | |
527 addq %r8,%r12 | |
528 adcq %r9,%r13 | |
529 movq %r12,%r8 | |
530 adcq %r10,%r14 | |
531 adcq %rdx,%r15 | |
532 movq %r13,%r9 | |
533 adcq $0,%r11 | |
534 | |
535 subq $-1,%r12 | |
536 movq %r14,%r10 | |
537 sbbq %rsi,%r13 | |
538 sbbq $0,%r14 | |
539 movq %r15,%rcx | |
540 sbbq %rbp,%r15 | |
541 sbbq $0,%r11 | |
542 | |
543 cmovcq %r8,%r12 | |
544 cmovcq %r9,%r13 | |
545 movq %r12,0(%rdi) | |
546 cmovcq %r10,%r14 | |
547 movq %r13,8(%rdi) | |
548 cmovcq %rcx,%r15 | |
549 movq %r14,16(%rdi) | |
550 movq %r15,24(%rdi) | |
551 | |
552 .byte 0xf3,0xc3 | |
553 .size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq | |
554 | |
555 | |
556 | |
557 | |
558 | |
559 | |
560 .globl ecp_nistz256_from_mont | |
561 .hidden ecp_nistz256_from_mont | |
562 .type ecp_nistz256_from_mont,@function | |
563 .align 32 | |
564 ecp_nistz256_from_mont: | |
565 pushq %r12 | |
566 pushq %r13 | |
567 | |
568 movq 0(%rsi),%rax | |
569 movq .Lpoly+24(%rip),%r13 | |
570 movq 8(%rsi),%r9 | |
571 movq 16(%rsi),%r10 | |
572 movq 24(%rsi),%r11 | |
573 movq %rax,%r8 | |
574 movq .Lpoly+8(%rip),%r12 | |
575 | |
576 | |
577 | |
578 movq %rax,%rcx | |
579 shlq $32,%r8 | |
580 mulq %r13 | |
581 shrq $32,%rcx | |
582 addq %r8,%r9 | |
583 adcq %rcx,%r10 | |
584 adcq %rax,%r11 | |
585 movq %r9,%rax | |
586 adcq $0,%rdx | |
587 | |
588 | |
589 | |
590 movq %r9,%rcx | |
591 shlq $32,%r9 | |
592 movq %rdx,%r8 | |
593 mulq %r13 | |
594 shrq $32,%rcx | |
595 addq %r9,%r10 | |
596 adcq %rcx,%r11 | |
597 adcq %rax,%r8 | |
598 movq %r10,%rax | |
599 adcq $0,%rdx | |
600 | |
601 | |
602 | |
603 movq %r10,%rcx | |
604 shlq $32,%r10 | |
605 movq %rdx,%r9 | |
606 mulq %r13 | |
607 shrq $32,%rcx | |
608 addq %r10,%r11 | |
609 adcq %rcx,%r8 | |
610 adcq %rax,%r9 | |
611 movq %r11,%rax | |
612 adcq $0,%rdx | |
613 | |
614 | |
615 | |
616 movq %r11,%rcx | |
617 shlq $32,%r11 | |
618 movq %rdx,%r10 | |
619 mulq %r13 | |
620 shrq $32,%rcx | |
621 addq %r11,%r8 | |
622 adcq %rcx,%r9 | |
623 movq %r8,%rcx | |
624 adcq %rax,%r10 | |
625 movq %r9,%rsi | |
626 adcq $0,%rdx | |
627 | |
628 subq $-1,%r8 | |
629 movq %r10,%rax | |
630 sbbq %r12,%r9 | |
631 sbbq $0,%r10 | |
632 movq %rdx,%r11 | |
633 sbbq %r13,%rdx | |
634 sbbq %r13,%r13 | |
635 | |
636 cmovnzq %rcx,%r8 | |
637 cmovnzq %rsi,%r9 | |
638 movq %r8,0(%rdi) | |
639 cmovnzq %rax,%r10 | |
640 movq %r9,8(%rdi) | |
641 cmovzq %rdx,%r11 | |
642 movq %r10,16(%rdi) | |
643 movq %r11,24(%rdi) | |
644 | |
645 popq %r13 | |
646 popq %r12 | |
647 .byte 0xf3,0xc3 | |
648 .size ecp_nistz256_from_mont,.-ecp_nistz256_from_mont | |
649 | |
650 | |
651 .globl ecp_nistz256_select_w5 | |
652 .hidden ecp_nistz256_select_w5 | |
653 .type ecp_nistz256_select_w5,@function | |
654 .align 32 | |
655 ecp_nistz256_select_w5: | |
656 movdqa .LOne(%rip),%xmm0 | |
657 movd %edx,%xmm1 | |
658 | |
659 pxor %xmm2,%xmm2 | |
660 pxor %xmm3,%xmm3 | |
661 pxor %xmm4,%xmm4 | |
662 pxor %xmm5,%xmm5 | |
663 pxor %xmm6,%xmm6 | |
664 pxor %xmm7,%xmm7 | |
665 | |
666 movdqa %xmm0,%xmm8 | |
667 pshufd $0,%xmm1,%xmm1 | |
668 | |
669 movq $16,%rax | |
670 .Lselect_loop_sse_w5: | |
671 | |
672 movdqa %xmm8,%xmm15 | |
673 paddd %xmm0,%xmm8 | |
674 pcmpeqd %xmm1,%xmm15 | |
675 | |
676 movdqa 0(%rsi),%xmm9 | |
677 movdqa 16(%rsi),%xmm10 | |
678 movdqa 32(%rsi),%xmm11 | |
679 movdqa 48(%rsi),%xmm12 | |
680 movdqa 64(%rsi),%xmm13 | |
681 movdqa 80(%rsi),%xmm14 | |
682 leaq 96(%rsi),%rsi | |
683 | |
684 pand %xmm15,%xmm9 | |
685 pand %xmm15,%xmm10 | |
686 por %xmm9,%xmm2 | |
687 pand %xmm15,%xmm11 | |
688 por %xmm10,%xmm3 | |
689 pand %xmm15,%xmm12 | |
690 por %xmm11,%xmm4 | |
691 pand %xmm15,%xmm13 | |
692 por %xmm12,%xmm5 | |
693 pand %xmm15,%xmm14 | |
694 por %xmm13,%xmm6 | |
695 por %xmm14,%xmm7 | |
696 | |
697 decq %rax | |
698 jnz .Lselect_loop_sse_w5 | |
699 | |
700 movdqu %xmm2,0(%rdi) | |
701 movdqu %xmm3,16(%rdi) | |
702 movdqu %xmm4,32(%rdi) | |
703 movdqu %xmm5,48(%rdi) | |
704 movdqu %xmm6,64(%rdi) | |
705 movdqu %xmm7,80(%rdi) | |
706 .byte 0xf3,0xc3 | |
707 .size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5 | |
708 | |
709 | |
710 | |
711 .globl ecp_nistz256_select_w7 | |
712 .hidden ecp_nistz256_select_w7 | |
713 .type ecp_nistz256_select_w7,@function | |
714 .align 32 | |
715 ecp_nistz256_select_w7: | |
716 movdqa .LOne(%rip),%xmm8 | |
717 movd %edx,%xmm1 | |
718 | |
719 pxor %xmm2,%xmm2 | |
720 pxor %xmm3,%xmm3 | |
721 pxor %xmm4,%xmm4 | |
722 pxor %xmm5,%xmm5 | |
723 | |
724 movdqa %xmm8,%xmm0 | |
725 pshufd $0,%xmm1,%xmm1 | |
726 movq $64,%rax | |
727 | |
728 .Lselect_loop_sse_w7: | |
729 movdqa %xmm8,%xmm15 | |
730 paddd %xmm0,%xmm8 | |
731 movdqa 0(%rsi),%xmm9 | |
732 movdqa 16(%rsi),%xmm10 | |
733 pcmpeqd %xmm1,%xmm15 | |
734 movdqa 32(%rsi),%xmm11 | |
735 movdqa 48(%rsi),%xmm12 | |
736 leaq 64(%rsi),%rsi | |
737 | |
738 pand %xmm15,%xmm9 | |
739 pand %xmm15,%xmm10 | |
740 por %xmm9,%xmm2 | |
741 pand %xmm15,%xmm11 | |
742 por %xmm10,%xmm3 | |
743 pand %xmm15,%xmm12 | |
744 por %xmm11,%xmm4 | |
745 prefetcht0 255(%rsi) | |
746 por %xmm12,%xmm5 | |
747 | |
748 decq %rax | |
749 jnz .Lselect_loop_sse_w7 | |
750 | |
751 movdqu %xmm2,0(%rdi) | |
752 movdqu %xmm3,16(%rdi) | |
753 movdqu %xmm4,32(%rdi) | |
754 movdqu %xmm5,48(%rdi) | |
755 .byte 0xf3,0xc3 | |
756 .size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7 | |
757 .globl ecp_nistz256_avx2_select_w7 | |
758 .hidden ecp_nistz256_avx2_select_w7 | |
759 .type ecp_nistz256_avx2_select_w7,@function | |
760 .align 32 | |
761 ecp_nistz256_avx2_select_w7: | |
762 .byte 0x0f,0x0b | |
763 .byte 0xf3,0xc3 | |
764 .size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7 | |
765 .type __ecp_nistz256_add_toq,@function | |
766 .align 32 | |
767 __ecp_nistz256_add_toq: | |
768 addq 0(%rbx),%r12 | |
769 adcq 8(%rbx),%r13 | |
770 movq %r12,%rax | |
771 adcq 16(%rbx),%r8 | |
772 adcq 24(%rbx),%r9 | |
773 movq %r13,%rbp | |
774 sbbq %r11,%r11 | |
775 | |
776 subq $-1,%r12 | |
777 movq %r8,%rcx | |
778 sbbq %r14,%r13 | |
779 sbbq $0,%r8 | |
780 movq %r9,%r10 | |
781 sbbq %r15,%r9 | |
782 testq %r11,%r11 | |
783 | |
784 cmovzq %rax,%r12 | |
785 cmovzq %rbp,%r13 | |
786 movq %r12,0(%rdi) | |
787 cmovzq %rcx,%r8 | |
788 movq %r13,8(%rdi) | |
789 cmovzq %r10,%r9 | |
790 movq %r8,16(%rdi) | |
791 movq %r9,24(%rdi) | |
792 | |
793 .byte 0xf3,0xc3 | |
794 .size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq | |
795 | |
796 .type __ecp_nistz256_sub_fromq,@function | |
797 .align 32 | |
798 __ecp_nistz256_sub_fromq: | |
799 subq 0(%rbx),%r12 | |
800 sbbq 8(%rbx),%r13 | |
801 movq %r12,%rax | |
802 sbbq 16(%rbx),%r8 | |
803 sbbq 24(%rbx),%r9 | |
804 movq %r13,%rbp | |
805 sbbq %r11,%r11 | |
806 | |
807 addq $-1,%r12 | |
808 movq %r8,%rcx | |
809 adcq %r14,%r13 | |
810 adcq $0,%r8 | |
811 movq %r9,%r10 | |
812 adcq %r15,%r9 | |
813 testq %r11,%r11 | |
814 | |
815 cmovzq %rax,%r12 | |
816 cmovzq %rbp,%r13 | |
817 movq %r12,0(%rdi) | |
818 cmovzq %rcx,%r8 | |
819 movq %r13,8(%rdi) | |
820 cmovzq %r10,%r9 | |
821 movq %r8,16(%rdi) | |
822 movq %r9,24(%rdi) | |
823 | |
824 .byte 0xf3,0xc3 | |
825 .size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq | |
826 | |
827 .type __ecp_nistz256_subq,@function | |
828 .align 32 | |
829 __ecp_nistz256_subq: | |
830 subq %r12,%rax | |
831 sbbq %r13,%rbp | |
832 movq %rax,%r12 | |
833 sbbq %r8,%rcx | |
834 sbbq %r9,%r10 | |
835 movq %rbp,%r13 | |
836 sbbq %r11,%r11 | |
837 | |
838 addq $-1,%rax | |
839 movq %rcx,%r8 | |
840 adcq %r14,%rbp | |
841 adcq $0,%rcx | |
842 movq %r10,%r9 | |
843 adcq %r15,%r10 | |
844 testq %r11,%r11 | |
845 | |
846 cmovnzq %rax,%r12 | |
847 cmovnzq %rbp,%r13 | |
848 cmovnzq %rcx,%r8 | |
849 cmovnzq %r10,%r9 | |
850 | |
851 .byte 0xf3,0xc3 | |
852 .size __ecp_nistz256_subq,.-__ecp_nistz256_subq | |
853 | |
854 .type __ecp_nistz256_mul_by_2q,@function | |
855 .align 32 | |
856 __ecp_nistz256_mul_by_2q: | |
857 addq %r12,%r12 | |
858 adcq %r13,%r13 | |
859 movq %r12,%rax | |
860 adcq %r8,%r8 | |
861 adcq %r9,%r9 | |
862 movq %r13,%rbp | |
863 sbbq %r11,%r11 | |
864 | |
865 subq $-1,%r12 | |
866 movq %r8,%rcx | |
867 sbbq %r14,%r13 | |
868 sbbq $0,%r8 | |
869 movq %r9,%r10 | |
870 sbbq %r15,%r9 | |
871 testq %r11,%r11 | |
872 | |
873 cmovzq %rax,%r12 | |
874 cmovzq %rbp,%r13 | |
875 movq %r12,0(%rdi) | |
876 cmovzq %rcx,%r8 | |
877 movq %r13,8(%rdi) | |
878 cmovzq %r10,%r9 | |
879 movq %r8,16(%rdi) | |
880 movq %r9,24(%rdi) | |
881 | |
882 .byte 0xf3,0xc3 | |
883 .size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q | |
884 .globl ecp_nistz256_point_double | |
885 .hidden ecp_nistz256_point_double | |
886 .type ecp_nistz256_point_double,@function | |
887 .align 32 | |
888 ecp_nistz256_point_double: | |
889 pushq %rbp | |
890 pushq %rbx | |
891 pushq %r12 | |
892 pushq %r13 | |
893 pushq %r14 | |
894 pushq %r15 | |
895 subq $160+8,%rsp | |
896 | |
897 .Lpoint_double_shortcutq: | |
898 movdqu 0(%rsi),%xmm0 | |
899 movq %rsi,%rbx | |
900 movdqu 16(%rsi),%xmm1 | |
901 movq 32+0(%rsi),%r12 | |
902 movq 32+8(%rsi),%r13 | |
903 movq 32+16(%rsi),%r8 | |
904 movq 32+24(%rsi),%r9 | |
905 movq .Lpoly+8(%rip),%r14 | |
906 movq .Lpoly+24(%rip),%r15 | |
907 movdqa %xmm0,96(%rsp) | |
908 movdqa %xmm1,96+16(%rsp) | |
909 leaq 32(%rdi),%r10 | |
910 leaq 64(%rdi),%r11 | |
911 .byte 102,72,15,110,199 | |
912 .byte 102,73,15,110,202 | |
913 .byte 102,73,15,110,211 | |
914 | |
915 leaq 0(%rsp),%rdi | |
916 call __ecp_nistz256_mul_by_2q | |
917 | |
918 movq 64+0(%rsi),%rax | |
919 movq 64+8(%rsi),%r14 | |
920 movq 64+16(%rsi),%r15 | |
921 movq 64+24(%rsi),%r8 | |
922 leaq 64-0(%rsi),%rsi | |
923 leaq 64(%rsp),%rdi | |
924 call __ecp_nistz256_sqr_montq | |
925 | |
926 movq 0+0(%rsp),%rax | |
927 movq 8+0(%rsp),%r14 | |
928 leaq 0+0(%rsp),%rsi | |
929 movq 16+0(%rsp),%r15 | |
930 movq 24+0(%rsp),%r8 | |
931 leaq 0(%rsp),%rdi | |
932 call __ecp_nistz256_sqr_montq | |
933 | |
934 movq 32(%rbx),%rax | |
935 movq 64+0(%rbx),%r9 | |
936 movq 64+8(%rbx),%r10 | |
937 movq 64+16(%rbx),%r11 | |
938 movq 64+24(%rbx),%r12 | |
939 leaq 64-0(%rbx),%rsi | |
940 leaq 32(%rbx),%rbx | |
941 .byte 102,72,15,126,215 | |
942 call __ecp_nistz256_mul_montq | |
943 call __ecp_nistz256_mul_by_2q | |
944 | |
945 movq 96+0(%rsp),%r12 | |
946 movq 96+8(%rsp),%r13 | |
947 leaq 64(%rsp),%rbx | |
948 movq 96+16(%rsp),%r8 | |
949 movq 96+24(%rsp),%r9 | |
950 leaq 32(%rsp),%rdi | |
951 call __ecp_nistz256_add_toq | |
952 | |
953 movq 96+0(%rsp),%r12 | |
954 movq 96+8(%rsp),%r13 | |
955 leaq 64(%rsp),%rbx | |
956 movq 96+16(%rsp),%r8 | |
957 movq 96+24(%rsp),%r9 | |
958 leaq 64(%rsp),%rdi | |
959 call __ecp_nistz256_sub_fromq | |
960 | |
961 movq 0+0(%rsp),%rax | |
962 movq 8+0(%rsp),%r14 | |
963 leaq 0+0(%rsp),%rsi | |
964 movq 16+0(%rsp),%r15 | |
965 movq 24+0(%rsp),%r8 | |
966 .byte 102,72,15,126,207 | |
967 call __ecp_nistz256_sqr_montq | |
968 xorq %r9,%r9 | |
969 movq %r12,%rax | |
970 addq $-1,%r12 | |
971 movq %r13,%r10 | |
972 adcq %rsi,%r13 | |
973 movq %r14,%rcx | |
974 adcq $0,%r14 | |
975 movq %r15,%r8 | |
976 adcq %rbp,%r15 | |
977 adcq $0,%r9 | |
978 xorq %rsi,%rsi | |
979 testq $1,%rax | |
980 | |
981 cmovzq %rax,%r12 | |
982 cmovzq %r10,%r13 | |
983 cmovzq %rcx,%r14 | |
984 cmovzq %r8,%r15 | |
985 cmovzq %rsi,%r9 | |
986 | |
987 movq %r13,%rax | |
988 shrq $1,%r12 | |
989 shlq $63,%rax | |
990 movq %r14,%r10 | |
991 shrq $1,%r13 | |
992 orq %rax,%r12 | |
993 shlq $63,%r10 | |
994 movq %r15,%rcx | |
995 shrq $1,%r14 | |
996 orq %r10,%r13 | |
997 shlq $63,%rcx | |
998 movq %r12,0(%rdi) | |
999 shrq $1,%r15 | |
1000 movq %r13,8(%rdi) | |
1001 shlq $63,%r9 | |
1002 orq %rcx,%r14 | |
1003 orq %r9,%r15 | |
1004 movq %r14,16(%rdi) | |
1005 movq %r15,24(%rdi) | |
1006 movq 64(%rsp),%rax | |
1007 leaq 64(%rsp),%rbx | |
1008 movq 0+32(%rsp),%r9 | |
1009 movq 8+32(%rsp),%r10 | |
1010 leaq 0+32(%rsp),%rsi | |
1011 movq 16+32(%rsp),%r11 | |
1012 movq 24+32(%rsp),%r12 | |
1013 leaq 32(%rsp),%rdi | |
1014 call __ecp_nistz256_mul_montq | |
1015 | |
1016 leaq 128(%rsp),%rdi | |
1017 call __ecp_nistz256_mul_by_2q | |
1018 | |
1019 leaq 32(%rsp),%rbx | |
1020 leaq 32(%rsp),%rdi | |
1021 call __ecp_nistz256_add_toq | |
1022 | |
1023 movq 96(%rsp),%rax | |
1024 leaq 96(%rsp),%rbx | |
1025 movq 0+0(%rsp),%r9 | |
1026 movq 8+0(%rsp),%r10 | |
1027 leaq 0+0(%rsp),%rsi | |
1028 movq 16+0(%rsp),%r11 | |
1029 movq 24+0(%rsp),%r12 | |
1030 leaq 0(%rsp),%rdi | |
1031 call __ecp_nistz256_mul_montq | |
1032 | |
1033 leaq 128(%rsp),%rdi | |
1034 call __ecp_nistz256_mul_by_2q | |
1035 | |
1036 movq 0+32(%rsp),%rax | |
1037 movq 8+32(%rsp),%r14 | |
1038 leaq 0+32(%rsp),%rsi | |
1039 movq 16+32(%rsp),%r15 | |
1040 movq 24+32(%rsp),%r8 | |
1041 .byte 102,72,15,126,199 | |
1042 call __ecp_nistz256_sqr_montq | |
1043 | |
1044 leaq 128(%rsp),%rbx | |
1045 movq %r14,%r8 | |
1046 movq %r15,%r9 | |
1047 movq %rsi,%r14 | |
1048 movq %rbp,%r15 | |
1049 call __ecp_nistz256_sub_fromq | |
1050 | |
1051 movq 0+0(%rsp),%rax | |
1052 movq 0+8(%rsp),%rbp | |
1053 movq 0+16(%rsp),%rcx | |
1054 movq 0+24(%rsp),%r10 | |
1055 leaq 0(%rsp),%rdi | |
1056 call __ecp_nistz256_subq | |
1057 | |
1058 movq 32(%rsp),%rax | |
1059 leaq 32(%rsp),%rbx | |
1060 movq %r12,%r14 | |
1061 xorl %ecx,%ecx | |
1062 movq %r12,0+0(%rsp) | |
1063 movq %r13,%r10 | |
1064 movq %r13,0+8(%rsp) | |
1065 cmovzq %r8,%r11 | |
1066 movq %r8,0+16(%rsp) | |
1067 leaq 0-0(%rsp),%rsi | |
1068 cmovzq %r9,%r12 | |
1069 movq %r9,0+24(%rsp) | |
1070 movq %r14,%r9 | |
1071 leaq 0(%rsp),%rdi | |
1072 call __ecp_nistz256_mul_montq | |
1073 | |
1074 .byte 102,72,15,126,203 | |
1075 .byte 102,72,15,126,207 | |
1076 call __ecp_nistz256_sub_fromq | |
1077 | |
1078 addq $160+8,%rsp | |
1079 popq %r15 | |
1080 popq %r14 | |
1081 popq %r13 | |
1082 popq %r12 | |
1083 popq %rbx | |
1084 popq %rbp | |
1085 .byte 0xf3,0xc3 | |
1086 .size ecp_nistz256_point_double,.-ecp_nistz256_point_double | |
1087 .globl ecp_nistz256_point_add | |
1088 .hidden ecp_nistz256_point_add | |
1089 .type ecp_nistz256_point_add,@function | |
1090 .align 32 | |
1091 ecp_nistz256_point_add: | |
1092 pushq %rbp | |
1093 pushq %rbx | |
1094 pushq %r12 | |
1095 pushq %r13 | |
1096 pushq %r14 | |
1097 pushq %r15 | |
1098 subq $576+8,%rsp | |
1099 | |
1100 movdqu 0(%rsi),%xmm0 | |
1101 movdqu 16(%rsi),%xmm1 | |
1102 movdqu 32(%rsi),%xmm2 | |
1103 movdqu 48(%rsi),%xmm3 | |
1104 movdqu 64(%rsi),%xmm4 | |
1105 movdqu 80(%rsi),%xmm5 | |
1106 movq %rsi,%rbx | |
1107 movq %rdx,%rsi | |
1108 movdqa %xmm0,384(%rsp) | |
1109 movdqa %xmm1,384+16(%rsp) | |
1110 por %xmm0,%xmm1 | |
1111 movdqa %xmm2,416(%rsp) | |
1112 movdqa %xmm3,416+16(%rsp) | |
1113 por %xmm2,%xmm3 | |
1114 movdqa %xmm4,448(%rsp) | |
1115 movdqa %xmm5,448+16(%rsp) | |
1116 por %xmm1,%xmm3 | |
1117 | |
1118 movdqu 0(%rsi),%xmm0 | |
1119 pshufd $0xb1,%xmm3,%xmm5 | |
1120 movdqu 16(%rsi),%xmm1 | |
1121 movdqu 32(%rsi),%xmm2 | |
1122 por %xmm3,%xmm5 | |
1123 movdqu 48(%rsi),%xmm3 | |
1124 movq 64+0(%rsi),%rax | |
1125 movq 64+8(%rsi),%r14 | |
1126 movq 64+16(%rsi),%r15 | |
1127 movq 64+24(%rsi),%r8 | |
1128 movdqa %xmm0,480(%rsp) | |
1129 pshufd $0x1e,%xmm5,%xmm4 | |
1130 movdqa %xmm1,480+16(%rsp) | |
1131 por %xmm0,%xmm1 | |
1132 .byte 102,72,15,110,199 | |
1133 movdqa %xmm2,512(%rsp) | |
1134 movdqa %xmm3,512+16(%rsp) | |
1135 por %xmm2,%xmm3 | |
1136 por %xmm4,%xmm5 | |
1137 pxor %xmm4,%xmm4 | |
1138 por %xmm1,%xmm3 | |
1139 | |
1140 leaq 64-0(%rsi),%rsi | |
1141 movq %rax,544+0(%rsp) | |
1142 movq %r14,544+8(%rsp) | |
1143 movq %r15,544+16(%rsp) | |
1144 movq %r8,544+24(%rsp) | |
1145 leaq 96(%rsp),%rdi | |
1146 call __ecp_nistz256_sqr_montq | |
1147 | |
1148 pcmpeqd %xmm4,%xmm5 | |
1149 pshufd $0xb1,%xmm3,%xmm4 | |
1150 por %xmm3,%xmm4 | |
1151 pshufd $0,%xmm5,%xmm5 | |
1152 pshufd $0x1e,%xmm4,%xmm3 | |
1153 por %xmm3,%xmm4 | |
1154 pxor %xmm3,%xmm3 | |
1155 pcmpeqd %xmm3,%xmm4 | |
1156 pshufd $0,%xmm4,%xmm4 | |
1157 movq 64+0(%rbx),%rax | |
1158 movq 64+8(%rbx),%r14 | |
1159 movq 64+16(%rbx),%r15 | |
1160 movq 64+24(%rbx),%r8 | |
1161 .byte 102,72,15,110,203 | |
1162 | |
1163 leaq 64-0(%rbx),%rsi | |
1164 leaq 32(%rsp),%rdi | |
1165 call __ecp_nistz256_sqr_montq | |
1166 | |
1167 movq 544(%rsp),%rax | |
1168 leaq 544(%rsp),%rbx | |
1169 movq 0+96(%rsp),%r9 | |
1170 movq 8+96(%rsp),%r10 | |
1171 leaq 0+96(%rsp),%rsi | |
1172 movq 16+96(%rsp),%r11 | |
1173 movq 24+96(%rsp),%r12 | |
1174 leaq 224(%rsp),%rdi | |
1175 call __ecp_nistz256_mul_montq | |
1176 | |
1177 movq 448(%rsp),%rax | |
1178 leaq 448(%rsp),%rbx | |
1179 movq 0+32(%rsp),%r9 | |
1180 movq 8+32(%rsp),%r10 | |
1181 leaq 0+32(%rsp),%rsi | |
1182 movq 16+32(%rsp),%r11 | |
1183 movq 24+32(%rsp),%r12 | |
1184 leaq 256(%rsp),%rdi | |
1185 call __ecp_nistz256_mul_montq | |
1186 | |
1187 movq 416(%rsp),%rax | |
1188 leaq 416(%rsp),%rbx | |
1189 movq 0+224(%rsp),%r9 | |
1190 movq 8+224(%rsp),%r10 | |
1191 leaq 0+224(%rsp),%rsi | |
1192 movq 16+224(%rsp),%r11 | |
1193 movq 24+224(%rsp),%r12 | |
1194 leaq 224(%rsp),%rdi | |
1195 call __ecp_nistz256_mul_montq | |
1196 | |
1197 movq 512(%rsp),%rax | |
1198 leaq 512(%rsp),%rbx | |
1199 movq 0+256(%rsp),%r9 | |
1200 movq 8+256(%rsp),%r10 | |
1201 leaq 0+256(%rsp),%rsi | |
1202 movq 16+256(%rsp),%r11 | |
1203 movq 24+256(%rsp),%r12 | |
1204 leaq 256(%rsp),%rdi | |
1205 call __ecp_nistz256_mul_montq | |
1206 | |
1207 leaq 224(%rsp),%rbx | |
1208 leaq 64(%rsp),%rdi | |
1209 call __ecp_nistz256_sub_fromq | |
1210 | |
1211 orq %r13,%r12 | |
1212 movdqa %xmm4,%xmm2 | |
1213 orq %r8,%r12 | |
1214 orq %r9,%r12 | |
1215 por %xmm5,%xmm2 | |
1216 .byte 102,73,15,110,220 | |
1217 | |
1218 movq 384(%rsp),%rax | |
1219 leaq 384(%rsp),%rbx | |
1220 movq 0+96(%rsp),%r9 | |
1221 movq 8+96(%rsp),%r10 | |
1222 leaq 0+96(%rsp),%rsi | |
1223 movq 16+96(%rsp),%r11 | |
1224 movq 24+96(%rsp),%r12 | |
1225 leaq 160(%rsp),%rdi | |
1226 call __ecp_nistz256_mul_montq | |
1227 | |
1228 movq 480(%rsp),%rax | |
1229 leaq 480(%rsp),%rbx | |
1230 movq 0+32(%rsp),%r9 | |
1231 movq 8+32(%rsp),%r10 | |
1232 leaq 0+32(%rsp),%rsi | |
1233 movq 16+32(%rsp),%r11 | |
1234 movq 24+32(%rsp),%r12 | |
1235 leaq 192(%rsp),%rdi | |
1236 call __ecp_nistz256_mul_montq | |
1237 | |
1238 leaq 160(%rsp),%rbx | |
1239 leaq 0(%rsp),%rdi | |
1240 call __ecp_nistz256_sub_fromq | |
1241 | |
1242 orq %r13,%r12 | |
1243 orq %r8,%r12 | |
1244 orq %r9,%r12 | |
1245 | |
1246 .byte 0x3e | |
1247 jnz .Ladd_proceedq | |
1248 .byte 102,73,15,126,208 | |
1249 .byte 102,73,15,126,217 | |
1250 testq %r8,%r8 | |
1251 jnz .Ladd_proceedq | |
1252 testq %r9,%r9 | |
1253 jz .Ladd_doubleq | |
1254 | |
1255 .byte 102,72,15,126,199 | |
1256 pxor %xmm0,%xmm0 | |
1257 movdqu %xmm0,0(%rdi) | |
1258 movdqu %xmm0,16(%rdi) | |
1259 movdqu %xmm0,32(%rdi) | |
1260 movdqu %xmm0,48(%rdi) | |
1261 movdqu %xmm0,64(%rdi) | |
1262 movdqu %xmm0,80(%rdi) | |
1263 jmp .Ladd_doneq | |
1264 | |
1265 .align 32 | |
1266 .Ladd_doubleq: | |
1267 .byte 102,72,15,126,206 | |
1268 .byte 102,72,15,126,199 | |
1269 addq $416,%rsp | |
1270 jmp .Lpoint_double_shortcutq | |
1271 | |
1272 .align 32 | |
1273 .Ladd_proceedq: | |
1274 movq 0+64(%rsp),%rax | |
1275 movq 8+64(%rsp),%r14 | |
1276 leaq 0+64(%rsp),%rsi | |
1277 movq 16+64(%rsp),%r15 | |
1278 movq 24+64(%rsp),%r8 | |
1279 leaq 96(%rsp),%rdi | |
1280 call __ecp_nistz256_sqr_montq | |
1281 | |
1282 movq 448(%rsp),%rax | |
1283 leaq 448(%rsp),%rbx | |
1284 movq 0+0(%rsp),%r9 | |
1285 movq 8+0(%rsp),%r10 | |
1286 leaq 0+0(%rsp),%rsi | |
1287 movq 16+0(%rsp),%r11 | |
1288 movq 24+0(%rsp),%r12 | |
1289 leaq 352(%rsp),%rdi | |
1290 call __ecp_nistz256_mul_montq | |
1291 | |
1292 movq 0+0(%rsp),%rax | |
1293 movq 8+0(%rsp),%r14 | |
1294 leaq 0+0(%rsp),%rsi | |
1295 movq 16+0(%rsp),%r15 | |
1296 movq 24+0(%rsp),%r8 | |
1297 leaq 32(%rsp),%rdi | |
1298 call __ecp_nistz256_sqr_montq | |
1299 | |
1300 movq 544(%rsp),%rax | |
1301 leaq 544(%rsp),%rbx | |
1302 movq 0+352(%rsp),%r9 | |
1303 movq 8+352(%rsp),%r10 | |
1304 leaq 0+352(%rsp),%rsi | |
1305 movq 16+352(%rsp),%r11 | |
1306 movq 24+352(%rsp),%r12 | |
1307 leaq 352(%rsp),%rdi | |
1308 call __ecp_nistz256_mul_montq | |
1309 | |
1310 movq 0(%rsp),%rax | |
1311 leaq 0(%rsp),%rbx | |
1312 movq 0+32(%rsp),%r9 | |
1313 movq 8+32(%rsp),%r10 | |
1314 leaq 0+32(%rsp),%rsi | |
1315 movq 16+32(%rsp),%r11 | |
1316 movq 24+32(%rsp),%r12 | |
1317 leaq 128(%rsp),%rdi | |
1318 call __ecp_nistz256_mul_montq | |
1319 | |
1320 movq 160(%rsp),%rax | |
1321 leaq 160(%rsp),%rbx | |
1322 movq 0+32(%rsp),%r9 | |
1323 movq 8+32(%rsp),%r10 | |
1324 leaq 0+32(%rsp),%rsi | |
1325 movq 16+32(%rsp),%r11 | |
1326 movq 24+32(%rsp),%r12 | |
1327 leaq 192(%rsp),%rdi | |
1328 call __ecp_nistz256_mul_montq | |
1329 | |
1330 | |
1331 | |
1332 | |
1333 addq %r12,%r12 | |
1334 leaq 96(%rsp),%rsi | |
1335 adcq %r13,%r13 | |
1336 movq %r12,%rax | |
1337 adcq %r8,%r8 | |
1338 adcq %r9,%r9 | |
1339 movq %r13,%rbp | |
1340 sbbq %r11,%r11 | |
1341 | |
1342 subq $-1,%r12 | |
1343 movq %r8,%rcx | |
1344 sbbq %r14,%r13 | |
1345 sbbq $0,%r8 | |
1346 movq %r9,%r10 | |
1347 sbbq %r15,%r9 | |
1348 testq %r11,%r11 | |
1349 | |
1350 cmovzq %rax,%r12 | |
1351 movq 0(%rsi),%rax | |
1352 cmovzq %rbp,%r13 | |
1353 movq 8(%rsi),%rbp | |
1354 cmovzq %rcx,%r8 | |
1355 movq 16(%rsi),%rcx | |
1356 cmovzq %r10,%r9 | |
1357 movq 24(%rsi),%r10 | |
1358 | |
1359 call __ecp_nistz256_subq | |
1360 | |
1361 leaq 128(%rsp),%rbx | |
1362 leaq 288(%rsp),%rdi | |
1363 call __ecp_nistz256_sub_fromq | |
1364 | |
1365 movq 192+0(%rsp),%rax | |
1366 movq 192+8(%rsp),%rbp | |
1367 movq 192+16(%rsp),%rcx | |
1368 movq 192+24(%rsp),%r10 | |
1369 leaq 320(%rsp),%rdi | |
1370 | |
1371 call __ecp_nistz256_subq | |
1372 | |
1373 movq %r12,0(%rdi) | |
1374 movq %r13,8(%rdi) | |
1375 movq %r8,16(%rdi) | |
1376 movq %r9,24(%rdi) | |
1377 movq 128(%rsp),%rax | |
1378 leaq 128(%rsp),%rbx | |
1379 movq 0+224(%rsp),%r9 | |
1380 movq 8+224(%rsp),%r10 | |
1381 leaq 0+224(%rsp),%rsi | |
1382 movq 16+224(%rsp),%r11 | |
1383 movq 24+224(%rsp),%r12 | |
1384 leaq 256(%rsp),%rdi | |
1385 call __ecp_nistz256_mul_montq | |
1386 | |
1387 movq 320(%rsp),%rax | |
1388 leaq 320(%rsp),%rbx | |
1389 movq 0+64(%rsp),%r9 | |
1390 movq 8+64(%rsp),%r10 | |
1391 leaq 0+64(%rsp),%rsi | |
1392 movq 16+64(%rsp),%r11 | |
1393 movq 24+64(%rsp),%r12 | |
1394 leaq 320(%rsp),%rdi | |
1395 call __ecp_nistz256_mul_montq | |
1396 | |
1397 leaq 256(%rsp),%rbx | |
1398 leaq 320(%rsp),%rdi | |
1399 call __ecp_nistz256_sub_fromq | |
1400 | |
1401 .byte 102,72,15,126,199 | |
1402 | |
1403 movdqa %xmm5,%xmm0 | |
1404 movdqa %xmm5,%xmm1 | |
1405 pandn 352(%rsp),%xmm0 | |
1406 movdqa %xmm5,%xmm2 | |
1407 pandn 352+16(%rsp),%xmm1 | |
1408 movdqa %xmm5,%xmm3 | |
1409 pand 544(%rsp),%xmm2 | |
1410 pand 544+16(%rsp),%xmm3 | |
1411 por %xmm0,%xmm2 | |
1412 por %xmm1,%xmm3 | |
1413 | |
1414 movdqa %xmm4,%xmm0 | |
1415 movdqa %xmm4,%xmm1 | |
1416 pandn %xmm2,%xmm0 | |
1417 movdqa %xmm4,%xmm2 | |
1418 pandn %xmm3,%xmm1 | |
1419 movdqa %xmm4,%xmm3 | |
1420 pand 448(%rsp),%xmm2 | |
1421 pand 448+16(%rsp),%xmm3 | |
1422 por %xmm0,%xmm2 | |
1423 por %xmm1,%xmm3 | |
1424 movdqu %xmm2,64(%rdi) | |
1425 movdqu %xmm3,80(%rdi) | |
1426 | |
1427 movdqa %xmm5,%xmm0 | |
1428 movdqa %xmm5,%xmm1 | |
1429 pandn 288(%rsp),%xmm0 | |
1430 movdqa %xmm5,%xmm2 | |
1431 pandn 288+16(%rsp),%xmm1 | |
1432 movdqa %xmm5,%xmm3 | |
1433 pand 480(%rsp),%xmm2 | |
1434 pand 480+16(%rsp),%xmm3 | |
1435 por %xmm0,%xmm2 | |
1436 por %xmm1,%xmm3 | |
1437 | |
1438 movdqa %xmm4,%xmm0 | |
1439 movdqa %xmm4,%xmm1 | |
1440 pandn %xmm2,%xmm0 | |
1441 movdqa %xmm4,%xmm2 | |
1442 pandn %xmm3,%xmm1 | |
1443 movdqa %xmm4,%xmm3 | |
1444 pand 384(%rsp),%xmm2 | |
1445 pand 384+16(%rsp),%xmm3 | |
1446 por %xmm0,%xmm2 | |
1447 por %xmm1,%xmm3 | |
1448 movdqu %xmm2,0(%rdi) | |
1449 movdqu %xmm3,16(%rdi) | |
1450 | |
1451 movdqa %xmm5,%xmm0 | |
1452 movdqa %xmm5,%xmm1 | |
1453 pandn 320(%rsp),%xmm0 | |
1454 movdqa %xmm5,%xmm2 | |
1455 pandn 320+16(%rsp),%xmm1 | |
1456 movdqa %xmm5,%xmm3 | |
1457 pand 512(%rsp),%xmm2 | |
1458 pand 512+16(%rsp),%xmm3 | |
1459 por %xmm0,%xmm2 | |
1460 por %xmm1,%xmm3 | |
1461 | |
1462 movdqa %xmm4,%xmm0 | |
1463 movdqa %xmm4,%xmm1 | |
1464 pandn %xmm2,%xmm0 | |
1465 movdqa %xmm4,%xmm2 | |
1466 pandn %xmm3,%xmm1 | |
1467 movdqa %xmm4,%xmm3 | |
1468 pand 416(%rsp),%xmm2 | |
1469 pand 416+16(%rsp),%xmm3 | |
1470 por %xmm0,%xmm2 | |
1471 por %xmm1,%xmm3 | |
1472 movdqu %xmm2,32(%rdi) | |
1473 movdqu %xmm3,48(%rdi) | |
1474 | |
1475 .Ladd_doneq: | |
1476 addq $576+8,%rsp | |
1477 popq %r15 | |
1478 popq %r14 | |
1479 popq %r13 | |
1480 popq %r12 | |
1481 popq %rbx | |
1482 popq %rbp | |
1483 .byte 0xf3,0xc3 | |
1484 .size ecp_nistz256_point_add,.-ecp_nistz256_point_add | |
1485 .globl ecp_nistz256_point_add_affine | |
1486 .hidden ecp_nistz256_point_add_affine | |
1487 .type ecp_nistz256_point_add_affine,@function | |
1488 .align 32 | |
1489 ecp_nistz256_point_add_affine: | |
1490 pushq %rbp | |
1491 pushq %rbx | |
1492 pushq %r12 | |
1493 pushq %r13 | |
1494 pushq %r14 | |
1495 pushq %r15 | |
1496 subq $480+8,%rsp | |
1497 | |
1498 movdqu 0(%rsi),%xmm0 | |
1499 movq %rdx,%rbx | |
1500 movdqu 16(%rsi),%xmm1 | |
1501 movdqu 32(%rsi),%xmm2 | |
1502 movdqu 48(%rsi),%xmm3 | |
1503 movdqu 64(%rsi),%xmm4 | |
1504 movdqu 80(%rsi),%xmm5 | |
1505 movq 64+0(%rsi),%rax | |
1506 movq 64+8(%rsi),%r14 | |
1507 movq 64+16(%rsi),%r15 | |
1508 movq 64+24(%rsi),%r8 | |
1509 movdqa %xmm0,320(%rsp) | |
1510 movdqa %xmm1,320+16(%rsp) | |
1511 por %xmm0,%xmm1 | |
1512 movdqa %xmm2,352(%rsp) | |
1513 movdqa %xmm3,352+16(%rsp) | |
1514 por %xmm2,%xmm3 | |
1515 movdqa %xmm4,384(%rsp) | |
1516 movdqa %xmm5,384+16(%rsp) | |
1517 por %xmm1,%xmm3 | |
1518 | |
1519 movdqu 0(%rbx),%xmm0 | |
1520 pshufd $0xb1,%xmm3,%xmm5 | |
1521 movdqu 16(%rbx),%xmm1 | |
1522 movdqu 32(%rbx),%xmm2 | |
1523 por %xmm3,%xmm5 | |
1524 movdqu 48(%rbx),%xmm3 | |
1525 movdqa %xmm0,416(%rsp) | |
1526 pshufd $0x1e,%xmm5,%xmm4 | |
1527 movdqa %xmm1,416+16(%rsp) | |
1528 por %xmm0,%xmm1 | |
1529 .byte 102,72,15,110,199 | |
1530 movdqa %xmm2,448(%rsp) | |
1531 movdqa %xmm3,448+16(%rsp) | |
1532 por %xmm2,%xmm3 | |
1533 por %xmm4,%xmm5 | |
1534 pxor %xmm4,%xmm4 | |
1535 por %xmm1,%xmm3 | |
1536 | |
1537 leaq 64-0(%rsi),%rsi | |
1538 leaq 32(%rsp),%rdi | |
1539 call __ecp_nistz256_sqr_montq | |
1540 | |
1541 pcmpeqd %xmm4,%xmm5 | |
1542 pshufd $0xb1,%xmm3,%xmm4 | |
1543 movq 0(%rbx),%rax | |
1544 | |
1545 movq %r12,%r9 | |
1546 por %xmm3,%xmm4 | |
1547 pshufd $0,%xmm5,%xmm5 | |
1548 pshufd $0x1e,%xmm4,%xmm3 | |
1549 movq %r13,%r10 | |
1550 por %xmm3,%xmm4 | |
1551 pxor %xmm3,%xmm3 | |
1552 movq %r14,%r11 | |
1553 pcmpeqd %xmm3,%xmm4 | |
1554 pshufd $0,%xmm4,%xmm4 | |
1555 | |
1556 leaq 32-0(%rsp),%rsi | |
1557 movq %r15,%r12 | |
1558 leaq 0(%rsp),%rdi | |
1559 call __ecp_nistz256_mul_montq | |
1560 | |
1561 leaq 320(%rsp),%rbx | |
1562 leaq 64(%rsp),%rdi | |
1563 call __ecp_nistz256_sub_fromq | |
1564 | |
1565 movq 384(%rsp),%rax | |
1566 leaq 384(%rsp),%rbx | |
1567 movq 0+32(%rsp),%r9 | |
1568 movq 8+32(%rsp),%r10 | |
1569 leaq 0+32(%rsp),%rsi | |
1570 movq 16+32(%rsp),%r11 | |
1571 movq 24+32(%rsp),%r12 | |
1572 leaq 32(%rsp),%rdi | |
1573 call __ecp_nistz256_mul_montq | |
1574 | |
1575 movq 384(%rsp),%rax | |
1576 leaq 384(%rsp),%rbx | |
1577 movq 0+64(%rsp),%r9 | |
1578 movq 8+64(%rsp),%r10 | |
1579 leaq 0+64(%rsp),%rsi | |
1580 movq 16+64(%rsp),%r11 | |
1581 movq 24+64(%rsp),%r12 | |
1582 leaq 288(%rsp),%rdi | |
1583 call __ecp_nistz256_mul_montq | |
1584 | |
1585 movq 448(%rsp),%rax | |
1586 leaq 448(%rsp),%rbx | |
1587 movq 0+32(%rsp),%r9 | |
1588 movq 8+32(%rsp),%r10 | |
1589 leaq 0+32(%rsp),%rsi | |
1590 movq 16+32(%rsp),%r11 | |
1591 movq 24+32(%rsp),%r12 | |
1592 leaq 32(%rsp),%rdi | |
1593 call __ecp_nistz256_mul_montq | |
1594 | |
1595 leaq 352(%rsp),%rbx | |
1596 leaq 96(%rsp),%rdi | |
1597 call __ecp_nistz256_sub_fromq | |
1598 | |
1599 movq 0+64(%rsp),%rax | |
1600 movq 8+64(%rsp),%r14 | |
1601 leaq 0+64(%rsp),%rsi | |
1602 movq 16+64(%rsp),%r15 | |
1603 movq 24+64(%rsp),%r8 | |
1604 leaq 128(%rsp),%rdi | |
1605 call __ecp_nistz256_sqr_montq | |
1606 | |
1607 movq 0+96(%rsp),%rax | |
1608 movq 8+96(%rsp),%r14 | |
1609 leaq 0+96(%rsp),%rsi | |
1610 movq 16+96(%rsp),%r15 | |
1611 movq 24+96(%rsp),%r8 | |
1612 leaq 192(%rsp),%rdi | |
1613 call __ecp_nistz256_sqr_montq | |
1614 | |
1615 movq 128(%rsp),%rax | |
1616 leaq 128(%rsp),%rbx | |
1617 movq 0+64(%rsp),%r9 | |
1618 movq 8+64(%rsp),%r10 | |
1619 leaq 0+64(%rsp),%rsi | |
1620 movq 16+64(%rsp),%r11 | |
1621 movq 24+64(%rsp),%r12 | |
1622 leaq 160(%rsp),%rdi | |
1623 call __ecp_nistz256_mul_montq | |
1624 | |
1625 movq 320(%rsp),%rax | |
1626 leaq 320(%rsp),%rbx | |
1627 movq 0+128(%rsp),%r9 | |
1628 movq 8+128(%rsp),%r10 | |
1629 leaq 0+128(%rsp),%rsi | |
1630 movq 16+128(%rsp),%r11 | |
1631 movq 24+128(%rsp),%r12 | |
1632 leaq 0(%rsp),%rdi | |
1633 call __ecp_nistz256_mul_montq | |
1634 | |
1635 | |
1636 | |
1637 | |
1638 addq %r12,%r12 | |
1639 leaq 192(%rsp),%rsi | |
1640 adcq %r13,%r13 | |
1641 movq %r12,%rax | |
1642 adcq %r8,%r8 | |
1643 adcq %r9,%r9 | |
1644 movq %r13,%rbp | |
1645 sbbq %r11,%r11 | |
1646 | |
1647 subq $-1,%r12 | |
1648 movq %r8,%rcx | |
1649 sbbq %r14,%r13 | |
1650 sbbq $0,%r8 | |
1651 movq %r9,%r10 | |
1652 sbbq %r15,%r9 | |
1653 testq %r11,%r11 | |
1654 | |
1655 cmovzq %rax,%r12 | |
1656 movq 0(%rsi),%rax | |
1657 cmovzq %rbp,%r13 | |
1658 movq 8(%rsi),%rbp | |
1659 cmovzq %rcx,%r8 | |
1660 movq 16(%rsi),%rcx | |
1661 cmovzq %r10,%r9 | |
1662 movq 24(%rsi),%r10 | |
1663 | |
1664 call __ecp_nistz256_subq | |
1665 | |
1666 leaq 160(%rsp),%rbx | |
1667 leaq 224(%rsp),%rdi | |
1668 call __ecp_nistz256_sub_fromq | |
1669 | |
1670 movq 0+0(%rsp),%rax | |
1671 movq 0+8(%rsp),%rbp | |
1672 movq 0+16(%rsp),%rcx | |
1673 movq 0+24(%rsp),%r10 | |
1674 leaq 64(%rsp),%rdi | |
1675 | |
1676 call __ecp_nistz256_subq | |
1677 | |
1678 movq %r12,0(%rdi) | |
1679 movq %r13,8(%rdi) | |
1680 movq %r8,16(%rdi) | |
1681 movq %r9,24(%rdi) | |
1682 movq 352(%rsp),%rax | |
1683 leaq 352(%rsp),%rbx | |
1684 movq 0+160(%rsp),%r9 | |
1685 movq 8+160(%rsp),%r10 | |
1686 leaq 0+160(%rsp),%rsi | |
1687 movq 16+160(%rsp),%r11 | |
1688 movq 24+160(%rsp),%r12 | |
1689 leaq 32(%rsp),%rdi | |
1690 call __ecp_nistz256_mul_montq | |
1691 | |
1692 movq 96(%rsp),%rax | |
1693 leaq 96(%rsp),%rbx | |
1694 movq 0+64(%rsp),%r9 | |
1695 movq 8+64(%rsp),%r10 | |
1696 leaq 0+64(%rsp),%rsi | |
1697 movq 16+64(%rsp),%r11 | |
1698 movq 24+64(%rsp),%r12 | |
1699 leaq 64(%rsp),%rdi | |
1700 call __ecp_nistz256_mul_montq | |
1701 | |
1702 leaq 32(%rsp),%rbx | |
1703 leaq 256(%rsp),%rdi | |
1704 call __ecp_nistz256_sub_fromq | |
1705 | |
1706 .byte 102,72,15,126,199 | |
1707 | |
1708 movdqa %xmm5,%xmm0 | |
1709 movdqa %xmm5,%xmm1 | |
1710 pandn 288(%rsp),%xmm0 | |
1711 movdqa %xmm5,%xmm2 | |
1712 pandn 288+16(%rsp),%xmm1 | |
1713 movdqa %xmm5,%xmm3 | |
1714 pand .LONE_mont(%rip),%xmm2 | |
1715 pand .LONE_mont+16(%rip),%xmm3 | |
1716 por %xmm0,%xmm2 | |
1717 por %xmm1,%xmm3 | |
1718 | |
1719 movdqa %xmm4,%xmm0 | |
1720 movdqa %xmm4,%xmm1 | |
1721 pandn %xmm2,%xmm0 | |
1722 movdqa %xmm4,%xmm2 | |
1723 pandn %xmm3,%xmm1 | |
1724 movdqa %xmm4,%xmm3 | |
1725 pand 384(%rsp),%xmm2 | |
1726 pand 384+16(%rsp),%xmm3 | |
1727 por %xmm0,%xmm2 | |
1728 por %xmm1,%xmm3 | |
1729 movdqu %xmm2,64(%rdi) | |
1730 movdqu %xmm3,80(%rdi) | |
1731 | |
1732 movdqa %xmm5,%xmm0 | |
1733 movdqa %xmm5,%xmm1 | |
1734 pandn 224(%rsp),%xmm0 | |
1735 movdqa %xmm5,%xmm2 | |
1736 pandn 224+16(%rsp),%xmm1 | |
1737 movdqa %xmm5,%xmm3 | |
1738 pand 416(%rsp),%xmm2 | |
1739 pand 416+16(%rsp),%xmm3 | |
1740 por %xmm0,%xmm2 | |
1741 por %xmm1,%xmm3 | |
1742 | |
1743 movdqa %xmm4,%xmm0 | |
1744 movdqa %xmm4,%xmm1 | |
1745 pandn %xmm2,%xmm0 | |
1746 movdqa %xmm4,%xmm2 | |
1747 pandn %xmm3,%xmm1 | |
1748 movdqa %xmm4,%xmm3 | |
1749 pand 320(%rsp),%xmm2 | |
1750 pand 320+16(%rsp),%xmm3 | |
1751 por %xmm0,%xmm2 | |
1752 por %xmm1,%xmm3 | |
1753 movdqu %xmm2,0(%rdi) | |
1754 movdqu %xmm3,16(%rdi) | |
1755 | |
1756 movdqa %xmm5,%xmm0 | |
1757 movdqa %xmm5,%xmm1 | |
1758 pandn 256(%rsp),%xmm0 | |
1759 movdqa %xmm5,%xmm2 | |
1760 pandn 256+16(%rsp),%xmm1 | |
1761 movdqa %xmm5,%xmm3 | |
1762 pand 448(%rsp),%xmm2 | |
1763 pand 448+16(%rsp),%xmm3 | |
1764 por %xmm0,%xmm2 | |
1765 por %xmm1,%xmm3 | |
1766 | |
1767 movdqa %xmm4,%xmm0 | |
1768 movdqa %xmm4,%xmm1 | |
1769 pandn %xmm2,%xmm0 | |
1770 movdqa %xmm4,%xmm2 | |
1771 pandn %xmm3,%xmm1 | |
1772 movdqa %xmm4,%xmm3 | |
1773 pand 352(%rsp),%xmm2 | |
1774 pand 352+16(%rsp),%xmm3 | |
1775 por %xmm0,%xmm2 | |
1776 por %xmm1,%xmm3 | |
1777 movdqu %xmm2,32(%rdi) | |
1778 movdqu %xmm3,48(%rdi) | |
1779 | |
1780 addq $480+8,%rsp | |
1781 popq %r15 | |
1782 popq %r14 | |
1783 popq %r13 | |
1784 popq %r12 | |
1785 popq %rbx | |
1786 popq %rbp | |
1787 .byte 0xf3,0xc3 | |
1788 .size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine | |
1789 #endif | |
OLD | NEW |