OLD | NEW |
| (Empty) |
1 #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) | |
2 .text | |
3 .extern OPENSSL_ia32cap_P | |
4 .hidden OPENSSL_ia32cap_P | |
5 | |
6 | |
7 .align 64 | |
8 .Lpoly: | |
9 .quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00
000001 | |
10 | |
11 .LOne: | |
12 .long 1,1,1,1,1,1,1,1 | |
13 .LTwo: | |
14 .long 2,2,2,2,2,2,2,2 | |
15 .LThree: | |
16 .long 3,3,3,3,3,3,3,3 | |
17 .LONE_mont: | |
18 .quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000ff
fffffe | |
19 | |
20 | |
21 | |
22 .globl ecp_nistz256_neg | |
23 .hidden ecp_nistz256_neg | |
24 .type ecp_nistz256_neg,@function | |
25 .align 32 | |
26 ecp_nistz256_neg: | |
27 pushq %r12 | |
28 pushq %r13 | |
29 | |
30 xorq %r8,%r8 | |
31 xorq %r9,%r9 | |
32 xorq %r10,%r10 | |
33 xorq %r11,%r11 | |
34 xorq %r13,%r13 | |
35 | |
36 subq 0(%rsi),%r8 | |
37 sbbq 8(%rsi),%r9 | |
38 sbbq 16(%rsi),%r10 | |
39 movq %r8,%rax | |
40 sbbq 24(%rsi),%r11 | |
41 leaq .Lpoly(%rip),%rsi | |
42 movq %r9,%rdx | |
43 sbbq $0,%r13 | |
44 | |
45 addq 0(%rsi),%r8 | |
46 movq %r10,%rcx | |
47 adcq 8(%rsi),%r9 | |
48 adcq 16(%rsi),%r10 | |
49 movq %r11,%r12 | |
50 adcq 24(%rsi),%r11 | |
51 testq %r13,%r13 | |
52 | |
53 cmovzq %rax,%r8 | |
54 cmovzq %rdx,%r9 | |
55 movq %r8,0(%rdi) | |
56 cmovzq %rcx,%r10 | |
57 movq %r9,8(%rdi) | |
58 cmovzq %r12,%r11 | |
59 movq %r10,16(%rdi) | |
60 movq %r11,24(%rdi) | |
61 | |
62 popq %r13 | |
63 popq %r12 | |
64 .byte 0xf3,0xc3 | |
65 .size ecp_nistz256_neg,.-ecp_nistz256_neg | |
66 | |
67 | |
68 | |
69 | |
70 | |
71 | |
72 .globl ecp_nistz256_mul_mont | |
73 .hidden ecp_nistz256_mul_mont | |
74 .type ecp_nistz256_mul_mont,@function | |
75 .align 32 | |
76 ecp_nistz256_mul_mont: | |
77 .Lmul_mont: | |
78 pushq %rbp | |
79 pushq %rbx | |
80 pushq %r12 | |
81 pushq %r13 | |
82 pushq %r14 | |
83 pushq %r15 | |
84 movq %rdx,%rbx | |
85 movq 0(%rdx),%rax | |
86 movq 0(%rsi),%r9 | |
87 movq 8(%rsi),%r10 | |
88 movq 16(%rsi),%r11 | |
89 movq 24(%rsi),%r12 | |
90 | |
91 call __ecp_nistz256_mul_montq | |
92 .Lmul_mont_done: | |
93 popq %r15 | |
94 popq %r14 | |
95 popq %r13 | |
96 popq %r12 | |
97 popq %rbx | |
98 popq %rbp | |
99 .byte 0xf3,0xc3 | |
100 .size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont | |
101 | |
102 .type __ecp_nistz256_mul_montq,@function | |
103 .align 32 | |
104 __ecp_nistz256_mul_montq: | |
105 | |
106 | |
107 movq %rax,%rbp | |
108 mulq %r9 | |
109 movq .Lpoly+8(%rip),%r14 | |
110 movq %rax,%r8 | |
111 movq %rbp,%rax | |
112 movq %rdx,%r9 | |
113 | |
114 mulq %r10 | |
115 movq .Lpoly+24(%rip),%r15 | |
116 addq %rax,%r9 | |
117 movq %rbp,%rax | |
118 adcq $0,%rdx | |
119 movq %rdx,%r10 | |
120 | |
121 mulq %r11 | |
122 addq %rax,%r10 | |
123 movq %rbp,%rax | |
124 adcq $0,%rdx | |
125 movq %rdx,%r11 | |
126 | |
127 mulq %r12 | |
128 addq %rax,%r11 | |
129 movq %r8,%rax | |
130 adcq $0,%rdx | |
131 xorq %r13,%r13 | |
132 movq %rdx,%r12 | |
133 | |
134 | |
135 | |
136 | |
137 | |
138 | |
139 | |
140 | |
141 | |
142 | |
143 movq %r8,%rbp | |
144 shlq $32,%r8 | |
145 mulq %r15 | |
146 shrq $32,%rbp | |
147 addq %r8,%r9 | |
148 adcq %rbp,%r10 | |
149 adcq %rax,%r11 | |
150 movq 8(%rbx),%rax | |
151 adcq %rdx,%r12 | |
152 adcq $0,%r13 | |
153 xorq %r8,%r8 | |
154 | |
155 | |
156 | |
157 movq %rax,%rbp | |
158 mulq 0(%rsi) | |
159 addq %rax,%r9 | |
160 movq %rbp,%rax | |
161 adcq $0,%rdx | |
162 movq %rdx,%rcx | |
163 | |
164 mulq 8(%rsi) | |
165 addq %rcx,%r10 | |
166 adcq $0,%rdx | |
167 addq %rax,%r10 | |
168 movq %rbp,%rax | |
169 adcq $0,%rdx | |
170 movq %rdx,%rcx | |
171 | |
172 mulq 16(%rsi) | |
173 addq %rcx,%r11 | |
174 adcq $0,%rdx | |
175 addq %rax,%r11 | |
176 movq %rbp,%rax | |
177 adcq $0,%rdx | |
178 movq %rdx,%rcx | |
179 | |
180 mulq 24(%rsi) | |
181 addq %rcx,%r12 | |
182 adcq $0,%rdx | |
183 addq %rax,%r12 | |
184 movq %r9,%rax | |
185 adcq %rdx,%r13 | |
186 adcq $0,%r8 | |
187 | |
188 | |
189 | |
190 movq %r9,%rbp | |
191 shlq $32,%r9 | |
192 mulq %r15 | |
193 shrq $32,%rbp | |
194 addq %r9,%r10 | |
195 adcq %rbp,%r11 | |
196 adcq %rax,%r12 | |
197 movq 16(%rbx),%rax | |
198 adcq %rdx,%r13 | |
199 adcq $0,%r8 | |
200 xorq %r9,%r9 | |
201 | |
202 | |
203 | |
204 movq %rax,%rbp | |
205 mulq 0(%rsi) | |
206 addq %rax,%r10 | |
207 movq %rbp,%rax | |
208 adcq $0,%rdx | |
209 movq %rdx,%rcx | |
210 | |
211 mulq 8(%rsi) | |
212 addq %rcx,%r11 | |
213 adcq $0,%rdx | |
214 addq %rax,%r11 | |
215 movq %rbp,%rax | |
216 adcq $0,%rdx | |
217 movq %rdx,%rcx | |
218 | |
219 mulq 16(%rsi) | |
220 addq %rcx,%r12 | |
221 adcq $0,%rdx | |
222 addq %rax,%r12 | |
223 movq %rbp,%rax | |
224 adcq $0,%rdx | |
225 movq %rdx,%rcx | |
226 | |
227 mulq 24(%rsi) | |
228 addq %rcx,%r13 | |
229 adcq $0,%rdx | |
230 addq %rax,%r13 | |
231 movq %r10,%rax | |
232 adcq %rdx,%r8 | |
233 adcq $0,%r9 | |
234 | |
235 | |
236 | |
237 movq %r10,%rbp | |
238 shlq $32,%r10 | |
239 mulq %r15 | |
240 shrq $32,%rbp | |
241 addq %r10,%r11 | |
242 adcq %rbp,%r12 | |
243 adcq %rax,%r13 | |
244 movq 24(%rbx),%rax | |
245 adcq %rdx,%r8 | |
246 adcq $0,%r9 | |
247 xorq %r10,%r10 | |
248 | |
249 | |
250 | |
251 movq %rax,%rbp | |
252 mulq 0(%rsi) | |
253 addq %rax,%r11 | |
254 movq %rbp,%rax | |
255 adcq $0,%rdx | |
256 movq %rdx,%rcx | |
257 | |
258 mulq 8(%rsi) | |
259 addq %rcx,%r12 | |
260 adcq $0,%rdx | |
261 addq %rax,%r12 | |
262 movq %rbp,%rax | |
263 adcq $0,%rdx | |
264 movq %rdx,%rcx | |
265 | |
266 mulq 16(%rsi) | |
267 addq %rcx,%r13 | |
268 adcq $0,%rdx | |
269 addq %rax,%r13 | |
270 movq %rbp,%rax | |
271 adcq $0,%rdx | |
272 movq %rdx,%rcx | |
273 | |
274 mulq 24(%rsi) | |
275 addq %rcx,%r8 | |
276 adcq $0,%rdx | |
277 addq %rax,%r8 | |
278 movq %r11,%rax | |
279 adcq %rdx,%r9 | |
280 adcq $0,%r10 | |
281 | |
282 | |
283 | |
284 movq %r11,%rbp | |
285 shlq $32,%r11 | |
286 mulq %r15 | |
287 shrq $32,%rbp | |
288 addq %r11,%r12 | |
289 adcq %rbp,%r13 | |
290 movq %r12,%rcx | |
291 adcq %rax,%r8 | |
292 adcq %rdx,%r9 | |
293 movq %r13,%rbp | |
294 adcq $0,%r10 | |
295 | |
296 | |
297 | |
298 subq $-1,%r12 | |
299 movq %r8,%rbx | |
300 sbbq %r14,%r13 | |
301 sbbq $0,%r8 | |
302 movq %r9,%rdx | |
303 sbbq %r15,%r9 | |
304 sbbq $0,%r10 | |
305 | |
306 cmovcq %rcx,%r12 | |
307 cmovcq %rbp,%r13 | |
308 movq %r12,0(%rdi) | |
309 cmovcq %rbx,%r8 | |
310 movq %r13,8(%rdi) | |
311 cmovcq %rdx,%r9 | |
312 movq %r8,16(%rdi) | |
313 movq %r9,24(%rdi) | |
314 | |
315 .byte 0xf3,0xc3 | |
316 .size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq | |
317 | |
318 | |
319 | |
320 | |
321 | |
322 | |
323 | |
324 | |
325 .globl ecp_nistz256_sqr_mont | |
326 .hidden ecp_nistz256_sqr_mont | |
327 .type ecp_nistz256_sqr_mont,@function | |
328 .align 32 | |
329 ecp_nistz256_sqr_mont: | |
330 pushq %rbp | |
331 pushq %rbx | |
332 pushq %r12 | |
333 pushq %r13 | |
334 pushq %r14 | |
335 pushq %r15 | |
336 movq 0(%rsi),%rax | |
337 movq 8(%rsi),%r14 | |
338 movq 16(%rsi),%r15 | |
339 movq 24(%rsi),%r8 | |
340 | |
341 call __ecp_nistz256_sqr_montq | |
342 .Lsqr_mont_done: | |
343 popq %r15 | |
344 popq %r14 | |
345 popq %r13 | |
346 popq %r12 | |
347 popq %rbx | |
348 popq %rbp | |
349 .byte 0xf3,0xc3 | |
350 .size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont | |
351 | |
352 .type __ecp_nistz256_sqr_montq,@function | |
353 .align 32 | |
354 __ecp_nistz256_sqr_montq: | |
355 movq %rax,%r13 | |
356 mulq %r14 | |
357 movq %rax,%r9 | |
358 movq %r15,%rax | |
359 movq %rdx,%r10 | |
360 | |
361 mulq %r13 | |
362 addq %rax,%r10 | |
363 movq %r8,%rax | |
364 adcq $0,%rdx | |
365 movq %rdx,%r11 | |
366 | |
367 mulq %r13 | |
368 addq %rax,%r11 | |
369 movq %r15,%rax | |
370 adcq $0,%rdx | |
371 movq %rdx,%r12 | |
372 | |
373 | |
374 mulq %r14 | |
375 addq %rax,%r11 | |
376 movq %r8,%rax | |
377 adcq $0,%rdx | |
378 movq %rdx,%rbp | |
379 | |
380 mulq %r14 | |
381 addq %rax,%r12 | |
382 movq %r8,%rax | |
383 adcq $0,%rdx | |
384 addq %rbp,%r12 | |
385 movq %rdx,%r13 | |
386 adcq $0,%r13 | |
387 | |
388 | |
389 mulq %r15 | |
390 xorq %r15,%r15 | |
391 addq %rax,%r13 | |
392 movq 0(%rsi),%rax | |
393 movq %rdx,%r14 | |
394 adcq $0,%r14 | |
395 | |
396 addq %r9,%r9 | |
397 adcq %r10,%r10 | |
398 adcq %r11,%r11 | |
399 adcq %r12,%r12 | |
400 adcq %r13,%r13 | |
401 adcq %r14,%r14 | |
402 adcq $0,%r15 | |
403 | |
404 mulq %rax | |
405 movq %rax,%r8 | |
406 movq 8(%rsi),%rax | |
407 movq %rdx,%rcx | |
408 | |
409 mulq %rax | |
410 addq %rcx,%r9 | |
411 adcq %rax,%r10 | |
412 movq 16(%rsi),%rax | |
413 adcq $0,%rdx | |
414 movq %rdx,%rcx | |
415 | |
416 mulq %rax | |
417 addq %rcx,%r11 | |
418 adcq %rax,%r12 | |
419 movq 24(%rsi),%rax | |
420 adcq $0,%rdx | |
421 movq %rdx,%rcx | |
422 | |
423 mulq %rax | |
424 addq %rcx,%r13 | |
425 adcq %rax,%r14 | |
426 movq %r8,%rax | |
427 adcq %rdx,%r15 | |
428 | |
429 movq .Lpoly+8(%rip),%rsi | |
430 movq .Lpoly+24(%rip),%rbp | |
431 | |
432 | |
433 | |
434 | |
435 movq %r8,%rcx | |
436 shlq $32,%r8 | |
437 mulq %rbp | |
438 shrq $32,%rcx | |
439 addq %r8,%r9 | |
440 adcq %rcx,%r10 | |
441 adcq %rax,%r11 | |
442 movq %r9,%rax | |
443 adcq $0,%rdx | |
444 | |
445 | |
446 | |
447 movq %r9,%rcx | |
448 shlq $32,%r9 | |
449 movq %rdx,%r8 | |
450 mulq %rbp | |
451 shrq $32,%rcx | |
452 addq %r9,%r10 | |
453 adcq %rcx,%r11 | |
454 adcq %rax,%r8 | |
455 movq %r10,%rax | |
456 adcq $0,%rdx | |
457 | |
458 | |
459 | |
460 movq %r10,%rcx | |
461 shlq $32,%r10 | |
462 movq %rdx,%r9 | |
463 mulq %rbp | |
464 shrq $32,%rcx | |
465 addq %r10,%r11 | |
466 adcq %rcx,%r8 | |
467 adcq %rax,%r9 | |
468 movq %r11,%rax | |
469 adcq $0,%rdx | |
470 | |
471 | |
472 | |
473 movq %r11,%rcx | |
474 shlq $32,%r11 | |
475 movq %rdx,%r10 | |
476 mulq %rbp | |
477 shrq $32,%rcx | |
478 addq %r11,%r8 | |
479 adcq %rcx,%r9 | |
480 adcq %rax,%r10 | |
481 adcq $0,%rdx | |
482 xorq %r11,%r11 | |
483 | |
484 | |
485 | |
486 addq %r8,%r12 | |
487 adcq %r9,%r13 | |
488 movq %r12,%r8 | |
489 adcq %r10,%r14 | |
490 adcq %rdx,%r15 | |
491 movq %r13,%r9 | |
492 adcq $0,%r11 | |
493 | |
494 subq $-1,%r12 | |
495 movq %r14,%r10 | |
496 sbbq %rsi,%r13 | |
497 sbbq $0,%r14 | |
498 movq %r15,%rcx | |
499 sbbq %rbp,%r15 | |
500 sbbq $0,%r11 | |
501 | |
502 cmovcq %r8,%r12 | |
503 cmovcq %r9,%r13 | |
504 movq %r12,0(%rdi) | |
505 cmovcq %r10,%r14 | |
506 movq %r13,8(%rdi) | |
507 cmovcq %rcx,%r15 | |
508 movq %r14,16(%rdi) | |
509 movq %r15,24(%rdi) | |
510 | |
511 .byte 0xf3,0xc3 | |
512 .size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq | |
513 | |
514 | |
515 .globl ecp_nistz256_select_w5 | |
516 .hidden ecp_nistz256_select_w5 | |
517 .type ecp_nistz256_select_w5,@function | |
518 .align 32 | |
519 ecp_nistz256_select_w5: | |
520 movdqa .LOne(%rip),%xmm0 | |
521 movd %edx,%xmm1 | |
522 | |
523 pxor %xmm2,%xmm2 | |
524 pxor %xmm3,%xmm3 | |
525 pxor %xmm4,%xmm4 | |
526 pxor %xmm5,%xmm5 | |
527 pxor %xmm6,%xmm6 | |
528 pxor %xmm7,%xmm7 | |
529 | |
530 movdqa %xmm0,%xmm8 | |
531 pshufd $0,%xmm1,%xmm1 | |
532 | |
533 movq $16,%rax | |
534 .Lselect_loop_sse_w5: | |
535 | |
536 movdqa %xmm8,%xmm15 | |
537 paddd %xmm0,%xmm8 | |
538 pcmpeqd %xmm1,%xmm15 | |
539 | |
540 movdqa 0(%rsi),%xmm9 | |
541 movdqa 16(%rsi),%xmm10 | |
542 movdqa 32(%rsi),%xmm11 | |
543 movdqa 48(%rsi),%xmm12 | |
544 movdqa 64(%rsi),%xmm13 | |
545 movdqa 80(%rsi),%xmm14 | |
546 leaq 96(%rsi),%rsi | |
547 | |
548 pand %xmm15,%xmm9 | |
549 pand %xmm15,%xmm10 | |
550 por %xmm9,%xmm2 | |
551 pand %xmm15,%xmm11 | |
552 por %xmm10,%xmm3 | |
553 pand %xmm15,%xmm12 | |
554 por %xmm11,%xmm4 | |
555 pand %xmm15,%xmm13 | |
556 por %xmm12,%xmm5 | |
557 pand %xmm15,%xmm14 | |
558 por %xmm13,%xmm6 | |
559 por %xmm14,%xmm7 | |
560 | |
561 decq %rax | |
562 jnz .Lselect_loop_sse_w5 | |
563 | |
564 movdqu %xmm2,0(%rdi) | |
565 movdqu %xmm3,16(%rdi) | |
566 movdqu %xmm4,32(%rdi) | |
567 movdqu %xmm5,48(%rdi) | |
568 movdqu %xmm6,64(%rdi) | |
569 movdqu %xmm7,80(%rdi) | |
570 .byte 0xf3,0xc3 | |
571 .size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5 | |
572 | |
573 | |
574 | |
575 .globl ecp_nistz256_select_w7 | |
576 .hidden ecp_nistz256_select_w7 | |
577 .type ecp_nistz256_select_w7,@function | |
578 .align 32 | |
579 ecp_nistz256_select_w7: | |
580 movdqa .LOne(%rip),%xmm8 | |
581 movd %edx,%xmm1 | |
582 | |
583 pxor %xmm2,%xmm2 | |
584 pxor %xmm3,%xmm3 | |
585 pxor %xmm4,%xmm4 | |
586 pxor %xmm5,%xmm5 | |
587 | |
588 movdqa %xmm8,%xmm0 | |
589 pshufd $0,%xmm1,%xmm1 | |
590 movq $64,%rax | |
591 | |
592 .Lselect_loop_sse_w7: | |
593 movdqa %xmm8,%xmm15 | |
594 paddd %xmm0,%xmm8 | |
595 movdqa 0(%rsi),%xmm9 | |
596 movdqa 16(%rsi),%xmm10 | |
597 pcmpeqd %xmm1,%xmm15 | |
598 movdqa 32(%rsi),%xmm11 | |
599 movdqa 48(%rsi),%xmm12 | |
600 leaq 64(%rsi),%rsi | |
601 | |
602 pand %xmm15,%xmm9 | |
603 pand %xmm15,%xmm10 | |
604 por %xmm9,%xmm2 | |
605 pand %xmm15,%xmm11 | |
606 por %xmm10,%xmm3 | |
607 pand %xmm15,%xmm12 | |
608 por %xmm11,%xmm4 | |
609 prefetcht0 255(%rsi) | |
610 por %xmm12,%xmm5 | |
611 | |
612 decq %rax | |
613 jnz .Lselect_loop_sse_w7 | |
614 | |
615 movdqu %xmm2,0(%rdi) | |
616 movdqu %xmm3,16(%rdi) | |
617 movdqu %xmm4,32(%rdi) | |
618 movdqu %xmm5,48(%rdi) | |
619 .byte 0xf3,0xc3 | |
620 .size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7 | |
621 .globl ecp_nistz256_avx2_select_w7 | |
622 .hidden ecp_nistz256_avx2_select_w7 | |
623 .type ecp_nistz256_avx2_select_w7,@function | |
624 .align 32 | |
625 ecp_nistz256_avx2_select_w7: | |
626 .byte 0x0f,0x0b | |
627 .byte 0xf3,0xc3 | |
628 .size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7 | |
629 .type __ecp_nistz256_add_toq,@function | |
630 .align 32 | |
631 __ecp_nistz256_add_toq: | |
632 xorq %r11,%r11 | |
633 addq 0(%rbx),%r12 | |
634 adcq 8(%rbx),%r13 | |
635 movq %r12,%rax | |
636 adcq 16(%rbx),%r8 | |
637 adcq 24(%rbx),%r9 | |
638 movq %r13,%rbp | |
639 adcq $0,%r11 | |
640 | |
641 subq $-1,%r12 | |
642 movq %r8,%rcx | |
643 sbbq %r14,%r13 | |
644 sbbq $0,%r8 | |
645 movq %r9,%r10 | |
646 sbbq %r15,%r9 | |
647 sbbq $0,%r11 | |
648 | |
649 cmovcq %rax,%r12 | |
650 cmovcq %rbp,%r13 | |
651 movq %r12,0(%rdi) | |
652 cmovcq %rcx,%r8 | |
653 movq %r13,8(%rdi) | |
654 cmovcq %r10,%r9 | |
655 movq %r8,16(%rdi) | |
656 movq %r9,24(%rdi) | |
657 | |
658 .byte 0xf3,0xc3 | |
659 .size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq | |
660 | |
661 .type __ecp_nistz256_sub_fromq,@function | |
662 .align 32 | |
663 __ecp_nistz256_sub_fromq: | |
664 subq 0(%rbx),%r12 | |
665 sbbq 8(%rbx),%r13 | |
666 movq %r12,%rax | |
667 sbbq 16(%rbx),%r8 | |
668 sbbq 24(%rbx),%r9 | |
669 movq %r13,%rbp | |
670 sbbq %r11,%r11 | |
671 | |
672 addq $-1,%r12 | |
673 movq %r8,%rcx | |
674 adcq %r14,%r13 | |
675 adcq $0,%r8 | |
676 movq %r9,%r10 | |
677 adcq %r15,%r9 | |
678 testq %r11,%r11 | |
679 | |
680 cmovzq %rax,%r12 | |
681 cmovzq %rbp,%r13 | |
682 movq %r12,0(%rdi) | |
683 cmovzq %rcx,%r8 | |
684 movq %r13,8(%rdi) | |
685 cmovzq %r10,%r9 | |
686 movq %r8,16(%rdi) | |
687 movq %r9,24(%rdi) | |
688 | |
689 .byte 0xf3,0xc3 | |
690 .size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq | |
691 | |
692 .type __ecp_nistz256_subq,@function | |
693 .align 32 | |
694 __ecp_nistz256_subq: | |
695 subq %r12,%rax | |
696 sbbq %r13,%rbp | |
697 movq %rax,%r12 | |
698 sbbq %r8,%rcx | |
699 sbbq %r9,%r10 | |
700 movq %rbp,%r13 | |
701 sbbq %r11,%r11 | |
702 | |
703 addq $-1,%rax | |
704 movq %rcx,%r8 | |
705 adcq %r14,%rbp | |
706 adcq $0,%rcx | |
707 movq %r10,%r9 | |
708 adcq %r15,%r10 | |
709 testq %r11,%r11 | |
710 | |
711 cmovnzq %rax,%r12 | |
712 cmovnzq %rbp,%r13 | |
713 cmovnzq %rcx,%r8 | |
714 cmovnzq %r10,%r9 | |
715 | |
716 .byte 0xf3,0xc3 | |
717 .size __ecp_nistz256_subq,.-__ecp_nistz256_subq | |
718 | |
719 .type __ecp_nistz256_mul_by_2q,@function | |
720 .align 32 | |
721 __ecp_nistz256_mul_by_2q: | |
722 xorq %r11,%r11 | |
723 addq %r12,%r12 | |
724 adcq %r13,%r13 | |
725 movq %r12,%rax | |
726 adcq %r8,%r8 | |
727 adcq %r9,%r9 | |
728 movq %r13,%rbp | |
729 adcq $0,%r11 | |
730 | |
731 subq $-1,%r12 | |
732 movq %r8,%rcx | |
733 sbbq %r14,%r13 | |
734 sbbq $0,%r8 | |
735 movq %r9,%r10 | |
736 sbbq %r15,%r9 | |
737 sbbq $0,%r11 | |
738 | |
739 cmovcq %rax,%r12 | |
740 cmovcq %rbp,%r13 | |
741 movq %r12,0(%rdi) | |
742 cmovcq %rcx,%r8 | |
743 movq %r13,8(%rdi) | |
744 cmovcq %r10,%r9 | |
745 movq %r8,16(%rdi) | |
746 movq %r9,24(%rdi) | |
747 | |
748 .byte 0xf3,0xc3 | |
749 .size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q | |
750 .globl ecp_nistz256_point_double | |
751 .hidden ecp_nistz256_point_double | |
752 .type ecp_nistz256_point_double,@function | |
753 .align 32 | |
754 ecp_nistz256_point_double: | |
755 pushq %rbp | |
756 pushq %rbx | |
757 pushq %r12 | |
758 pushq %r13 | |
759 pushq %r14 | |
760 pushq %r15 | |
761 subq $160+8,%rsp | |
762 | |
763 .Lpoint_double_shortcutq: | |
764 movdqu 0(%rsi),%xmm0 | |
765 movq %rsi,%rbx | |
766 movdqu 16(%rsi),%xmm1 | |
767 movq 32+0(%rsi),%r12 | |
768 movq 32+8(%rsi),%r13 | |
769 movq 32+16(%rsi),%r8 | |
770 movq 32+24(%rsi),%r9 | |
771 movq .Lpoly+8(%rip),%r14 | |
772 movq .Lpoly+24(%rip),%r15 | |
773 movdqa %xmm0,96(%rsp) | |
774 movdqa %xmm1,96+16(%rsp) | |
775 leaq 32(%rdi),%r10 | |
776 leaq 64(%rdi),%r11 | |
777 .byte 102,72,15,110,199 | |
778 .byte 102,73,15,110,202 | |
779 .byte 102,73,15,110,211 | |
780 | |
781 leaq 0(%rsp),%rdi | |
782 call __ecp_nistz256_mul_by_2q | |
783 | |
784 movq 64+0(%rsi),%rax | |
785 movq 64+8(%rsi),%r14 | |
786 movq 64+16(%rsi),%r15 | |
787 movq 64+24(%rsi),%r8 | |
788 leaq 64-0(%rsi),%rsi | |
789 leaq 64(%rsp),%rdi | |
790 call __ecp_nistz256_sqr_montq | |
791 | |
792 movq 0+0(%rsp),%rax | |
793 movq 8+0(%rsp),%r14 | |
794 leaq 0+0(%rsp),%rsi | |
795 movq 16+0(%rsp),%r15 | |
796 movq 24+0(%rsp),%r8 | |
797 leaq 0(%rsp),%rdi | |
798 call __ecp_nistz256_sqr_montq | |
799 | |
800 movq 32(%rbx),%rax | |
801 movq 64+0(%rbx),%r9 | |
802 movq 64+8(%rbx),%r10 | |
803 movq 64+16(%rbx),%r11 | |
804 movq 64+24(%rbx),%r12 | |
805 leaq 64-0(%rbx),%rsi | |
806 leaq 32(%rbx),%rbx | |
807 .byte 102,72,15,126,215 | |
808 call __ecp_nistz256_mul_montq | |
809 call __ecp_nistz256_mul_by_2q | |
810 | |
811 movq 96+0(%rsp),%r12 | |
812 movq 96+8(%rsp),%r13 | |
813 leaq 64(%rsp),%rbx | |
814 movq 96+16(%rsp),%r8 | |
815 movq 96+24(%rsp),%r9 | |
816 leaq 32(%rsp),%rdi | |
817 call __ecp_nistz256_add_toq | |
818 | |
819 movq 96+0(%rsp),%r12 | |
820 movq 96+8(%rsp),%r13 | |
821 leaq 64(%rsp),%rbx | |
822 movq 96+16(%rsp),%r8 | |
823 movq 96+24(%rsp),%r9 | |
824 leaq 64(%rsp),%rdi | |
825 call __ecp_nistz256_sub_fromq | |
826 | |
827 movq 0+0(%rsp),%rax | |
828 movq 8+0(%rsp),%r14 | |
829 leaq 0+0(%rsp),%rsi | |
830 movq 16+0(%rsp),%r15 | |
831 movq 24+0(%rsp),%r8 | |
832 .byte 102,72,15,126,207 | |
833 call __ecp_nistz256_sqr_montq | |
834 xorq %r9,%r9 | |
835 movq %r12,%rax | |
836 addq $-1,%r12 | |
837 movq %r13,%r10 | |
838 adcq %rsi,%r13 | |
839 movq %r14,%rcx | |
840 adcq $0,%r14 | |
841 movq %r15,%r8 | |
842 adcq %rbp,%r15 | |
843 adcq $0,%r9 | |
844 xorq %rsi,%rsi | |
845 testq $1,%rax | |
846 | |
847 cmovzq %rax,%r12 | |
848 cmovzq %r10,%r13 | |
849 cmovzq %rcx,%r14 | |
850 cmovzq %r8,%r15 | |
851 cmovzq %rsi,%r9 | |
852 | |
853 movq %r13,%rax | |
854 shrq $1,%r12 | |
855 shlq $63,%rax | |
856 movq %r14,%r10 | |
857 shrq $1,%r13 | |
858 orq %rax,%r12 | |
859 shlq $63,%r10 | |
860 movq %r15,%rcx | |
861 shrq $1,%r14 | |
862 orq %r10,%r13 | |
863 shlq $63,%rcx | |
864 movq %r12,0(%rdi) | |
865 shrq $1,%r15 | |
866 movq %r13,8(%rdi) | |
867 shlq $63,%r9 | |
868 orq %rcx,%r14 | |
869 orq %r9,%r15 | |
870 movq %r14,16(%rdi) | |
871 movq %r15,24(%rdi) | |
872 movq 64(%rsp),%rax | |
873 leaq 64(%rsp),%rbx | |
874 movq 0+32(%rsp),%r9 | |
875 movq 8+32(%rsp),%r10 | |
876 leaq 0+32(%rsp),%rsi | |
877 movq 16+32(%rsp),%r11 | |
878 movq 24+32(%rsp),%r12 | |
879 leaq 32(%rsp),%rdi | |
880 call __ecp_nistz256_mul_montq | |
881 | |
882 leaq 128(%rsp),%rdi | |
883 call __ecp_nistz256_mul_by_2q | |
884 | |
885 leaq 32(%rsp),%rbx | |
886 leaq 32(%rsp),%rdi | |
887 call __ecp_nistz256_add_toq | |
888 | |
889 movq 96(%rsp),%rax | |
890 leaq 96(%rsp),%rbx | |
891 movq 0+0(%rsp),%r9 | |
892 movq 8+0(%rsp),%r10 | |
893 leaq 0+0(%rsp),%rsi | |
894 movq 16+0(%rsp),%r11 | |
895 movq 24+0(%rsp),%r12 | |
896 leaq 0(%rsp),%rdi | |
897 call __ecp_nistz256_mul_montq | |
898 | |
899 leaq 128(%rsp),%rdi | |
900 call __ecp_nistz256_mul_by_2q | |
901 | |
902 movq 0+32(%rsp),%rax | |
903 movq 8+32(%rsp),%r14 | |
904 leaq 0+32(%rsp),%rsi | |
905 movq 16+32(%rsp),%r15 | |
906 movq 24+32(%rsp),%r8 | |
907 .byte 102,72,15,126,199 | |
908 call __ecp_nistz256_sqr_montq | |
909 | |
910 leaq 128(%rsp),%rbx | |
911 movq %r14,%r8 | |
912 movq %r15,%r9 | |
913 movq %rsi,%r14 | |
914 movq %rbp,%r15 | |
915 call __ecp_nistz256_sub_fromq | |
916 | |
917 movq 0+0(%rsp),%rax | |
918 movq 0+8(%rsp),%rbp | |
919 movq 0+16(%rsp),%rcx | |
920 movq 0+24(%rsp),%r10 | |
921 leaq 0(%rsp),%rdi | |
922 call __ecp_nistz256_subq | |
923 | |
924 movq 32(%rsp),%rax | |
925 leaq 32(%rsp),%rbx | |
926 movq %r12,%r14 | |
927 xorl %ecx,%ecx | |
928 movq %r12,0+0(%rsp) | |
929 movq %r13,%r10 | |
930 movq %r13,0+8(%rsp) | |
931 cmovzq %r8,%r11 | |
932 movq %r8,0+16(%rsp) | |
933 leaq 0-0(%rsp),%rsi | |
934 cmovzq %r9,%r12 | |
935 movq %r9,0+24(%rsp) | |
936 movq %r14,%r9 | |
937 leaq 0(%rsp),%rdi | |
938 call __ecp_nistz256_mul_montq | |
939 | |
940 .byte 102,72,15,126,203 | |
941 .byte 102,72,15,126,207 | |
942 call __ecp_nistz256_sub_fromq | |
943 | |
944 addq $160+8,%rsp | |
945 popq %r15 | |
946 popq %r14 | |
947 popq %r13 | |
948 popq %r12 | |
949 popq %rbx | |
950 popq %rbp | |
951 .byte 0xf3,0xc3 | |
952 .size ecp_nistz256_point_double,.-ecp_nistz256_point_double | |
953 .globl ecp_nistz256_point_add | |
954 .hidden ecp_nistz256_point_add | |
955 .type ecp_nistz256_point_add,@function | |
956 .align 32 | |
957 ecp_nistz256_point_add: | |
958 pushq %rbp | |
959 pushq %rbx | |
960 pushq %r12 | |
961 pushq %r13 | |
962 pushq %r14 | |
963 pushq %r15 | |
964 subq $576+8,%rsp | |
965 | |
966 movdqu 0(%rsi),%xmm0 | |
967 movdqu 16(%rsi),%xmm1 | |
968 movdqu 32(%rsi),%xmm2 | |
969 movdqu 48(%rsi),%xmm3 | |
970 movdqu 64(%rsi),%xmm4 | |
971 movdqu 80(%rsi),%xmm5 | |
972 movq %rsi,%rbx | |
973 movq %rdx,%rsi | |
974 movdqa %xmm0,384(%rsp) | |
975 movdqa %xmm1,384+16(%rsp) | |
976 movdqa %xmm2,416(%rsp) | |
977 movdqa %xmm3,416+16(%rsp) | |
978 movdqa %xmm4,448(%rsp) | |
979 movdqa %xmm5,448+16(%rsp) | |
980 por %xmm4,%xmm5 | |
981 | |
982 movdqu 0(%rsi),%xmm0 | |
983 pshufd $0xb1,%xmm5,%xmm3 | |
984 movdqu 16(%rsi),%xmm1 | |
985 movdqu 32(%rsi),%xmm2 | |
986 por %xmm3,%xmm5 | |
987 movdqu 48(%rsi),%xmm3 | |
988 movq 64+0(%rsi),%rax | |
989 movq 64+8(%rsi),%r14 | |
990 movq 64+16(%rsi),%r15 | |
991 movq 64+24(%rsi),%r8 | |
992 movdqa %xmm0,480(%rsp) | |
993 pshufd $0x1e,%xmm5,%xmm4 | |
994 movdqa %xmm1,480+16(%rsp) | |
995 movdqu 64(%rsi),%xmm0 | |
996 movdqu 80(%rsi),%xmm1 | |
997 movdqa %xmm2,512(%rsp) | |
998 movdqa %xmm3,512+16(%rsp) | |
999 por %xmm4,%xmm5 | |
1000 pxor %xmm4,%xmm4 | |
1001 por %xmm0,%xmm1 | |
1002 .byte 102,72,15,110,199 | |
1003 | |
1004 leaq 64-0(%rsi),%rsi | |
1005 movq %rax,544+0(%rsp) | |
1006 movq %r14,544+8(%rsp) | |
1007 movq %r15,544+16(%rsp) | |
1008 movq %r8,544+24(%rsp) | |
1009 leaq 96(%rsp),%rdi | |
1010 call __ecp_nistz256_sqr_montq | |
1011 | |
1012 pcmpeqd %xmm4,%xmm5 | |
1013 pshufd $0xb1,%xmm1,%xmm4 | |
1014 por %xmm1,%xmm4 | |
1015 pshufd $0,%xmm5,%xmm5 | |
1016 pshufd $0x1e,%xmm4,%xmm3 | |
1017 por %xmm3,%xmm4 | |
1018 pxor %xmm3,%xmm3 | |
1019 pcmpeqd %xmm3,%xmm4 | |
1020 pshufd $0,%xmm4,%xmm4 | |
1021 movq 64+0(%rbx),%rax | |
1022 movq 64+8(%rbx),%r14 | |
1023 movq 64+16(%rbx),%r15 | |
1024 movq 64+24(%rbx),%r8 | |
1025 .byte 102,72,15,110,203 | |
1026 | |
1027 leaq 64-0(%rbx),%rsi | |
1028 leaq 32(%rsp),%rdi | |
1029 call __ecp_nistz256_sqr_montq | |
1030 | |
1031 movq 544(%rsp),%rax | |
1032 leaq 544(%rsp),%rbx | |
1033 movq 0+96(%rsp),%r9 | |
1034 movq 8+96(%rsp),%r10 | |
1035 leaq 0+96(%rsp),%rsi | |
1036 movq 16+96(%rsp),%r11 | |
1037 movq 24+96(%rsp),%r12 | |
1038 leaq 224(%rsp),%rdi | |
1039 call __ecp_nistz256_mul_montq | |
1040 | |
1041 movq 448(%rsp),%rax | |
1042 leaq 448(%rsp),%rbx | |
1043 movq 0+32(%rsp),%r9 | |
1044 movq 8+32(%rsp),%r10 | |
1045 leaq 0+32(%rsp),%rsi | |
1046 movq 16+32(%rsp),%r11 | |
1047 movq 24+32(%rsp),%r12 | |
1048 leaq 256(%rsp),%rdi | |
1049 call __ecp_nistz256_mul_montq | |
1050 | |
1051 movq 416(%rsp),%rax | |
1052 leaq 416(%rsp),%rbx | |
1053 movq 0+224(%rsp),%r9 | |
1054 movq 8+224(%rsp),%r10 | |
1055 leaq 0+224(%rsp),%rsi | |
1056 movq 16+224(%rsp),%r11 | |
1057 movq 24+224(%rsp),%r12 | |
1058 leaq 224(%rsp),%rdi | |
1059 call __ecp_nistz256_mul_montq | |
1060 | |
1061 movq 512(%rsp),%rax | |
1062 leaq 512(%rsp),%rbx | |
1063 movq 0+256(%rsp),%r9 | |
1064 movq 8+256(%rsp),%r10 | |
1065 leaq 0+256(%rsp),%rsi | |
1066 movq 16+256(%rsp),%r11 | |
1067 movq 24+256(%rsp),%r12 | |
1068 leaq 256(%rsp),%rdi | |
1069 call __ecp_nistz256_mul_montq | |
1070 | |
1071 leaq 224(%rsp),%rbx | |
1072 leaq 64(%rsp),%rdi | |
1073 call __ecp_nistz256_sub_fromq | |
1074 | |
1075 orq %r13,%r12 | |
1076 movdqa %xmm4,%xmm2 | |
1077 orq %r8,%r12 | |
1078 orq %r9,%r12 | |
1079 por %xmm5,%xmm2 | |
1080 .byte 102,73,15,110,220 | |
1081 | |
1082 movq 384(%rsp),%rax | |
1083 leaq 384(%rsp),%rbx | |
1084 movq 0+96(%rsp),%r9 | |
1085 movq 8+96(%rsp),%r10 | |
1086 leaq 0+96(%rsp),%rsi | |
1087 movq 16+96(%rsp),%r11 | |
1088 movq 24+96(%rsp),%r12 | |
1089 leaq 160(%rsp),%rdi | |
1090 call __ecp_nistz256_mul_montq | |
1091 | |
1092 movq 480(%rsp),%rax | |
1093 leaq 480(%rsp),%rbx | |
1094 movq 0+32(%rsp),%r9 | |
1095 movq 8+32(%rsp),%r10 | |
1096 leaq 0+32(%rsp),%rsi | |
1097 movq 16+32(%rsp),%r11 | |
1098 movq 24+32(%rsp),%r12 | |
1099 leaq 192(%rsp),%rdi | |
1100 call __ecp_nistz256_mul_montq | |
1101 | |
1102 leaq 160(%rsp),%rbx | |
1103 leaq 0(%rsp),%rdi | |
1104 call __ecp_nistz256_sub_fromq | |
1105 | |
1106 orq %r13,%r12 | |
1107 orq %r8,%r12 | |
1108 orq %r9,%r12 | |
1109 | |
1110 .byte 0x3e | |
1111 jnz .Ladd_proceedq | |
1112 .byte 102,73,15,126,208 | |
1113 .byte 102,73,15,126,217 | |
1114 testq %r8,%r8 | |
1115 jnz .Ladd_proceedq | |
1116 testq %r9,%r9 | |
1117 jz .Ladd_doubleq | |
1118 | |
1119 .byte 102,72,15,126,199 | |
1120 pxor %xmm0,%xmm0 | |
1121 movdqu %xmm0,0(%rdi) | |
1122 movdqu %xmm0,16(%rdi) | |
1123 movdqu %xmm0,32(%rdi) | |
1124 movdqu %xmm0,48(%rdi) | |
1125 movdqu %xmm0,64(%rdi) | |
1126 movdqu %xmm0,80(%rdi) | |
1127 jmp .Ladd_doneq | |
1128 | |
1129 .align 32 | |
1130 .Ladd_doubleq: | |
1131 .byte 102,72,15,126,206 | |
1132 .byte 102,72,15,126,199 | |
1133 addq $416,%rsp | |
1134 jmp .Lpoint_double_shortcutq | |
1135 | |
1136 .align 32 | |
1137 .Ladd_proceedq: | |
1138 movq 0+64(%rsp),%rax | |
1139 movq 8+64(%rsp),%r14 | |
1140 leaq 0+64(%rsp),%rsi | |
1141 movq 16+64(%rsp),%r15 | |
1142 movq 24+64(%rsp),%r8 | |
1143 leaq 96(%rsp),%rdi | |
1144 call __ecp_nistz256_sqr_montq | |
1145 | |
1146 movq 448(%rsp),%rax | |
1147 leaq 448(%rsp),%rbx | |
1148 movq 0+0(%rsp),%r9 | |
1149 movq 8+0(%rsp),%r10 | |
1150 leaq 0+0(%rsp),%rsi | |
1151 movq 16+0(%rsp),%r11 | |
1152 movq 24+0(%rsp),%r12 | |
1153 leaq 352(%rsp),%rdi | |
1154 call __ecp_nistz256_mul_montq | |
1155 | |
1156 movq 0+0(%rsp),%rax | |
1157 movq 8+0(%rsp),%r14 | |
1158 leaq 0+0(%rsp),%rsi | |
1159 movq 16+0(%rsp),%r15 | |
1160 movq 24+0(%rsp),%r8 | |
1161 leaq 32(%rsp),%rdi | |
1162 call __ecp_nistz256_sqr_montq | |
1163 | |
1164 movq 544(%rsp),%rax | |
1165 leaq 544(%rsp),%rbx | |
1166 movq 0+352(%rsp),%r9 | |
1167 movq 8+352(%rsp),%r10 | |
1168 leaq 0+352(%rsp),%rsi | |
1169 movq 16+352(%rsp),%r11 | |
1170 movq 24+352(%rsp),%r12 | |
1171 leaq 352(%rsp),%rdi | |
1172 call __ecp_nistz256_mul_montq | |
1173 | |
1174 movq 0(%rsp),%rax | |
1175 leaq 0(%rsp),%rbx | |
1176 movq 0+32(%rsp),%r9 | |
1177 movq 8+32(%rsp),%r10 | |
1178 leaq 0+32(%rsp),%rsi | |
1179 movq 16+32(%rsp),%r11 | |
1180 movq 24+32(%rsp),%r12 | |
1181 leaq 128(%rsp),%rdi | |
1182 call __ecp_nistz256_mul_montq | |
1183 | |
1184 movq 160(%rsp),%rax | |
1185 leaq 160(%rsp),%rbx | |
1186 movq 0+32(%rsp),%r9 | |
1187 movq 8+32(%rsp),%r10 | |
1188 leaq 0+32(%rsp),%rsi | |
1189 movq 16+32(%rsp),%r11 | |
1190 movq 24+32(%rsp),%r12 | |
1191 leaq 192(%rsp),%rdi | |
1192 call __ecp_nistz256_mul_montq | |
1193 | |
1194 | |
1195 | |
1196 | |
1197 xorq %r11,%r11 | |
1198 addq %r12,%r12 | |
1199 leaq 96(%rsp),%rsi | |
1200 adcq %r13,%r13 | |
1201 movq %r12,%rax | |
1202 adcq %r8,%r8 | |
1203 adcq %r9,%r9 | |
1204 movq %r13,%rbp | |
1205 adcq $0,%r11 | |
1206 | |
1207 subq $-1,%r12 | |
1208 movq %r8,%rcx | |
1209 sbbq %r14,%r13 | |
1210 sbbq $0,%r8 | |
1211 movq %r9,%r10 | |
1212 sbbq %r15,%r9 | |
1213 sbbq $0,%r11 | |
1214 | |
1215 cmovcq %rax,%r12 | |
1216 movq 0(%rsi),%rax | |
1217 cmovcq %rbp,%r13 | |
1218 movq 8(%rsi),%rbp | |
1219 cmovcq %rcx,%r8 | |
1220 movq 16(%rsi),%rcx | |
1221 cmovcq %r10,%r9 | |
1222 movq 24(%rsi),%r10 | |
1223 | |
1224 call __ecp_nistz256_subq | |
1225 | |
1226 leaq 128(%rsp),%rbx | |
1227 leaq 288(%rsp),%rdi | |
1228 call __ecp_nistz256_sub_fromq | |
1229 | |
1230 movq 192+0(%rsp),%rax | |
1231 movq 192+8(%rsp),%rbp | |
1232 movq 192+16(%rsp),%rcx | |
1233 movq 192+24(%rsp),%r10 | |
1234 leaq 320(%rsp),%rdi | |
1235 | |
1236 call __ecp_nistz256_subq | |
1237 | |
1238 movq %r12,0(%rdi) | |
1239 movq %r13,8(%rdi) | |
1240 movq %r8,16(%rdi) | |
1241 movq %r9,24(%rdi) | |
1242 movq 128(%rsp),%rax | |
1243 leaq 128(%rsp),%rbx | |
1244 movq 0+224(%rsp),%r9 | |
1245 movq 8+224(%rsp),%r10 | |
1246 leaq 0+224(%rsp),%rsi | |
1247 movq 16+224(%rsp),%r11 | |
1248 movq 24+224(%rsp),%r12 | |
1249 leaq 256(%rsp),%rdi | |
1250 call __ecp_nistz256_mul_montq | |
1251 | |
1252 movq 320(%rsp),%rax | |
1253 leaq 320(%rsp),%rbx | |
1254 movq 0+64(%rsp),%r9 | |
1255 movq 8+64(%rsp),%r10 | |
1256 leaq 0+64(%rsp),%rsi | |
1257 movq 16+64(%rsp),%r11 | |
1258 movq 24+64(%rsp),%r12 | |
1259 leaq 320(%rsp),%rdi | |
1260 call __ecp_nistz256_mul_montq | |
1261 | |
1262 leaq 256(%rsp),%rbx | |
1263 leaq 320(%rsp),%rdi | |
1264 call __ecp_nistz256_sub_fromq | |
1265 | |
1266 .byte 102,72,15,126,199 | |
1267 | |
1268 movdqa %xmm5,%xmm0 | |
1269 movdqa %xmm5,%xmm1 | |
1270 pandn 352(%rsp),%xmm0 | |
1271 movdqa %xmm5,%xmm2 | |
1272 pandn 352+16(%rsp),%xmm1 | |
1273 movdqa %xmm5,%xmm3 | |
1274 pand 544(%rsp),%xmm2 | |
1275 pand 544+16(%rsp),%xmm3 | |
1276 por %xmm0,%xmm2 | |
1277 por %xmm1,%xmm3 | |
1278 | |
1279 movdqa %xmm4,%xmm0 | |
1280 movdqa %xmm4,%xmm1 | |
1281 pandn %xmm2,%xmm0 | |
1282 movdqa %xmm4,%xmm2 | |
1283 pandn %xmm3,%xmm1 | |
1284 movdqa %xmm4,%xmm3 | |
1285 pand 448(%rsp),%xmm2 | |
1286 pand 448+16(%rsp),%xmm3 | |
1287 por %xmm0,%xmm2 | |
1288 por %xmm1,%xmm3 | |
1289 movdqu %xmm2,64(%rdi) | |
1290 movdqu %xmm3,80(%rdi) | |
1291 | |
1292 movdqa %xmm5,%xmm0 | |
1293 movdqa %xmm5,%xmm1 | |
1294 pandn 288(%rsp),%xmm0 | |
1295 movdqa %xmm5,%xmm2 | |
1296 pandn 288+16(%rsp),%xmm1 | |
1297 movdqa %xmm5,%xmm3 | |
1298 pand 480(%rsp),%xmm2 | |
1299 pand 480+16(%rsp),%xmm3 | |
1300 por %xmm0,%xmm2 | |
1301 por %xmm1,%xmm3 | |
1302 | |
1303 movdqa %xmm4,%xmm0 | |
1304 movdqa %xmm4,%xmm1 | |
1305 pandn %xmm2,%xmm0 | |
1306 movdqa %xmm4,%xmm2 | |
1307 pandn %xmm3,%xmm1 | |
1308 movdqa %xmm4,%xmm3 | |
1309 pand 384(%rsp),%xmm2 | |
1310 pand 384+16(%rsp),%xmm3 | |
1311 por %xmm0,%xmm2 | |
1312 por %xmm1,%xmm3 | |
1313 movdqu %xmm2,0(%rdi) | |
1314 movdqu %xmm3,16(%rdi) | |
1315 | |
1316 movdqa %xmm5,%xmm0 | |
1317 movdqa %xmm5,%xmm1 | |
1318 pandn 320(%rsp),%xmm0 | |
1319 movdqa %xmm5,%xmm2 | |
1320 pandn 320+16(%rsp),%xmm1 | |
1321 movdqa %xmm5,%xmm3 | |
1322 pand 512(%rsp),%xmm2 | |
1323 pand 512+16(%rsp),%xmm3 | |
1324 por %xmm0,%xmm2 | |
1325 por %xmm1,%xmm3 | |
1326 | |
1327 movdqa %xmm4,%xmm0 | |
1328 movdqa %xmm4,%xmm1 | |
1329 pandn %xmm2,%xmm0 | |
1330 movdqa %xmm4,%xmm2 | |
1331 pandn %xmm3,%xmm1 | |
1332 movdqa %xmm4,%xmm3 | |
1333 pand 416(%rsp),%xmm2 | |
1334 pand 416+16(%rsp),%xmm3 | |
1335 por %xmm0,%xmm2 | |
1336 por %xmm1,%xmm3 | |
1337 movdqu %xmm2,32(%rdi) | |
1338 movdqu %xmm3,48(%rdi) | |
1339 | |
1340 .Ladd_doneq: | |
1341 addq $576+8,%rsp | |
1342 popq %r15 | |
1343 popq %r14 | |
1344 popq %r13 | |
1345 popq %r12 | |
1346 popq %rbx | |
1347 popq %rbp | |
1348 .byte 0xf3,0xc3 | |
1349 .size ecp_nistz256_point_add,.-ecp_nistz256_point_add | |
1350 .globl ecp_nistz256_point_add_affine | |
1351 .hidden ecp_nistz256_point_add_affine | |
1352 .type ecp_nistz256_point_add_affine,@function | |
1353 .align 32 | |
1354 ecp_nistz256_point_add_affine: | |
1355 pushq %rbp | |
1356 pushq %rbx | |
1357 pushq %r12 | |
1358 pushq %r13 | |
1359 pushq %r14 | |
1360 pushq %r15 | |
1361 subq $480+8,%rsp | |
1362 | |
1363 movdqu 0(%rsi),%xmm0 | |
1364 movq %rdx,%rbx | |
1365 movdqu 16(%rsi),%xmm1 | |
1366 movdqu 32(%rsi),%xmm2 | |
1367 movdqu 48(%rsi),%xmm3 | |
1368 movdqu 64(%rsi),%xmm4 | |
1369 movdqu 80(%rsi),%xmm5 | |
1370 movq 64+0(%rsi),%rax | |
1371 movq 64+8(%rsi),%r14 | |
1372 movq 64+16(%rsi),%r15 | |
1373 movq 64+24(%rsi),%r8 | |
1374 movdqa %xmm0,320(%rsp) | |
1375 movdqa %xmm1,320+16(%rsp) | |
1376 movdqa %xmm2,352(%rsp) | |
1377 movdqa %xmm3,352+16(%rsp) | |
1378 movdqa %xmm4,384(%rsp) | |
1379 movdqa %xmm5,384+16(%rsp) | |
1380 por %xmm4,%xmm5 | |
1381 | |
1382 movdqu 0(%rbx),%xmm0 | |
1383 pshufd $0xb1,%xmm5,%xmm3 | |
1384 movdqu 16(%rbx),%xmm1 | |
1385 movdqu 32(%rbx),%xmm2 | |
1386 por %xmm3,%xmm5 | |
1387 movdqu 48(%rbx),%xmm3 | |
1388 movdqa %xmm0,416(%rsp) | |
1389 pshufd $0x1e,%xmm5,%xmm4 | |
1390 movdqa %xmm1,416+16(%rsp) | |
1391 por %xmm0,%xmm1 | |
1392 .byte 102,72,15,110,199 | |
1393 movdqa %xmm2,448(%rsp) | |
1394 movdqa %xmm3,448+16(%rsp) | |
1395 por %xmm2,%xmm3 | |
1396 por %xmm4,%xmm5 | |
1397 pxor %xmm4,%xmm4 | |
1398 por %xmm1,%xmm3 | |
1399 | |
1400 leaq 64-0(%rsi),%rsi | |
1401 leaq 32(%rsp),%rdi | |
1402 call __ecp_nistz256_sqr_montq | |
1403 | |
1404 pcmpeqd %xmm4,%xmm5 | |
1405 pshufd $0xb1,%xmm3,%xmm4 | |
1406 movq 0(%rbx),%rax | |
1407 | |
1408 movq %r12,%r9 | |
1409 por %xmm3,%xmm4 | |
1410 pshufd $0,%xmm5,%xmm5 | |
1411 pshufd $0x1e,%xmm4,%xmm3 | |
1412 movq %r13,%r10 | |
1413 por %xmm3,%xmm4 | |
1414 pxor %xmm3,%xmm3 | |
1415 movq %r14,%r11 | |
1416 pcmpeqd %xmm3,%xmm4 | |
1417 pshufd $0,%xmm4,%xmm4 | |
1418 | |
1419 leaq 32-0(%rsp),%rsi | |
1420 movq %r15,%r12 | |
1421 leaq 0(%rsp),%rdi | |
1422 call __ecp_nistz256_mul_montq | |
1423 | |
1424 leaq 320(%rsp),%rbx | |
1425 leaq 64(%rsp),%rdi | |
1426 call __ecp_nistz256_sub_fromq | |
1427 | |
1428 movq 384(%rsp),%rax | |
1429 leaq 384(%rsp),%rbx | |
1430 movq 0+32(%rsp),%r9 | |
1431 movq 8+32(%rsp),%r10 | |
1432 leaq 0+32(%rsp),%rsi | |
1433 movq 16+32(%rsp),%r11 | |
1434 movq 24+32(%rsp),%r12 | |
1435 leaq 32(%rsp),%rdi | |
1436 call __ecp_nistz256_mul_montq | |
1437 | |
1438 movq 384(%rsp),%rax | |
1439 leaq 384(%rsp),%rbx | |
1440 movq 0+64(%rsp),%r9 | |
1441 movq 8+64(%rsp),%r10 | |
1442 leaq 0+64(%rsp),%rsi | |
1443 movq 16+64(%rsp),%r11 | |
1444 movq 24+64(%rsp),%r12 | |
1445 leaq 288(%rsp),%rdi | |
1446 call __ecp_nistz256_mul_montq | |
1447 | |
1448 movq 448(%rsp),%rax | |
1449 leaq 448(%rsp),%rbx | |
1450 movq 0+32(%rsp),%r9 | |
1451 movq 8+32(%rsp),%r10 | |
1452 leaq 0+32(%rsp),%rsi | |
1453 movq 16+32(%rsp),%r11 | |
1454 movq 24+32(%rsp),%r12 | |
1455 leaq 32(%rsp),%rdi | |
1456 call __ecp_nistz256_mul_montq | |
1457 | |
1458 leaq 352(%rsp),%rbx | |
1459 leaq 96(%rsp),%rdi | |
1460 call __ecp_nistz256_sub_fromq | |
1461 | |
1462 movq 0+64(%rsp),%rax | |
1463 movq 8+64(%rsp),%r14 | |
1464 leaq 0+64(%rsp),%rsi | |
1465 movq 16+64(%rsp),%r15 | |
1466 movq 24+64(%rsp),%r8 | |
1467 leaq 128(%rsp),%rdi | |
1468 call __ecp_nistz256_sqr_montq | |
1469 | |
1470 movq 0+96(%rsp),%rax | |
1471 movq 8+96(%rsp),%r14 | |
1472 leaq 0+96(%rsp),%rsi | |
1473 movq 16+96(%rsp),%r15 | |
1474 movq 24+96(%rsp),%r8 | |
1475 leaq 192(%rsp),%rdi | |
1476 call __ecp_nistz256_sqr_montq | |
1477 | |
1478 movq 128(%rsp),%rax | |
1479 leaq 128(%rsp),%rbx | |
1480 movq 0+64(%rsp),%r9 | |
1481 movq 8+64(%rsp),%r10 | |
1482 leaq 0+64(%rsp),%rsi | |
1483 movq 16+64(%rsp),%r11 | |
1484 movq 24+64(%rsp),%r12 | |
1485 leaq 160(%rsp),%rdi | |
1486 call __ecp_nistz256_mul_montq | |
1487 | |
1488 movq 320(%rsp),%rax | |
1489 leaq 320(%rsp),%rbx | |
1490 movq 0+128(%rsp),%r9 | |
1491 movq 8+128(%rsp),%r10 | |
1492 leaq 0+128(%rsp),%rsi | |
1493 movq 16+128(%rsp),%r11 | |
1494 movq 24+128(%rsp),%r12 | |
1495 leaq 0(%rsp),%rdi | |
1496 call __ecp_nistz256_mul_montq | |
1497 | |
1498 | |
1499 | |
1500 | |
1501 xorq %r11,%r11 | |
1502 addq %r12,%r12 | |
1503 leaq 192(%rsp),%rsi | |
1504 adcq %r13,%r13 | |
1505 movq %r12,%rax | |
1506 adcq %r8,%r8 | |
1507 adcq %r9,%r9 | |
1508 movq %r13,%rbp | |
1509 adcq $0,%r11 | |
1510 | |
1511 subq $-1,%r12 | |
1512 movq %r8,%rcx | |
1513 sbbq %r14,%r13 | |
1514 sbbq $0,%r8 | |
1515 movq %r9,%r10 | |
1516 sbbq %r15,%r9 | |
1517 sbbq $0,%r11 | |
1518 | |
1519 cmovcq %rax,%r12 | |
1520 movq 0(%rsi),%rax | |
1521 cmovcq %rbp,%r13 | |
1522 movq 8(%rsi),%rbp | |
1523 cmovcq %rcx,%r8 | |
1524 movq 16(%rsi),%rcx | |
1525 cmovcq %r10,%r9 | |
1526 movq 24(%rsi),%r10 | |
1527 | |
1528 call __ecp_nistz256_subq | |
1529 | |
1530 leaq 160(%rsp),%rbx | |
1531 leaq 224(%rsp),%rdi | |
1532 call __ecp_nistz256_sub_fromq | |
1533 | |
1534 movq 0+0(%rsp),%rax | |
1535 movq 0+8(%rsp),%rbp | |
1536 movq 0+16(%rsp),%rcx | |
1537 movq 0+24(%rsp),%r10 | |
1538 leaq 64(%rsp),%rdi | |
1539 | |
1540 call __ecp_nistz256_subq | |
1541 | |
1542 movq %r12,0(%rdi) | |
1543 movq %r13,8(%rdi) | |
1544 movq %r8,16(%rdi) | |
1545 movq %r9,24(%rdi) | |
1546 movq 352(%rsp),%rax | |
1547 leaq 352(%rsp),%rbx | |
1548 movq 0+160(%rsp),%r9 | |
1549 movq 8+160(%rsp),%r10 | |
1550 leaq 0+160(%rsp),%rsi | |
1551 movq 16+160(%rsp),%r11 | |
1552 movq 24+160(%rsp),%r12 | |
1553 leaq 32(%rsp),%rdi | |
1554 call __ecp_nistz256_mul_montq | |
1555 | |
1556 movq 96(%rsp),%rax | |
1557 leaq 96(%rsp),%rbx | |
1558 movq 0+64(%rsp),%r9 | |
1559 movq 8+64(%rsp),%r10 | |
1560 leaq 0+64(%rsp),%rsi | |
1561 movq 16+64(%rsp),%r11 | |
1562 movq 24+64(%rsp),%r12 | |
1563 leaq 64(%rsp),%rdi | |
1564 call __ecp_nistz256_mul_montq | |
1565 | |
1566 leaq 32(%rsp),%rbx | |
1567 leaq 256(%rsp),%rdi | |
1568 call __ecp_nistz256_sub_fromq | |
1569 | |
1570 .byte 102,72,15,126,199 | |
1571 | |
1572 movdqa %xmm5,%xmm0 | |
1573 movdqa %xmm5,%xmm1 | |
1574 pandn 288(%rsp),%xmm0 | |
1575 movdqa %xmm5,%xmm2 | |
1576 pandn 288+16(%rsp),%xmm1 | |
1577 movdqa %xmm5,%xmm3 | |
1578 pand .LONE_mont(%rip),%xmm2 | |
1579 pand .LONE_mont+16(%rip),%xmm3 | |
1580 por %xmm0,%xmm2 | |
1581 por %xmm1,%xmm3 | |
1582 | |
1583 movdqa %xmm4,%xmm0 | |
1584 movdqa %xmm4,%xmm1 | |
1585 pandn %xmm2,%xmm0 | |
1586 movdqa %xmm4,%xmm2 | |
1587 pandn %xmm3,%xmm1 | |
1588 movdqa %xmm4,%xmm3 | |
1589 pand 384(%rsp),%xmm2 | |
1590 pand 384+16(%rsp),%xmm3 | |
1591 por %xmm0,%xmm2 | |
1592 por %xmm1,%xmm3 | |
1593 movdqu %xmm2,64(%rdi) | |
1594 movdqu %xmm3,80(%rdi) | |
1595 | |
1596 movdqa %xmm5,%xmm0 | |
1597 movdqa %xmm5,%xmm1 | |
1598 pandn 224(%rsp),%xmm0 | |
1599 movdqa %xmm5,%xmm2 | |
1600 pandn 224+16(%rsp),%xmm1 | |
1601 movdqa %xmm5,%xmm3 | |
1602 pand 416(%rsp),%xmm2 | |
1603 pand 416+16(%rsp),%xmm3 | |
1604 por %xmm0,%xmm2 | |
1605 por %xmm1,%xmm3 | |
1606 | |
1607 movdqa %xmm4,%xmm0 | |
1608 movdqa %xmm4,%xmm1 | |
1609 pandn %xmm2,%xmm0 | |
1610 movdqa %xmm4,%xmm2 | |
1611 pandn %xmm3,%xmm1 | |
1612 movdqa %xmm4,%xmm3 | |
1613 pand 320(%rsp),%xmm2 | |
1614 pand 320+16(%rsp),%xmm3 | |
1615 por %xmm0,%xmm2 | |
1616 por %xmm1,%xmm3 | |
1617 movdqu %xmm2,0(%rdi) | |
1618 movdqu %xmm3,16(%rdi) | |
1619 | |
1620 movdqa %xmm5,%xmm0 | |
1621 movdqa %xmm5,%xmm1 | |
1622 pandn 256(%rsp),%xmm0 | |
1623 movdqa %xmm5,%xmm2 | |
1624 pandn 256+16(%rsp),%xmm1 | |
1625 movdqa %xmm5,%xmm3 | |
1626 pand 448(%rsp),%xmm2 | |
1627 pand 448+16(%rsp),%xmm3 | |
1628 por %xmm0,%xmm2 | |
1629 por %xmm1,%xmm3 | |
1630 | |
1631 movdqa %xmm4,%xmm0 | |
1632 movdqa %xmm4,%xmm1 | |
1633 pandn %xmm2,%xmm0 | |
1634 movdqa %xmm4,%xmm2 | |
1635 pandn %xmm3,%xmm1 | |
1636 movdqa %xmm4,%xmm3 | |
1637 pand 352(%rsp),%xmm2 | |
1638 pand 352+16(%rsp),%xmm3 | |
1639 por %xmm0,%xmm2 | |
1640 por %xmm1,%xmm3 | |
1641 movdqu %xmm2,32(%rdi) | |
1642 movdqu %xmm3,48(%rdi) | |
1643 | |
1644 addq $480+8,%rsp | |
1645 popq %r15 | |
1646 popq %r14 | |
1647 popq %r13 | |
1648 popq %r12 | |
1649 popq %rbx | |
1650 popq %rbp | |
1651 .byte 0xf3,0xc3 | |
1652 .size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine | |
1653 #endif | |
OLD | NEW |