OLD | NEW |
1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
2 .text | 2 .text |
3 | 3 |
4 | 4 |
5 | 5 |
6 .globl _bn_mul_mont_gather5 | 6 .globl _bn_mul_mont_gather5 |
7 .private_extern _bn_mul_mont_gather5 | 7 .private_extern _bn_mul_mont_gather5 |
8 | 8 |
9 .p2align 6 | 9 .p2align 6 |
10 _bn_mul_mont_gather5: | 10 _bn_mul_mont_gather5: |
| 11 movl %r9d,%r9d |
| 12 movq %rsp,%rax |
11 testl $7,%r9d | 13 testl $7,%r9d |
12 jnz L$mul_enter | 14 jnz L$mul_enter |
13 jmp L$mul4x_enter | 15 jmp L$mul4x_enter |
14 | 16 |
15 .p2align 4 | 17 .p2align 4 |
16 L$mul_enter: | 18 L$mul_enter: |
17 movl %r9d,%r9d | |
18 movq %rsp,%rax | |
19 movd 8(%rsp),%xmm5 | 19 movd 8(%rsp),%xmm5 |
20 leaq L$inc(%rip),%r10 | |
21 pushq %rbx | 20 pushq %rbx |
22 pushq %rbp | 21 pushq %rbp |
23 pushq %r12 | 22 pushq %r12 |
24 pushq %r13 | 23 pushq %r13 |
25 pushq %r14 | 24 pushq %r14 |
26 pushq %r15 | 25 pushq %r15 |
27 | 26 |
28 » leaq» 2(%r9),%r11 | 27 » negq» %r9 |
29 » negq» %r11 | 28 » movq» %rsp,%r11 |
30 » leaq» -264(%rsp,%r11,8),%rsp | 29 » leaq» -280(%rsp,%r9,8),%r10 |
31 » andq» $-1024,%rsp | 30 » negq» %r9 |
| 31 » andq» $-1024,%r10 |
32 | 32 |
| 33 |
| 34 |
| 35 |
| 36 |
| 37 |
| 38 |
| 39 |
| 40 |
| 41 subq %r10,%r11 |
| 42 andq $-4096,%r11 |
| 43 leaq (%r10,%r11,1),%rsp |
| 44 movq (%rsp),%r11 |
| 45 cmpq %r10,%rsp |
| 46 ja L$mul_page_walk |
| 47 jmp L$mul_page_walk_done |
| 48 |
| 49 L$mul_page_walk: |
| 50 leaq -4096(%rsp),%rsp |
| 51 movq (%rsp),%r11 |
| 52 cmpq %r10,%rsp |
| 53 ja L$mul_page_walk |
| 54 L$mul_page_walk_done: |
| 55 |
| 56 leaq L$inc(%rip),%r10 |
33 movq %rax,8(%rsp,%r9,8) | 57 movq %rax,8(%rsp,%r9,8) |
34 L$mul_body: | 58 L$mul_body: |
| 59 |
35 leaq 128(%rdx),%r12 | 60 leaq 128(%rdx),%r12 |
36 movdqa 0(%r10),%xmm0 | 61 movdqa 0(%r10),%xmm0 |
37 movdqa 16(%r10),%xmm1 | 62 movdqa 16(%r10),%xmm1 |
38 leaq 24-112(%rsp,%r9,8),%r10 | 63 leaq 24-112(%rsp,%r9,8),%r10 |
39 andq $-16,%r10 | 64 andq $-16,%r10 |
40 | 65 |
41 pshufd $0,%xmm5,%xmm5 | 66 pshufd $0,%xmm5,%xmm5 |
42 movdqa %xmm1,%xmm4 | 67 movdqa %xmm1,%xmm4 |
43 movdqa %xmm1,%xmm2 | 68 movdqa %xmm1,%xmm2 |
44 paddd %xmm0,%xmm1 | 69 paddd %xmm0,%xmm1 |
(...skipping 318 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
363 .p2align 4 | 388 .p2align 4 |
364 L$sub: sbbq (%rcx,%r14,8),%rax | 389 L$sub: sbbq (%rcx,%r14,8),%rax |
365 movq %rax,(%rdi,%r14,8) | 390 movq %rax,(%rdi,%r14,8) |
366 movq 8(%rsi,%r14,8),%rax | 391 movq 8(%rsi,%r14,8),%rax |
367 leaq 1(%r14),%r14 | 392 leaq 1(%r14),%r14 |
368 decq %r15 | 393 decq %r15 |
369 jnz L$sub | 394 jnz L$sub |
370 | 395 |
371 sbbq $0,%rax | 396 sbbq $0,%rax |
372 xorq %r14,%r14 | 397 xorq %r14,%r14 |
| 398 andq %rax,%rsi |
| 399 notq %rax |
| 400 movq %rdi,%rcx |
| 401 andq %rax,%rcx |
373 movq %r9,%r15 | 402 movq %r9,%r15 |
| 403 orq %rcx,%rsi |
374 .p2align 4 | 404 .p2align 4 |
375 L$copy: | 405 L$copy: |
376 » movq» (%rsp,%r14,8),%rsi | 406 » movq» (%rsi,%r14,8),%rax |
377 » movq» (%rdi,%r14,8),%rcx | |
378 » xorq» %rcx,%rsi | |
379 » andq» %rax,%rsi | |
380 » xorq» %rcx,%rsi | |
381 movq %r14,(%rsp,%r14,8) | 407 movq %r14,(%rsp,%r14,8) |
382 » movq» %rsi,(%rdi,%r14,8) | 408 » movq» %rax,(%rdi,%r14,8) |
383 leaq 1(%r14),%r14 | 409 leaq 1(%r14),%r14 |
384 subq $1,%r15 | 410 subq $1,%r15 |
385 jnz L$copy | 411 jnz L$copy |
386 | 412 |
387 movq 8(%rsp,%r9,8),%rsi | 413 movq 8(%rsp,%r9,8),%rsi |
388 movq $1,%rax | 414 movq $1,%rax |
389 | 415 |
390 movq -48(%rsi),%r15 | 416 movq -48(%rsi),%r15 |
391 movq -40(%rsi),%r14 | 417 movq -40(%rsi),%r14 |
392 movq -32(%rsi),%r13 | 418 movq -32(%rsi),%r13 |
393 movq -24(%rsi),%r12 | 419 movq -24(%rsi),%r12 |
394 movq -16(%rsi),%rbp | 420 movq -16(%rsi),%rbp |
395 movq -8(%rsi),%rbx | 421 movq -8(%rsi),%rbx |
396 leaq (%rsi),%rsp | 422 leaq (%rsi),%rsp |
397 L$mul_epilogue: | 423 L$mul_epilogue: |
398 .byte 0xf3,0xc3 | 424 .byte 0xf3,0xc3 |
399 | 425 |
400 | 426 |
401 .p2align 5 | 427 .p2align 5 |
402 bn_mul4x_mont_gather5: | 428 bn_mul4x_mont_gather5: |
403 L$mul4x_enter: | |
404 .byte 0x67 | 429 .byte 0x67 |
405 movq %rsp,%rax | 430 movq %rsp,%rax |
| 431 L$mul4x_enter: |
406 pushq %rbx | 432 pushq %rbx |
407 pushq %rbp | 433 pushq %rbp |
408 pushq %r12 | 434 pushq %r12 |
409 pushq %r13 | 435 pushq %r13 |
410 pushq %r14 | 436 pushq %r14 |
411 pushq %r15 | 437 pushq %r15 |
| 438 L$mul4x_prologue: |
412 | 439 |
413 .byte 0x67 | 440 .byte 0x67 |
414 shll $3,%r9d | 441 shll $3,%r9d |
415 leaq (%r9,%r9,2),%r10 | 442 leaq (%r9,%r9,2),%r10 |
416 negq %r9 | 443 negq %r9 |
417 | 444 |
418 | 445 |
419 | 446 |
420 | 447 |
421 | 448 |
422 | 449 |
423 | 450 |
424 | 451 |
425 | 452 |
426 | 453 |
427 leaq -320(%rsp,%r9,2),%r11 | 454 leaq -320(%rsp,%r9,2),%r11 |
| 455 movq %rsp,%rbp |
428 subq %rdi,%r11 | 456 subq %rdi,%r11 |
429 andq $4095,%r11 | 457 andq $4095,%r11 |
430 cmpq %r11,%r10 | 458 cmpq %r11,%r10 |
431 jb L$mul4xsp_alt | 459 jb L$mul4xsp_alt |
432 » subq» %r11,%rsp | 460 » subq» %r11,%rbp |
433 » leaq» -320(%rsp,%r9,2),%rsp | 461 » leaq» -320(%rbp,%r9,2),%rbp |
434 jmp L$mul4xsp_done | 462 jmp L$mul4xsp_done |
435 | 463 |
436 .p2align 5 | 464 .p2align 5 |
437 L$mul4xsp_alt: | 465 L$mul4xsp_alt: |
438 leaq 4096-320(,%r9,2),%r10 | 466 leaq 4096-320(,%r9,2),%r10 |
439 » leaq» -320(%rsp,%r9,2),%rsp | 467 » leaq» -320(%rbp,%r9,2),%rbp |
440 subq %r10,%r11 | 468 subq %r10,%r11 |
441 movq $0,%r10 | 469 movq $0,%r10 |
442 cmovcq %r10,%r11 | 470 cmovcq %r10,%r11 |
443 » subq» %r11,%rsp | 471 » subq» %r11,%rbp |
444 L$mul4xsp_done: | 472 L$mul4xsp_done: |
445 » andq» $-64,%rsp | 473 » andq» $-64,%rbp |
| 474 » movq» %rsp,%r11 |
| 475 » subq» %rbp,%r11 |
| 476 » andq» $-4096,%r11 |
| 477 » leaq» (%r11,%rbp,1),%rsp |
| 478 » movq» (%rsp),%r10 |
| 479 » cmpq» %rbp,%rsp |
| 480 » ja» L$mul4x_page_walk |
| 481 » jmp» L$mul4x_page_walk_done |
| 482 |
| 483 L$mul4x_page_walk: |
| 484 » leaq» -4096(%rsp),%rsp |
| 485 » movq» (%rsp),%r10 |
| 486 » cmpq» %rbp,%rsp |
| 487 » ja» L$mul4x_page_walk |
| 488 L$mul4x_page_walk_done: |
| 489 |
446 negq %r9 | 490 negq %r9 |
447 | 491 |
448 movq %rax,40(%rsp) | 492 movq %rax,40(%rsp) |
449 L$mul4x_body: | 493 L$mul4x_body: |
450 | 494 |
451 call mul4x_internal | 495 call mul4x_internal |
452 | 496 |
453 movq 40(%rsp),%rsi | 497 movq 40(%rsp),%rsi |
454 movq $1,%rax | 498 movq $1,%rax |
455 | 499 |
(...skipping 538 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
994 | 1038 |
995 .p2align 5 | 1039 .p2align 5 |
996 _bn_power5: | 1040 _bn_power5: |
997 movq %rsp,%rax | 1041 movq %rsp,%rax |
998 pushq %rbx | 1042 pushq %rbx |
999 pushq %rbp | 1043 pushq %rbp |
1000 pushq %r12 | 1044 pushq %r12 |
1001 pushq %r13 | 1045 pushq %r13 |
1002 pushq %r14 | 1046 pushq %r14 |
1003 pushq %r15 | 1047 pushq %r15 |
| 1048 L$power5_prologue: |
1004 | 1049 |
1005 shll $3,%r9d | 1050 shll $3,%r9d |
1006 leal (%r9,%r9,2),%r10d | 1051 leal (%r9,%r9,2),%r10d |
1007 negq %r9 | 1052 negq %r9 |
1008 movq (%r8),%r8 | 1053 movq (%r8),%r8 |
1009 | 1054 |
1010 | 1055 |
1011 | 1056 |
1012 | 1057 |
1013 | 1058 |
1014 | 1059 |
1015 | 1060 |
1016 | 1061 |
1017 leaq -320(%rsp,%r9,2),%r11 | 1062 leaq -320(%rsp,%r9,2),%r11 |
| 1063 movq %rsp,%rbp |
1018 subq %rdi,%r11 | 1064 subq %rdi,%r11 |
1019 andq $4095,%r11 | 1065 andq $4095,%r11 |
1020 cmpq %r11,%r10 | 1066 cmpq %r11,%r10 |
1021 jb L$pwr_sp_alt | 1067 jb L$pwr_sp_alt |
1022 » subq» %r11,%rsp | 1068 » subq» %r11,%rbp |
1023 » leaq» -320(%rsp,%r9,2),%rsp | 1069 » leaq» -320(%rbp,%r9,2),%rbp |
1024 jmp L$pwr_sp_done | 1070 jmp L$pwr_sp_done |
1025 | 1071 |
1026 .p2align 5 | 1072 .p2align 5 |
1027 L$pwr_sp_alt: | 1073 L$pwr_sp_alt: |
1028 leaq 4096-320(,%r9,2),%r10 | 1074 leaq 4096-320(,%r9,2),%r10 |
1029 » leaq» -320(%rsp,%r9,2),%rsp | 1075 » leaq» -320(%rbp,%r9,2),%rbp |
1030 subq %r10,%r11 | 1076 subq %r10,%r11 |
1031 movq $0,%r10 | 1077 movq $0,%r10 |
1032 cmovcq %r10,%r11 | 1078 cmovcq %r10,%r11 |
1033 » subq» %r11,%rsp | 1079 » subq» %r11,%rbp |
1034 L$pwr_sp_done: | 1080 L$pwr_sp_done: |
1035 » andq» $-64,%rsp | 1081 » andq» $-64,%rbp |
| 1082 » movq» %rsp,%r11 |
| 1083 » subq» %rbp,%r11 |
| 1084 » andq» $-4096,%r11 |
| 1085 » leaq» (%r11,%rbp,1),%rsp |
| 1086 » movq» (%rsp),%r10 |
| 1087 » cmpq» %rbp,%rsp |
| 1088 » ja» L$pwr_page_walk |
| 1089 » jmp» L$pwr_page_walk_done |
| 1090 |
| 1091 L$pwr_page_walk: |
| 1092 » leaq» -4096(%rsp),%rsp |
| 1093 » movq» (%rsp),%r10 |
| 1094 » cmpq» %rbp,%rsp |
| 1095 » ja» L$pwr_page_walk |
| 1096 L$pwr_page_walk_done: |
| 1097 |
1036 movq %r9,%r10 | 1098 movq %r9,%r10 |
1037 negq %r9 | 1099 negq %r9 |
1038 | 1100 |
1039 | 1101 |
1040 | 1102 |
1041 | 1103 |
1042 | 1104 |
1043 | 1105 |
1044 | 1106 |
1045 | 1107 |
(...skipping 890 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1936 .p2align 5 | 1998 .p2align 5 |
1937 bn_from_mont8x: | 1999 bn_from_mont8x: |
1938 .byte 0x67 | 2000 .byte 0x67 |
1939 movq %rsp,%rax | 2001 movq %rsp,%rax |
1940 pushq %rbx | 2002 pushq %rbx |
1941 pushq %rbp | 2003 pushq %rbp |
1942 pushq %r12 | 2004 pushq %r12 |
1943 pushq %r13 | 2005 pushq %r13 |
1944 pushq %r14 | 2006 pushq %r14 |
1945 pushq %r15 | 2007 pushq %r15 |
| 2008 L$from_prologue: |
1946 | 2009 |
1947 shll $3,%r9d | 2010 shll $3,%r9d |
1948 leaq (%r9,%r9,2),%r10 | 2011 leaq (%r9,%r9,2),%r10 |
1949 negq %r9 | 2012 negq %r9 |
1950 movq (%r8),%r8 | 2013 movq (%r8),%r8 |
1951 | 2014 |
1952 | 2015 |
1953 | 2016 |
1954 | 2017 |
1955 | 2018 |
1956 | 2019 |
1957 | 2020 |
1958 | 2021 |
1959 leaq -320(%rsp,%r9,2),%r11 | 2022 leaq -320(%rsp,%r9,2),%r11 |
| 2023 movq %rsp,%rbp |
1960 subq %rdi,%r11 | 2024 subq %rdi,%r11 |
1961 andq $4095,%r11 | 2025 andq $4095,%r11 |
1962 cmpq %r11,%r10 | 2026 cmpq %r11,%r10 |
1963 jb L$from_sp_alt | 2027 jb L$from_sp_alt |
1964 » subq» %r11,%rsp | 2028 » subq» %r11,%rbp |
1965 » leaq» -320(%rsp,%r9,2),%rsp | 2029 » leaq» -320(%rbp,%r9,2),%rbp |
1966 jmp L$from_sp_done | 2030 jmp L$from_sp_done |
1967 | 2031 |
1968 .p2align 5 | 2032 .p2align 5 |
1969 L$from_sp_alt: | 2033 L$from_sp_alt: |
1970 leaq 4096-320(,%r9,2),%r10 | 2034 leaq 4096-320(,%r9,2),%r10 |
1971 » leaq» -320(%rsp,%r9,2),%rsp | 2035 » leaq» -320(%rbp,%r9,2),%rbp |
1972 subq %r10,%r11 | 2036 subq %r10,%r11 |
1973 movq $0,%r10 | 2037 movq $0,%r10 |
1974 cmovcq %r10,%r11 | 2038 cmovcq %r10,%r11 |
1975 » subq» %r11,%rsp | 2039 » subq» %r11,%rbp |
1976 L$from_sp_done: | 2040 L$from_sp_done: |
1977 » andq» $-64,%rsp | 2041 » andq» $-64,%rbp |
| 2042 » movq» %rsp,%r11 |
| 2043 » subq» %rbp,%r11 |
| 2044 » andq» $-4096,%r11 |
| 2045 » leaq» (%r11,%rbp,1),%rsp |
| 2046 » movq» (%rsp),%r10 |
| 2047 » cmpq» %rbp,%rsp |
| 2048 » ja» L$from_page_walk |
| 2049 » jmp» L$from_page_walk_done |
| 2050 |
| 2051 L$from_page_walk: |
| 2052 » leaq» -4096(%rsp),%rsp |
| 2053 » movq» (%rsp),%r10 |
| 2054 » cmpq» %rbp,%rsp |
| 2055 » ja» L$from_page_walk |
| 2056 L$from_page_walk_done: |
| 2057 |
1978 movq %r9,%r10 | 2058 movq %r9,%r10 |
1979 negq %r9 | 2059 negq %r9 |
1980 | 2060 |
1981 | 2061 |
1982 | 2062 |
1983 | 2063 |
1984 | 2064 |
1985 | 2065 |
1986 | 2066 |
1987 | 2067 |
(...skipping 242 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2230 leaq (%r10),%rsp | 2310 leaq (%r10),%rsp |
2231 .byte 0xf3,0xc3 | 2311 .byte 0xf3,0xc3 |
2232 L$SEH_end_bn_gather5: | 2312 L$SEH_end_bn_gather5: |
2233 | 2313 |
2234 .p2align 6 | 2314 .p2align 6 |
2235 L$inc: | 2315 L$inc: |
2236 .long 0,0, 1,1 | 2316 .long 0,0, 1,1 |
2237 .long 2,2, 2,2 | 2317 .long 2,2, 2,2 |
2238 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97
,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71
,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,1
11,114,103,62,0 | 2318 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97
,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71
,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,1
11,114,103,62,0 |
2239 #endif | 2319 #endif |
OLD | NEW |