OLD | NEW |
1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
2 .text | 2 .text |
3 | 3 |
4 .extern OPENSSL_ia32cap_P | 4 .extern OPENSSL_ia32cap_P |
5 .hidden OPENSSL_ia32cap_P | 5 .hidden OPENSSL_ia32cap_P |
6 | 6 |
7 .globl bn_mul_mont_gather5 | 7 .globl bn_mul_mont_gather5 |
8 .hidden bn_mul_mont_gather5 | 8 .hidden bn_mul_mont_gather5 |
9 .type bn_mul_mont_gather5,@function | 9 .type bn_mul_mont_gather5,@function |
10 .align 64 | 10 .align 64 |
11 bn_mul_mont_gather5: | 11 bn_mul_mont_gather5: |
| 12 movl %r9d,%r9d |
| 13 movq %rsp,%rax |
12 testl $7,%r9d | 14 testl $7,%r9d |
13 jnz .Lmul_enter | 15 jnz .Lmul_enter |
14 jmp .Lmul4x_enter | 16 jmp .Lmul4x_enter |
15 | 17 |
16 .align 16 | 18 .align 16 |
17 .Lmul_enter: | 19 .Lmul_enter: |
18 movl %r9d,%r9d | |
19 movq %rsp,%rax | |
20 movd 8(%rsp),%xmm5 | 20 movd 8(%rsp),%xmm5 |
21 leaq .Linc(%rip),%r10 | |
22 pushq %rbx | 21 pushq %rbx |
23 pushq %rbp | 22 pushq %rbp |
24 pushq %r12 | 23 pushq %r12 |
25 pushq %r13 | 24 pushq %r13 |
26 pushq %r14 | 25 pushq %r14 |
27 pushq %r15 | 26 pushq %r15 |
28 | 27 |
29 » leaq» 2(%r9),%r11 | 28 » negq» %r9 |
30 » negq» %r11 | 29 » movq» %rsp,%r11 |
31 » leaq» -264(%rsp,%r11,8),%rsp | 30 » leaq» -280(%rsp,%r9,8),%r10 |
32 » andq» $-1024,%rsp | 31 » negq» %r9 |
| 32 » andq» $-1024,%r10 |
33 | 33 |
| 34 |
| 35 |
| 36 |
| 37 |
| 38 |
| 39 |
| 40 |
| 41 |
| 42 subq %r10,%r11 |
| 43 andq $-4096,%r11 |
| 44 leaq (%r10,%r11,1),%rsp |
| 45 movq (%rsp),%r11 |
| 46 cmpq %r10,%rsp |
| 47 ja .Lmul_page_walk |
| 48 jmp .Lmul_page_walk_done |
| 49 |
| 50 .Lmul_page_walk: |
| 51 leaq -4096(%rsp),%rsp |
| 52 movq (%rsp),%r11 |
| 53 cmpq %r10,%rsp |
| 54 ja .Lmul_page_walk |
| 55 .Lmul_page_walk_done: |
| 56 |
| 57 leaq .Linc(%rip),%r10 |
34 movq %rax,8(%rsp,%r9,8) | 58 movq %rax,8(%rsp,%r9,8) |
35 .Lmul_body: | 59 .Lmul_body: |
| 60 |
36 leaq 128(%rdx),%r12 | 61 leaq 128(%rdx),%r12 |
37 movdqa 0(%r10),%xmm0 | 62 movdqa 0(%r10),%xmm0 |
38 movdqa 16(%r10),%xmm1 | 63 movdqa 16(%r10),%xmm1 |
39 leaq 24-112(%rsp,%r9,8),%r10 | 64 leaq 24-112(%rsp,%r9,8),%r10 |
40 andq $-16,%r10 | 65 andq $-16,%r10 |
41 | 66 |
42 pshufd $0,%xmm5,%xmm5 | 67 pshufd $0,%xmm5,%xmm5 |
43 movdqa %xmm1,%xmm4 | 68 movdqa %xmm1,%xmm4 |
44 movdqa %xmm1,%xmm2 | 69 movdqa %xmm1,%xmm2 |
45 paddd %xmm0,%xmm1 | 70 paddd %xmm0,%xmm1 |
(...skipping 318 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
364 .align 16 | 389 .align 16 |
365 .Lsub: sbbq (%rcx,%r14,8),%rax | 390 .Lsub: sbbq (%rcx,%r14,8),%rax |
366 movq %rax,(%rdi,%r14,8) | 391 movq %rax,(%rdi,%r14,8) |
367 movq 8(%rsi,%r14,8),%rax | 392 movq 8(%rsi,%r14,8),%rax |
368 leaq 1(%r14),%r14 | 393 leaq 1(%r14),%r14 |
369 decq %r15 | 394 decq %r15 |
370 jnz .Lsub | 395 jnz .Lsub |
371 | 396 |
372 sbbq $0,%rax | 397 sbbq $0,%rax |
373 xorq %r14,%r14 | 398 xorq %r14,%r14 |
| 399 andq %rax,%rsi |
| 400 notq %rax |
| 401 movq %rdi,%rcx |
| 402 andq %rax,%rcx |
374 movq %r9,%r15 | 403 movq %r9,%r15 |
| 404 orq %rcx,%rsi |
375 .align 16 | 405 .align 16 |
376 .Lcopy: | 406 .Lcopy: |
377 » movq» (%rsp,%r14,8),%rsi | 407 » movq» (%rsi,%r14,8),%rax |
378 » movq» (%rdi,%r14,8),%rcx | |
379 » xorq» %rcx,%rsi | |
380 » andq» %rax,%rsi | |
381 » xorq» %rcx,%rsi | |
382 movq %r14,(%rsp,%r14,8) | 408 movq %r14,(%rsp,%r14,8) |
383 » movq» %rsi,(%rdi,%r14,8) | 409 » movq» %rax,(%rdi,%r14,8) |
384 leaq 1(%r14),%r14 | 410 leaq 1(%r14),%r14 |
385 subq $1,%r15 | 411 subq $1,%r15 |
386 jnz .Lcopy | 412 jnz .Lcopy |
387 | 413 |
388 movq 8(%rsp,%r9,8),%rsi | 414 movq 8(%rsp,%r9,8),%rsi |
389 movq $1,%rax | 415 movq $1,%rax |
390 | 416 |
391 movq -48(%rsi),%r15 | 417 movq -48(%rsi),%r15 |
392 movq -40(%rsi),%r14 | 418 movq -40(%rsi),%r14 |
393 movq -32(%rsi),%r13 | 419 movq -32(%rsi),%r13 |
394 movq -24(%rsi),%r12 | 420 movq -24(%rsi),%r12 |
395 movq -16(%rsi),%rbp | 421 movq -16(%rsi),%rbp |
396 movq -8(%rsi),%rbx | 422 movq -8(%rsi),%rbx |
397 leaq (%rsi),%rsp | 423 leaq (%rsi),%rsp |
398 .Lmul_epilogue: | 424 .Lmul_epilogue: |
399 .byte 0xf3,0xc3 | 425 .byte 0xf3,0xc3 |
400 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5 | 426 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5 |
401 .type bn_mul4x_mont_gather5,@function | 427 .type bn_mul4x_mont_gather5,@function |
402 .align 32 | 428 .align 32 |
403 bn_mul4x_mont_gather5: | 429 bn_mul4x_mont_gather5: |
404 .Lmul4x_enter: | |
405 .byte 0x67 | 430 .byte 0x67 |
406 movq %rsp,%rax | 431 movq %rsp,%rax |
| 432 .Lmul4x_enter: |
407 pushq %rbx | 433 pushq %rbx |
408 pushq %rbp | 434 pushq %rbp |
409 pushq %r12 | 435 pushq %r12 |
410 pushq %r13 | 436 pushq %r13 |
411 pushq %r14 | 437 pushq %r14 |
412 pushq %r15 | 438 pushq %r15 |
| 439 .Lmul4x_prologue: |
413 | 440 |
414 .byte 0x67 | 441 .byte 0x67 |
415 shll $3,%r9d | 442 shll $3,%r9d |
416 leaq (%r9,%r9,2),%r10 | 443 leaq (%r9,%r9,2),%r10 |
417 negq %r9 | 444 negq %r9 |
418 | 445 |
419 | 446 |
420 | 447 |
421 | 448 |
422 | 449 |
423 | 450 |
424 | 451 |
425 | 452 |
426 | 453 |
427 | 454 |
428 leaq -320(%rsp,%r9,2),%r11 | 455 leaq -320(%rsp,%r9,2),%r11 |
| 456 movq %rsp,%rbp |
429 subq %rdi,%r11 | 457 subq %rdi,%r11 |
430 andq $4095,%r11 | 458 andq $4095,%r11 |
431 cmpq %r11,%r10 | 459 cmpq %r11,%r10 |
432 jb .Lmul4xsp_alt | 460 jb .Lmul4xsp_alt |
433 » subq» %r11,%rsp | 461 » subq» %r11,%rbp |
434 » leaq» -320(%rsp,%r9,2),%rsp | 462 » leaq» -320(%rbp,%r9,2),%rbp |
435 jmp .Lmul4xsp_done | 463 jmp .Lmul4xsp_done |
436 | 464 |
437 .align 32 | 465 .align 32 |
438 .Lmul4xsp_alt: | 466 .Lmul4xsp_alt: |
439 leaq 4096-320(,%r9,2),%r10 | 467 leaq 4096-320(,%r9,2),%r10 |
440 » leaq» -320(%rsp,%r9,2),%rsp | 468 » leaq» -320(%rbp,%r9,2),%rbp |
441 subq %r10,%r11 | 469 subq %r10,%r11 |
442 movq $0,%r10 | 470 movq $0,%r10 |
443 cmovcq %r10,%r11 | 471 cmovcq %r10,%r11 |
444 » subq» %r11,%rsp | 472 » subq» %r11,%rbp |
445 .Lmul4xsp_done: | 473 .Lmul4xsp_done: |
446 » andq» $-64,%rsp | 474 » andq» $-64,%rbp |
| 475 » movq» %rsp,%r11 |
| 476 » subq» %rbp,%r11 |
| 477 » andq» $-4096,%r11 |
| 478 » leaq» (%r11,%rbp,1),%rsp |
| 479 » movq» (%rsp),%r10 |
| 480 » cmpq» %rbp,%rsp |
| 481 » ja» .Lmul4x_page_walk |
| 482 » jmp» .Lmul4x_page_walk_done |
| 483 |
| 484 .Lmul4x_page_walk: |
| 485 » leaq» -4096(%rsp),%rsp |
| 486 » movq» (%rsp),%r10 |
| 487 » cmpq» %rbp,%rsp |
| 488 » ja» .Lmul4x_page_walk |
| 489 .Lmul4x_page_walk_done: |
| 490 |
447 negq %r9 | 491 negq %r9 |
448 | 492 |
449 movq %rax,40(%rsp) | 493 movq %rax,40(%rsp) |
450 .Lmul4x_body: | 494 .Lmul4x_body: |
451 | 495 |
452 call mul4x_internal | 496 call mul4x_internal |
453 | 497 |
454 movq 40(%rsp),%rsi | 498 movq 40(%rsp),%rsi |
455 movq $1,%rax | 499 movq $1,%rax |
456 | 500 |
(...skipping 538 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
995 .type bn_power5,@function | 1039 .type bn_power5,@function |
996 .align 32 | 1040 .align 32 |
997 bn_power5: | 1041 bn_power5: |
998 movq %rsp,%rax | 1042 movq %rsp,%rax |
999 pushq %rbx | 1043 pushq %rbx |
1000 pushq %rbp | 1044 pushq %rbp |
1001 pushq %r12 | 1045 pushq %r12 |
1002 pushq %r13 | 1046 pushq %r13 |
1003 pushq %r14 | 1047 pushq %r14 |
1004 pushq %r15 | 1048 pushq %r15 |
| 1049 .Lpower5_prologue: |
1005 | 1050 |
1006 shll $3,%r9d | 1051 shll $3,%r9d |
1007 leal (%r9,%r9,2),%r10d | 1052 leal (%r9,%r9,2),%r10d |
1008 negq %r9 | 1053 negq %r9 |
1009 movq (%r8),%r8 | 1054 movq (%r8),%r8 |
1010 | 1055 |
1011 | 1056 |
1012 | 1057 |
1013 | 1058 |
1014 | 1059 |
1015 | 1060 |
1016 | 1061 |
1017 | 1062 |
1018 leaq -320(%rsp,%r9,2),%r11 | 1063 leaq -320(%rsp,%r9,2),%r11 |
| 1064 movq %rsp,%rbp |
1019 subq %rdi,%r11 | 1065 subq %rdi,%r11 |
1020 andq $4095,%r11 | 1066 andq $4095,%r11 |
1021 cmpq %r11,%r10 | 1067 cmpq %r11,%r10 |
1022 jb .Lpwr_sp_alt | 1068 jb .Lpwr_sp_alt |
1023 » subq» %r11,%rsp | 1069 » subq» %r11,%rbp |
1024 » leaq» -320(%rsp,%r9,2),%rsp | 1070 » leaq» -320(%rbp,%r9,2),%rbp |
1025 jmp .Lpwr_sp_done | 1071 jmp .Lpwr_sp_done |
1026 | 1072 |
1027 .align 32 | 1073 .align 32 |
1028 .Lpwr_sp_alt: | 1074 .Lpwr_sp_alt: |
1029 leaq 4096-320(,%r9,2),%r10 | 1075 leaq 4096-320(,%r9,2),%r10 |
1030 » leaq» -320(%rsp,%r9,2),%rsp | 1076 » leaq» -320(%rbp,%r9,2),%rbp |
1031 subq %r10,%r11 | 1077 subq %r10,%r11 |
1032 movq $0,%r10 | 1078 movq $0,%r10 |
1033 cmovcq %r10,%r11 | 1079 cmovcq %r10,%r11 |
1034 » subq» %r11,%rsp | 1080 » subq» %r11,%rbp |
1035 .Lpwr_sp_done: | 1081 .Lpwr_sp_done: |
1036 » andq» $-64,%rsp | 1082 » andq» $-64,%rbp |
| 1083 » movq» %rsp,%r11 |
| 1084 » subq» %rbp,%r11 |
| 1085 » andq» $-4096,%r11 |
| 1086 » leaq» (%r11,%rbp,1),%rsp |
| 1087 » movq» (%rsp),%r10 |
| 1088 » cmpq» %rbp,%rsp |
| 1089 » ja» .Lpwr_page_walk |
| 1090 » jmp» .Lpwr_page_walk_done |
| 1091 |
| 1092 .Lpwr_page_walk: |
| 1093 » leaq» -4096(%rsp),%rsp |
| 1094 » movq» (%rsp),%r10 |
| 1095 » cmpq» %rbp,%rsp |
| 1096 » ja» .Lpwr_page_walk |
| 1097 .Lpwr_page_walk_done: |
| 1098 |
1037 movq %r9,%r10 | 1099 movq %r9,%r10 |
1038 negq %r9 | 1100 negq %r9 |
1039 | 1101 |
1040 | 1102 |
1041 | 1103 |
1042 | 1104 |
1043 | 1105 |
1044 | 1106 |
1045 | 1107 |
1046 | 1108 |
(...skipping 890 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1937 .align 32 | 1999 .align 32 |
1938 bn_from_mont8x: | 2000 bn_from_mont8x: |
1939 .byte 0x67 | 2001 .byte 0x67 |
1940 movq %rsp,%rax | 2002 movq %rsp,%rax |
1941 pushq %rbx | 2003 pushq %rbx |
1942 pushq %rbp | 2004 pushq %rbp |
1943 pushq %r12 | 2005 pushq %r12 |
1944 pushq %r13 | 2006 pushq %r13 |
1945 pushq %r14 | 2007 pushq %r14 |
1946 pushq %r15 | 2008 pushq %r15 |
| 2009 .Lfrom_prologue: |
1947 | 2010 |
1948 shll $3,%r9d | 2011 shll $3,%r9d |
1949 leaq (%r9,%r9,2),%r10 | 2012 leaq (%r9,%r9,2),%r10 |
1950 negq %r9 | 2013 negq %r9 |
1951 movq (%r8),%r8 | 2014 movq (%r8),%r8 |
1952 | 2015 |
1953 | 2016 |
1954 | 2017 |
1955 | 2018 |
1956 | 2019 |
1957 | 2020 |
1958 | 2021 |
1959 | 2022 |
1960 leaq -320(%rsp,%r9,2),%r11 | 2023 leaq -320(%rsp,%r9,2),%r11 |
| 2024 movq %rsp,%rbp |
1961 subq %rdi,%r11 | 2025 subq %rdi,%r11 |
1962 andq $4095,%r11 | 2026 andq $4095,%r11 |
1963 cmpq %r11,%r10 | 2027 cmpq %r11,%r10 |
1964 jb .Lfrom_sp_alt | 2028 jb .Lfrom_sp_alt |
1965 » subq» %r11,%rsp | 2029 » subq» %r11,%rbp |
1966 » leaq» -320(%rsp,%r9,2),%rsp | 2030 » leaq» -320(%rbp,%r9,2),%rbp |
1967 jmp .Lfrom_sp_done | 2031 jmp .Lfrom_sp_done |
1968 | 2032 |
1969 .align 32 | 2033 .align 32 |
1970 .Lfrom_sp_alt: | 2034 .Lfrom_sp_alt: |
1971 leaq 4096-320(,%r9,2),%r10 | 2035 leaq 4096-320(,%r9,2),%r10 |
1972 » leaq» -320(%rsp,%r9,2),%rsp | 2036 » leaq» -320(%rbp,%r9,2),%rbp |
1973 subq %r10,%r11 | 2037 subq %r10,%r11 |
1974 movq $0,%r10 | 2038 movq $0,%r10 |
1975 cmovcq %r10,%r11 | 2039 cmovcq %r10,%r11 |
1976 » subq» %r11,%rsp | 2040 » subq» %r11,%rbp |
1977 .Lfrom_sp_done: | 2041 .Lfrom_sp_done: |
1978 » andq» $-64,%rsp | 2042 » andq» $-64,%rbp |
| 2043 » movq» %rsp,%r11 |
| 2044 » subq» %rbp,%r11 |
| 2045 » andq» $-4096,%r11 |
| 2046 » leaq» (%r11,%rbp,1),%rsp |
| 2047 » movq» (%rsp),%r10 |
| 2048 » cmpq» %rbp,%rsp |
| 2049 » ja» .Lfrom_page_walk |
| 2050 » jmp» .Lfrom_page_walk_done |
| 2051 |
| 2052 .Lfrom_page_walk: |
| 2053 » leaq» -4096(%rsp),%rsp |
| 2054 » movq» (%rsp),%r10 |
| 2055 » cmpq» %rbp,%rsp |
| 2056 » ja» .Lfrom_page_walk |
| 2057 .Lfrom_page_walk_done: |
| 2058 |
1979 movq %r9,%r10 | 2059 movq %r9,%r10 |
1980 negq %r9 | 2060 negq %r9 |
1981 | 2061 |
1982 | 2062 |
1983 | 2063 |
1984 | 2064 |
1985 | 2065 |
1986 | 2066 |
1987 | 2067 |
1988 | 2068 |
(...skipping 242 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2231 leaq (%r10),%rsp | 2311 leaq (%r10),%rsp |
2232 .byte 0xf3,0xc3 | 2312 .byte 0xf3,0xc3 |
2233 .LSEH_end_bn_gather5: | 2313 .LSEH_end_bn_gather5: |
2234 .size bn_gather5,.-bn_gather5 | 2314 .size bn_gather5,.-bn_gather5 |
2235 .align 64 | 2315 .align 64 |
2236 .Linc: | 2316 .Linc: |
2237 .long 0,0, 1,1 | 2317 .long 0,0, 1,1 |
2238 .long 2,2, 2,2 | 2318 .long 2,2, 2,2 |
2239 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97
,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71
,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,1
11,114,103,62,0 | 2319 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97
,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71
,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,1
11,114,103,62,0 |
2240 #endif | 2320 #endif |
OLD | NEW |