| OLD | NEW |
| 1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
| 2 .text | 2 .text |
| 3 | 3 |
| 4 .extern OPENSSL_ia32cap_P | 4 .extern OPENSSL_ia32cap_P |
| 5 .hidden OPENSSL_ia32cap_P | 5 .hidden OPENSSL_ia32cap_P |
| 6 | 6 |
| 7 .globl bn_mul_mont_gather5 | 7 .globl bn_mul_mont_gather5 |
| 8 .hidden bn_mul_mont_gather5 | 8 .hidden bn_mul_mont_gather5 |
| 9 .type bn_mul_mont_gather5,@function | 9 .type bn_mul_mont_gather5,@function |
| 10 .align 64 | 10 .align 64 |
| 11 bn_mul_mont_gather5: | 11 bn_mul_mont_gather5: |
| 12 movl %r9d,%r9d |
| 13 movq %rsp,%rax |
| 12 testl $7,%r9d | 14 testl $7,%r9d |
| 13 jnz .Lmul_enter | 15 jnz .Lmul_enter |
| 14 jmp .Lmul4x_enter | 16 jmp .Lmul4x_enter |
| 15 | 17 |
| 16 .align 16 | 18 .align 16 |
| 17 .Lmul_enter: | 19 .Lmul_enter: |
| 18 movl %r9d,%r9d | |
| 19 movq %rsp,%rax | |
| 20 movd 8(%rsp),%xmm5 | 20 movd 8(%rsp),%xmm5 |
| 21 leaq .Linc(%rip),%r10 | |
| 22 pushq %rbx | 21 pushq %rbx |
| 23 pushq %rbp | 22 pushq %rbp |
| 24 pushq %r12 | 23 pushq %r12 |
| 25 pushq %r13 | 24 pushq %r13 |
| 26 pushq %r14 | 25 pushq %r14 |
| 27 pushq %r15 | 26 pushq %r15 |
| 28 | 27 |
| 29 » leaq» 2(%r9),%r11 | 28 » negq» %r9 |
| 30 » negq» %r11 | 29 » movq» %rsp,%r11 |
| 31 » leaq» -264(%rsp,%r11,8),%rsp | 30 » leaq» -280(%rsp,%r9,8),%r10 |
| 32 » andq» $-1024,%rsp | 31 » negq» %r9 |
| 32 » andq» $-1024,%r10 |
| 33 | 33 |
| 34 |
| 35 |
| 36 |
| 37 |
| 38 |
| 39 |
| 40 |
| 41 |
| 42 subq %r10,%r11 |
| 43 andq $-4096,%r11 |
| 44 leaq (%r10,%r11,1),%rsp |
| 45 movq (%rsp),%r11 |
| 46 cmpq %r10,%rsp |
| 47 ja .Lmul_page_walk |
| 48 jmp .Lmul_page_walk_done |
| 49 |
| 50 .Lmul_page_walk: |
| 51 leaq -4096(%rsp),%rsp |
| 52 movq (%rsp),%r11 |
| 53 cmpq %r10,%rsp |
| 54 ja .Lmul_page_walk |
| 55 .Lmul_page_walk_done: |
| 56 |
| 57 leaq .Linc(%rip),%r10 |
| 34 movq %rax,8(%rsp,%r9,8) | 58 movq %rax,8(%rsp,%r9,8) |
| 35 .Lmul_body: | 59 .Lmul_body: |
| 60 |
| 36 leaq 128(%rdx),%r12 | 61 leaq 128(%rdx),%r12 |
| 37 movdqa 0(%r10),%xmm0 | 62 movdqa 0(%r10),%xmm0 |
| 38 movdqa 16(%r10),%xmm1 | 63 movdqa 16(%r10),%xmm1 |
| 39 leaq 24-112(%rsp,%r9,8),%r10 | 64 leaq 24-112(%rsp,%r9,8),%r10 |
| 40 andq $-16,%r10 | 65 andq $-16,%r10 |
| 41 | 66 |
| 42 pshufd $0,%xmm5,%xmm5 | 67 pshufd $0,%xmm5,%xmm5 |
| 43 movdqa %xmm1,%xmm4 | 68 movdqa %xmm1,%xmm4 |
| 44 movdqa %xmm1,%xmm2 | 69 movdqa %xmm1,%xmm2 |
| 45 paddd %xmm0,%xmm1 | 70 paddd %xmm0,%xmm1 |
| (...skipping 318 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 364 .align 16 | 389 .align 16 |
| 365 .Lsub: sbbq (%rcx,%r14,8),%rax | 390 .Lsub: sbbq (%rcx,%r14,8),%rax |
| 366 movq %rax,(%rdi,%r14,8) | 391 movq %rax,(%rdi,%r14,8) |
| 367 movq 8(%rsi,%r14,8),%rax | 392 movq 8(%rsi,%r14,8),%rax |
| 368 leaq 1(%r14),%r14 | 393 leaq 1(%r14),%r14 |
| 369 decq %r15 | 394 decq %r15 |
| 370 jnz .Lsub | 395 jnz .Lsub |
| 371 | 396 |
| 372 sbbq $0,%rax | 397 sbbq $0,%rax |
| 373 xorq %r14,%r14 | 398 xorq %r14,%r14 |
| 399 andq %rax,%rsi |
| 400 notq %rax |
| 401 movq %rdi,%rcx |
| 402 andq %rax,%rcx |
| 374 movq %r9,%r15 | 403 movq %r9,%r15 |
| 404 orq %rcx,%rsi |
| 375 .align 16 | 405 .align 16 |
| 376 .Lcopy: | 406 .Lcopy: |
| 377 » movq» (%rsp,%r14,8),%rsi | 407 » movq» (%rsi,%r14,8),%rax |
| 378 » movq» (%rdi,%r14,8),%rcx | |
| 379 » xorq» %rcx,%rsi | |
| 380 » andq» %rax,%rsi | |
| 381 » xorq» %rcx,%rsi | |
| 382 movq %r14,(%rsp,%r14,8) | 408 movq %r14,(%rsp,%r14,8) |
| 383 » movq» %rsi,(%rdi,%r14,8) | 409 » movq» %rax,(%rdi,%r14,8) |
| 384 leaq 1(%r14),%r14 | 410 leaq 1(%r14),%r14 |
| 385 subq $1,%r15 | 411 subq $1,%r15 |
| 386 jnz .Lcopy | 412 jnz .Lcopy |
| 387 | 413 |
| 388 movq 8(%rsp,%r9,8),%rsi | 414 movq 8(%rsp,%r9,8),%rsi |
| 389 movq $1,%rax | 415 movq $1,%rax |
| 390 | 416 |
| 391 movq -48(%rsi),%r15 | 417 movq -48(%rsi),%r15 |
| 392 movq -40(%rsi),%r14 | 418 movq -40(%rsi),%r14 |
| 393 movq -32(%rsi),%r13 | 419 movq -32(%rsi),%r13 |
| 394 movq -24(%rsi),%r12 | 420 movq -24(%rsi),%r12 |
| 395 movq -16(%rsi),%rbp | 421 movq -16(%rsi),%rbp |
| 396 movq -8(%rsi),%rbx | 422 movq -8(%rsi),%rbx |
| 397 leaq (%rsi),%rsp | 423 leaq (%rsi),%rsp |
| 398 .Lmul_epilogue: | 424 .Lmul_epilogue: |
| 399 .byte 0xf3,0xc3 | 425 .byte 0xf3,0xc3 |
| 400 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5 | 426 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5 |
| 401 .type bn_mul4x_mont_gather5,@function | 427 .type bn_mul4x_mont_gather5,@function |
| 402 .align 32 | 428 .align 32 |
| 403 bn_mul4x_mont_gather5: | 429 bn_mul4x_mont_gather5: |
| 404 .Lmul4x_enter: | |
| 405 .byte 0x67 | 430 .byte 0x67 |
| 406 movq %rsp,%rax | 431 movq %rsp,%rax |
| 432 .Lmul4x_enter: |
| 407 pushq %rbx | 433 pushq %rbx |
| 408 pushq %rbp | 434 pushq %rbp |
| 409 pushq %r12 | 435 pushq %r12 |
| 410 pushq %r13 | 436 pushq %r13 |
| 411 pushq %r14 | 437 pushq %r14 |
| 412 pushq %r15 | 438 pushq %r15 |
| 439 .Lmul4x_prologue: |
| 413 | 440 |
| 414 .byte 0x67 | 441 .byte 0x67 |
| 415 shll $3,%r9d | 442 shll $3,%r9d |
| 416 leaq (%r9,%r9,2),%r10 | 443 leaq (%r9,%r9,2),%r10 |
| 417 negq %r9 | 444 negq %r9 |
| 418 | 445 |
| 419 | 446 |
| 420 | 447 |
| 421 | 448 |
| 422 | 449 |
| 423 | 450 |
| 424 | 451 |
| 425 | 452 |
| 426 | 453 |
| 427 | 454 |
| 428 leaq -320(%rsp,%r9,2),%r11 | 455 leaq -320(%rsp,%r9,2),%r11 |
| 456 movq %rsp,%rbp |
| 429 subq %rdi,%r11 | 457 subq %rdi,%r11 |
| 430 andq $4095,%r11 | 458 andq $4095,%r11 |
| 431 cmpq %r11,%r10 | 459 cmpq %r11,%r10 |
| 432 jb .Lmul4xsp_alt | 460 jb .Lmul4xsp_alt |
| 433 » subq» %r11,%rsp | 461 » subq» %r11,%rbp |
| 434 » leaq» -320(%rsp,%r9,2),%rsp | 462 » leaq» -320(%rbp,%r9,2),%rbp |
| 435 jmp .Lmul4xsp_done | 463 jmp .Lmul4xsp_done |
| 436 | 464 |
| 437 .align 32 | 465 .align 32 |
| 438 .Lmul4xsp_alt: | 466 .Lmul4xsp_alt: |
| 439 leaq 4096-320(,%r9,2),%r10 | 467 leaq 4096-320(,%r9,2),%r10 |
| 440 » leaq» -320(%rsp,%r9,2),%rsp | 468 » leaq» -320(%rbp,%r9,2),%rbp |
| 441 subq %r10,%r11 | 469 subq %r10,%r11 |
| 442 movq $0,%r10 | 470 movq $0,%r10 |
| 443 cmovcq %r10,%r11 | 471 cmovcq %r10,%r11 |
| 444 » subq» %r11,%rsp | 472 » subq» %r11,%rbp |
| 445 .Lmul4xsp_done: | 473 .Lmul4xsp_done: |
| 446 » andq» $-64,%rsp | 474 » andq» $-64,%rbp |
| 475 » movq» %rsp,%r11 |
| 476 » subq» %rbp,%r11 |
| 477 » andq» $-4096,%r11 |
| 478 » leaq» (%r11,%rbp,1),%rsp |
| 479 » movq» (%rsp),%r10 |
| 480 » cmpq» %rbp,%rsp |
| 481 » ja» .Lmul4x_page_walk |
| 482 » jmp» .Lmul4x_page_walk_done |
| 483 |
| 484 .Lmul4x_page_walk: |
| 485 » leaq» -4096(%rsp),%rsp |
| 486 » movq» (%rsp),%r10 |
| 487 » cmpq» %rbp,%rsp |
| 488 » ja» .Lmul4x_page_walk |
| 489 .Lmul4x_page_walk_done: |
| 490 |
| 447 negq %r9 | 491 negq %r9 |
| 448 | 492 |
| 449 movq %rax,40(%rsp) | 493 movq %rax,40(%rsp) |
| 450 .Lmul4x_body: | 494 .Lmul4x_body: |
| 451 | 495 |
| 452 call mul4x_internal | 496 call mul4x_internal |
| 453 | 497 |
| 454 movq 40(%rsp),%rsi | 498 movq 40(%rsp),%rsi |
| 455 movq $1,%rax | 499 movq $1,%rax |
| 456 | 500 |
| (...skipping 538 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 995 .type bn_power5,@function | 1039 .type bn_power5,@function |
| 996 .align 32 | 1040 .align 32 |
| 997 bn_power5: | 1041 bn_power5: |
| 998 movq %rsp,%rax | 1042 movq %rsp,%rax |
| 999 pushq %rbx | 1043 pushq %rbx |
| 1000 pushq %rbp | 1044 pushq %rbp |
| 1001 pushq %r12 | 1045 pushq %r12 |
| 1002 pushq %r13 | 1046 pushq %r13 |
| 1003 pushq %r14 | 1047 pushq %r14 |
| 1004 pushq %r15 | 1048 pushq %r15 |
| 1049 .Lpower5_prologue: |
| 1005 | 1050 |
| 1006 shll $3,%r9d | 1051 shll $3,%r9d |
| 1007 leal (%r9,%r9,2),%r10d | 1052 leal (%r9,%r9,2),%r10d |
| 1008 negq %r9 | 1053 negq %r9 |
| 1009 movq (%r8),%r8 | 1054 movq (%r8),%r8 |
| 1010 | 1055 |
| 1011 | 1056 |
| 1012 | 1057 |
| 1013 | 1058 |
| 1014 | 1059 |
| 1015 | 1060 |
| 1016 | 1061 |
| 1017 | 1062 |
| 1018 leaq -320(%rsp,%r9,2),%r11 | 1063 leaq -320(%rsp,%r9,2),%r11 |
| 1064 movq %rsp,%rbp |
| 1019 subq %rdi,%r11 | 1065 subq %rdi,%r11 |
| 1020 andq $4095,%r11 | 1066 andq $4095,%r11 |
| 1021 cmpq %r11,%r10 | 1067 cmpq %r11,%r10 |
| 1022 jb .Lpwr_sp_alt | 1068 jb .Lpwr_sp_alt |
| 1023 » subq» %r11,%rsp | 1069 » subq» %r11,%rbp |
| 1024 » leaq» -320(%rsp,%r9,2),%rsp | 1070 » leaq» -320(%rbp,%r9,2),%rbp |
| 1025 jmp .Lpwr_sp_done | 1071 jmp .Lpwr_sp_done |
| 1026 | 1072 |
| 1027 .align 32 | 1073 .align 32 |
| 1028 .Lpwr_sp_alt: | 1074 .Lpwr_sp_alt: |
| 1029 leaq 4096-320(,%r9,2),%r10 | 1075 leaq 4096-320(,%r9,2),%r10 |
| 1030 » leaq» -320(%rsp,%r9,2),%rsp | 1076 » leaq» -320(%rbp,%r9,2),%rbp |
| 1031 subq %r10,%r11 | 1077 subq %r10,%r11 |
| 1032 movq $0,%r10 | 1078 movq $0,%r10 |
| 1033 cmovcq %r10,%r11 | 1079 cmovcq %r10,%r11 |
| 1034 » subq» %r11,%rsp | 1080 » subq» %r11,%rbp |
| 1035 .Lpwr_sp_done: | 1081 .Lpwr_sp_done: |
| 1036 » andq» $-64,%rsp | 1082 » andq» $-64,%rbp |
| 1083 » movq» %rsp,%r11 |
| 1084 » subq» %rbp,%r11 |
| 1085 » andq» $-4096,%r11 |
| 1086 » leaq» (%r11,%rbp,1),%rsp |
| 1087 » movq» (%rsp),%r10 |
| 1088 » cmpq» %rbp,%rsp |
| 1089 » ja» .Lpwr_page_walk |
| 1090 » jmp» .Lpwr_page_walk_done |
| 1091 |
| 1092 .Lpwr_page_walk: |
| 1093 » leaq» -4096(%rsp),%rsp |
| 1094 » movq» (%rsp),%r10 |
| 1095 » cmpq» %rbp,%rsp |
| 1096 » ja» .Lpwr_page_walk |
| 1097 .Lpwr_page_walk_done: |
| 1098 |
| 1037 movq %r9,%r10 | 1099 movq %r9,%r10 |
| 1038 negq %r9 | 1100 negq %r9 |
| 1039 | 1101 |
| 1040 | 1102 |
| 1041 | 1103 |
| 1042 | 1104 |
| 1043 | 1105 |
| 1044 | 1106 |
| 1045 | 1107 |
| 1046 | 1108 |
| (...skipping 890 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1937 .align 32 | 1999 .align 32 |
| 1938 bn_from_mont8x: | 2000 bn_from_mont8x: |
| 1939 .byte 0x67 | 2001 .byte 0x67 |
| 1940 movq %rsp,%rax | 2002 movq %rsp,%rax |
| 1941 pushq %rbx | 2003 pushq %rbx |
| 1942 pushq %rbp | 2004 pushq %rbp |
| 1943 pushq %r12 | 2005 pushq %r12 |
| 1944 pushq %r13 | 2006 pushq %r13 |
| 1945 pushq %r14 | 2007 pushq %r14 |
| 1946 pushq %r15 | 2008 pushq %r15 |
| 2009 .Lfrom_prologue: |
| 1947 | 2010 |
| 1948 shll $3,%r9d | 2011 shll $3,%r9d |
| 1949 leaq (%r9,%r9,2),%r10 | 2012 leaq (%r9,%r9,2),%r10 |
| 1950 negq %r9 | 2013 negq %r9 |
| 1951 movq (%r8),%r8 | 2014 movq (%r8),%r8 |
| 1952 | 2015 |
| 1953 | 2016 |
| 1954 | 2017 |
| 1955 | 2018 |
| 1956 | 2019 |
| 1957 | 2020 |
| 1958 | 2021 |
| 1959 | 2022 |
| 1960 leaq -320(%rsp,%r9,2),%r11 | 2023 leaq -320(%rsp,%r9,2),%r11 |
| 2024 movq %rsp,%rbp |
| 1961 subq %rdi,%r11 | 2025 subq %rdi,%r11 |
| 1962 andq $4095,%r11 | 2026 andq $4095,%r11 |
| 1963 cmpq %r11,%r10 | 2027 cmpq %r11,%r10 |
| 1964 jb .Lfrom_sp_alt | 2028 jb .Lfrom_sp_alt |
| 1965 » subq» %r11,%rsp | 2029 » subq» %r11,%rbp |
| 1966 » leaq» -320(%rsp,%r9,2),%rsp | 2030 » leaq» -320(%rbp,%r9,2),%rbp |
| 1967 jmp .Lfrom_sp_done | 2031 jmp .Lfrom_sp_done |
| 1968 | 2032 |
| 1969 .align 32 | 2033 .align 32 |
| 1970 .Lfrom_sp_alt: | 2034 .Lfrom_sp_alt: |
| 1971 leaq 4096-320(,%r9,2),%r10 | 2035 leaq 4096-320(,%r9,2),%r10 |
| 1972 » leaq» -320(%rsp,%r9,2),%rsp | 2036 » leaq» -320(%rbp,%r9,2),%rbp |
| 1973 subq %r10,%r11 | 2037 subq %r10,%r11 |
| 1974 movq $0,%r10 | 2038 movq $0,%r10 |
| 1975 cmovcq %r10,%r11 | 2039 cmovcq %r10,%r11 |
| 1976 » subq» %r11,%rsp | 2040 » subq» %r11,%rbp |
| 1977 .Lfrom_sp_done: | 2041 .Lfrom_sp_done: |
| 1978 » andq» $-64,%rsp | 2042 » andq» $-64,%rbp |
| 2043 » movq» %rsp,%r11 |
| 2044 » subq» %rbp,%r11 |
| 2045 » andq» $-4096,%r11 |
| 2046 » leaq» (%r11,%rbp,1),%rsp |
| 2047 » movq» (%rsp),%r10 |
| 2048 » cmpq» %rbp,%rsp |
| 2049 » ja» .Lfrom_page_walk |
| 2050 » jmp» .Lfrom_page_walk_done |
| 2051 |
| 2052 .Lfrom_page_walk: |
| 2053 » leaq» -4096(%rsp),%rsp |
| 2054 » movq» (%rsp),%r10 |
| 2055 » cmpq» %rbp,%rsp |
| 2056 » ja» .Lfrom_page_walk |
| 2057 .Lfrom_page_walk_done: |
| 2058 |
| 1979 movq %r9,%r10 | 2059 movq %r9,%r10 |
| 1980 negq %r9 | 2060 negq %r9 |
| 1981 | 2061 |
| 1982 | 2062 |
| 1983 | 2063 |
| 1984 | 2064 |
| 1985 | 2065 |
| 1986 | 2066 |
| 1987 | 2067 |
| 1988 | 2068 |
| (...skipping 242 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2231 leaq (%r10),%rsp | 2311 leaq (%r10),%rsp |
| 2232 .byte 0xf3,0xc3 | 2312 .byte 0xf3,0xc3 |
| 2233 .LSEH_end_bn_gather5: | 2313 .LSEH_end_bn_gather5: |
| 2234 .size bn_gather5,.-bn_gather5 | 2314 .size bn_gather5,.-bn_gather5 |
| 2235 .align 64 | 2315 .align 64 |
| 2236 .Linc: | 2316 .Linc: |
| 2237 .long 0,0, 1,1 | 2317 .long 0,0, 1,1 |
| 2238 .long 2,2, 2,2 | 2318 .long 2,2, 2,2 |
| 2239 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97
,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71
,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,1
11,114,103,62,0 | 2319 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97
,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71
,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,1
11,114,103,62,0 |
| 2240 #endif | 2320 #endif |
| OLD | NEW |