OLD | NEW |
(Empty) | |
| 1 default rel |
| 2 %define XMMWORD |
| 3 %define YMMWORD |
| 4 %define ZMMWORD |
| 5 section .text code align=64 |
| 6 |
| 7 |
| 8 EXTERN OPENSSL_ia32cap_P |
| 9 |
| 10 global bn_mul_mont |
| 11 |
| 12 ALIGN 16 |
| 13 bn_mul_mont: |
| 14 mov QWORD[8+rsp],rdi ;WIN64 prologue |
| 15 mov QWORD[16+rsp],rsi |
| 16 mov rax,rsp |
| 17 $L$SEH_begin_bn_mul_mont: |
| 18 mov rdi,rcx |
| 19 mov rsi,rdx |
| 20 mov rdx,r8 |
| 21 mov rcx,r9 |
| 22 mov r8,QWORD[40+rsp] |
| 23 mov r9,QWORD[48+rsp] |
| 24 |
| 25 |
| 26 test r9d,3 |
| 27 jnz NEAR $L$mul_enter |
| 28 cmp r9d,8 |
| 29 jb NEAR $L$mul_enter |
| 30 cmp rdx,rsi |
| 31 jne NEAR $L$mul4x_enter |
| 32 test r9d,7 |
| 33 jz NEAR $L$sqr8x_enter |
| 34 jmp NEAR $L$mul4x_enter |
| 35 |
| 36 ALIGN 16 |
| 37 $L$mul_enter: |
| 38 push rbx |
| 39 push rbp |
| 40 push r12 |
| 41 push r13 |
| 42 push r14 |
| 43 push r15 |
| 44 |
| 45 mov r9d,r9d |
| 46 lea r10,[2+r9] |
| 47 mov r11,rsp |
| 48 neg r10 |
| 49 lea rsp,[r10*8+rsp] |
| 50 and rsp,-1024 |
| 51 |
| 52 mov QWORD[8+r9*8+rsp],r11 |
| 53 $L$mul_body: |
| 54 mov r12,rdx |
| 55 mov r8,QWORD[r8] |
| 56 mov rbx,QWORD[r12] |
| 57 mov rax,QWORD[rsi] |
| 58 |
| 59 xor r14,r14 |
| 60 xor r15,r15 |
| 61 |
| 62 mov rbp,r8 |
| 63 mul rbx |
| 64 mov r10,rax |
| 65 mov rax,QWORD[rcx] |
| 66 |
| 67 imul rbp,r10 |
| 68 mov r11,rdx |
| 69 |
| 70 mul rbp |
| 71 add r10,rax |
| 72 mov rax,QWORD[8+rsi] |
| 73 adc rdx,0 |
| 74 mov r13,rdx |
| 75 |
| 76 lea r15,[1+r15] |
| 77 jmp NEAR $L$1st_enter |
| 78 |
| 79 ALIGN 16 |
| 80 $L$1st: |
| 81 add r13,rax |
| 82 mov rax,QWORD[r15*8+rsi] |
| 83 adc rdx,0 |
| 84 add r13,r11 |
| 85 mov r11,r10 |
| 86 adc rdx,0 |
| 87 mov QWORD[((-16))+r15*8+rsp],r13 |
| 88 mov r13,rdx |
| 89 |
| 90 $L$1st_enter: |
| 91 mul rbx |
| 92 add r11,rax |
| 93 mov rax,QWORD[r15*8+rcx] |
| 94 adc rdx,0 |
| 95 lea r15,[1+r15] |
| 96 mov r10,rdx |
| 97 |
| 98 mul rbp |
| 99 cmp r15,r9 |
| 100 jne NEAR $L$1st |
| 101 |
| 102 add r13,rax |
| 103 mov rax,QWORD[rsi] |
| 104 adc rdx,0 |
| 105 add r13,r11 |
| 106 adc rdx,0 |
| 107 mov QWORD[((-16))+r15*8+rsp],r13 |
| 108 mov r13,rdx |
| 109 mov r11,r10 |
| 110 |
| 111 xor rdx,rdx |
| 112 add r13,r11 |
| 113 adc rdx,0 |
| 114 mov QWORD[((-8))+r9*8+rsp],r13 |
| 115 mov QWORD[r9*8+rsp],rdx |
| 116 |
| 117 lea r14,[1+r14] |
| 118 jmp NEAR $L$outer |
| 119 ALIGN 16 |
| 120 $L$outer: |
| 121 mov rbx,QWORD[r14*8+r12] |
| 122 xor r15,r15 |
| 123 mov rbp,r8 |
| 124 mov r10,QWORD[rsp] |
| 125 mul rbx |
| 126 add r10,rax |
| 127 mov rax,QWORD[rcx] |
| 128 adc rdx,0 |
| 129 |
| 130 imul rbp,r10 |
| 131 mov r11,rdx |
| 132 |
| 133 mul rbp |
| 134 add r10,rax |
| 135 mov rax,QWORD[8+rsi] |
| 136 adc rdx,0 |
| 137 mov r10,QWORD[8+rsp] |
| 138 mov r13,rdx |
| 139 |
| 140 lea r15,[1+r15] |
| 141 jmp NEAR $L$inner_enter |
| 142 |
| 143 ALIGN 16 |
| 144 $L$inner: |
| 145 add r13,rax |
| 146 mov rax,QWORD[r15*8+rsi] |
| 147 adc rdx,0 |
| 148 add r13,r10 |
| 149 mov r10,QWORD[r15*8+rsp] |
| 150 adc rdx,0 |
| 151 mov QWORD[((-16))+r15*8+rsp],r13 |
| 152 mov r13,rdx |
| 153 |
| 154 $L$inner_enter: |
| 155 mul rbx |
| 156 add r11,rax |
| 157 mov rax,QWORD[r15*8+rcx] |
| 158 adc rdx,0 |
| 159 add r10,r11 |
| 160 mov r11,rdx |
| 161 adc r11,0 |
| 162 lea r15,[1+r15] |
| 163 |
| 164 mul rbp |
| 165 cmp r15,r9 |
| 166 jne NEAR $L$inner |
| 167 |
| 168 add r13,rax |
| 169 mov rax,QWORD[rsi] |
| 170 adc rdx,0 |
| 171 add r13,r10 |
| 172 mov r10,QWORD[r15*8+rsp] |
| 173 adc rdx,0 |
| 174 mov QWORD[((-16))+r15*8+rsp],r13 |
| 175 mov r13,rdx |
| 176 |
| 177 xor rdx,rdx |
| 178 add r13,r11 |
| 179 adc rdx,0 |
| 180 add r13,r10 |
| 181 adc rdx,0 |
| 182 mov QWORD[((-8))+r9*8+rsp],r13 |
| 183 mov QWORD[r9*8+rsp],rdx |
| 184 |
| 185 lea r14,[1+r14] |
| 186 cmp r14,r9 |
| 187 jb NEAR $L$outer |
| 188 |
| 189 xor r14,r14 |
| 190 mov rax,QWORD[rsp] |
| 191 lea rsi,[rsp] |
| 192 mov r15,r9 |
| 193 jmp NEAR $L$sub |
| 194 ALIGN 16 |
| 195 $L$sub: sbb rax,QWORD[r14*8+rcx] |
| 196 mov QWORD[r14*8+rdi],rax |
| 197 mov rax,QWORD[8+r14*8+rsi] |
| 198 lea r14,[1+r14] |
| 199 dec r15 |
| 200 jnz NEAR $L$sub |
| 201 |
| 202 sbb rax,0 |
| 203 xor r14,r14 |
| 204 mov r15,r9 |
| 205 ALIGN 16 |
| 206 $L$copy: |
| 207 mov rsi,QWORD[r14*8+rsp] |
| 208 mov rcx,QWORD[r14*8+rdi] |
| 209 xor rsi,rcx |
| 210 and rsi,rax |
| 211 xor rsi,rcx |
| 212 mov QWORD[r14*8+rsp],r14 |
| 213 mov QWORD[r14*8+rdi],rsi |
| 214 lea r14,[1+r14] |
| 215 sub r15,1 |
| 216 jnz NEAR $L$copy |
| 217 |
| 218 mov rsi,QWORD[8+r9*8+rsp] |
| 219 mov rax,1 |
| 220 mov r15,QWORD[rsi] |
| 221 mov r14,QWORD[8+rsi] |
| 222 mov r13,QWORD[16+rsi] |
| 223 mov r12,QWORD[24+rsi] |
| 224 mov rbp,QWORD[32+rsi] |
| 225 mov rbx,QWORD[40+rsi] |
| 226 lea rsp,[48+rsi] |
| 227 $L$mul_epilogue: |
| 228 mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
| 229 mov rsi,QWORD[16+rsp] |
| 230 DB 0F3h,0C3h ;repret |
| 231 $L$SEH_end_bn_mul_mont: |
| 232 |
| 233 ALIGN 16 |
| 234 bn_mul4x_mont: |
| 235 mov QWORD[8+rsp],rdi ;WIN64 prologue |
| 236 mov QWORD[16+rsp],rsi |
| 237 mov rax,rsp |
| 238 $L$SEH_begin_bn_mul4x_mont: |
| 239 mov rdi,rcx |
| 240 mov rsi,rdx |
| 241 mov rdx,r8 |
| 242 mov rcx,r9 |
| 243 mov r8,QWORD[40+rsp] |
| 244 mov r9,QWORD[48+rsp] |
| 245 |
| 246 |
| 247 $L$mul4x_enter: |
| 248 push rbx |
| 249 push rbp |
| 250 push r12 |
| 251 push r13 |
| 252 push r14 |
| 253 push r15 |
| 254 |
| 255 mov r9d,r9d |
| 256 lea r10,[4+r9] |
| 257 mov r11,rsp |
| 258 neg r10 |
| 259 lea rsp,[r10*8+rsp] |
| 260 and rsp,-1024 |
| 261 |
| 262 mov QWORD[8+r9*8+rsp],r11 |
| 263 $L$mul4x_body: |
| 264 mov QWORD[16+r9*8+rsp],rdi |
| 265 mov r12,rdx |
| 266 mov r8,QWORD[r8] |
| 267 mov rbx,QWORD[r12] |
| 268 mov rax,QWORD[rsi] |
| 269 |
| 270 xor r14,r14 |
| 271 xor r15,r15 |
| 272 |
| 273 mov rbp,r8 |
| 274 mul rbx |
| 275 mov r10,rax |
| 276 mov rax,QWORD[rcx] |
| 277 |
| 278 imul rbp,r10 |
| 279 mov r11,rdx |
| 280 |
| 281 mul rbp |
| 282 add r10,rax |
| 283 mov rax,QWORD[8+rsi] |
| 284 adc rdx,0 |
| 285 mov rdi,rdx |
| 286 |
| 287 mul rbx |
| 288 add r11,rax |
| 289 mov rax,QWORD[8+rcx] |
| 290 adc rdx,0 |
| 291 mov r10,rdx |
| 292 |
| 293 mul rbp |
| 294 add rdi,rax |
| 295 mov rax,QWORD[16+rsi] |
| 296 adc rdx,0 |
| 297 add rdi,r11 |
| 298 lea r15,[4+r15] |
| 299 adc rdx,0 |
| 300 mov QWORD[rsp],rdi |
| 301 mov r13,rdx |
| 302 jmp NEAR $L$1st4x |
| 303 ALIGN 16 |
| 304 $L$1st4x: |
| 305 mul rbx |
| 306 add r10,rax |
| 307 mov rax,QWORD[((-16))+r15*8+rcx] |
| 308 adc rdx,0 |
| 309 mov r11,rdx |
| 310 |
| 311 mul rbp |
| 312 add r13,rax |
| 313 mov rax,QWORD[((-8))+r15*8+rsi] |
| 314 adc rdx,0 |
| 315 add r13,r10 |
| 316 adc rdx,0 |
| 317 mov QWORD[((-24))+r15*8+rsp],r13 |
| 318 mov rdi,rdx |
| 319 |
| 320 mul rbx |
| 321 add r11,rax |
| 322 mov rax,QWORD[((-8))+r15*8+rcx] |
| 323 adc rdx,0 |
| 324 mov r10,rdx |
| 325 |
| 326 mul rbp |
| 327 add rdi,rax |
| 328 mov rax,QWORD[r15*8+rsi] |
| 329 adc rdx,0 |
| 330 add rdi,r11 |
| 331 adc rdx,0 |
| 332 mov QWORD[((-16))+r15*8+rsp],rdi |
| 333 mov r13,rdx |
| 334 |
| 335 mul rbx |
| 336 add r10,rax |
| 337 mov rax,QWORD[r15*8+rcx] |
| 338 adc rdx,0 |
| 339 mov r11,rdx |
| 340 |
| 341 mul rbp |
| 342 add r13,rax |
| 343 mov rax,QWORD[8+r15*8+rsi] |
| 344 adc rdx,0 |
| 345 add r13,r10 |
| 346 adc rdx,0 |
| 347 mov QWORD[((-8))+r15*8+rsp],r13 |
| 348 mov rdi,rdx |
| 349 |
| 350 mul rbx |
| 351 add r11,rax |
| 352 mov rax,QWORD[8+r15*8+rcx] |
| 353 adc rdx,0 |
| 354 lea r15,[4+r15] |
| 355 mov r10,rdx |
| 356 |
| 357 mul rbp |
| 358 add rdi,rax |
| 359 mov rax,QWORD[((-16))+r15*8+rsi] |
| 360 adc rdx,0 |
| 361 add rdi,r11 |
| 362 adc rdx,0 |
| 363 mov QWORD[((-32))+r15*8+rsp],rdi |
| 364 mov r13,rdx |
| 365 cmp r15,r9 |
| 366 jb NEAR $L$1st4x |
| 367 |
| 368 mul rbx |
| 369 add r10,rax |
| 370 mov rax,QWORD[((-16))+r15*8+rcx] |
| 371 adc rdx,0 |
| 372 mov r11,rdx |
| 373 |
| 374 mul rbp |
| 375 add r13,rax |
| 376 mov rax,QWORD[((-8))+r15*8+rsi] |
| 377 adc rdx,0 |
| 378 add r13,r10 |
| 379 adc rdx,0 |
| 380 mov QWORD[((-24))+r15*8+rsp],r13 |
| 381 mov rdi,rdx |
| 382 |
| 383 mul rbx |
| 384 add r11,rax |
| 385 mov rax,QWORD[((-8))+r15*8+rcx] |
| 386 adc rdx,0 |
| 387 mov r10,rdx |
| 388 |
| 389 mul rbp |
| 390 add rdi,rax |
| 391 mov rax,QWORD[rsi] |
| 392 adc rdx,0 |
| 393 add rdi,r11 |
| 394 adc rdx,0 |
| 395 mov QWORD[((-16))+r15*8+rsp],rdi |
| 396 mov r13,rdx |
| 397 |
| 398 xor rdi,rdi |
| 399 add r13,r10 |
| 400 adc rdi,0 |
| 401 mov QWORD[((-8))+r15*8+rsp],r13 |
| 402 mov QWORD[r15*8+rsp],rdi |
| 403 |
| 404 lea r14,[1+r14] |
| 405 ALIGN 4 |
| 406 $L$outer4x: |
| 407 mov rbx,QWORD[r14*8+r12] |
| 408 xor r15,r15 |
| 409 mov r10,QWORD[rsp] |
| 410 mov rbp,r8 |
| 411 mul rbx |
| 412 add r10,rax |
| 413 mov rax,QWORD[rcx] |
| 414 adc rdx,0 |
| 415 |
| 416 imul rbp,r10 |
| 417 mov r11,rdx |
| 418 |
| 419 mul rbp |
| 420 add r10,rax |
| 421 mov rax,QWORD[8+rsi] |
| 422 adc rdx,0 |
| 423 mov rdi,rdx |
| 424 |
| 425 mul rbx |
| 426 add r11,rax |
| 427 mov rax,QWORD[8+rcx] |
| 428 adc rdx,0 |
| 429 add r11,QWORD[8+rsp] |
| 430 adc rdx,0 |
| 431 mov r10,rdx |
| 432 |
| 433 mul rbp |
| 434 add rdi,rax |
| 435 mov rax,QWORD[16+rsi] |
| 436 adc rdx,0 |
| 437 add rdi,r11 |
| 438 lea r15,[4+r15] |
| 439 adc rdx,0 |
| 440 mov QWORD[rsp],rdi |
| 441 mov r13,rdx |
| 442 jmp NEAR $L$inner4x |
| 443 ALIGN 16 |
| 444 $L$inner4x: |
| 445 mul rbx |
| 446 add r10,rax |
| 447 mov rax,QWORD[((-16))+r15*8+rcx] |
| 448 adc rdx,0 |
| 449 add r10,QWORD[((-16))+r15*8+rsp] |
| 450 adc rdx,0 |
| 451 mov r11,rdx |
| 452 |
| 453 mul rbp |
| 454 add r13,rax |
| 455 mov rax,QWORD[((-8))+r15*8+rsi] |
| 456 adc rdx,0 |
| 457 add r13,r10 |
| 458 adc rdx,0 |
| 459 mov QWORD[((-24))+r15*8+rsp],r13 |
| 460 mov rdi,rdx |
| 461 |
| 462 mul rbx |
| 463 add r11,rax |
| 464 mov rax,QWORD[((-8))+r15*8+rcx] |
| 465 adc rdx,0 |
| 466 add r11,QWORD[((-8))+r15*8+rsp] |
| 467 adc rdx,0 |
| 468 mov r10,rdx |
| 469 |
| 470 mul rbp |
| 471 add rdi,rax |
| 472 mov rax,QWORD[r15*8+rsi] |
| 473 adc rdx,0 |
| 474 add rdi,r11 |
| 475 adc rdx,0 |
| 476 mov QWORD[((-16))+r15*8+rsp],rdi |
| 477 mov r13,rdx |
| 478 |
| 479 mul rbx |
| 480 add r10,rax |
| 481 mov rax,QWORD[r15*8+rcx] |
| 482 adc rdx,0 |
| 483 add r10,QWORD[r15*8+rsp] |
| 484 adc rdx,0 |
| 485 mov r11,rdx |
| 486 |
| 487 mul rbp |
| 488 add r13,rax |
| 489 mov rax,QWORD[8+r15*8+rsi] |
| 490 adc rdx,0 |
| 491 add r13,r10 |
| 492 adc rdx,0 |
| 493 mov QWORD[((-8))+r15*8+rsp],r13 |
| 494 mov rdi,rdx |
| 495 |
| 496 mul rbx |
| 497 add r11,rax |
| 498 mov rax,QWORD[8+r15*8+rcx] |
| 499 adc rdx,0 |
| 500 add r11,QWORD[8+r15*8+rsp] |
| 501 adc rdx,0 |
| 502 lea r15,[4+r15] |
| 503 mov r10,rdx |
| 504 |
| 505 mul rbp |
| 506 add rdi,rax |
| 507 mov rax,QWORD[((-16))+r15*8+rsi] |
| 508 adc rdx,0 |
| 509 add rdi,r11 |
| 510 adc rdx,0 |
| 511 mov QWORD[((-32))+r15*8+rsp],rdi |
| 512 mov r13,rdx |
| 513 cmp r15,r9 |
| 514 jb NEAR $L$inner4x |
| 515 |
| 516 mul rbx |
| 517 add r10,rax |
| 518 mov rax,QWORD[((-16))+r15*8+rcx] |
| 519 adc rdx,0 |
| 520 add r10,QWORD[((-16))+r15*8+rsp] |
| 521 adc rdx,0 |
| 522 mov r11,rdx |
| 523 |
| 524 mul rbp |
| 525 add r13,rax |
| 526 mov rax,QWORD[((-8))+r15*8+rsi] |
| 527 adc rdx,0 |
| 528 add r13,r10 |
| 529 adc rdx,0 |
| 530 mov QWORD[((-24))+r15*8+rsp],r13 |
| 531 mov rdi,rdx |
| 532 |
| 533 mul rbx |
| 534 add r11,rax |
| 535 mov rax,QWORD[((-8))+r15*8+rcx] |
| 536 adc rdx,0 |
| 537 add r11,QWORD[((-8))+r15*8+rsp] |
| 538 adc rdx,0 |
| 539 lea r14,[1+r14] |
| 540 mov r10,rdx |
| 541 |
| 542 mul rbp |
| 543 add rdi,rax |
| 544 mov rax,QWORD[rsi] |
| 545 adc rdx,0 |
| 546 add rdi,r11 |
| 547 adc rdx,0 |
| 548 mov QWORD[((-16))+r15*8+rsp],rdi |
| 549 mov r13,rdx |
| 550 |
| 551 xor rdi,rdi |
| 552 add r13,r10 |
| 553 adc rdi,0 |
| 554 add r13,QWORD[r9*8+rsp] |
| 555 adc rdi,0 |
| 556 mov QWORD[((-8))+r15*8+rsp],r13 |
| 557 mov QWORD[r15*8+rsp],rdi |
| 558 |
| 559 cmp r14,r9 |
| 560 jb NEAR $L$outer4x |
| 561 mov rdi,QWORD[16+r9*8+rsp] |
| 562 mov rax,QWORD[rsp] |
| 563 mov rdx,QWORD[8+rsp] |
| 564 shr r9,2 |
| 565 lea rsi,[rsp] |
| 566 xor r14,r14 |
| 567 |
| 568 sub rax,QWORD[rcx] |
| 569 mov rbx,QWORD[16+rsi] |
| 570 mov rbp,QWORD[24+rsi] |
| 571 sbb rdx,QWORD[8+rcx] |
| 572 lea r15,[((-1))+r9] |
| 573 jmp NEAR $L$sub4x |
| 574 ALIGN 16 |
| 575 $L$sub4x: |
| 576 mov QWORD[r14*8+rdi],rax |
| 577 mov QWORD[8+r14*8+rdi],rdx |
| 578 sbb rbx,QWORD[16+r14*8+rcx] |
| 579 mov rax,QWORD[32+r14*8+rsi] |
| 580 mov rdx,QWORD[40+r14*8+rsi] |
| 581 sbb rbp,QWORD[24+r14*8+rcx] |
| 582 mov QWORD[16+r14*8+rdi],rbx |
| 583 mov QWORD[24+r14*8+rdi],rbp |
| 584 sbb rax,QWORD[32+r14*8+rcx] |
| 585 mov rbx,QWORD[48+r14*8+rsi] |
| 586 mov rbp,QWORD[56+r14*8+rsi] |
| 587 sbb rdx,QWORD[40+r14*8+rcx] |
| 588 lea r14,[4+r14] |
| 589 dec r15 |
| 590 jnz NEAR $L$sub4x |
| 591 |
| 592 mov QWORD[r14*8+rdi],rax |
| 593 mov rax,QWORD[32+r14*8+rsi] |
| 594 sbb rbx,QWORD[16+r14*8+rcx] |
| 595 mov QWORD[8+r14*8+rdi],rdx |
| 596 sbb rbp,QWORD[24+r14*8+rcx] |
| 597 mov QWORD[16+r14*8+rdi],rbx |
| 598 |
| 599 sbb rax,0 |
| 600 DB 66h, 48h, 0fh, 6eh, 0c0h |
| 601 punpcklqdq xmm0,xmm0 |
| 602 mov QWORD[24+r14*8+rdi],rbp |
| 603 xor r14,r14 |
| 604 |
| 605 mov r15,r9 |
| 606 pxor xmm5,xmm5 |
| 607 jmp NEAR $L$copy4x |
| 608 ALIGN 16 |
| 609 $L$copy4x: |
| 610 movdqu xmm2,XMMWORD[r14*1+rsp] |
| 611 movdqu xmm4,XMMWORD[16+r14*1+rsp] |
| 612 movdqu xmm1,XMMWORD[r14*1+rdi] |
| 613 movdqu xmm3,XMMWORD[16+r14*1+rdi] |
| 614 pxor xmm2,xmm1 |
| 615 pxor xmm4,xmm3 |
| 616 pand xmm2,xmm0 |
| 617 pand xmm4,xmm0 |
| 618 pxor xmm2,xmm1 |
| 619 pxor xmm4,xmm3 |
| 620 movdqu XMMWORD[r14*1+rdi],xmm2 |
| 621 movdqu XMMWORD[16+r14*1+rdi],xmm4 |
| 622 movdqa XMMWORD[r14*1+rsp],xmm5 |
| 623 movdqa XMMWORD[16+r14*1+rsp],xmm5 |
| 624 |
| 625 lea r14,[32+r14] |
| 626 dec r15 |
| 627 jnz NEAR $L$copy4x |
| 628 |
| 629 shl r9,2 |
| 630 mov rsi,QWORD[8+r9*8+rsp] |
| 631 mov rax,1 |
| 632 mov r15,QWORD[rsi] |
| 633 mov r14,QWORD[8+rsi] |
| 634 mov r13,QWORD[16+rsi] |
| 635 mov r12,QWORD[24+rsi] |
| 636 mov rbp,QWORD[32+rsi] |
| 637 mov rbx,QWORD[40+rsi] |
| 638 lea rsp,[48+rsi] |
| 639 $L$mul4x_epilogue: |
| 640 mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
| 641 mov rsi,QWORD[16+rsp] |
| 642 DB 0F3h,0C3h ;repret |
| 643 $L$SEH_end_bn_mul4x_mont: |
| 644 EXTERN bn_sqr8x_internal |
| 645 |
| 646 |
| 647 ALIGN 32 |
| 648 bn_sqr8x_mont: |
| 649 mov QWORD[8+rsp],rdi ;WIN64 prologue |
| 650 mov QWORD[16+rsp],rsi |
| 651 mov rax,rsp |
| 652 $L$SEH_begin_bn_sqr8x_mont: |
| 653 mov rdi,rcx |
| 654 mov rsi,rdx |
| 655 mov rdx,r8 |
| 656 mov rcx,r9 |
| 657 mov r8,QWORD[40+rsp] |
| 658 mov r9,QWORD[48+rsp] |
| 659 |
| 660 |
| 661 $L$sqr8x_enter: |
| 662 mov rax,rsp |
| 663 push rbx |
| 664 push rbp |
| 665 push r12 |
| 666 push r13 |
| 667 push r14 |
| 668 push r15 |
| 669 |
| 670 mov r10d,r9d |
| 671 shl r9d,3 |
| 672 shl r10,3+2 |
| 673 neg r9 |
| 674 |
| 675 |
| 676 |
| 677 |
| 678 |
| 679 |
| 680 lea r11,[((-64))+r9*4+rsp] |
| 681 mov r8,QWORD[r8] |
| 682 sub r11,rsi |
| 683 and r11,4095 |
| 684 cmp r10,r11 |
| 685 jb NEAR $L$sqr8x_sp_alt |
| 686 sub rsp,r11 |
| 687 lea rsp,[((-64))+r9*4+rsp] |
| 688 jmp NEAR $L$sqr8x_sp_done |
| 689 |
| 690 ALIGN 32 |
| 691 $L$sqr8x_sp_alt: |
| 692 lea r10,[((4096-64))+r9*4] |
| 693 lea rsp,[((-64))+r9*4+rsp] |
| 694 sub r11,r10 |
| 695 mov r10,0 |
| 696 cmovc r11,r10 |
| 697 sub rsp,r11 |
| 698 $L$sqr8x_sp_done: |
| 699 and rsp,-64 |
| 700 mov r10,r9 |
| 701 neg r9 |
| 702 |
| 703 lea r11,[64+r9*2+rsp] |
| 704 mov QWORD[32+rsp],r8 |
| 705 mov QWORD[40+rsp],rax |
| 706 $L$sqr8x_body: |
| 707 |
| 708 mov rbp,r9 |
| 709 DB 102,73,15,110,211 |
| 710 shr rbp,3+2 |
| 711 mov eax,DWORD[((OPENSSL_ia32cap_P+8))] |
| 712 jmp NEAR $L$sqr8x_copy_n |
| 713 |
| 714 ALIGN 32 |
| 715 $L$sqr8x_copy_n: |
| 716 movq xmm0,QWORD[rcx] |
| 717 movq xmm1,QWORD[8+rcx] |
| 718 movq xmm3,QWORD[16+rcx] |
| 719 movq xmm4,QWORD[24+rcx] |
| 720 lea rcx,[32+rcx] |
| 721 movdqa XMMWORD[r11],xmm0 |
| 722 movdqa XMMWORD[16+r11],xmm1 |
| 723 movdqa XMMWORD[32+r11],xmm3 |
| 724 movdqa XMMWORD[48+r11],xmm4 |
| 725 lea r11,[64+r11] |
| 726 dec rbp |
| 727 jnz NEAR $L$sqr8x_copy_n |
| 728 |
| 729 pxor xmm0,xmm0 |
| 730 DB 102,72,15,110,207 |
| 731 DB 102,73,15,110,218 |
| 732 call bn_sqr8x_internal |
| 733 |
| 734 pxor xmm0,xmm0 |
| 735 lea rax,[48+rsp] |
| 736 lea rdx,[64+r9*2+rsp] |
| 737 shr r9,3+2 |
| 738 mov rsi,QWORD[40+rsp] |
| 739 jmp NEAR $L$sqr8x_zero |
| 740 |
| 741 ALIGN 32 |
| 742 $L$sqr8x_zero: |
| 743 movdqa XMMWORD[rax],xmm0 |
| 744 movdqa XMMWORD[16+rax],xmm0 |
| 745 movdqa XMMWORD[32+rax],xmm0 |
| 746 movdqa XMMWORD[48+rax],xmm0 |
| 747 lea rax,[64+rax] |
| 748 movdqa XMMWORD[rdx],xmm0 |
| 749 movdqa XMMWORD[16+rdx],xmm0 |
| 750 movdqa XMMWORD[32+rdx],xmm0 |
| 751 movdqa XMMWORD[48+rdx],xmm0 |
| 752 lea rdx,[64+rdx] |
| 753 dec r9 |
| 754 jnz NEAR $L$sqr8x_zero |
| 755 |
| 756 mov rax,1 |
| 757 mov r15,QWORD[((-48))+rsi] |
| 758 mov r14,QWORD[((-40))+rsi] |
| 759 mov r13,QWORD[((-32))+rsi] |
| 760 mov r12,QWORD[((-24))+rsi] |
| 761 mov rbp,QWORD[((-16))+rsi] |
| 762 mov rbx,QWORD[((-8))+rsi] |
| 763 lea rsp,[rsi] |
| 764 $L$sqr8x_epilogue: |
| 765 mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
| 766 mov rsi,QWORD[16+rsp] |
| 767 DB 0F3h,0C3h ;repret |
| 768 $L$SEH_end_bn_sqr8x_mont: |
| 769 DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 |
| 770 DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 |
| 771 DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83 |
| 772 DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 |
| 773 DB 115,108,46,111,114,103,62,0 |
| 774 ALIGN 16 |
| 775 EXTERN __imp_RtlVirtualUnwind |
| 776 |
| 777 ALIGN 16 |
| 778 mul_handler: |
| 779 push rsi |
| 780 push rdi |
| 781 push rbx |
| 782 push rbp |
| 783 push r12 |
| 784 push r13 |
| 785 push r14 |
| 786 push r15 |
| 787 pushfq |
| 788 sub rsp,64 |
| 789 |
| 790 mov rax,QWORD[120+r8] |
| 791 mov rbx,QWORD[248+r8] |
| 792 |
| 793 mov rsi,QWORD[8+r9] |
| 794 mov r11,QWORD[56+r9] |
| 795 |
| 796 mov r10d,DWORD[r11] |
| 797 lea r10,[r10*1+rsi] |
| 798 cmp rbx,r10 |
| 799 jb NEAR $L$common_seh_tail |
| 800 |
| 801 mov rax,QWORD[152+r8] |
| 802 |
| 803 mov r10d,DWORD[4+r11] |
| 804 lea r10,[r10*1+rsi] |
| 805 cmp rbx,r10 |
| 806 jae NEAR $L$common_seh_tail |
| 807 |
| 808 mov r10,QWORD[192+r8] |
| 809 mov rax,QWORD[8+r10*8+rax] |
| 810 lea rax,[48+rax] |
| 811 |
| 812 mov rbx,QWORD[((-8))+rax] |
| 813 mov rbp,QWORD[((-16))+rax] |
| 814 mov r12,QWORD[((-24))+rax] |
| 815 mov r13,QWORD[((-32))+rax] |
| 816 mov r14,QWORD[((-40))+rax] |
| 817 mov r15,QWORD[((-48))+rax] |
| 818 mov QWORD[144+r8],rbx |
| 819 mov QWORD[160+r8],rbp |
| 820 mov QWORD[216+r8],r12 |
| 821 mov QWORD[224+r8],r13 |
| 822 mov QWORD[232+r8],r14 |
| 823 mov QWORD[240+r8],r15 |
| 824 |
| 825 jmp NEAR $L$common_seh_tail |
| 826 |
| 827 |
| 828 |
| 829 ALIGN 16 |
| 830 sqr_handler: |
| 831 push rsi |
| 832 push rdi |
| 833 push rbx |
| 834 push rbp |
| 835 push r12 |
| 836 push r13 |
| 837 push r14 |
| 838 push r15 |
| 839 pushfq |
| 840 sub rsp,64 |
| 841 |
| 842 mov rax,QWORD[120+r8] |
| 843 mov rbx,QWORD[248+r8] |
| 844 |
| 845 mov rsi,QWORD[8+r9] |
| 846 mov r11,QWORD[56+r9] |
| 847 |
| 848 mov r10d,DWORD[r11] |
| 849 lea r10,[r10*1+rsi] |
| 850 cmp rbx,r10 |
| 851 jb NEAR $L$common_seh_tail |
| 852 |
| 853 mov rax,QWORD[152+r8] |
| 854 |
| 855 mov r10d,DWORD[4+r11] |
| 856 lea r10,[r10*1+rsi] |
| 857 cmp rbx,r10 |
| 858 jae NEAR $L$common_seh_tail |
| 859 |
| 860 mov rax,QWORD[40+rax] |
| 861 |
| 862 mov rbx,QWORD[((-8))+rax] |
| 863 mov rbp,QWORD[((-16))+rax] |
| 864 mov r12,QWORD[((-24))+rax] |
| 865 mov r13,QWORD[((-32))+rax] |
| 866 mov r14,QWORD[((-40))+rax] |
| 867 mov r15,QWORD[((-48))+rax] |
| 868 mov QWORD[144+r8],rbx |
| 869 mov QWORD[160+r8],rbp |
| 870 mov QWORD[216+r8],r12 |
| 871 mov QWORD[224+r8],r13 |
| 872 mov QWORD[232+r8],r14 |
| 873 mov QWORD[240+r8],r15 |
| 874 |
| 875 $L$common_seh_tail: |
| 876 mov rdi,QWORD[8+rax] |
| 877 mov rsi,QWORD[16+rax] |
| 878 mov QWORD[152+r8],rax |
| 879 mov QWORD[168+r8],rsi |
| 880 mov QWORD[176+r8],rdi |
| 881 |
| 882 mov rdi,QWORD[40+r9] |
| 883 mov rsi,r8 |
| 884 mov ecx,154 |
| 885 DD 0xa548f3fc |
| 886 |
| 887 mov rsi,r9 |
| 888 xor rcx,rcx |
| 889 mov rdx,QWORD[8+rsi] |
| 890 mov r8,QWORD[rsi] |
| 891 mov r9,QWORD[16+rsi] |
| 892 mov r10,QWORD[40+rsi] |
| 893 lea r11,[56+rsi] |
| 894 lea r12,[24+rsi] |
| 895 mov QWORD[32+rsp],r10 |
| 896 mov QWORD[40+rsp],r11 |
| 897 mov QWORD[48+rsp],r12 |
| 898 mov QWORD[56+rsp],rcx |
| 899 call QWORD[__imp_RtlVirtualUnwind] |
| 900 |
| 901 mov eax,1 |
| 902 add rsp,64 |
| 903 popfq |
| 904 pop r15 |
| 905 pop r14 |
| 906 pop r13 |
| 907 pop r12 |
| 908 pop rbp |
| 909 pop rbx |
| 910 pop rdi |
| 911 pop rsi |
| 912 DB 0F3h,0C3h ;repret |
| 913 |
| 914 |
| 915 section .pdata rdata align=4 |
| 916 ALIGN 4 |
| 917 DD $L$SEH_begin_bn_mul_mont wrt ..imagebase |
| 918 DD $L$SEH_end_bn_mul_mont wrt ..imagebase |
| 919 DD $L$SEH_info_bn_mul_mont wrt ..imagebase |
| 920 |
| 921 DD $L$SEH_begin_bn_mul4x_mont wrt ..imagebase |
| 922 DD $L$SEH_end_bn_mul4x_mont wrt ..imagebase |
| 923 DD $L$SEH_info_bn_mul4x_mont wrt ..imagebase |
| 924 |
| 925 DD $L$SEH_begin_bn_sqr8x_mont wrt ..imagebase |
| 926 DD $L$SEH_end_bn_sqr8x_mont wrt ..imagebase |
| 927 DD $L$SEH_info_bn_sqr8x_mont wrt ..imagebase |
| 928 section .xdata rdata align=8 |
| 929 ALIGN 8 |
| 930 $L$SEH_info_bn_mul_mont: |
| 931 DB 9,0,0,0 |
| 932 DD mul_handler wrt ..imagebase |
| 933 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase |
| 934 $L$SEH_info_bn_mul4x_mont: |
| 935 DB 9,0,0,0 |
| 936 DD mul_handler wrt ..imagebase |
| 937 DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase |
| 938 $L$SEH_info_bn_sqr8x_mont: |
| 939 DB 9,0,0,0 |
| 940 DD sqr_handler wrt ..imagebase |
| 941 DD $L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase |
OLD | NEW |