OLD | NEW |
(Empty) | |
| 1 OPTION DOTNAME |
| 2 .text$ SEGMENT ALIGN(64) 'CODE' |
| 3 |
| 4 EXTERN OPENSSL_ia32cap_P:NEAR |
| 5 |
| 6 PUBLIC bn_mul_mont |
| 7 |
| 8 ALIGN 16 |
| 9 bn_mul_mont PROC PUBLIC |
| 10 mov QWORD PTR[8+rsp],rdi ;WIN64 prologue |
| 11 mov QWORD PTR[16+rsp],rsi |
| 12 mov rax,rsp |
| 13 $L$SEH_begin_bn_mul_mont:: |
| 14 mov rdi,rcx |
| 15 mov rsi,rdx |
| 16 mov rdx,r8 |
| 17 mov rcx,r9 |
| 18 mov r8,QWORD PTR[40+rsp] |
| 19 mov r9,QWORD PTR[48+rsp] |
| 20 |
| 21 |
| 22 test r9d,3 |
| 23 jnz $L$mul_enter |
| 24 cmp r9d,8 |
| 25 jb $L$mul_enter |
| 26 cmp rdx,rsi |
| 27 jne $L$mul4x_enter |
| 28 test r9d,7 |
| 29 jz $L$sqr8x_enter |
| 30 jmp $L$mul4x_enter |
| 31 |
| 32 ALIGN 16 |
| 33 $L$mul_enter:: |
| 34 push rbx |
| 35 push rbp |
| 36 push r12 |
| 37 push r13 |
| 38 push r14 |
| 39 push r15 |
| 40 |
| 41 mov r9d,r9d |
| 42 lea r10,QWORD PTR[2+r9] |
| 43 mov r11,rsp |
| 44 neg r10 |
| 45 lea rsp,QWORD PTR[r10*8+rsp] |
| 46 and rsp,-1024 |
| 47 |
| 48 mov QWORD PTR[8+r9*8+rsp],r11 |
| 49 $L$mul_body:: |
| 50 mov r12,rdx |
| 51 mov r8,QWORD PTR[r8] |
| 52 mov rbx,QWORD PTR[r12] |
| 53 mov rax,QWORD PTR[rsi] |
| 54 |
| 55 xor r14,r14 |
| 56 xor r15,r15 |
| 57 |
| 58 mov rbp,r8 |
| 59 mul rbx |
| 60 mov r10,rax |
| 61 mov rax,QWORD PTR[rcx] |
| 62 |
| 63 imul rbp,r10 |
| 64 mov r11,rdx |
| 65 |
| 66 mul rbp |
| 67 add r10,rax |
| 68 mov rax,QWORD PTR[8+rsi] |
| 69 adc rdx,0 |
| 70 mov r13,rdx |
| 71 |
| 72 lea r15,QWORD PTR[1+r15] |
| 73 jmp $L$1st_enter |
| 74 |
| 75 ALIGN 16 |
| 76 $L$1st:: |
| 77 add r13,rax |
| 78 mov rax,QWORD PTR[r15*8+rsi] |
| 79 adc rdx,0 |
| 80 add r13,r11 |
| 81 mov r11,r10 |
| 82 adc rdx,0 |
| 83 mov QWORD PTR[((-16))+r15*8+rsp],r13 |
| 84 mov r13,rdx |
| 85 |
| 86 $L$1st_enter:: |
| 87 mul rbx |
| 88 add r11,rax |
| 89 mov rax,QWORD PTR[r15*8+rcx] |
| 90 adc rdx,0 |
| 91 lea r15,QWORD PTR[1+r15] |
| 92 mov r10,rdx |
| 93 |
| 94 mul rbp |
| 95 cmp r15,r9 |
| 96 jne $L$1st |
| 97 |
| 98 add r13,rax |
| 99 mov rax,QWORD PTR[rsi] |
| 100 adc rdx,0 |
| 101 add r13,r11 |
| 102 adc rdx,0 |
| 103 mov QWORD PTR[((-16))+r15*8+rsp],r13 |
| 104 mov r13,rdx |
| 105 mov r11,r10 |
| 106 |
| 107 xor rdx,rdx |
| 108 add r13,r11 |
| 109 adc rdx,0 |
| 110 mov QWORD PTR[((-8))+r9*8+rsp],r13 |
| 111 mov QWORD PTR[r9*8+rsp],rdx |
| 112 |
| 113 lea r14,QWORD PTR[1+r14] |
| 114 jmp $L$outer |
| 115 ALIGN 16 |
| 116 $L$outer:: |
| 117 mov rbx,QWORD PTR[r14*8+r12] |
| 118 xor r15,r15 |
| 119 mov rbp,r8 |
| 120 mov r10,QWORD PTR[rsp] |
| 121 mul rbx |
| 122 add r10,rax |
| 123 mov rax,QWORD PTR[rcx] |
| 124 adc rdx,0 |
| 125 |
| 126 imul rbp,r10 |
| 127 mov r11,rdx |
| 128 |
| 129 mul rbp |
| 130 add r10,rax |
| 131 mov rax,QWORD PTR[8+rsi] |
| 132 adc rdx,0 |
| 133 mov r10,QWORD PTR[8+rsp] |
| 134 mov r13,rdx |
| 135 |
| 136 lea r15,QWORD PTR[1+r15] |
| 137 jmp $L$inner_enter |
| 138 |
| 139 ALIGN 16 |
| 140 $L$inner:: |
| 141 add r13,rax |
| 142 mov rax,QWORD PTR[r15*8+rsi] |
| 143 adc rdx,0 |
| 144 add r13,r10 |
| 145 mov r10,QWORD PTR[r15*8+rsp] |
| 146 adc rdx,0 |
| 147 mov QWORD PTR[((-16))+r15*8+rsp],r13 |
| 148 mov r13,rdx |
| 149 |
| 150 $L$inner_enter:: |
| 151 mul rbx |
| 152 add r11,rax |
| 153 mov rax,QWORD PTR[r15*8+rcx] |
| 154 adc rdx,0 |
| 155 add r10,r11 |
| 156 mov r11,rdx |
| 157 adc r11,0 |
| 158 lea r15,QWORD PTR[1+r15] |
| 159 |
| 160 mul rbp |
| 161 cmp r15,r9 |
| 162 jne $L$inner |
| 163 |
| 164 add r13,rax |
| 165 mov rax,QWORD PTR[rsi] |
| 166 adc rdx,0 |
| 167 add r13,r10 |
| 168 mov r10,QWORD PTR[r15*8+rsp] |
| 169 adc rdx,0 |
| 170 mov QWORD PTR[((-16))+r15*8+rsp],r13 |
| 171 mov r13,rdx |
| 172 |
| 173 xor rdx,rdx |
| 174 add r13,r11 |
| 175 adc rdx,0 |
| 176 add r13,r10 |
| 177 adc rdx,0 |
| 178 mov QWORD PTR[((-8))+r9*8+rsp],r13 |
| 179 mov QWORD PTR[r9*8+rsp],rdx |
| 180 |
| 181 lea r14,QWORD PTR[1+r14] |
| 182 cmp r14,r9 |
| 183 jb $L$outer |
| 184 |
| 185 xor r14,r14 |
| 186 mov rax,QWORD PTR[rsp] |
| 187 lea rsi,QWORD PTR[rsp] |
| 188 mov r15,r9 |
| 189 jmp $L$sub |
| 190 ALIGN 16 |
| 191 $L$sub:: sbb rax,QWORD PTR[r14*8+rcx] |
| 192 mov QWORD PTR[r14*8+rdi],rax |
| 193 mov rax,QWORD PTR[8+r14*8+rsi] |
| 194 lea r14,QWORD PTR[1+r14] |
| 195 dec r15 |
| 196 jnz $L$sub |
| 197 |
| 198 sbb rax,0 |
| 199 xor r14,r14 |
| 200 mov r15,r9 |
| 201 ALIGN 16 |
| 202 $L$copy:: |
| 203 mov rsi,QWORD PTR[r14*8+rsp] |
| 204 mov rcx,QWORD PTR[r14*8+rdi] |
| 205 xor rsi,rcx |
| 206 and rsi,rax |
| 207 xor rsi,rcx |
| 208 mov QWORD PTR[r14*8+rsp],r14 |
| 209 mov QWORD PTR[r14*8+rdi],rsi |
| 210 lea r14,QWORD PTR[1+r14] |
| 211 sub r15,1 |
| 212 jnz $L$copy |
| 213 |
| 214 mov rsi,QWORD PTR[8+r9*8+rsp] |
| 215 mov rax,1 |
| 216 mov r15,QWORD PTR[rsi] |
| 217 mov r14,QWORD PTR[8+rsi] |
| 218 mov r13,QWORD PTR[16+rsi] |
| 219 mov r12,QWORD PTR[24+rsi] |
| 220 mov rbp,QWORD PTR[32+rsi] |
| 221 mov rbx,QWORD PTR[40+rsi] |
| 222 lea rsp,QWORD PTR[48+rsi] |
| 223 $L$mul_epilogue:: |
| 224 mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue |
| 225 mov rsi,QWORD PTR[16+rsp] |
| 226 DB 0F3h,0C3h ;repret |
| 227 $L$SEH_end_bn_mul_mont:: |
| 228 bn_mul_mont ENDP |
| 229 |
| 230 ALIGN 16 |
| 231 bn_mul4x_mont PROC PRIVATE |
| 232 mov QWORD PTR[8+rsp],rdi ;WIN64 prologue |
| 233 mov QWORD PTR[16+rsp],rsi |
| 234 mov rax,rsp |
| 235 $L$SEH_begin_bn_mul4x_mont:: |
| 236 mov rdi,rcx |
| 237 mov rsi,rdx |
| 238 mov rdx,r8 |
| 239 mov rcx,r9 |
| 240 mov r8,QWORD PTR[40+rsp] |
| 241 mov r9,QWORD PTR[48+rsp] |
| 242 |
| 243 |
| 244 $L$mul4x_enter:: |
| 245 push rbx |
| 246 push rbp |
| 247 push r12 |
| 248 push r13 |
| 249 push r14 |
| 250 push r15 |
| 251 |
| 252 mov r9d,r9d |
| 253 lea r10,QWORD PTR[4+r9] |
| 254 mov r11,rsp |
| 255 neg r10 |
| 256 lea rsp,QWORD PTR[r10*8+rsp] |
| 257 and rsp,-1024 |
| 258 |
| 259 mov QWORD PTR[8+r9*8+rsp],r11 |
| 260 $L$mul4x_body:: |
| 261 mov QWORD PTR[16+r9*8+rsp],rdi |
| 262 mov r12,rdx |
| 263 mov r8,QWORD PTR[r8] |
| 264 mov rbx,QWORD PTR[r12] |
| 265 mov rax,QWORD PTR[rsi] |
| 266 |
| 267 xor r14,r14 |
| 268 xor r15,r15 |
| 269 |
| 270 mov rbp,r8 |
| 271 mul rbx |
| 272 mov r10,rax |
| 273 mov rax,QWORD PTR[rcx] |
| 274 |
| 275 imul rbp,r10 |
| 276 mov r11,rdx |
| 277 |
| 278 mul rbp |
| 279 add r10,rax |
| 280 mov rax,QWORD PTR[8+rsi] |
| 281 adc rdx,0 |
| 282 mov rdi,rdx |
| 283 |
| 284 mul rbx |
| 285 add r11,rax |
| 286 mov rax,QWORD PTR[8+rcx] |
| 287 adc rdx,0 |
| 288 mov r10,rdx |
| 289 |
| 290 mul rbp |
| 291 add rdi,rax |
| 292 mov rax,QWORD PTR[16+rsi] |
| 293 adc rdx,0 |
| 294 add rdi,r11 |
| 295 lea r15,QWORD PTR[4+r15] |
| 296 adc rdx,0 |
| 297 mov QWORD PTR[rsp],rdi |
| 298 mov r13,rdx |
| 299 jmp $L$1st4x |
| 300 ALIGN 16 |
| 301 $L$1st4x:: |
| 302 mul rbx |
| 303 add r10,rax |
| 304 mov rax,QWORD PTR[((-16))+r15*8+rcx] |
| 305 adc rdx,0 |
| 306 mov r11,rdx |
| 307 |
| 308 mul rbp |
| 309 add r13,rax |
| 310 mov rax,QWORD PTR[((-8))+r15*8+rsi] |
| 311 adc rdx,0 |
| 312 add r13,r10 |
| 313 adc rdx,0 |
| 314 mov QWORD PTR[((-24))+r15*8+rsp],r13 |
| 315 mov rdi,rdx |
| 316 |
| 317 mul rbx |
| 318 add r11,rax |
| 319 mov rax,QWORD PTR[((-8))+r15*8+rcx] |
| 320 adc rdx,0 |
| 321 mov r10,rdx |
| 322 |
| 323 mul rbp |
| 324 add rdi,rax |
| 325 mov rax,QWORD PTR[r15*8+rsi] |
| 326 adc rdx,0 |
| 327 add rdi,r11 |
| 328 adc rdx,0 |
| 329 mov QWORD PTR[((-16))+r15*8+rsp],rdi |
| 330 mov r13,rdx |
| 331 |
| 332 mul rbx |
| 333 add r10,rax |
| 334 mov rax,QWORD PTR[r15*8+rcx] |
| 335 adc rdx,0 |
| 336 mov r11,rdx |
| 337 |
| 338 mul rbp |
| 339 add r13,rax |
| 340 mov rax,QWORD PTR[8+r15*8+rsi] |
| 341 adc rdx,0 |
| 342 add r13,r10 |
| 343 adc rdx,0 |
| 344 mov QWORD PTR[((-8))+r15*8+rsp],r13 |
| 345 mov rdi,rdx |
| 346 |
| 347 mul rbx |
| 348 add r11,rax |
| 349 mov rax,QWORD PTR[8+r15*8+rcx] |
| 350 adc rdx,0 |
| 351 lea r15,QWORD PTR[4+r15] |
| 352 mov r10,rdx |
| 353 |
| 354 mul rbp |
| 355 add rdi,rax |
| 356 mov rax,QWORD PTR[((-16))+r15*8+rsi] |
| 357 adc rdx,0 |
| 358 add rdi,r11 |
| 359 adc rdx,0 |
| 360 mov QWORD PTR[((-32))+r15*8+rsp],rdi |
| 361 mov r13,rdx |
| 362 cmp r15,r9 |
| 363 jb $L$1st4x |
| 364 |
| 365 mul rbx |
| 366 add r10,rax |
| 367 mov rax,QWORD PTR[((-16))+r15*8+rcx] |
| 368 adc rdx,0 |
| 369 mov r11,rdx |
| 370 |
| 371 mul rbp |
| 372 add r13,rax |
| 373 mov rax,QWORD PTR[((-8))+r15*8+rsi] |
| 374 adc rdx,0 |
| 375 add r13,r10 |
| 376 adc rdx,0 |
| 377 mov QWORD PTR[((-24))+r15*8+rsp],r13 |
| 378 mov rdi,rdx |
| 379 |
| 380 mul rbx |
| 381 add r11,rax |
| 382 mov rax,QWORD PTR[((-8))+r15*8+rcx] |
| 383 adc rdx,0 |
| 384 mov r10,rdx |
| 385 |
| 386 mul rbp |
| 387 add rdi,rax |
| 388 mov rax,QWORD PTR[rsi] |
| 389 adc rdx,0 |
| 390 add rdi,r11 |
| 391 adc rdx,0 |
| 392 mov QWORD PTR[((-16))+r15*8+rsp],rdi |
| 393 mov r13,rdx |
| 394 |
| 395 xor rdi,rdi |
| 396 add r13,r10 |
| 397 adc rdi,0 |
| 398 mov QWORD PTR[((-8))+r15*8+rsp],r13 |
| 399 mov QWORD PTR[r15*8+rsp],rdi |
| 400 |
| 401 lea r14,QWORD PTR[1+r14] |
| 402 ALIGN 4 |
| 403 $L$outer4x:: |
| 404 mov rbx,QWORD PTR[r14*8+r12] |
| 405 xor r15,r15 |
| 406 mov r10,QWORD PTR[rsp] |
| 407 mov rbp,r8 |
| 408 mul rbx |
| 409 add r10,rax |
| 410 mov rax,QWORD PTR[rcx] |
| 411 adc rdx,0 |
| 412 |
| 413 imul rbp,r10 |
| 414 mov r11,rdx |
| 415 |
| 416 mul rbp |
| 417 add r10,rax |
| 418 mov rax,QWORD PTR[8+rsi] |
| 419 adc rdx,0 |
| 420 mov rdi,rdx |
| 421 |
| 422 mul rbx |
| 423 add r11,rax |
| 424 mov rax,QWORD PTR[8+rcx] |
| 425 adc rdx,0 |
| 426 add r11,QWORD PTR[8+rsp] |
| 427 adc rdx,0 |
| 428 mov r10,rdx |
| 429 |
| 430 mul rbp |
| 431 add rdi,rax |
| 432 mov rax,QWORD PTR[16+rsi] |
| 433 adc rdx,0 |
| 434 add rdi,r11 |
| 435 lea r15,QWORD PTR[4+r15] |
| 436 adc rdx,0 |
| 437 mov QWORD PTR[rsp],rdi |
| 438 mov r13,rdx |
| 439 jmp $L$inner4x |
| 440 ALIGN 16 |
| 441 $L$inner4x:: |
| 442 mul rbx |
| 443 add r10,rax |
| 444 mov rax,QWORD PTR[((-16))+r15*8+rcx] |
| 445 adc rdx,0 |
| 446 add r10,QWORD PTR[((-16))+r15*8+rsp] |
| 447 adc rdx,0 |
| 448 mov r11,rdx |
| 449 |
| 450 mul rbp |
| 451 add r13,rax |
| 452 mov rax,QWORD PTR[((-8))+r15*8+rsi] |
| 453 adc rdx,0 |
| 454 add r13,r10 |
| 455 adc rdx,0 |
| 456 mov QWORD PTR[((-24))+r15*8+rsp],r13 |
| 457 mov rdi,rdx |
| 458 |
| 459 mul rbx |
| 460 add r11,rax |
| 461 mov rax,QWORD PTR[((-8))+r15*8+rcx] |
| 462 adc rdx,0 |
| 463 add r11,QWORD PTR[((-8))+r15*8+rsp] |
| 464 adc rdx,0 |
| 465 mov r10,rdx |
| 466 |
| 467 mul rbp |
| 468 add rdi,rax |
| 469 mov rax,QWORD PTR[r15*8+rsi] |
| 470 adc rdx,0 |
| 471 add rdi,r11 |
| 472 adc rdx,0 |
| 473 mov QWORD PTR[((-16))+r15*8+rsp],rdi |
| 474 mov r13,rdx |
| 475 |
| 476 mul rbx |
| 477 add r10,rax |
| 478 mov rax,QWORD PTR[r15*8+rcx] |
| 479 adc rdx,0 |
| 480 add r10,QWORD PTR[r15*8+rsp] |
| 481 adc rdx,0 |
| 482 mov r11,rdx |
| 483 |
| 484 mul rbp |
| 485 add r13,rax |
| 486 mov rax,QWORD PTR[8+r15*8+rsi] |
| 487 adc rdx,0 |
| 488 add r13,r10 |
| 489 adc rdx,0 |
| 490 mov QWORD PTR[((-8))+r15*8+rsp],r13 |
| 491 mov rdi,rdx |
| 492 |
| 493 mul rbx |
| 494 add r11,rax |
| 495 mov rax,QWORD PTR[8+r15*8+rcx] |
| 496 adc rdx,0 |
| 497 add r11,QWORD PTR[8+r15*8+rsp] |
| 498 adc rdx,0 |
| 499 lea r15,QWORD PTR[4+r15] |
| 500 mov r10,rdx |
| 501 |
| 502 mul rbp |
| 503 add rdi,rax |
| 504 mov rax,QWORD PTR[((-16))+r15*8+rsi] |
| 505 adc rdx,0 |
| 506 add rdi,r11 |
| 507 adc rdx,0 |
| 508 mov QWORD PTR[((-32))+r15*8+rsp],rdi |
| 509 mov r13,rdx |
| 510 cmp r15,r9 |
| 511 jb $L$inner4x |
| 512 |
| 513 mul rbx |
| 514 add r10,rax |
| 515 mov rax,QWORD PTR[((-16))+r15*8+rcx] |
| 516 adc rdx,0 |
| 517 add r10,QWORD PTR[((-16))+r15*8+rsp] |
| 518 adc rdx,0 |
| 519 mov r11,rdx |
| 520 |
| 521 mul rbp |
| 522 add r13,rax |
| 523 mov rax,QWORD PTR[((-8))+r15*8+rsi] |
| 524 adc rdx,0 |
| 525 add r13,r10 |
| 526 adc rdx,0 |
| 527 mov QWORD PTR[((-24))+r15*8+rsp],r13 |
| 528 mov rdi,rdx |
| 529 |
| 530 mul rbx |
| 531 add r11,rax |
| 532 mov rax,QWORD PTR[((-8))+r15*8+rcx] |
| 533 adc rdx,0 |
| 534 add r11,QWORD PTR[((-8))+r15*8+rsp] |
| 535 adc rdx,0 |
| 536 lea r14,QWORD PTR[1+r14] |
| 537 mov r10,rdx |
| 538 |
| 539 mul rbp |
| 540 add rdi,rax |
| 541 mov rax,QWORD PTR[rsi] |
| 542 adc rdx,0 |
| 543 add rdi,r11 |
| 544 adc rdx,0 |
| 545 mov QWORD PTR[((-16))+r15*8+rsp],rdi |
| 546 mov r13,rdx |
| 547 |
| 548 xor rdi,rdi |
| 549 add r13,r10 |
| 550 adc rdi,0 |
| 551 add r13,QWORD PTR[r9*8+rsp] |
| 552 adc rdi,0 |
| 553 mov QWORD PTR[((-8))+r15*8+rsp],r13 |
| 554 mov QWORD PTR[r15*8+rsp],rdi |
| 555 |
| 556 cmp r14,r9 |
| 557 jb $L$outer4x |
| 558 mov rdi,QWORD PTR[16+r9*8+rsp] |
| 559 mov rax,QWORD PTR[rsp] |
| 560 mov rdx,QWORD PTR[8+rsp] |
| 561 shr r9,2 |
| 562 lea rsi,QWORD PTR[rsp] |
| 563 xor r14,r14 |
| 564 |
| 565 sub rax,QWORD PTR[rcx] |
| 566 mov rbx,QWORD PTR[16+rsi] |
| 567 mov rbp,QWORD PTR[24+rsi] |
| 568 sbb rdx,QWORD PTR[8+rcx] |
| 569 lea r15,QWORD PTR[((-1))+r9] |
| 570 jmp $L$sub4x |
| 571 ALIGN 16 |
| 572 $L$sub4x:: |
| 573 mov QWORD PTR[r14*8+rdi],rax |
| 574 mov QWORD PTR[8+r14*8+rdi],rdx |
| 575 sbb rbx,QWORD PTR[16+r14*8+rcx] |
| 576 mov rax,QWORD PTR[32+r14*8+rsi] |
| 577 mov rdx,QWORD PTR[40+r14*8+rsi] |
| 578 sbb rbp,QWORD PTR[24+r14*8+rcx] |
| 579 mov QWORD PTR[16+r14*8+rdi],rbx |
| 580 mov QWORD PTR[24+r14*8+rdi],rbp |
| 581 sbb rax,QWORD PTR[32+r14*8+rcx] |
| 582 mov rbx,QWORD PTR[48+r14*8+rsi] |
| 583 mov rbp,QWORD PTR[56+r14*8+rsi] |
| 584 sbb rdx,QWORD PTR[40+r14*8+rcx] |
| 585 lea r14,QWORD PTR[4+r14] |
| 586 dec r15 |
| 587 jnz $L$sub4x |
| 588 |
| 589 mov QWORD PTR[r14*8+rdi],rax |
| 590 mov rax,QWORD PTR[32+r14*8+rsi] |
| 591 sbb rbx,QWORD PTR[16+r14*8+rcx] |
| 592 mov QWORD PTR[8+r14*8+rdi],rdx |
| 593 sbb rbp,QWORD PTR[24+r14*8+rcx] |
| 594 mov QWORD PTR[16+r14*8+rdi],rbx |
| 595 |
| 596 sbb rax,0 |
| 597 DB 66h, 48h, 0fh, 6eh, 0c0h |
| 598 punpcklqdq xmm0,xmm0 |
| 599 mov QWORD PTR[24+r14*8+rdi],rbp |
| 600 xor r14,r14 |
| 601 |
| 602 mov r15,r9 |
| 603 pxor xmm5,xmm5 |
| 604 jmp $L$copy4x |
| 605 ALIGN 16 |
| 606 $L$copy4x:: |
| 607 movdqu xmm2,XMMWORD PTR[r14*1+rsp] |
| 608 movdqu xmm4,XMMWORD PTR[16+r14*1+rsp] |
| 609 movdqu xmm1,XMMWORD PTR[r14*1+rdi] |
| 610 movdqu xmm3,XMMWORD PTR[16+r14*1+rdi] |
| 611 pxor xmm2,xmm1 |
| 612 pxor xmm4,xmm3 |
| 613 pand xmm2,xmm0 |
| 614 pand xmm4,xmm0 |
| 615 pxor xmm2,xmm1 |
| 616 pxor xmm4,xmm3 |
| 617 movdqu XMMWORD PTR[r14*1+rdi],xmm2 |
| 618 movdqu XMMWORD PTR[16+r14*1+rdi],xmm4 |
| 619 movdqa XMMWORD PTR[r14*1+rsp],xmm5 |
| 620 movdqa XMMWORD PTR[16+r14*1+rsp],xmm5 |
| 621 |
| 622 lea r14,QWORD PTR[32+r14] |
| 623 dec r15 |
| 624 jnz $L$copy4x |
| 625 |
| 626 shl r9,2 |
| 627 mov rsi,QWORD PTR[8+r9*8+rsp] |
| 628 mov rax,1 |
| 629 mov r15,QWORD PTR[rsi] |
| 630 mov r14,QWORD PTR[8+rsi] |
| 631 mov r13,QWORD PTR[16+rsi] |
| 632 mov r12,QWORD PTR[24+rsi] |
| 633 mov rbp,QWORD PTR[32+rsi] |
| 634 mov rbx,QWORD PTR[40+rsi] |
| 635 lea rsp,QWORD PTR[48+rsi] |
| 636 $L$mul4x_epilogue:: |
| 637 mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue |
| 638 mov rsi,QWORD PTR[16+rsp] |
| 639 DB 0F3h,0C3h ;repret |
| 640 $L$SEH_end_bn_mul4x_mont:: |
| 641 bn_mul4x_mont ENDP |
| 642 EXTERN bn_sqr8x_internal:NEAR |
| 643 EXTERN bn_sqrx8x_internal:NEAR |
| 644 |
| 645 |
| 646 ALIGN 32 |
| 647 bn_sqr8x_mont PROC PRIVATE |
| 648 mov QWORD PTR[8+rsp],rdi ;WIN64 prologue |
| 649 mov QWORD PTR[16+rsp],rsi |
| 650 mov rax,rsp |
| 651 $L$SEH_begin_bn_sqr8x_mont:: |
| 652 mov rdi,rcx |
| 653 mov rsi,rdx |
| 654 mov rdx,r8 |
| 655 mov rcx,r9 |
| 656 mov r8,QWORD PTR[40+rsp] |
| 657 mov r9,QWORD PTR[48+rsp] |
| 658 |
| 659 |
| 660 $L$sqr8x_enter:: |
| 661 mov rax,rsp |
| 662 push rbx |
| 663 push rbp |
| 664 push r12 |
| 665 push r13 |
| 666 push r14 |
| 667 push r15 |
| 668 |
| 669 mov r10d,r9d |
| 670 shl r9d,3 |
| 671 shl r10,3+2 |
| 672 neg r9 |
| 673 |
| 674 |
| 675 |
| 676 |
| 677 |
| 678 |
| 679 lea r11,QWORD PTR[((-64))+r9*4+rsp] |
| 680 mov r8,QWORD PTR[r8] |
| 681 sub r11,rsi |
| 682 and r11,4095 |
| 683 cmp r10,r11 |
| 684 jb $L$sqr8x_sp_alt |
| 685 sub rsp,r11 |
| 686 lea rsp,QWORD PTR[((-64))+r9*4+rsp] |
| 687 jmp $L$sqr8x_sp_done |
| 688 |
| 689 ALIGN 32 |
| 690 $L$sqr8x_sp_alt:: |
| 691 lea r10,QWORD PTR[((4096-64))+r9*4] |
| 692 lea rsp,QWORD PTR[((-64))+r9*4+rsp] |
| 693 sub r11,r10 |
| 694 mov r10,0 |
| 695 cmovc r11,r10 |
| 696 sub rsp,r11 |
| 697 $L$sqr8x_sp_done:: |
| 698 and rsp,-64 |
| 699 mov r10,r9 |
| 700 neg r9 |
| 701 |
| 702 lea r11,QWORD PTR[64+r9*2+rsp] |
| 703 mov QWORD PTR[32+rsp],r8 |
| 704 mov QWORD PTR[40+rsp],rax |
| 705 $L$sqr8x_body:: |
| 706 |
| 707 mov rbp,r9 |
| 708 DB 102,73,15,110,211 |
| 709 shr rbp,3+2 |
| 710 mov eax,DWORD PTR[((OPENSSL_ia32cap_P+8))] |
| 711 jmp $L$sqr8x_copy_n |
| 712 |
| 713 ALIGN 32 |
| 714 $L$sqr8x_copy_n:: |
| 715 movq xmm0,QWORD PTR[rcx] |
| 716 movq xmm1,QWORD PTR[8+rcx] |
| 717 movq xmm3,QWORD PTR[16+rcx] |
| 718 movq xmm4,QWORD PTR[24+rcx] |
| 719 lea rcx,QWORD PTR[32+rcx] |
| 720 movdqa XMMWORD PTR[r11],xmm0 |
| 721 movdqa XMMWORD PTR[16+r11],xmm1 |
| 722 movdqa XMMWORD PTR[32+r11],xmm3 |
| 723 movdqa XMMWORD PTR[48+r11],xmm4 |
| 724 lea r11,QWORD PTR[64+r11] |
| 725 dec rbp |
| 726 jnz $L$sqr8x_copy_n |
| 727 |
| 728 pxor xmm0,xmm0 |
| 729 DB 102,72,15,110,207 |
| 730 DB 102,73,15,110,218 |
| 731 call bn_sqr8x_internal |
| 732 |
| 733 pxor xmm0,xmm0 |
| 734 lea rax,QWORD PTR[48+rsp] |
| 735 lea rdx,QWORD PTR[64+r9*2+rsp] |
| 736 shr r9,3+2 |
| 737 mov rsi,QWORD PTR[40+rsp] |
| 738 jmp $L$sqr8x_zero |
| 739 |
| 740 ALIGN 32 |
| 741 $L$sqr8x_zero:: |
| 742 movdqa XMMWORD PTR[rax],xmm0 |
| 743 movdqa XMMWORD PTR[16+rax],xmm0 |
| 744 movdqa XMMWORD PTR[32+rax],xmm0 |
| 745 movdqa XMMWORD PTR[48+rax],xmm0 |
| 746 lea rax,QWORD PTR[64+rax] |
| 747 movdqa XMMWORD PTR[rdx],xmm0 |
| 748 movdqa XMMWORD PTR[16+rdx],xmm0 |
| 749 movdqa XMMWORD PTR[32+rdx],xmm0 |
| 750 movdqa XMMWORD PTR[48+rdx],xmm0 |
| 751 lea rdx,QWORD PTR[64+rdx] |
| 752 dec r9 |
| 753 jnz $L$sqr8x_zero |
| 754 |
| 755 mov rax,1 |
| 756 mov r15,QWORD PTR[((-48))+rsi] |
| 757 mov r14,QWORD PTR[((-40))+rsi] |
| 758 mov r13,QWORD PTR[((-32))+rsi] |
| 759 mov r12,QWORD PTR[((-24))+rsi] |
| 760 mov rbp,QWORD PTR[((-16))+rsi] |
| 761 mov rbx,QWORD PTR[((-8))+rsi] |
| 762 lea rsp,QWORD PTR[rsi] |
| 763 $L$sqr8x_epilogue:: |
| 764 mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue |
| 765 mov rsi,QWORD PTR[16+rsp] |
| 766 DB 0F3h,0C3h ;repret |
| 767 $L$SEH_end_bn_sqr8x_mont:: |
| 768 bn_sqr8x_mont ENDP |
| 769 DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 |
| 770 DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 |
| 771 DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83 |
| 772 DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 |
| 773 DB 115,108,46,111,114,103,62,0 |
| 774 ALIGN 16 |
| 775 EXTERN __imp_RtlVirtualUnwind:NEAR |
| 776 |
| 777 ALIGN 16 |
| 778 mul_handler PROC PRIVATE |
| 779 push rsi |
| 780 push rdi |
| 781 push rbx |
| 782 push rbp |
| 783 push r12 |
| 784 push r13 |
| 785 push r14 |
| 786 push r15 |
| 787 pushfq |
| 788 sub rsp,64 |
| 789 |
| 790 mov rax,QWORD PTR[120+r8] |
| 791 mov rbx,QWORD PTR[248+r8] |
| 792 |
| 793 mov rsi,QWORD PTR[8+r9] |
| 794 mov r11,QWORD PTR[56+r9] |
| 795 |
| 796 mov r10d,DWORD PTR[r11] |
| 797 lea r10,QWORD PTR[r10*1+rsi] |
| 798 cmp rbx,r10 |
| 799 jb $L$common_seh_tail |
| 800 |
| 801 mov rax,QWORD PTR[152+r8] |
| 802 |
| 803 mov r10d,DWORD PTR[4+r11] |
| 804 lea r10,QWORD PTR[r10*1+rsi] |
| 805 cmp rbx,r10 |
| 806 jae $L$common_seh_tail |
| 807 |
| 808 mov r10,QWORD PTR[192+r8] |
| 809 mov rax,QWORD PTR[8+r10*8+rax] |
| 810 lea rax,QWORD PTR[48+rax] |
| 811 |
| 812 mov rbx,QWORD PTR[((-8))+rax] |
| 813 mov rbp,QWORD PTR[((-16))+rax] |
| 814 mov r12,QWORD PTR[((-24))+rax] |
| 815 mov r13,QWORD PTR[((-32))+rax] |
| 816 mov r14,QWORD PTR[((-40))+rax] |
| 817 mov r15,QWORD PTR[((-48))+rax] |
| 818 mov QWORD PTR[144+r8],rbx |
| 819 mov QWORD PTR[160+r8],rbp |
| 820 mov QWORD PTR[216+r8],r12 |
| 821 mov QWORD PTR[224+r8],r13 |
| 822 mov QWORD PTR[232+r8],r14 |
| 823 mov QWORD PTR[240+r8],r15 |
| 824 |
| 825 jmp $L$common_seh_tail |
| 826 mul_handler ENDP |
| 827 |
| 828 |
| 829 ALIGN 16 |
| 830 sqr_handler PROC PRIVATE |
| 831 push rsi |
| 832 push rdi |
| 833 push rbx |
| 834 push rbp |
| 835 push r12 |
| 836 push r13 |
| 837 push r14 |
| 838 push r15 |
| 839 pushfq |
| 840 sub rsp,64 |
| 841 |
| 842 mov rax,QWORD PTR[120+r8] |
| 843 mov rbx,QWORD PTR[248+r8] |
| 844 |
| 845 mov rsi,QWORD PTR[8+r9] |
| 846 mov r11,QWORD PTR[56+r9] |
| 847 |
| 848 mov r10d,DWORD PTR[r11] |
| 849 lea r10,QWORD PTR[r10*1+rsi] |
| 850 cmp rbx,r10 |
| 851 jb $L$common_seh_tail |
| 852 |
| 853 mov rax,QWORD PTR[152+r8] |
| 854 |
| 855 mov r10d,DWORD PTR[4+r11] |
| 856 lea r10,QWORD PTR[r10*1+rsi] |
| 857 cmp rbx,r10 |
| 858 jae $L$common_seh_tail |
| 859 |
| 860 mov rax,QWORD PTR[40+rax] |
| 861 |
| 862 mov rbx,QWORD PTR[((-8))+rax] |
| 863 mov rbp,QWORD PTR[((-16))+rax] |
| 864 mov r12,QWORD PTR[((-24))+rax] |
| 865 mov r13,QWORD PTR[((-32))+rax] |
| 866 mov r14,QWORD PTR[((-40))+rax] |
| 867 mov r15,QWORD PTR[((-48))+rax] |
| 868 mov QWORD PTR[144+r8],rbx |
| 869 mov QWORD PTR[160+r8],rbp |
| 870 mov QWORD PTR[216+r8],r12 |
| 871 mov QWORD PTR[224+r8],r13 |
| 872 mov QWORD PTR[232+r8],r14 |
| 873 mov QWORD PTR[240+r8],r15 |
| 874 |
| 875 $L$common_seh_tail:: |
| 876 mov rdi,QWORD PTR[8+rax] |
| 877 mov rsi,QWORD PTR[16+rax] |
| 878 mov QWORD PTR[152+r8],rax |
| 879 mov QWORD PTR[168+r8],rsi |
| 880 mov QWORD PTR[176+r8],rdi |
| 881 |
| 882 mov rdi,QWORD PTR[40+r9] |
| 883 mov rsi,r8 |
| 884 mov ecx,154 |
| 885 DD 0a548f3fch |
| 886 |
| 887 mov rsi,r9 |
| 888 xor rcx,rcx |
| 889 mov rdx,QWORD PTR[8+rsi] |
| 890 mov r8,QWORD PTR[rsi] |
| 891 mov r9,QWORD PTR[16+rsi] |
| 892 mov r10,QWORD PTR[40+rsi] |
| 893 lea r11,QWORD PTR[56+rsi] |
| 894 lea r12,QWORD PTR[24+rsi] |
| 895 mov QWORD PTR[32+rsp],r10 |
| 896 mov QWORD PTR[40+rsp],r11 |
| 897 mov QWORD PTR[48+rsp],r12 |
| 898 mov QWORD PTR[56+rsp],rcx |
| 899 call QWORD PTR[__imp_RtlVirtualUnwind] |
| 900 |
| 901 mov eax,1 |
| 902 add rsp,64 |
| 903 popfq |
| 904 pop r15 |
| 905 pop r14 |
| 906 pop r13 |
| 907 pop r12 |
| 908 pop rbp |
| 909 pop rbx |
| 910 pop rdi |
| 911 pop rsi |
| 912 DB 0F3h,0C3h ;repret |
| 913 sqr_handler ENDP |
| 914 |
| 915 .text$ ENDS |
| 916 .pdata SEGMENT READONLY ALIGN(4) |
| 917 ALIGN 4 |
| 918 DD imagerel $L$SEH_begin_bn_mul_mont |
| 919 DD imagerel $L$SEH_end_bn_mul_mont |
| 920 DD imagerel $L$SEH_info_bn_mul_mont |
| 921 |
| 922 DD imagerel $L$SEH_begin_bn_mul4x_mont |
| 923 DD imagerel $L$SEH_end_bn_mul4x_mont |
| 924 DD imagerel $L$SEH_info_bn_mul4x_mont |
| 925 |
| 926 DD imagerel $L$SEH_begin_bn_sqr8x_mont |
| 927 DD imagerel $L$SEH_end_bn_sqr8x_mont |
| 928 DD imagerel $L$SEH_info_bn_sqr8x_mont |
| 929 .pdata ENDS |
| 930 .xdata SEGMENT READONLY ALIGN(8) |
| 931 ALIGN 8 |
| 932 $L$SEH_info_bn_mul_mont:: |
| 933 DB 9,0,0,0 |
| 934 DD imagerel mul_handler |
| 935 DD imagerel $L$mul_body,imagerel $L$mul_epilogue |
| 936 $L$SEH_info_bn_mul4x_mont:: |
| 937 DB 9,0,0,0 |
| 938 DD imagerel mul_handler |
| 939 DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue |
| 940 $L$SEH_info_bn_sqr8x_mont:: |
| 941 DB 9,0,0,0 |
| 942 DD imagerel sqr_handler |
| 943 DD imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue |
| 944 |
| 945 .xdata ENDS |
| 946 END |
OLD | NEW |