OLD | NEW |
(Empty) | |
| 1 #if defined(__x86_64__) |
| 2 .text |
| 3 |
| 4 |
| 5 |
| 6 .globl _rsaz_512_sqr |
| 7 .private_extern _rsaz_512_sqr |
| 8 |
| 9 .p2align 5 |
| 10 _rsaz_512_sqr: |
| 11 pushq %rbx |
| 12 pushq %rbp |
| 13 pushq %r12 |
| 14 pushq %r13 |
| 15 pushq %r14 |
| 16 pushq %r15 |
| 17 |
| 18 subq $128+24,%rsp |
| 19 L$sqr_body: |
| 20 movq %rdx,%rbp |
| 21 movq (%rsi),%rdx |
| 22 movq 8(%rsi),%rax |
| 23 movq %rcx,128(%rsp) |
| 24 jmp L$oop_sqr |
| 25 |
| 26 .p2align 5 |
| 27 L$oop_sqr: |
| 28 movl %r8d,128+8(%rsp) |
| 29 |
| 30 movq %rdx,%rbx |
| 31 mulq %rdx |
| 32 movq %rax,%r8 |
| 33 movq 16(%rsi),%rax |
| 34 movq %rdx,%r9 |
| 35 |
| 36 mulq %rbx |
| 37 addq %rax,%r9 |
| 38 movq 24(%rsi),%rax |
| 39 movq %rdx,%r10 |
| 40 adcq $0,%r10 |
| 41 |
| 42 mulq %rbx |
| 43 addq %rax,%r10 |
| 44 movq 32(%rsi),%rax |
| 45 movq %rdx,%r11 |
| 46 adcq $0,%r11 |
| 47 |
| 48 mulq %rbx |
| 49 addq %rax,%r11 |
| 50 movq 40(%rsi),%rax |
| 51 movq %rdx,%r12 |
| 52 adcq $0,%r12 |
| 53 |
| 54 mulq %rbx |
| 55 addq %rax,%r12 |
| 56 movq 48(%rsi),%rax |
| 57 movq %rdx,%r13 |
| 58 adcq $0,%r13 |
| 59 |
| 60 mulq %rbx |
| 61 addq %rax,%r13 |
| 62 movq 56(%rsi),%rax |
| 63 movq %rdx,%r14 |
| 64 adcq $0,%r14 |
| 65 |
| 66 mulq %rbx |
| 67 addq %rax,%r14 |
| 68 movq %rbx,%rax |
| 69 movq %rdx,%r15 |
| 70 adcq $0,%r15 |
| 71 |
| 72 addq %r8,%r8 |
| 73 movq %r9,%rcx |
| 74 adcq %r9,%r9 |
| 75 |
| 76 mulq %rax |
| 77 movq %rax,(%rsp) |
| 78 addq %rdx,%r8 |
| 79 adcq $0,%r9 |
| 80 |
| 81 movq %r8,8(%rsp) |
| 82 shrq $63,%rcx |
| 83 |
| 84 |
| 85 movq 8(%rsi),%r8 |
| 86 movq 16(%rsi),%rax |
| 87 mulq %r8 |
| 88 addq %rax,%r10 |
| 89 movq 24(%rsi),%rax |
| 90 movq %rdx,%rbx |
| 91 adcq $0,%rbx |
| 92 |
| 93 mulq %r8 |
| 94 addq %rax,%r11 |
| 95 movq 32(%rsi),%rax |
| 96 adcq $0,%rdx |
| 97 addq %rbx,%r11 |
| 98 movq %rdx,%rbx |
| 99 adcq $0,%rbx |
| 100 |
| 101 mulq %r8 |
| 102 addq %rax,%r12 |
| 103 movq 40(%rsi),%rax |
| 104 adcq $0,%rdx |
| 105 addq %rbx,%r12 |
| 106 movq %rdx,%rbx |
| 107 adcq $0,%rbx |
| 108 |
| 109 mulq %r8 |
| 110 addq %rax,%r13 |
| 111 movq 48(%rsi),%rax |
| 112 adcq $0,%rdx |
| 113 addq %rbx,%r13 |
| 114 movq %rdx,%rbx |
| 115 adcq $0,%rbx |
| 116 |
| 117 mulq %r8 |
| 118 addq %rax,%r14 |
| 119 movq 56(%rsi),%rax |
| 120 adcq $0,%rdx |
| 121 addq %rbx,%r14 |
| 122 movq %rdx,%rbx |
| 123 adcq $0,%rbx |
| 124 |
| 125 mulq %r8 |
| 126 addq %rax,%r15 |
| 127 movq %r8,%rax |
| 128 adcq $0,%rdx |
| 129 addq %rbx,%r15 |
| 130 movq %rdx,%r8 |
| 131 movq %r10,%rdx |
| 132 adcq $0,%r8 |
| 133 |
| 134 addq %rdx,%rdx |
| 135 leaq (%rcx,%r10,2),%r10 |
| 136 movq %r11,%rbx |
| 137 adcq %r11,%r11 |
| 138 |
| 139 mulq %rax |
| 140 addq %rax,%r9 |
| 141 adcq %rdx,%r10 |
| 142 adcq $0,%r11 |
| 143 |
| 144 movq %r9,16(%rsp) |
| 145 movq %r10,24(%rsp) |
| 146 shrq $63,%rbx |
| 147 |
| 148 |
| 149 movq 16(%rsi),%r9 |
| 150 movq 24(%rsi),%rax |
| 151 mulq %r9 |
| 152 addq %rax,%r12 |
| 153 movq 32(%rsi),%rax |
| 154 movq %rdx,%rcx |
| 155 adcq $0,%rcx |
| 156 |
| 157 mulq %r9 |
| 158 addq %rax,%r13 |
| 159 movq 40(%rsi),%rax |
| 160 adcq $0,%rdx |
| 161 addq %rcx,%r13 |
| 162 movq %rdx,%rcx |
| 163 adcq $0,%rcx |
| 164 |
| 165 mulq %r9 |
| 166 addq %rax,%r14 |
| 167 movq 48(%rsi),%rax |
| 168 adcq $0,%rdx |
| 169 addq %rcx,%r14 |
| 170 movq %rdx,%rcx |
| 171 adcq $0,%rcx |
| 172 |
| 173 mulq %r9 |
| 174 movq %r12,%r10 |
| 175 leaq (%rbx,%r12,2),%r12 |
| 176 addq %rax,%r15 |
| 177 movq 56(%rsi),%rax |
| 178 adcq $0,%rdx |
| 179 addq %rcx,%r15 |
| 180 movq %rdx,%rcx |
| 181 adcq $0,%rcx |
| 182 |
| 183 mulq %r9 |
| 184 shrq $63,%r10 |
| 185 addq %rax,%r8 |
| 186 movq %r9,%rax |
| 187 adcq $0,%rdx |
| 188 addq %rcx,%r8 |
| 189 movq %rdx,%r9 |
| 190 adcq $0,%r9 |
| 191 |
| 192 movq %r13,%rcx |
| 193 leaq (%r10,%r13,2),%r13 |
| 194 |
| 195 mulq %rax |
| 196 addq %rax,%r11 |
| 197 adcq %rdx,%r12 |
| 198 adcq $0,%r13 |
| 199 |
| 200 movq %r11,32(%rsp) |
| 201 movq %r12,40(%rsp) |
| 202 shrq $63,%rcx |
| 203 |
| 204 |
| 205 movq 24(%rsi),%r10 |
| 206 movq 32(%rsi),%rax |
| 207 mulq %r10 |
| 208 addq %rax,%r14 |
| 209 movq 40(%rsi),%rax |
| 210 movq %rdx,%rbx |
| 211 adcq $0,%rbx |
| 212 |
| 213 mulq %r10 |
| 214 addq %rax,%r15 |
| 215 movq 48(%rsi),%rax |
| 216 adcq $0,%rdx |
| 217 addq %rbx,%r15 |
| 218 movq %rdx,%rbx |
| 219 adcq $0,%rbx |
| 220 |
| 221 mulq %r10 |
| 222 movq %r14,%r12 |
| 223 leaq (%rcx,%r14,2),%r14 |
| 224 addq %rax,%r8 |
| 225 movq 56(%rsi),%rax |
| 226 adcq $0,%rdx |
| 227 addq %rbx,%r8 |
| 228 movq %rdx,%rbx |
| 229 adcq $0,%rbx |
| 230 |
| 231 mulq %r10 |
| 232 shrq $63,%r12 |
| 233 addq %rax,%r9 |
| 234 movq %r10,%rax |
| 235 adcq $0,%rdx |
| 236 addq %rbx,%r9 |
| 237 movq %rdx,%r10 |
| 238 adcq $0,%r10 |
| 239 |
| 240 movq %r15,%rbx |
| 241 leaq (%r12,%r15,2),%r15 |
| 242 |
| 243 mulq %rax |
| 244 addq %rax,%r13 |
| 245 adcq %rdx,%r14 |
| 246 adcq $0,%r15 |
| 247 |
| 248 movq %r13,48(%rsp) |
| 249 movq %r14,56(%rsp) |
| 250 shrq $63,%rbx |
| 251 |
| 252 |
| 253 movq 32(%rsi),%r11 |
| 254 movq 40(%rsi),%rax |
| 255 mulq %r11 |
| 256 addq %rax,%r8 |
| 257 movq 48(%rsi),%rax |
| 258 movq %rdx,%rcx |
| 259 adcq $0,%rcx |
| 260 |
| 261 mulq %r11 |
| 262 addq %rax,%r9 |
| 263 movq 56(%rsi),%rax |
| 264 adcq $0,%rdx |
| 265 movq %r8,%r12 |
| 266 leaq (%rbx,%r8,2),%r8 |
| 267 addq %rcx,%r9 |
| 268 movq %rdx,%rcx |
| 269 adcq $0,%rcx |
| 270 |
| 271 mulq %r11 |
| 272 shrq $63,%r12 |
| 273 addq %rax,%r10 |
| 274 movq %r11,%rax |
| 275 adcq $0,%rdx |
| 276 addq %rcx,%r10 |
| 277 movq %rdx,%r11 |
| 278 adcq $0,%r11 |
| 279 |
| 280 movq %r9,%rcx |
| 281 leaq (%r12,%r9,2),%r9 |
| 282 |
| 283 mulq %rax |
| 284 addq %rax,%r15 |
| 285 adcq %rdx,%r8 |
| 286 adcq $0,%r9 |
| 287 |
| 288 movq %r15,64(%rsp) |
| 289 movq %r8,72(%rsp) |
| 290 shrq $63,%rcx |
| 291 |
| 292 |
| 293 movq 40(%rsi),%r12 |
| 294 movq 48(%rsi),%rax |
| 295 mulq %r12 |
| 296 addq %rax,%r10 |
| 297 movq 56(%rsi),%rax |
| 298 movq %rdx,%rbx |
| 299 adcq $0,%rbx |
| 300 |
| 301 mulq %r12 |
| 302 addq %rax,%r11 |
| 303 movq %r12,%rax |
| 304 movq %r10,%r15 |
| 305 leaq (%rcx,%r10,2),%r10 |
| 306 adcq $0,%rdx |
| 307 shrq $63,%r15 |
| 308 addq %rbx,%r11 |
| 309 movq %rdx,%r12 |
| 310 adcq $0,%r12 |
| 311 |
| 312 movq %r11,%rbx |
| 313 leaq (%r15,%r11,2),%r11 |
| 314 |
| 315 mulq %rax |
| 316 addq %rax,%r9 |
| 317 adcq %rdx,%r10 |
| 318 adcq $0,%r11 |
| 319 |
| 320 movq %r9,80(%rsp) |
| 321 movq %r10,88(%rsp) |
| 322 |
| 323 |
| 324 movq 48(%rsi),%r13 |
| 325 movq 56(%rsi),%rax |
| 326 mulq %r13 |
| 327 addq %rax,%r12 |
| 328 movq %r13,%rax |
| 329 movq %rdx,%r13 |
| 330 adcq $0,%r13 |
| 331 |
| 332 xorq %r14,%r14 |
| 333 shlq $1,%rbx |
| 334 adcq %r12,%r12 |
| 335 adcq %r13,%r13 |
| 336 adcq %r14,%r14 |
| 337 |
| 338 mulq %rax |
| 339 addq %rax,%r11 |
| 340 adcq %rdx,%r12 |
| 341 adcq $0,%r13 |
| 342 |
| 343 movq %r11,96(%rsp) |
| 344 movq %r12,104(%rsp) |
| 345 |
| 346 |
| 347 movq 56(%rsi),%rax |
| 348 mulq %rax |
| 349 addq %rax,%r13 |
| 350 adcq $0,%rdx |
| 351 |
| 352 addq %rdx,%r14 |
| 353 |
| 354 movq %r13,112(%rsp) |
| 355 movq %r14,120(%rsp) |
| 356 |
| 357 movq (%rsp),%r8 |
| 358 movq 8(%rsp),%r9 |
| 359 movq 16(%rsp),%r10 |
| 360 movq 24(%rsp),%r11 |
| 361 movq 32(%rsp),%r12 |
| 362 movq 40(%rsp),%r13 |
| 363 movq 48(%rsp),%r14 |
| 364 movq 56(%rsp),%r15 |
| 365 |
| 366 call __rsaz_512_reduce |
| 367 |
| 368 addq 64(%rsp),%r8 |
| 369 adcq 72(%rsp),%r9 |
| 370 adcq 80(%rsp),%r10 |
| 371 adcq 88(%rsp),%r11 |
| 372 adcq 96(%rsp),%r12 |
| 373 adcq 104(%rsp),%r13 |
| 374 adcq 112(%rsp),%r14 |
| 375 adcq 120(%rsp),%r15 |
| 376 sbbq %rcx,%rcx |
| 377 |
| 378 call __rsaz_512_subtract |
| 379 |
| 380 movq %r8,%rdx |
| 381 movq %r9,%rax |
| 382 movl 128+8(%rsp),%r8d |
| 383 movq %rdi,%rsi |
| 384 |
| 385 decl %r8d |
| 386 jnz L$oop_sqr |
| 387 |
| 388 leaq 128+24+48(%rsp),%rax |
| 389 movq -48(%rax),%r15 |
| 390 movq -40(%rax),%r14 |
| 391 movq -32(%rax),%r13 |
| 392 movq -24(%rax),%r12 |
| 393 movq -16(%rax),%rbp |
| 394 movq -8(%rax),%rbx |
| 395 leaq (%rax),%rsp |
| 396 L$sqr_epilogue: |
| 397 .byte 0xf3,0xc3 |
| 398 |
| 399 .globl _rsaz_512_mul |
| 400 .private_extern _rsaz_512_mul |
| 401 |
| 402 .p2align 5 |
| 403 _rsaz_512_mul: |
| 404 pushq %rbx |
| 405 pushq %rbp |
| 406 pushq %r12 |
| 407 pushq %r13 |
| 408 pushq %r14 |
| 409 pushq %r15 |
| 410 |
| 411 subq $128+24,%rsp |
| 412 L$mul_body: |
| 413 .byte 102,72,15,110,199 |
| 414 .byte 102,72,15,110,201 |
| 415 movq %r8,128(%rsp) |
| 416 movq (%rdx),%rbx |
| 417 movq %rdx,%rbp |
| 418 call __rsaz_512_mul |
| 419 |
| 420 .byte 102,72,15,126,199 |
| 421 .byte 102,72,15,126,205 |
| 422 |
| 423 movq (%rsp),%r8 |
| 424 movq 8(%rsp),%r9 |
| 425 movq 16(%rsp),%r10 |
| 426 movq 24(%rsp),%r11 |
| 427 movq 32(%rsp),%r12 |
| 428 movq 40(%rsp),%r13 |
| 429 movq 48(%rsp),%r14 |
| 430 movq 56(%rsp),%r15 |
| 431 |
| 432 call __rsaz_512_reduce |
| 433 addq 64(%rsp),%r8 |
| 434 adcq 72(%rsp),%r9 |
| 435 adcq 80(%rsp),%r10 |
| 436 adcq 88(%rsp),%r11 |
| 437 adcq 96(%rsp),%r12 |
| 438 adcq 104(%rsp),%r13 |
| 439 adcq 112(%rsp),%r14 |
| 440 adcq 120(%rsp),%r15 |
| 441 sbbq %rcx,%rcx |
| 442 |
| 443 call __rsaz_512_subtract |
| 444 |
| 445 leaq 128+24+48(%rsp),%rax |
| 446 movq -48(%rax),%r15 |
| 447 movq -40(%rax),%r14 |
| 448 movq -32(%rax),%r13 |
| 449 movq -24(%rax),%r12 |
| 450 movq -16(%rax),%rbp |
| 451 movq -8(%rax),%rbx |
| 452 leaq (%rax),%rsp |
| 453 L$mul_epilogue: |
| 454 .byte 0xf3,0xc3 |
| 455 |
| 456 .globl _rsaz_512_mul_gather4 |
| 457 .private_extern _rsaz_512_mul_gather4 |
| 458 |
| 459 .p2align 5 |
| 460 _rsaz_512_mul_gather4: |
| 461 pushq %rbx |
| 462 pushq %rbp |
| 463 pushq %r12 |
| 464 pushq %r13 |
| 465 pushq %r14 |
| 466 pushq %r15 |
| 467 |
| 468 movl %r9d,%r9d |
| 469 subq $128+24,%rsp |
| 470 L$mul_gather4_body: |
| 471 movl 64(%rdx,%r9,4),%eax |
| 472 .byte 102,72,15,110,199 |
| 473 movl (%rdx,%r9,4),%ebx |
| 474 .byte 102,72,15,110,201 |
| 475 movq %r8,128(%rsp) |
| 476 |
| 477 shlq $32,%rax |
| 478 orq %rax,%rbx |
| 479 movq (%rsi),%rax |
| 480 movq 8(%rsi),%rcx |
| 481 leaq 128(%rdx,%r9,4),%rbp |
| 482 mulq %rbx |
| 483 movq %rax,(%rsp) |
| 484 movq %rcx,%rax |
| 485 movq %rdx,%r8 |
| 486 |
| 487 mulq %rbx |
| 488 movd (%rbp),%xmm4 |
| 489 addq %rax,%r8 |
| 490 movq 16(%rsi),%rax |
| 491 movq %rdx,%r9 |
| 492 adcq $0,%r9 |
| 493 |
| 494 mulq %rbx |
| 495 movd 64(%rbp),%xmm5 |
| 496 addq %rax,%r9 |
| 497 movq 24(%rsi),%rax |
| 498 movq %rdx,%r10 |
| 499 adcq $0,%r10 |
| 500 |
| 501 mulq %rbx |
| 502 pslldq $4,%xmm5 |
| 503 addq %rax,%r10 |
| 504 movq 32(%rsi),%rax |
| 505 movq %rdx,%r11 |
| 506 adcq $0,%r11 |
| 507 |
| 508 mulq %rbx |
| 509 por %xmm5,%xmm4 |
| 510 addq %rax,%r11 |
| 511 movq 40(%rsi),%rax |
| 512 movq %rdx,%r12 |
| 513 adcq $0,%r12 |
| 514 |
| 515 mulq %rbx |
| 516 addq %rax,%r12 |
| 517 movq 48(%rsi),%rax |
| 518 movq %rdx,%r13 |
| 519 adcq $0,%r13 |
| 520 |
| 521 mulq %rbx |
| 522 leaq 128(%rbp),%rbp |
| 523 addq %rax,%r13 |
| 524 movq 56(%rsi),%rax |
| 525 movq %rdx,%r14 |
| 526 adcq $0,%r14 |
| 527 |
| 528 mulq %rbx |
| 529 .byte 102,72,15,126,227 |
| 530 addq %rax,%r14 |
| 531 movq (%rsi),%rax |
| 532 movq %rdx,%r15 |
| 533 adcq $0,%r15 |
| 534 |
| 535 leaq 8(%rsp),%rdi |
| 536 movl $7,%ecx |
| 537 jmp L$oop_mul_gather |
| 538 |
| 539 .p2align 5 |
| 540 L$oop_mul_gather: |
| 541 mulq %rbx |
| 542 addq %rax,%r8 |
| 543 movq 8(%rsi),%rax |
| 544 movq %r8,(%rdi) |
| 545 movq %rdx,%r8 |
| 546 adcq $0,%r8 |
| 547 |
| 548 mulq %rbx |
| 549 movd (%rbp),%xmm4 |
| 550 addq %rax,%r9 |
| 551 movq 16(%rsi),%rax |
| 552 adcq $0,%rdx |
| 553 addq %r9,%r8 |
| 554 movq %rdx,%r9 |
| 555 adcq $0,%r9 |
| 556 |
| 557 mulq %rbx |
| 558 movd 64(%rbp),%xmm5 |
| 559 addq %rax,%r10 |
| 560 movq 24(%rsi),%rax |
| 561 adcq $0,%rdx |
| 562 addq %r10,%r9 |
| 563 movq %rdx,%r10 |
| 564 adcq $0,%r10 |
| 565 |
| 566 mulq %rbx |
| 567 pslldq $4,%xmm5 |
| 568 addq %rax,%r11 |
| 569 movq 32(%rsi),%rax |
| 570 adcq $0,%rdx |
| 571 addq %r11,%r10 |
| 572 movq %rdx,%r11 |
| 573 adcq $0,%r11 |
| 574 |
| 575 mulq %rbx |
| 576 por %xmm5,%xmm4 |
| 577 addq %rax,%r12 |
| 578 movq 40(%rsi),%rax |
| 579 adcq $0,%rdx |
| 580 addq %r12,%r11 |
| 581 movq %rdx,%r12 |
| 582 adcq $0,%r12 |
| 583 |
| 584 mulq %rbx |
| 585 addq %rax,%r13 |
| 586 movq 48(%rsi),%rax |
| 587 adcq $0,%rdx |
| 588 addq %r13,%r12 |
| 589 movq %rdx,%r13 |
| 590 adcq $0,%r13 |
| 591 |
| 592 mulq %rbx |
| 593 addq %rax,%r14 |
| 594 movq 56(%rsi),%rax |
| 595 adcq $0,%rdx |
| 596 addq %r14,%r13 |
| 597 movq %rdx,%r14 |
| 598 adcq $0,%r14 |
| 599 |
| 600 mulq %rbx |
| 601 .byte 102,72,15,126,227 |
| 602 addq %rax,%r15 |
| 603 movq (%rsi),%rax |
| 604 adcq $0,%rdx |
| 605 addq %r15,%r14 |
| 606 movq %rdx,%r15 |
| 607 adcq $0,%r15 |
| 608 |
| 609 leaq 128(%rbp),%rbp |
| 610 leaq 8(%rdi),%rdi |
| 611 |
| 612 decl %ecx |
| 613 jnz L$oop_mul_gather |
| 614 |
| 615 movq %r8,(%rdi) |
| 616 movq %r9,8(%rdi) |
| 617 movq %r10,16(%rdi) |
| 618 movq %r11,24(%rdi) |
| 619 movq %r12,32(%rdi) |
| 620 movq %r13,40(%rdi) |
| 621 movq %r14,48(%rdi) |
| 622 movq %r15,56(%rdi) |
| 623 |
| 624 .byte 102,72,15,126,199 |
| 625 .byte 102,72,15,126,205 |
| 626 |
| 627 movq (%rsp),%r8 |
| 628 movq 8(%rsp),%r9 |
| 629 movq 16(%rsp),%r10 |
| 630 movq 24(%rsp),%r11 |
| 631 movq 32(%rsp),%r12 |
| 632 movq 40(%rsp),%r13 |
| 633 movq 48(%rsp),%r14 |
| 634 movq 56(%rsp),%r15 |
| 635 |
| 636 call __rsaz_512_reduce |
| 637 addq 64(%rsp),%r8 |
| 638 adcq 72(%rsp),%r9 |
| 639 adcq 80(%rsp),%r10 |
| 640 adcq 88(%rsp),%r11 |
| 641 adcq 96(%rsp),%r12 |
| 642 adcq 104(%rsp),%r13 |
| 643 adcq 112(%rsp),%r14 |
| 644 adcq 120(%rsp),%r15 |
| 645 sbbq %rcx,%rcx |
| 646 |
| 647 call __rsaz_512_subtract |
| 648 |
| 649 leaq 128+24+48(%rsp),%rax |
| 650 movq -48(%rax),%r15 |
| 651 movq -40(%rax),%r14 |
| 652 movq -32(%rax),%r13 |
| 653 movq -24(%rax),%r12 |
| 654 movq -16(%rax),%rbp |
| 655 movq -8(%rax),%rbx |
| 656 leaq (%rax),%rsp |
| 657 L$mul_gather4_epilogue: |
| 658 .byte 0xf3,0xc3 |
| 659 |
| 660 .globl _rsaz_512_mul_scatter4 |
| 661 .private_extern _rsaz_512_mul_scatter4 |
| 662 |
| 663 .p2align 5 |
| 664 _rsaz_512_mul_scatter4: |
| 665 pushq %rbx |
| 666 pushq %rbp |
| 667 pushq %r12 |
| 668 pushq %r13 |
| 669 pushq %r14 |
| 670 pushq %r15 |
| 671 |
| 672 movl %r9d,%r9d |
| 673 subq $128+24,%rsp |
| 674 L$mul_scatter4_body: |
| 675 leaq (%r8,%r9,4),%r8 |
| 676 .byte 102,72,15,110,199 |
| 677 .byte 102,72,15,110,202 |
| 678 .byte 102,73,15,110,208 |
| 679 movq %rcx,128(%rsp) |
| 680 |
| 681 movq %rdi,%rbp |
| 682 movq (%rdi),%rbx |
| 683 call __rsaz_512_mul |
| 684 |
| 685 .byte 102,72,15,126,199 |
| 686 .byte 102,72,15,126,205 |
| 687 |
| 688 movq (%rsp),%r8 |
| 689 movq 8(%rsp),%r9 |
| 690 movq 16(%rsp),%r10 |
| 691 movq 24(%rsp),%r11 |
| 692 movq 32(%rsp),%r12 |
| 693 movq 40(%rsp),%r13 |
| 694 movq 48(%rsp),%r14 |
| 695 movq 56(%rsp),%r15 |
| 696 |
| 697 call __rsaz_512_reduce |
| 698 addq 64(%rsp),%r8 |
| 699 adcq 72(%rsp),%r9 |
| 700 adcq 80(%rsp),%r10 |
| 701 adcq 88(%rsp),%r11 |
| 702 adcq 96(%rsp),%r12 |
| 703 adcq 104(%rsp),%r13 |
| 704 adcq 112(%rsp),%r14 |
| 705 adcq 120(%rsp),%r15 |
| 706 .byte 102,72,15,126,214 |
| 707 sbbq %rcx,%rcx |
| 708 |
| 709 call __rsaz_512_subtract |
| 710 |
| 711 movl %r8d,0(%rsi) |
| 712 shrq $32,%r8 |
| 713 movl %r9d,128(%rsi) |
| 714 shrq $32,%r9 |
| 715 movl %r10d,256(%rsi) |
| 716 shrq $32,%r10 |
| 717 movl %r11d,384(%rsi) |
| 718 shrq $32,%r11 |
| 719 movl %r12d,512(%rsi) |
| 720 shrq $32,%r12 |
| 721 movl %r13d,640(%rsi) |
| 722 shrq $32,%r13 |
| 723 movl %r14d,768(%rsi) |
| 724 shrq $32,%r14 |
| 725 movl %r15d,896(%rsi) |
| 726 shrq $32,%r15 |
| 727 movl %r8d,64(%rsi) |
| 728 movl %r9d,192(%rsi) |
| 729 movl %r10d,320(%rsi) |
| 730 movl %r11d,448(%rsi) |
| 731 movl %r12d,576(%rsi) |
| 732 movl %r13d,704(%rsi) |
| 733 movl %r14d,832(%rsi) |
| 734 movl %r15d,960(%rsi) |
| 735 |
| 736 leaq 128+24+48(%rsp),%rax |
| 737 movq -48(%rax),%r15 |
| 738 movq -40(%rax),%r14 |
| 739 movq -32(%rax),%r13 |
| 740 movq -24(%rax),%r12 |
| 741 movq -16(%rax),%rbp |
| 742 movq -8(%rax),%rbx |
| 743 leaq (%rax),%rsp |
| 744 L$mul_scatter4_epilogue: |
| 745 .byte 0xf3,0xc3 |
| 746 |
| 747 .globl _rsaz_512_mul_by_one |
| 748 .private_extern _rsaz_512_mul_by_one |
| 749 |
| 750 .p2align 5 |
| 751 _rsaz_512_mul_by_one: |
| 752 pushq %rbx |
| 753 pushq %rbp |
| 754 pushq %r12 |
| 755 pushq %r13 |
| 756 pushq %r14 |
| 757 pushq %r15 |
| 758 |
| 759 subq $128+24,%rsp |
| 760 L$mul_by_one_body: |
| 761 movq %rdx,%rbp |
| 762 movq %rcx,128(%rsp) |
| 763 |
| 764 movq (%rsi),%r8 |
| 765 pxor %xmm0,%xmm0 |
| 766 movq 8(%rsi),%r9 |
| 767 movq 16(%rsi),%r10 |
| 768 movq 24(%rsi),%r11 |
| 769 movq 32(%rsi),%r12 |
| 770 movq 40(%rsi),%r13 |
| 771 movq 48(%rsi),%r14 |
| 772 movq 56(%rsi),%r15 |
| 773 |
| 774 movdqa %xmm0,(%rsp) |
| 775 movdqa %xmm0,16(%rsp) |
| 776 movdqa %xmm0,32(%rsp) |
| 777 movdqa %xmm0,48(%rsp) |
| 778 movdqa %xmm0,64(%rsp) |
| 779 movdqa %xmm0,80(%rsp) |
| 780 movdqa %xmm0,96(%rsp) |
| 781 call __rsaz_512_reduce |
| 782 movq %r8,(%rdi) |
| 783 movq %r9,8(%rdi) |
| 784 movq %r10,16(%rdi) |
| 785 movq %r11,24(%rdi) |
| 786 movq %r12,32(%rdi) |
| 787 movq %r13,40(%rdi) |
| 788 movq %r14,48(%rdi) |
| 789 movq %r15,56(%rdi) |
| 790 |
| 791 leaq 128+24+48(%rsp),%rax |
| 792 movq -48(%rax),%r15 |
| 793 movq -40(%rax),%r14 |
| 794 movq -32(%rax),%r13 |
| 795 movq -24(%rax),%r12 |
| 796 movq -16(%rax),%rbp |
| 797 movq -8(%rax),%rbx |
| 798 leaq (%rax),%rsp |
| 799 L$mul_by_one_epilogue: |
| 800 .byte 0xf3,0xc3 |
| 801 |
| 802 |
| 803 .p2align 5 |
| 804 __rsaz_512_reduce: |
| 805 movq %r8,%rbx |
| 806 imulq 128+8(%rsp),%rbx |
| 807 movq 0(%rbp),%rax |
| 808 movl $8,%ecx |
| 809 jmp L$reduction_loop |
| 810 |
| 811 .p2align 5 |
| 812 L$reduction_loop: |
| 813 mulq %rbx |
| 814 movq 8(%rbp),%rax |
| 815 negq %r8 |
| 816 movq %rdx,%r8 |
| 817 adcq $0,%r8 |
| 818 |
| 819 mulq %rbx |
| 820 addq %rax,%r9 |
| 821 movq 16(%rbp),%rax |
| 822 adcq $0,%rdx |
| 823 addq %r9,%r8 |
| 824 movq %rdx,%r9 |
| 825 adcq $0,%r9 |
| 826 |
| 827 mulq %rbx |
| 828 addq %rax,%r10 |
| 829 movq 24(%rbp),%rax |
| 830 adcq $0,%rdx |
| 831 addq %r10,%r9 |
| 832 movq %rdx,%r10 |
| 833 adcq $0,%r10 |
| 834 |
| 835 mulq %rbx |
| 836 addq %rax,%r11 |
| 837 movq 32(%rbp),%rax |
| 838 adcq $0,%rdx |
| 839 addq %r11,%r10 |
| 840 movq 128+8(%rsp),%rsi |
| 841 |
| 842 |
| 843 adcq $0,%rdx |
| 844 movq %rdx,%r11 |
| 845 |
| 846 mulq %rbx |
| 847 addq %rax,%r12 |
| 848 movq 40(%rbp),%rax |
| 849 adcq $0,%rdx |
| 850 imulq %r8,%rsi |
| 851 addq %r12,%r11 |
| 852 movq %rdx,%r12 |
| 853 adcq $0,%r12 |
| 854 |
| 855 mulq %rbx |
| 856 addq %rax,%r13 |
| 857 movq 48(%rbp),%rax |
| 858 adcq $0,%rdx |
| 859 addq %r13,%r12 |
| 860 movq %rdx,%r13 |
| 861 adcq $0,%r13 |
| 862 |
| 863 mulq %rbx |
| 864 addq %rax,%r14 |
| 865 movq 56(%rbp),%rax |
| 866 adcq $0,%rdx |
| 867 addq %r14,%r13 |
| 868 movq %rdx,%r14 |
| 869 adcq $0,%r14 |
| 870 |
| 871 mulq %rbx |
| 872 movq %rsi,%rbx |
| 873 addq %rax,%r15 |
| 874 movq 0(%rbp),%rax |
| 875 adcq $0,%rdx |
| 876 addq %r15,%r14 |
| 877 movq %rdx,%r15 |
| 878 adcq $0,%r15 |
| 879 |
| 880 decl %ecx |
| 881 jne L$reduction_loop |
| 882 |
| 883 .byte 0xf3,0xc3 |
| 884 |
| 885 |
| 886 .p2align 5 |
| 887 __rsaz_512_subtract: |
| 888 movq %r8,(%rdi) |
| 889 movq %r9,8(%rdi) |
| 890 movq %r10,16(%rdi) |
| 891 movq %r11,24(%rdi) |
| 892 movq %r12,32(%rdi) |
| 893 movq %r13,40(%rdi) |
| 894 movq %r14,48(%rdi) |
| 895 movq %r15,56(%rdi) |
| 896 |
| 897 movq 0(%rbp),%r8 |
| 898 movq 8(%rbp),%r9 |
| 899 negq %r8 |
| 900 notq %r9 |
| 901 andq %rcx,%r8 |
| 902 movq 16(%rbp),%r10 |
| 903 andq %rcx,%r9 |
| 904 notq %r10 |
| 905 movq 24(%rbp),%r11 |
| 906 andq %rcx,%r10 |
| 907 notq %r11 |
| 908 movq 32(%rbp),%r12 |
| 909 andq %rcx,%r11 |
| 910 notq %r12 |
| 911 movq 40(%rbp),%r13 |
| 912 andq %rcx,%r12 |
| 913 notq %r13 |
| 914 movq 48(%rbp),%r14 |
| 915 andq %rcx,%r13 |
| 916 notq %r14 |
| 917 movq 56(%rbp),%r15 |
| 918 andq %rcx,%r14 |
| 919 notq %r15 |
| 920 andq %rcx,%r15 |
| 921 |
| 922 addq (%rdi),%r8 |
| 923 adcq 8(%rdi),%r9 |
| 924 adcq 16(%rdi),%r10 |
| 925 adcq 24(%rdi),%r11 |
| 926 adcq 32(%rdi),%r12 |
| 927 adcq 40(%rdi),%r13 |
| 928 adcq 48(%rdi),%r14 |
| 929 adcq 56(%rdi),%r15 |
| 930 |
| 931 movq %r8,(%rdi) |
| 932 movq %r9,8(%rdi) |
| 933 movq %r10,16(%rdi) |
| 934 movq %r11,24(%rdi) |
| 935 movq %r12,32(%rdi) |
| 936 movq %r13,40(%rdi) |
| 937 movq %r14,48(%rdi) |
| 938 movq %r15,56(%rdi) |
| 939 |
| 940 .byte 0xf3,0xc3 |
| 941 |
| 942 |
| 943 .p2align 5 |
| 944 __rsaz_512_mul: |
| 945 leaq 8(%rsp),%rdi |
| 946 |
| 947 movq (%rsi),%rax |
| 948 mulq %rbx |
| 949 movq %rax,(%rdi) |
| 950 movq 8(%rsi),%rax |
| 951 movq %rdx,%r8 |
| 952 |
| 953 mulq %rbx |
| 954 addq %rax,%r8 |
| 955 movq 16(%rsi),%rax |
| 956 movq %rdx,%r9 |
| 957 adcq $0,%r9 |
| 958 |
| 959 mulq %rbx |
| 960 addq %rax,%r9 |
| 961 movq 24(%rsi),%rax |
| 962 movq %rdx,%r10 |
| 963 adcq $0,%r10 |
| 964 |
| 965 mulq %rbx |
| 966 addq %rax,%r10 |
| 967 movq 32(%rsi),%rax |
| 968 movq %rdx,%r11 |
| 969 adcq $0,%r11 |
| 970 |
| 971 mulq %rbx |
| 972 addq %rax,%r11 |
| 973 movq 40(%rsi),%rax |
| 974 movq %rdx,%r12 |
| 975 adcq $0,%r12 |
| 976 |
| 977 mulq %rbx |
| 978 addq %rax,%r12 |
| 979 movq 48(%rsi),%rax |
| 980 movq %rdx,%r13 |
| 981 adcq $0,%r13 |
| 982 |
| 983 mulq %rbx |
| 984 addq %rax,%r13 |
| 985 movq 56(%rsi),%rax |
| 986 movq %rdx,%r14 |
| 987 adcq $0,%r14 |
| 988 |
| 989 mulq %rbx |
| 990 addq %rax,%r14 |
| 991 movq (%rsi),%rax |
| 992 movq %rdx,%r15 |
| 993 adcq $0,%r15 |
| 994 |
| 995 leaq 8(%rbp),%rbp |
| 996 leaq 8(%rdi),%rdi |
| 997 |
| 998 movl $7,%ecx |
| 999 jmp L$oop_mul |
| 1000 |
| 1001 .p2align 5 |
| 1002 L$oop_mul: |
| 1003 movq (%rbp),%rbx |
| 1004 mulq %rbx |
| 1005 addq %rax,%r8 |
| 1006 movq 8(%rsi),%rax |
| 1007 movq %r8,(%rdi) |
| 1008 movq %rdx,%r8 |
| 1009 adcq $0,%r8 |
| 1010 |
| 1011 mulq %rbx |
| 1012 addq %rax,%r9 |
| 1013 movq 16(%rsi),%rax |
| 1014 adcq $0,%rdx |
| 1015 addq %r9,%r8 |
| 1016 movq %rdx,%r9 |
| 1017 adcq $0,%r9 |
| 1018 |
| 1019 mulq %rbx |
| 1020 addq %rax,%r10 |
| 1021 movq 24(%rsi),%rax |
| 1022 adcq $0,%rdx |
| 1023 addq %r10,%r9 |
| 1024 movq %rdx,%r10 |
| 1025 adcq $0,%r10 |
| 1026 |
| 1027 mulq %rbx |
| 1028 addq %rax,%r11 |
| 1029 movq 32(%rsi),%rax |
| 1030 adcq $0,%rdx |
| 1031 addq %r11,%r10 |
| 1032 movq %rdx,%r11 |
| 1033 adcq $0,%r11 |
| 1034 |
| 1035 mulq %rbx |
| 1036 addq %rax,%r12 |
| 1037 movq 40(%rsi),%rax |
| 1038 adcq $0,%rdx |
| 1039 addq %r12,%r11 |
| 1040 movq %rdx,%r12 |
| 1041 adcq $0,%r12 |
| 1042 |
| 1043 mulq %rbx |
| 1044 addq %rax,%r13 |
| 1045 movq 48(%rsi),%rax |
| 1046 adcq $0,%rdx |
| 1047 addq %r13,%r12 |
| 1048 movq %rdx,%r13 |
| 1049 adcq $0,%r13 |
| 1050 |
| 1051 mulq %rbx |
| 1052 addq %rax,%r14 |
| 1053 movq 56(%rsi),%rax |
| 1054 adcq $0,%rdx |
| 1055 addq %r14,%r13 |
| 1056 movq %rdx,%r14 |
| 1057 leaq 8(%rbp),%rbp |
| 1058 adcq $0,%r14 |
| 1059 |
| 1060 mulq %rbx |
| 1061 addq %rax,%r15 |
| 1062 movq (%rsi),%rax |
| 1063 adcq $0,%rdx |
| 1064 addq %r15,%r14 |
| 1065 movq %rdx,%r15 |
| 1066 adcq $0,%r15 |
| 1067 |
| 1068 leaq 8(%rdi),%rdi |
| 1069 |
| 1070 decl %ecx |
| 1071 jnz L$oop_mul |
| 1072 |
| 1073 movq %r8,(%rdi) |
| 1074 movq %r9,8(%rdi) |
| 1075 movq %r10,16(%rdi) |
| 1076 movq %r11,24(%rdi) |
| 1077 movq %r12,32(%rdi) |
| 1078 movq %r13,40(%rdi) |
| 1079 movq %r14,48(%rdi) |
| 1080 movq %r15,56(%rdi) |
| 1081 |
| 1082 .byte 0xf3,0xc3 |
| 1083 |
| 1084 .globl _rsaz_512_scatter4 |
| 1085 .private_extern _rsaz_512_scatter4 |
| 1086 |
| 1087 .p2align 4 |
| 1088 _rsaz_512_scatter4: |
| 1089 leaq (%rdi,%rdx,4),%rdi |
| 1090 movl $8,%r9d |
| 1091 jmp L$oop_scatter |
| 1092 .p2align 4 |
| 1093 L$oop_scatter: |
| 1094 movq (%rsi),%rax |
| 1095 leaq 8(%rsi),%rsi |
| 1096 movl %eax,(%rdi) |
| 1097 shrq $32,%rax |
| 1098 movl %eax,64(%rdi) |
| 1099 leaq 128(%rdi),%rdi |
| 1100 decl %r9d |
| 1101 jnz L$oop_scatter |
| 1102 .byte 0xf3,0xc3 |
| 1103 |
| 1104 |
| 1105 .globl _rsaz_512_gather4 |
| 1106 .private_extern _rsaz_512_gather4 |
| 1107 |
| 1108 .p2align 4 |
| 1109 _rsaz_512_gather4: |
| 1110 leaq (%rsi,%rdx,4),%rsi |
| 1111 movl $8,%r9d |
| 1112 jmp L$oop_gather |
| 1113 .p2align 4 |
| 1114 L$oop_gather: |
| 1115 movl (%rsi),%eax |
| 1116 movl 64(%rsi),%r8d |
| 1117 leaq 128(%rsi),%rsi |
| 1118 shlq $32,%r8 |
| 1119 orq %r8,%rax |
| 1120 movq %rax,(%rdi) |
| 1121 leaq 8(%rdi),%rdi |
| 1122 decl %r9d |
| 1123 jnz L$oop_gather |
| 1124 .byte 0xf3,0xc3 |
| 1125 |
| 1126 #endif |
OLD | NEW |