OLD | NEW |
(Empty) | |
| 1 #if defined(__x86_64__) |
| 2 .text |
| 3 |
| 4 |
| 5 |
| 6 .globl rsaz_512_sqr |
| 7 .type rsaz_512_sqr,@function |
| 8 .align 32 |
| 9 rsaz_512_sqr: |
| 10 pushq %rbx |
| 11 pushq %rbp |
| 12 pushq %r12 |
| 13 pushq %r13 |
| 14 pushq %r14 |
| 15 pushq %r15 |
| 16 |
| 17 subq $128+24,%rsp |
| 18 .Lsqr_body: |
| 19 movq %rdx,%rbp |
| 20 movq (%rsi),%rdx |
| 21 movq 8(%rsi),%rax |
| 22 movq %rcx,128(%rsp) |
| 23 jmp .Loop_sqr |
| 24 |
| 25 .align 32 |
| 26 .Loop_sqr: |
| 27 movl %r8d,128+8(%rsp) |
| 28 |
| 29 movq %rdx,%rbx |
| 30 mulq %rdx |
| 31 movq %rax,%r8 |
| 32 movq 16(%rsi),%rax |
| 33 movq %rdx,%r9 |
| 34 |
| 35 mulq %rbx |
| 36 addq %rax,%r9 |
| 37 movq 24(%rsi),%rax |
| 38 movq %rdx,%r10 |
| 39 adcq $0,%r10 |
| 40 |
| 41 mulq %rbx |
| 42 addq %rax,%r10 |
| 43 movq 32(%rsi),%rax |
| 44 movq %rdx,%r11 |
| 45 adcq $0,%r11 |
| 46 |
| 47 mulq %rbx |
| 48 addq %rax,%r11 |
| 49 movq 40(%rsi),%rax |
| 50 movq %rdx,%r12 |
| 51 adcq $0,%r12 |
| 52 |
| 53 mulq %rbx |
| 54 addq %rax,%r12 |
| 55 movq 48(%rsi),%rax |
| 56 movq %rdx,%r13 |
| 57 adcq $0,%r13 |
| 58 |
| 59 mulq %rbx |
| 60 addq %rax,%r13 |
| 61 movq 56(%rsi),%rax |
| 62 movq %rdx,%r14 |
| 63 adcq $0,%r14 |
| 64 |
| 65 mulq %rbx |
| 66 addq %rax,%r14 |
| 67 movq %rbx,%rax |
| 68 movq %rdx,%r15 |
| 69 adcq $0,%r15 |
| 70 |
| 71 addq %r8,%r8 |
| 72 movq %r9,%rcx |
| 73 adcq %r9,%r9 |
| 74 |
| 75 mulq %rax |
| 76 movq %rax,(%rsp) |
| 77 addq %rdx,%r8 |
| 78 adcq $0,%r9 |
| 79 |
| 80 movq %r8,8(%rsp) |
| 81 shrq $63,%rcx |
| 82 |
| 83 |
| 84 movq 8(%rsi),%r8 |
| 85 movq 16(%rsi),%rax |
| 86 mulq %r8 |
| 87 addq %rax,%r10 |
| 88 movq 24(%rsi),%rax |
| 89 movq %rdx,%rbx |
| 90 adcq $0,%rbx |
| 91 |
| 92 mulq %r8 |
| 93 addq %rax,%r11 |
| 94 movq 32(%rsi),%rax |
| 95 adcq $0,%rdx |
| 96 addq %rbx,%r11 |
| 97 movq %rdx,%rbx |
| 98 adcq $0,%rbx |
| 99 |
| 100 mulq %r8 |
| 101 addq %rax,%r12 |
| 102 movq 40(%rsi),%rax |
| 103 adcq $0,%rdx |
| 104 addq %rbx,%r12 |
| 105 movq %rdx,%rbx |
| 106 adcq $0,%rbx |
| 107 |
| 108 mulq %r8 |
| 109 addq %rax,%r13 |
| 110 movq 48(%rsi),%rax |
| 111 adcq $0,%rdx |
| 112 addq %rbx,%r13 |
| 113 movq %rdx,%rbx |
| 114 adcq $0,%rbx |
| 115 |
| 116 mulq %r8 |
| 117 addq %rax,%r14 |
| 118 movq 56(%rsi),%rax |
| 119 adcq $0,%rdx |
| 120 addq %rbx,%r14 |
| 121 movq %rdx,%rbx |
| 122 adcq $0,%rbx |
| 123 |
| 124 mulq %r8 |
| 125 addq %rax,%r15 |
| 126 movq %r8,%rax |
| 127 adcq $0,%rdx |
| 128 addq %rbx,%r15 |
| 129 movq %rdx,%r8 |
| 130 movq %r10,%rdx |
| 131 adcq $0,%r8 |
| 132 |
| 133 addq %rdx,%rdx |
| 134 leaq (%rcx,%r10,2),%r10 |
| 135 movq %r11,%rbx |
| 136 adcq %r11,%r11 |
| 137 |
| 138 mulq %rax |
| 139 addq %rax,%r9 |
| 140 adcq %rdx,%r10 |
| 141 adcq $0,%r11 |
| 142 |
| 143 movq %r9,16(%rsp) |
| 144 movq %r10,24(%rsp) |
| 145 shrq $63,%rbx |
| 146 |
| 147 |
| 148 movq 16(%rsi),%r9 |
| 149 movq 24(%rsi),%rax |
| 150 mulq %r9 |
| 151 addq %rax,%r12 |
| 152 movq 32(%rsi),%rax |
| 153 movq %rdx,%rcx |
| 154 adcq $0,%rcx |
| 155 |
| 156 mulq %r9 |
| 157 addq %rax,%r13 |
| 158 movq 40(%rsi),%rax |
| 159 adcq $0,%rdx |
| 160 addq %rcx,%r13 |
| 161 movq %rdx,%rcx |
| 162 adcq $0,%rcx |
| 163 |
| 164 mulq %r9 |
| 165 addq %rax,%r14 |
| 166 movq 48(%rsi),%rax |
| 167 adcq $0,%rdx |
| 168 addq %rcx,%r14 |
| 169 movq %rdx,%rcx |
| 170 adcq $0,%rcx |
| 171 |
| 172 mulq %r9 |
| 173 movq %r12,%r10 |
| 174 leaq (%rbx,%r12,2),%r12 |
| 175 addq %rax,%r15 |
| 176 movq 56(%rsi),%rax |
| 177 adcq $0,%rdx |
| 178 addq %rcx,%r15 |
| 179 movq %rdx,%rcx |
| 180 adcq $0,%rcx |
| 181 |
| 182 mulq %r9 |
| 183 shrq $63,%r10 |
| 184 addq %rax,%r8 |
| 185 movq %r9,%rax |
| 186 adcq $0,%rdx |
| 187 addq %rcx,%r8 |
| 188 movq %rdx,%r9 |
| 189 adcq $0,%r9 |
| 190 |
| 191 movq %r13,%rcx |
| 192 leaq (%r10,%r13,2),%r13 |
| 193 |
| 194 mulq %rax |
| 195 addq %rax,%r11 |
| 196 adcq %rdx,%r12 |
| 197 adcq $0,%r13 |
| 198 |
| 199 movq %r11,32(%rsp) |
| 200 movq %r12,40(%rsp) |
| 201 shrq $63,%rcx |
| 202 |
| 203 |
| 204 movq 24(%rsi),%r10 |
| 205 movq 32(%rsi),%rax |
| 206 mulq %r10 |
| 207 addq %rax,%r14 |
| 208 movq 40(%rsi),%rax |
| 209 movq %rdx,%rbx |
| 210 adcq $0,%rbx |
| 211 |
| 212 mulq %r10 |
| 213 addq %rax,%r15 |
| 214 movq 48(%rsi),%rax |
| 215 adcq $0,%rdx |
| 216 addq %rbx,%r15 |
| 217 movq %rdx,%rbx |
| 218 adcq $0,%rbx |
| 219 |
| 220 mulq %r10 |
| 221 movq %r14,%r12 |
| 222 leaq (%rcx,%r14,2),%r14 |
| 223 addq %rax,%r8 |
| 224 movq 56(%rsi),%rax |
| 225 adcq $0,%rdx |
| 226 addq %rbx,%r8 |
| 227 movq %rdx,%rbx |
| 228 adcq $0,%rbx |
| 229 |
| 230 mulq %r10 |
| 231 shrq $63,%r12 |
| 232 addq %rax,%r9 |
| 233 movq %r10,%rax |
| 234 adcq $0,%rdx |
| 235 addq %rbx,%r9 |
| 236 movq %rdx,%r10 |
| 237 adcq $0,%r10 |
| 238 |
| 239 movq %r15,%rbx |
| 240 leaq (%r12,%r15,2),%r15 |
| 241 |
| 242 mulq %rax |
| 243 addq %rax,%r13 |
| 244 adcq %rdx,%r14 |
| 245 adcq $0,%r15 |
| 246 |
| 247 movq %r13,48(%rsp) |
| 248 movq %r14,56(%rsp) |
| 249 shrq $63,%rbx |
| 250 |
| 251 |
| 252 movq 32(%rsi),%r11 |
| 253 movq 40(%rsi),%rax |
| 254 mulq %r11 |
| 255 addq %rax,%r8 |
| 256 movq 48(%rsi),%rax |
| 257 movq %rdx,%rcx |
| 258 adcq $0,%rcx |
| 259 |
| 260 mulq %r11 |
| 261 addq %rax,%r9 |
| 262 movq 56(%rsi),%rax |
| 263 adcq $0,%rdx |
| 264 movq %r8,%r12 |
| 265 leaq (%rbx,%r8,2),%r8 |
| 266 addq %rcx,%r9 |
| 267 movq %rdx,%rcx |
| 268 adcq $0,%rcx |
| 269 |
| 270 mulq %r11 |
| 271 shrq $63,%r12 |
| 272 addq %rax,%r10 |
| 273 movq %r11,%rax |
| 274 adcq $0,%rdx |
| 275 addq %rcx,%r10 |
| 276 movq %rdx,%r11 |
| 277 adcq $0,%r11 |
| 278 |
| 279 movq %r9,%rcx |
| 280 leaq (%r12,%r9,2),%r9 |
| 281 |
| 282 mulq %rax |
| 283 addq %rax,%r15 |
| 284 adcq %rdx,%r8 |
| 285 adcq $0,%r9 |
| 286 |
| 287 movq %r15,64(%rsp) |
| 288 movq %r8,72(%rsp) |
| 289 shrq $63,%rcx |
| 290 |
| 291 |
| 292 movq 40(%rsi),%r12 |
| 293 movq 48(%rsi),%rax |
| 294 mulq %r12 |
| 295 addq %rax,%r10 |
| 296 movq 56(%rsi),%rax |
| 297 movq %rdx,%rbx |
| 298 adcq $0,%rbx |
| 299 |
| 300 mulq %r12 |
| 301 addq %rax,%r11 |
| 302 movq %r12,%rax |
| 303 movq %r10,%r15 |
| 304 leaq (%rcx,%r10,2),%r10 |
| 305 adcq $0,%rdx |
| 306 shrq $63,%r15 |
| 307 addq %rbx,%r11 |
| 308 movq %rdx,%r12 |
| 309 adcq $0,%r12 |
| 310 |
| 311 movq %r11,%rbx |
| 312 leaq (%r15,%r11,2),%r11 |
| 313 |
| 314 mulq %rax |
| 315 addq %rax,%r9 |
| 316 adcq %rdx,%r10 |
| 317 adcq $0,%r11 |
| 318 |
| 319 movq %r9,80(%rsp) |
| 320 movq %r10,88(%rsp) |
| 321 |
| 322 |
| 323 movq 48(%rsi),%r13 |
| 324 movq 56(%rsi),%rax |
| 325 mulq %r13 |
| 326 addq %rax,%r12 |
| 327 movq %r13,%rax |
| 328 movq %rdx,%r13 |
| 329 adcq $0,%r13 |
| 330 |
| 331 xorq %r14,%r14 |
| 332 shlq $1,%rbx |
| 333 adcq %r12,%r12 |
| 334 adcq %r13,%r13 |
| 335 adcq %r14,%r14 |
| 336 |
| 337 mulq %rax |
| 338 addq %rax,%r11 |
| 339 adcq %rdx,%r12 |
| 340 adcq $0,%r13 |
| 341 |
| 342 movq %r11,96(%rsp) |
| 343 movq %r12,104(%rsp) |
| 344 |
| 345 |
| 346 movq 56(%rsi),%rax |
| 347 mulq %rax |
| 348 addq %rax,%r13 |
| 349 adcq $0,%rdx |
| 350 |
| 351 addq %rdx,%r14 |
| 352 |
| 353 movq %r13,112(%rsp) |
| 354 movq %r14,120(%rsp) |
| 355 |
| 356 movq (%rsp),%r8 |
| 357 movq 8(%rsp),%r9 |
| 358 movq 16(%rsp),%r10 |
| 359 movq 24(%rsp),%r11 |
| 360 movq 32(%rsp),%r12 |
| 361 movq 40(%rsp),%r13 |
| 362 movq 48(%rsp),%r14 |
| 363 movq 56(%rsp),%r15 |
| 364 |
| 365 call __rsaz_512_reduce |
| 366 |
| 367 addq 64(%rsp),%r8 |
| 368 adcq 72(%rsp),%r9 |
| 369 adcq 80(%rsp),%r10 |
| 370 adcq 88(%rsp),%r11 |
| 371 adcq 96(%rsp),%r12 |
| 372 adcq 104(%rsp),%r13 |
| 373 adcq 112(%rsp),%r14 |
| 374 adcq 120(%rsp),%r15 |
| 375 sbbq %rcx,%rcx |
| 376 |
| 377 call __rsaz_512_subtract |
| 378 |
| 379 movq %r8,%rdx |
| 380 movq %r9,%rax |
| 381 movl 128+8(%rsp),%r8d |
| 382 movq %rdi,%rsi |
| 383 |
| 384 decl %r8d |
| 385 jnz .Loop_sqr |
| 386 |
| 387 leaq 128+24+48(%rsp),%rax |
| 388 movq -48(%rax),%r15 |
| 389 movq -40(%rax),%r14 |
| 390 movq -32(%rax),%r13 |
| 391 movq -24(%rax),%r12 |
| 392 movq -16(%rax),%rbp |
| 393 movq -8(%rax),%rbx |
| 394 leaq (%rax),%rsp |
| 395 .Lsqr_epilogue: |
| 396 .byte 0xf3,0xc3 |
| 397 .size rsaz_512_sqr,.-rsaz_512_sqr |
| 398 .globl rsaz_512_mul |
| 399 .type rsaz_512_mul,@function |
| 400 .align 32 |
| 401 rsaz_512_mul: |
| 402 pushq %rbx |
| 403 pushq %rbp |
| 404 pushq %r12 |
| 405 pushq %r13 |
| 406 pushq %r14 |
| 407 pushq %r15 |
| 408 |
| 409 subq $128+24,%rsp |
| 410 .Lmul_body: |
| 411 .byte 102,72,15,110,199 |
| 412 .byte 102,72,15,110,201 |
| 413 movq %r8,128(%rsp) |
| 414 movq (%rdx),%rbx |
| 415 movq %rdx,%rbp |
| 416 call __rsaz_512_mul |
| 417 |
| 418 .byte 102,72,15,126,199 |
| 419 .byte 102,72,15,126,205 |
| 420 |
| 421 movq (%rsp),%r8 |
| 422 movq 8(%rsp),%r9 |
| 423 movq 16(%rsp),%r10 |
| 424 movq 24(%rsp),%r11 |
| 425 movq 32(%rsp),%r12 |
| 426 movq 40(%rsp),%r13 |
| 427 movq 48(%rsp),%r14 |
| 428 movq 56(%rsp),%r15 |
| 429 |
| 430 call __rsaz_512_reduce |
| 431 addq 64(%rsp),%r8 |
| 432 adcq 72(%rsp),%r9 |
| 433 adcq 80(%rsp),%r10 |
| 434 adcq 88(%rsp),%r11 |
| 435 adcq 96(%rsp),%r12 |
| 436 adcq 104(%rsp),%r13 |
| 437 adcq 112(%rsp),%r14 |
| 438 adcq 120(%rsp),%r15 |
| 439 sbbq %rcx,%rcx |
| 440 |
| 441 call __rsaz_512_subtract |
| 442 |
| 443 leaq 128+24+48(%rsp),%rax |
| 444 movq -48(%rax),%r15 |
| 445 movq -40(%rax),%r14 |
| 446 movq -32(%rax),%r13 |
| 447 movq -24(%rax),%r12 |
| 448 movq -16(%rax),%rbp |
| 449 movq -8(%rax),%rbx |
| 450 leaq (%rax),%rsp |
| 451 .Lmul_epilogue: |
| 452 .byte 0xf3,0xc3 |
| 453 .size rsaz_512_mul,.-rsaz_512_mul |
| 454 .globl rsaz_512_mul_gather4 |
| 455 .type rsaz_512_mul_gather4,@function |
| 456 .align 32 |
| 457 rsaz_512_mul_gather4: |
| 458 pushq %rbx |
| 459 pushq %rbp |
| 460 pushq %r12 |
| 461 pushq %r13 |
| 462 pushq %r14 |
| 463 pushq %r15 |
| 464 |
| 465 movl %r9d,%r9d |
| 466 subq $128+24,%rsp |
| 467 .Lmul_gather4_body: |
| 468 movl 64(%rdx,%r9,4),%eax |
| 469 .byte 102,72,15,110,199 |
| 470 movl (%rdx,%r9,4),%ebx |
| 471 .byte 102,72,15,110,201 |
| 472 movq %r8,128(%rsp) |
| 473 |
| 474 shlq $32,%rax |
| 475 orq %rax,%rbx |
| 476 movq (%rsi),%rax |
| 477 movq 8(%rsi),%rcx |
| 478 leaq 128(%rdx,%r9,4),%rbp |
| 479 mulq %rbx |
| 480 movq %rax,(%rsp) |
| 481 movq %rcx,%rax |
| 482 movq %rdx,%r8 |
| 483 |
| 484 mulq %rbx |
| 485 movd (%rbp),%xmm4 |
| 486 addq %rax,%r8 |
| 487 movq 16(%rsi),%rax |
| 488 movq %rdx,%r9 |
| 489 adcq $0,%r9 |
| 490 |
| 491 mulq %rbx |
| 492 movd 64(%rbp),%xmm5 |
| 493 addq %rax,%r9 |
| 494 movq 24(%rsi),%rax |
| 495 movq %rdx,%r10 |
| 496 adcq $0,%r10 |
| 497 |
| 498 mulq %rbx |
| 499 pslldq $4,%xmm5 |
| 500 addq %rax,%r10 |
| 501 movq 32(%rsi),%rax |
| 502 movq %rdx,%r11 |
| 503 adcq $0,%r11 |
| 504 |
| 505 mulq %rbx |
| 506 por %xmm5,%xmm4 |
| 507 addq %rax,%r11 |
| 508 movq 40(%rsi),%rax |
| 509 movq %rdx,%r12 |
| 510 adcq $0,%r12 |
| 511 |
| 512 mulq %rbx |
| 513 addq %rax,%r12 |
| 514 movq 48(%rsi),%rax |
| 515 movq %rdx,%r13 |
| 516 adcq $0,%r13 |
| 517 |
| 518 mulq %rbx |
| 519 leaq 128(%rbp),%rbp |
| 520 addq %rax,%r13 |
| 521 movq 56(%rsi),%rax |
| 522 movq %rdx,%r14 |
| 523 adcq $0,%r14 |
| 524 |
| 525 mulq %rbx |
| 526 .byte 102,72,15,126,227 |
| 527 addq %rax,%r14 |
| 528 movq (%rsi),%rax |
| 529 movq %rdx,%r15 |
| 530 adcq $0,%r15 |
| 531 |
| 532 leaq 8(%rsp),%rdi |
| 533 movl $7,%ecx |
| 534 jmp .Loop_mul_gather |
| 535 |
| 536 .align 32 |
| 537 .Loop_mul_gather: |
| 538 mulq %rbx |
| 539 addq %rax,%r8 |
| 540 movq 8(%rsi),%rax |
| 541 movq %r8,(%rdi) |
| 542 movq %rdx,%r8 |
| 543 adcq $0,%r8 |
| 544 |
| 545 mulq %rbx |
| 546 movd (%rbp),%xmm4 |
| 547 addq %rax,%r9 |
| 548 movq 16(%rsi),%rax |
| 549 adcq $0,%rdx |
| 550 addq %r9,%r8 |
| 551 movq %rdx,%r9 |
| 552 adcq $0,%r9 |
| 553 |
| 554 mulq %rbx |
| 555 movd 64(%rbp),%xmm5 |
| 556 addq %rax,%r10 |
| 557 movq 24(%rsi),%rax |
| 558 adcq $0,%rdx |
| 559 addq %r10,%r9 |
| 560 movq %rdx,%r10 |
| 561 adcq $0,%r10 |
| 562 |
| 563 mulq %rbx |
| 564 pslldq $4,%xmm5 |
| 565 addq %rax,%r11 |
| 566 movq 32(%rsi),%rax |
| 567 adcq $0,%rdx |
| 568 addq %r11,%r10 |
| 569 movq %rdx,%r11 |
| 570 adcq $0,%r11 |
| 571 |
| 572 mulq %rbx |
| 573 por %xmm5,%xmm4 |
| 574 addq %rax,%r12 |
| 575 movq 40(%rsi),%rax |
| 576 adcq $0,%rdx |
| 577 addq %r12,%r11 |
| 578 movq %rdx,%r12 |
| 579 adcq $0,%r12 |
| 580 |
| 581 mulq %rbx |
| 582 addq %rax,%r13 |
| 583 movq 48(%rsi),%rax |
| 584 adcq $0,%rdx |
| 585 addq %r13,%r12 |
| 586 movq %rdx,%r13 |
| 587 adcq $0,%r13 |
| 588 |
| 589 mulq %rbx |
| 590 addq %rax,%r14 |
| 591 movq 56(%rsi),%rax |
| 592 adcq $0,%rdx |
| 593 addq %r14,%r13 |
| 594 movq %rdx,%r14 |
| 595 adcq $0,%r14 |
| 596 |
| 597 mulq %rbx |
| 598 .byte 102,72,15,126,227 |
| 599 addq %rax,%r15 |
| 600 movq (%rsi),%rax |
| 601 adcq $0,%rdx |
| 602 addq %r15,%r14 |
| 603 movq %rdx,%r15 |
| 604 adcq $0,%r15 |
| 605 |
| 606 leaq 128(%rbp),%rbp |
| 607 leaq 8(%rdi),%rdi |
| 608 |
| 609 decl %ecx |
| 610 jnz .Loop_mul_gather |
| 611 |
| 612 movq %r8,(%rdi) |
| 613 movq %r9,8(%rdi) |
| 614 movq %r10,16(%rdi) |
| 615 movq %r11,24(%rdi) |
| 616 movq %r12,32(%rdi) |
| 617 movq %r13,40(%rdi) |
| 618 movq %r14,48(%rdi) |
| 619 movq %r15,56(%rdi) |
| 620 |
| 621 .byte 102,72,15,126,199 |
| 622 .byte 102,72,15,126,205 |
| 623 |
| 624 movq (%rsp),%r8 |
| 625 movq 8(%rsp),%r9 |
| 626 movq 16(%rsp),%r10 |
| 627 movq 24(%rsp),%r11 |
| 628 movq 32(%rsp),%r12 |
| 629 movq 40(%rsp),%r13 |
| 630 movq 48(%rsp),%r14 |
| 631 movq 56(%rsp),%r15 |
| 632 |
| 633 call __rsaz_512_reduce |
| 634 addq 64(%rsp),%r8 |
| 635 adcq 72(%rsp),%r9 |
| 636 adcq 80(%rsp),%r10 |
| 637 adcq 88(%rsp),%r11 |
| 638 adcq 96(%rsp),%r12 |
| 639 adcq 104(%rsp),%r13 |
| 640 adcq 112(%rsp),%r14 |
| 641 adcq 120(%rsp),%r15 |
| 642 sbbq %rcx,%rcx |
| 643 |
| 644 call __rsaz_512_subtract |
| 645 |
| 646 leaq 128+24+48(%rsp),%rax |
| 647 movq -48(%rax),%r15 |
| 648 movq -40(%rax),%r14 |
| 649 movq -32(%rax),%r13 |
| 650 movq -24(%rax),%r12 |
| 651 movq -16(%rax),%rbp |
| 652 movq -8(%rax),%rbx |
| 653 leaq (%rax),%rsp |
| 654 .Lmul_gather4_epilogue: |
| 655 .byte 0xf3,0xc3 |
| 656 .size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4 |
| 657 .globl rsaz_512_mul_scatter4 |
| 658 .type rsaz_512_mul_scatter4,@function |
| 659 .align 32 |
| 660 rsaz_512_mul_scatter4: |
| 661 pushq %rbx |
| 662 pushq %rbp |
| 663 pushq %r12 |
| 664 pushq %r13 |
| 665 pushq %r14 |
| 666 pushq %r15 |
| 667 |
| 668 movl %r9d,%r9d |
| 669 subq $128+24,%rsp |
| 670 .Lmul_scatter4_body: |
| 671 leaq (%r8,%r9,4),%r8 |
| 672 .byte 102,72,15,110,199 |
| 673 .byte 102,72,15,110,202 |
| 674 .byte 102,73,15,110,208 |
| 675 movq %rcx,128(%rsp) |
| 676 |
| 677 movq %rdi,%rbp |
| 678 movq (%rdi),%rbx |
| 679 call __rsaz_512_mul |
| 680 |
| 681 .byte 102,72,15,126,199 |
| 682 .byte 102,72,15,126,205 |
| 683 |
| 684 movq (%rsp),%r8 |
| 685 movq 8(%rsp),%r9 |
| 686 movq 16(%rsp),%r10 |
| 687 movq 24(%rsp),%r11 |
| 688 movq 32(%rsp),%r12 |
| 689 movq 40(%rsp),%r13 |
| 690 movq 48(%rsp),%r14 |
| 691 movq 56(%rsp),%r15 |
| 692 |
| 693 call __rsaz_512_reduce |
| 694 addq 64(%rsp),%r8 |
| 695 adcq 72(%rsp),%r9 |
| 696 adcq 80(%rsp),%r10 |
| 697 adcq 88(%rsp),%r11 |
| 698 adcq 96(%rsp),%r12 |
| 699 adcq 104(%rsp),%r13 |
| 700 adcq 112(%rsp),%r14 |
| 701 adcq 120(%rsp),%r15 |
| 702 .byte 102,72,15,126,214 |
| 703 sbbq %rcx,%rcx |
| 704 |
| 705 call __rsaz_512_subtract |
| 706 |
| 707 movl %r8d,0(%rsi) |
| 708 shrq $32,%r8 |
| 709 movl %r9d,128(%rsi) |
| 710 shrq $32,%r9 |
| 711 movl %r10d,256(%rsi) |
| 712 shrq $32,%r10 |
| 713 movl %r11d,384(%rsi) |
| 714 shrq $32,%r11 |
| 715 movl %r12d,512(%rsi) |
| 716 shrq $32,%r12 |
| 717 movl %r13d,640(%rsi) |
| 718 shrq $32,%r13 |
| 719 movl %r14d,768(%rsi) |
| 720 shrq $32,%r14 |
| 721 movl %r15d,896(%rsi) |
| 722 shrq $32,%r15 |
| 723 movl %r8d,64(%rsi) |
| 724 movl %r9d,192(%rsi) |
| 725 movl %r10d,320(%rsi) |
| 726 movl %r11d,448(%rsi) |
| 727 movl %r12d,576(%rsi) |
| 728 movl %r13d,704(%rsi) |
| 729 movl %r14d,832(%rsi) |
| 730 movl %r15d,960(%rsi) |
| 731 |
| 732 leaq 128+24+48(%rsp),%rax |
| 733 movq -48(%rax),%r15 |
| 734 movq -40(%rax),%r14 |
| 735 movq -32(%rax),%r13 |
| 736 movq -24(%rax),%r12 |
| 737 movq -16(%rax),%rbp |
| 738 movq -8(%rax),%rbx |
| 739 leaq (%rax),%rsp |
| 740 .Lmul_scatter4_epilogue: |
| 741 .byte 0xf3,0xc3 |
| 742 .size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4 |
| 743 .globl rsaz_512_mul_by_one |
| 744 .type rsaz_512_mul_by_one,@function |
| 745 .align 32 |
| 746 rsaz_512_mul_by_one: |
| 747 pushq %rbx |
| 748 pushq %rbp |
| 749 pushq %r12 |
| 750 pushq %r13 |
| 751 pushq %r14 |
| 752 pushq %r15 |
| 753 |
| 754 subq $128+24,%rsp |
| 755 .Lmul_by_one_body: |
| 756 movq %rdx,%rbp |
| 757 movq %rcx,128(%rsp) |
| 758 |
| 759 movq (%rsi),%r8 |
| 760 pxor %xmm0,%xmm0 |
| 761 movq 8(%rsi),%r9 |
| 762 movq 16(%rsi),%r10 |
| 763 movq 24(%rsi),%r11 |
| 764 movq 32(%rsi),%r12 |
| 765 movq 40(%rsi),%r13 |
| 766 movq 48(%rsi),%r14 |
| 767 movq 56(%rsi),%r15 |
| 768 |
| 769 movdqa %xmm0,(%rsp) |
| 770 movdqa %xmm0,16(%rsp) |
| 771 movdqa %xmm0,32(%rsp) |
| 772 movdqa %xmm0,48(%rsp) |
| 773 movdqa %xmm0,64(%rsp) |
| 774 movdqa %xmm0,80(%rsp) |
| 775 movdqa %xmm0,96(%rsp) |
| 776 call __rsaz_512_reduce |
| 777 movq %r8,(%rdi) |
| 778 movq %r9,8(%rdi) |
| 779 movq %r10,16(%rdi) |
| 780 movq %r11,24(%rdi) |
| 781 movq %r12,32(%rdi) |
| 782 movq %r13,40(%rdi) |
| 783 movq %r14,48(%rdi) |
| 784 movq %r15,56(%rdi) |
| 785 |
| 786 leaq 128+24+48(%rsp),%rax |
| 787 movq -48(%rax),%r15 |
| 788 movq -40(%rax),%r14 |
| 789 movq -32(%rax),%r13 |
| 790 movq -24(%rax),%r12 |
| 791 movq -16(%rax),%rbp |
| 792 movq -8(%rax),%rbx |
| 793 leaq (%rax),%rsp |
| 794 .Lmul_by_one_epilogue: |
| 795 .byte 0xf3,0xc3 |
| 796 .size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one |
| 797 .type __rsaz_512_reduce,@function |
| 798 .align 32 |
| 799 __rsaz_512_reduce: |
| 800 movq %r8,%rbx |
| 801 imulq 128+8(%rsp),%rbx |
| 802 movq 0(%rbp),%rax |
| 803 movl $8,%ecx |
| 804 jmp .Lreduction_loop |
| 805 |
| 806 .align 32 |
| 807 .Lreduction_loop: |
| 808 mulq %rbx |
| 809 movq 8(%rbp),%rax |
| 810 negq %r8 |
| 811 movq %rdx,%r8 |
| 812 adcq $0,%r8 |
| 813 |
| 814 mulq %rbx |
| 815 addq %rax,%r9 |
| 816 movq 16(%rbp),%rax |
| 817 adcq $0,%rdx |
| 818 addq %r9,%r8 |
| 819 movq %rdx,%r9 |
| 820 adcq $0,%r9 |
| 821 |
| 822 mulq %rbx |
| 823 addq %rax,%r10 |
| 824 movq 24(%rbp),%rax |
| 825 adcq $0,%rdx |
| 826 addq %r10,%r9 |
| 827 movq %rdx,%r10 |
| 828 adcq $0,%r10 |
| 829 |
| 830 mulq %rbx |
| 831 addq %rax,%r11 |
| 832 movq 32(%rbp),%rax |
| 833 adcq $0,%rdx |
| 834 addq %r11,%r10 |
| 835 movq 128+8(%rsp),%rsi |
| 836 |
| 837 |
| 838 adcq $0,%rdx |
| 839 movq %rdx,%r11 |
| 840 |
| 841 mulq %rbx |
| 842 addq %rax,%r12 |
| 843 movq 40(%rbp),%rax |
| 844 adcq $0,%rdx |
| 845 imulq %r8,%rsi |
| 846 addq %r12,%r11 |
| 847 movq %rdx,%r12 |
| 848 adcq $0,%r12 |
| 849 |
| 850 mulq %rbx |
| 851 addq %rax,%r13 |
| 852 movq 48(%rbp),%rax |
| 853 adcq $0,%rdx |
| 854 addq %r13,%r12 |
| 855 movq %rdx,%r13 |
| 856 adcq $0,%r13 |
| 857 |
| 858 mulq %rbx |
| 859 addq %rax,%r14 |
| 860 movq 56(%rbp),%rax |
| 861 adcq $0,%rdx |
| 862 addq %r14,%r13 |
| 863 movq %rdx,%r14 |
| 864 adcq $0,%r14 |
| 865 |
| 866 mulq %rbx |
| 867 movq %rsi,%rbx |
| 868 addq %rax,%r15 |
| 869 movq 0(%rbp),%rax |
| 870 adcq $0,%rdx |
| 871 addq %r15,%r14 |
| 872 movq %rdx,%r15 |
| 873 adcq $0,%r15 |
| 874 |
| 875 decl %ecx |
| 876 jne .Lreduction_loop |
| 877 |
| 878 .byte 0xf3,0xc3 |
| 879 .size __rsaz_512_reduce,.-__rsaz_512_reduce |
| 880 .type __rsaz_512_subtract,@function |
| 881 .align 32 |
| 882 __rsaz_512_subtract: |
| 883 movq %r8,(%rdi) |
| 884 movq %r9,8(%rdi) |
| 885 movq %r10,16(%rdi) |
| 886 movq %r11,24(%rdi) |
| 887 movq %r12,32(%rdi) |
| 888 movq %r13,40(%rdi) |
| 889 movq %r14,48(%rdi) |
| 890 movq %r15,56(%rdi) |
| 891 |
| 892 movq 0(%rbp),%r8 |
| 893 movq 8(%rbp),%r9 |
| 894 negq %r8 |
| 895 notq %r9 |
| 896 andq %rcx,%r8 |
| 897 movq 16(%rbp),%r10 |
| 898 andq %rcx,%r9 |
| 899 notq %r10 |
| 900 movq 24(%rbp),%r11 |
| 901 andq %rcx,%r10 |
| 902 notq %r11 |
| 903 movq 32(%rbp),%r12 |
| 904 andq %rcx,%r11 |
| 905 notq %r12 |
| 906 movq 40(%rbp),%r13 |
| 907 andq %rcx,%r12 |
| 908 notq %r13 |
| 909 movq 48(%rbp),%r14 |
| 910 andq %rcx,%r13 |
| 911 notq %r14 |
| 912 movq 56(%rbp),%r15 |
| 913 andq %rcx,%r14 |
| 914 notq %r15 |
| 915 andq %rcx,%r15 |
| 916 |
| 917 addq (%rdi),%r8 |
| 918 adcq 8(%rdi),%r9 |
| 919 adcq 16(%rdi),%r10 |
| 920 adcq 24(%rdi),%r11 |
| 921 adcq 32(%rdi),%r12 |
| 922 adcq 40(%rdi),%r13 |
| 923 adcq 48(%rdi),%r14 |
| 924 adcq 56(%rdi),%r15 |
| 925 |
| 926 movq %r8,(%rdi) |
| 927 movq %r9,8(%rdi) |
| 928 movq %r10,16(%rdi) |
| 929 movq %r11,24(%rdi) |
| 930 movq %r12,32(%rdi) |
| 931 movq %r13,40(%rdi) |
| 932 movq %r14,48(%rdi) |
| 933 movq %r15,56(%rdi) |
| 934 |
| 935 .byte 0xf3,0xc3 |
| 936 .size __rsaz_512_subtract,.-__rsaz_512_subtract |
| 937 .type __rsaz_512_mul,@function |
| 938 .align 32 |
| 939 __rsaz_512_mul: |
| 940 leaq 8(%rsp),%rdi |
| 941 |
| 942 movq (%rsi),%rax |
| 943 mulq %rbx |
| 944 movq %rax,(%rdi) |
| 945 movq 8(%rsi),%rax |
| 946 movq %rdx,%r8 |
| 947 |
| 948 mulq %rbx |
| 949 addq %rax,%r8 |
| 950 movq 16(%rsi),%rax |
| 951 movq %rdx,%r9 |
| 952 adcq $0,%r9 |
| 953 |
| 954 mulq %rbx |
| 955 addq %rax,%r9 |
| 956 movq 24(%rsi),%rax |
| 957 movq %rdx,%r10 |
| 958 adcq $0,%r10 |
| 959 |
| 960 mulq %rbx |
| 961 addq %rax,%r10 |
| 962 movq 32(%rsi),%rax |
| 963 movq %rdx,%r11 |
| 964 adcq $0,%r11 |
| 965 |
| 966 mulq %rbx |
| 967 addq %rax,%r11 |
| 968 movq 40(%rsi),%rax |
| 969 movq %rdx,%r12 |
| 970 adcq $0,%r12 |
| 971 |
| 972 mulq %rbx |
| 973 addq %rax,%r12 |
| 974 movq 48(%rsi),%rax |
| 975 movq %rdx,%r13 |
| 976 adcq $0,%r13 |
| 977 |
| 978 mulq %rbx |
| 979 addq %rax,%r13 |
| 980 movq 56(%rsi),%rax |
| 981 movq %rdx,%r14 |
| 982 adcq $0,%r14 |
| 983 |
| 984 mulq %rbx |
| 985 addq %rax,%r14 |
| 986 movq (%rsi),%rax |
| 987 movq %rdx,%r15 |
| 988 adcq $0,%r15 |
| 989 |
| 990 leaq 8(%rbp),%rbp |
| 991 leaq 8(%rdi),%rdi |
| 992 |
| 993 movl $7,%ecx |
| 994 jmp .Loop_mul |
| 995 |
| 996 .align 32 |
| 997 .Loop_mul: |
| 998 movq (%rbp),%rbx |
| 999 mulq %rbx |
| 1000 addq %rax,%r8 |
| 1001 movq 8(%rsi),%rax |
| 1002 movq %r8,(%rdi) |
| 1003 movq %rdx,%r8 |
| 1004 adcq $0,%r8 |
| 1005 |
| 1006 mulq %rbx |
| 1007 addq %rax,%r9 |
| 1008 movq 16(%rsi),%rax |
| 1009 adcq $0,%rdx |
| 1010 addq %r9,%r8 |
| 1011 movq %rdx,%r9 |
| 1012 adcq $0,%r9 |
| 1013 |
| 1014 mulq %rbx |
| 1015 addq %rax,%r10 |
| 1016 movq 24(%rsi),%rax |
| 1017 adcq $0,%rdx |
| 1018 addq %r10,%r9 |
| 1019 movq %rdx,%r10 |
| 1020 adcq $0,%r10 |
| 1021 |
| 1022 mulq %rbx |
| 1023 addq %rax,%r11 |
| 1024 movq 32(%rsi),%rax |
| 1025 adcq $0,%rdx |
| 1026 addq %r11,%r10 |
| 1027 movq %rdx,%r11 |
| 1028 adcq $0,%r11 |
| 1029 |
| 1030 mulq %rbx |
| 1031 addq %rax,%r12 |
| 1032 movq 40(%rsi),%rax |
| 1033 adcq $0,%rdx |
| 1034 addq %r12,%r11 |
| 1035 movq %rdx,%r12 |
| 1036 adcq $0,%r12 |
| 1037 |
| 1038 mulq %rbx |
| 1039 addq %rax,%r13 |
| 1040 movq 48(%rsi),%rax |
| 1041 adcq $0,%rdx |
| 1042 addq %r13,%r12 |
| 1043 movq %rdx,%r13 |
| 1044 adcq $0,%r13 |
| 1045 |
| 1046 mulq %rbx |
| 1047 addq %rax,%r14 |
| 1048 movq 56(%rsi),%rax |
| 1049 adcq $0,%rdx |
| 1050 addq %r14,%r13 |
| 1051 movq %rdx,%r14 |
| 1052 leaq 8(%rbp),%rbp |
| 1053 adcq $0,%r14 |
| 1054 |
| 1055 mulq %rbx |
| 1056 addq %rax,%r15 |
| 1057 movq (%rsi),%rax |
| 1058 adcq $0,%rdx |
| 1059 addq %r15,%r14 |
| 1060 movq %rdx,%r15 |
| 1061 adcq $0,%r15 |
| 1062 |
| 1063 leaq 8(%rdi),%rdi |
| 1064 |
| 1065 decl %ecx |
| 1066 jnz .Loop_mul |
| 1067 |
| 1068 movq %r8,(%rdi) |
| 1069 movq %r9,8(%rdi) |
| 1070 movq %r10,16(%rdi) |
| 1071 movq %r11,24(%rdi) |
| 1072 movq %r12,32(%rdi) |
| 1073 movq %r13,40(%rdi) |
| 1074 movq %r14,48(%rdi) |
| 1075 movq %r15,56(%rdi) |
| 1076 |
| 1077 .byte 0xf3,0xc3 |
| 1078 .size __rsaz_512_mul,.-__rsaz_512_mul |
| 1079 .globl rsaz_512_scatter4 |
| 1080 .type rsaz_512_scatter4,@function |
| 1081 .align 16 |
| 1082 rsaz_512_scatter4: |
| 1083 leaq (%rdi,%rdx,4),%rdi |
| 1084 movl $8,%r9d |
| 1085 jmp .Loop_scatter |
| 1086 .align 16 |
| 1087 .Loop_scatter: |
| 1088 movq (%rsi),%rax |
| 1089 leaq 8(%rsi),%rsi |
| 1090 movl %eax,(%rdi) |
| 1091 shrq $32,%rax |
| 1092 movl %eax,64(%rdi) |
| 1093 leaq 128(%rdi),%rdi |
| 1094 decl %r9d |
| 1095 jnz .Loop_scatter |
| 1096 .byte 0xf3,0xc3 |
| 1097 .size rsaz_512_scatter4,.-rsaz_512_scatter4 |
| 1098 |
| 1099 .globl rsaz_512_gather4 |
| 1100 .type rsaz_512_gather4,@function |
| 1101 .align 16 |
| 1102 rsaz_512_gather4: |
| 1103 leaq (%rsi,%rdx,4),%rsi |
| 1104 movl $8,%r9d |
| 1105 jmp .Loop_gather |
| 1106 .align 16 |
| 1107 .Loop_gather: |
| 1108 movl (%rsi),%eax |
| 1109 movl 64(%rsi),%r8d |
| 1110 leaq 128(%rsi),%rsi |
| 1111 shlq $32,%r8 |
| 1112 orq %r8,%rax |
| 1113 movq %rax,(%rdi) |
| 1114 leaq 8(%rdi),%rdi |
| 1115 decl %r9d |
| 1116 jnz .Loop_gather |
| 1117 .byte 0xf3,0xc3 |
| 1118 .size rsaz_512_gather4,.-rsaz_512_gather4 |
| 1119 #endif |
OLD | NEW |