| OLD | NEW |
| (Empty) |
| 1 #if defined(__x86_64__) | |
| 2 .text | |
| 3 | |
| 4 .extern OPENSSL_ia32cap_P | |
| 5 .hidden OPENSSL_ia32cap_P | |
| 6 | |
| 7 .globl rsaz_512_sqr | |
| 8 .hidden rsaz_512_sqr | |
| 9 .type rsaz_512_sqr,@function | |
| 10 .align 32 | |
| 11 rsaz_512_sqr: | |
| 12 pushq %rbx | |
| 13 pushq %rbp | |
| 14 pushq %r12 | |
| 15 pushq %r13 | |
| 16 pushq %r14 | |
| 17 pushq %r15 | |
| 18 | |
| 19 subq $128+24,%rsp | |
| 20 .Lsqr_body: | |
| 21 movq %rdx,%rbp | |
| 22 movq (%rsi),%rdx | |
| 23 movq 8(%rsi),%rax | |
| 24 movq %rcx,128(%rsp) | |
| 25 jmp .Loop_sqr | |
| 26 | |
| 27 .align 32 | |
| 28 .Loop_sqr: | |
| 29 movl %r8d,128+8(%rsp) | |
| 30 | |
| 31 movq %rdx,%rbx | |
| 32 mulq %rdx | |
| 33 movq %rax,%r8 | |
| 34 movq 16(%rsi),%rax | |
| 35 movq %rdx,%r9 | |
| 36 | |
| 37 mulq %rbx | |
| 38 addq %rax,%r9 | |
| 39 movq 24(%rsi),%rax | |
| 40 movq %rdx,%r10 | |
| 41 adcq $0,%r10 | |
| 42 | |
| 43 mulq %rbx | |
| 44 addq %rax,%r10 | |
| 45 movq 32(%rsi),%rax | |
| 46 movq %rdx,%r11 | |
| 47 adcq $0,%r11 | |
| 48 | |
| 49 mulq %rbx | |
| 50 addq %rax,%r11 | |
| 51 movq 40(%rsi),%rax | |
| 52 movq %rdx,%r12 | |
| 53 adcq $0,%r12 | |
| 54 | |
| 55 mulq %rbx | |
| 56 addq %rax,%r12 | |
| 57 movq 48(%rsi),%rax | |
| 58 movq %rdx,%r13 | |
| 59 adcq $0,%r13 | |
| 60 | |
| 61 mulq %rbx | |
| 62 addq %rax,%r13 | |
| 63 movq 56(%rsi),%rax | |
| 64 movq %rdx,%r14 | |
| 65 adcq $0,%r14 | |
| 66 | |
| 67 mulq %rbx | |
| 68 addq %rax,%r14 | |
| 69 movq %rbx,%rax | |
| 70 movq %rdx,%r15 | |
| 71 adcq $0,%r15 | |
| 72 | |
| 73 addq %r8,%r8 | |
| 74 movq %r9,%rcx | |
| 75 adcq %r9,%r9 | |
| 76 | |
| 77 mulq %rax | |
| 78 movq %rax,(%rsp) | |
| 79 addq %rdx,%r8 | |
| 80 adcq $0,%r9 | |
| 81 | |
| 82 movq %r8,8(%rsp) | |
| 83 shrq $63,%rcx | |
| 84 | |
| 85 | |
| 86 movq 8(%rsi),%r8 | |
| 87 movq 16(%rsi),%rax | |
| 88 mulq %r8 | |
| 89 addq %rax,%r10 | |
| 90 movq 24(%rsi),%rax | |
| 91 movq %rdx,%rbx | |
| 92 adcq $0,%rbx | |
| 93 | |
| 94 mulq %r8 | |
| 95 addq %rax,%r11 | |
| 96 movq 32(%rsi),%rax | |
| 97 adcq $0,%rdx | |
| 98 addq %rbx,%r11 | |
| 99 movq %rdx,%rbx | |
| 100 adcq $0,%rbx | |
| 101 | |
| 102 mulq %r8 | |
| 103 addq %rax,%r12 | |
| 104 movq 40(%rsi),%rax | |
| 105 adcq $0,%rdx | |
| 106 addq %rbx,%r12 | |
| 107 movq %rdx,%rbx | |
| 108 adcq $0,%rbx | |
| 109 | |
| 110 mulq %r8 | |
| 111 addq %rax,%r13 | |
| 112 movq 48(%rsi),%rax | |
| 113 adcq $0,%rdx | |
| 114 addq %rbx,%r13 | |
| 115 movq %rdx,%rbx | |
| 116 adcq $0,%rbx | |
| 117 | |
| 118 mulq %r8 | |
| 119 addq %rax,%r14 | |
| 120 movq 56(%rsi),%rax | |
| 121 adcq $0,%rdx | |
| 122 addq %rbx,%r14 | |
| 123 movq %rdx,%rbx | |
| 124 adcq $0,%rbx | |
| 125 | |
| 126 mulq %r8 | |
| 127 addq %rax,%r15 | |
| 128 movq %r8,%rax | |
| 129 adcq $0,%rdx | |
| 130 addq %rbx,%r15 | |
| 131 movq %rdx,%r8 | |
| 132 movq %r10,%rdx | |
| 133 adcq $0,%r8 | |
| 134 | |
| 135 addq %rdx,%rdx | |
| 136 leaq (%rcx,%r10,2),%r10 | |
| 137 movq %r11,%rbx | |
| 138 adcq %r11,%r11 | |
| 139 | |
| 140 mulq %rax | |
| 141 addq %rax,%r9 | |
| 142 adcq %rdx,%r10 | |
| 143 adcq $0,%r11 | |
| 144 | |
| 145 movq %r9,16(%rsp) | |
| 146 movq %r10,24(%rsp) | |
| 147 shrq $63,%rbx | |
| 148 | |
| 149 | |
| 150 movq 16(%rsi),%r9 | |
| 151 movq 24(%rsi),%rax | |
| 152 mulq %r9 | |
| 153 addq %rax,%r12 | |
| 154 movq 32(%rsi),%rax | |
| 155 movq %rdx,%rcx | |
| 156 adcq $0,%rcx | |
| 157 | |
| 158 mulq %r9 | |
| 159 addq %rax,%r13 | |
| 160 movq 40(%rsi),%rax | |
| 161 adcq $0,%rdx | |
| 162 addq %rcx,%r13 | |
| 163 movq %rdx,%rcx | |
| 164 adcq $0,%rcx | |
| 165 | |
| 166 mulq %r9 | |
| 167 addq %rax,%r14 | |
| 168 movq 48(%rsi),%rax | |
| 169 adcq $0,%rdx | |
| 170 addq %rcx,%r14 | |
| 171 movq %rdx,%rcx | |
| 172 adcq $0,%rcx | |
| 173 | |
| 174 mulq %r9 | |
| 175 movq %r12,%r10 | |
| 176 leaq (%rbx,%r12,2),%r12 | |
| 177 addq %rax,%r15 | |
| 178 movq 56(%rsi),%rax | |
| 179 adcq $0,%rdx | |
| 180 addq %rcx,%r15 | |
| 181 movq %rdx,%rcx | |
| 182 adcq $0,%rcx | |
| 183 | |
| 184 mulq %r9 | |
| 185 shrq $63,%r10 | |
| 186 addq %rax,%r8 | |
| 187 movq %r9,%rax | |
| 188 adcq $0,%rdx | |
| 189 addq %rcx,%r8 | |
| 190 movq %rdx,%r9 | |
| 191 adcq $0,%r9 | |
| 192 | |
| 193 movq %r13,%rcx | |
| 194 leaq (%r10,%r13,2),%r13 | |
| 195 | |
| 196 mulq %rax | |
| 197 addq %rax,%r11 | |
| 198 adcq %rdx,%r12 | |
| 199 adcq $0,%r13 | |
| 200 | |
| 201 movq %r11,32(%rsp) | |
| 202 movq %r12,40(%rsp) | |
| 203 shrq $63,%rcx | |
| 204 | |
| 205 | |
| 206 movq 24(%rsi),%r10 | |
| 207 movq 32(%rsi),%rax | |
| 208 mulq %r10 | |
| 209 addq %rax,%r14 | |
| 210 movq 40(%rsi),%rax | |
| 211 movq %rdx,%rbx | |
| 212 adcq $0,%rbx | |
| 213 | |
| 214 mulq %r10 | |
| 215 addq %rax,%r15 | |
| 216 movq 48(%rsi),%rax | |
| 217 adcq $0,%rdx | |
| 218 addq %rbx,%r15 | |
| 219 movq %rdx,%rbx | |
| 220 adcq $0,%rbx | |
| 221 | |
| 222 mulq %r10 | |
| 223 movq %r14,%r12 | |
| 224 leaq (%rcx,%r14,2),%r14 | |
| 225 addq %rax,%r8 | |
| 226 movq 56(%rsi),%rax | |
| 227 adcq $0,%rdx | |
| 228 addq %rbx,%r8 | |
| 229 movq %rdx,%rbx | |
| 230 adcq $0,%rbx | |
| 231 | |
| 232 mulq %r10 | |
| 233 shrq $63,%r12 | |
| 234 addq %rax,%r9 | |
| 235 movq %r10,%rax | |
| 236 adcq $0,%rdx | |
| 237 addq %rbx,%r9 | |
| 238 movq %rdx,%r10 | |
| 239 adcq $0,%r10 | |
| 240 | |
| 241 movq %r15,%rbx | |
| 242 leaq (%r12,%r15,2),%r15 | |
| 243 | |
| 244 mulq %rax | |
| 245 addq %rax,%r13 | |
| 246 adcq %rdx,%r14 | |
| 247 adcq $0,%r15 | |
| 248 | |
| 249 movq %r13,48(%rsp) | |
| 250 movq %r14,56(%rsp) | |
| 251 shrq $63,%rbx | |
| 252 | |
| 253 | |
| 254 movq 32(%rsi),%r11 | |
| 255 movq 40(%rsi),%rax | |
| 256 mulq %r11 | |
| 257 addq %rax,%r8 | |
| 258 movq 48(%rsi),%rax | |
| 259 movq %rdx,%rcx | |
| 260 adcq $0,%rcx | |
| 261 | |
| 262 mulq %r11 | |
| 263 addq %rax,%r9 | |
| 264 movq 56(%rsi),%rax | |
| 265 adcq $0,%rdx | |
| 266 movq %r8,%r12 | |
| 267 leaq (%rbx,%r8,2),%r8 | |
| 268 addq %rcx,%r9 | |
| 269 movq %rdx,%rcx | |
| 270 adcq $0,%rcx | |
| 271 | |
| 272 mulq %r11 | |
| 273 shrq $63,%r12 | |
| 274 addq %rax,%r10 | |
| 275 movq %r11,%rax | |
| 276 adcq $0,%rdx | |
| 277 addq %rcx,%r10 | |
| 278 movq %rdx,%r11 | |
| 279 adcq $0,%r11 | |
| 280 | |
| 281 movq %r9,%rcx | |
| 282 leaq (%r12,%r9,2),%r9 | |
| 283 | |
| 284 mulq %rax | |
| 285 addq %rax,%r15 | |
| 286 adcq %rdx,%r8 | |
| 287 adcq $0,%r9 | |
| 288 | |
| 289 movq %r15,64(%rsp) | |
| 290 movq %r8,72(%rsp) | |
| 291 shrq $63,%rcx | |
| 292 | |
| 293 | |
| 294 movq 40(%rsi),%r12 | |
| 295 movq 48(%rsi),%rax | |
| 296 mulq %r12 | |
| 297 addq %rax,%r10 | |
| 298 movq 56(%rsi),%rax | |
| 299 movq %rdx,%rbx | |
| 300 adcq $0,%rbx | |
| 301 | |
| 302 mulq %r12 | |
| 303 addq %rax,%r11 | |
| 304 movq %r12,%rax | |
| 305 movq %r10,%r15 | |
| 306 leaq (%rcx,%r10,2),%r10 | |
| 307 adcq $0,%rdx | |
| 308 shrq $63,%r15 | |
| 309 addq %rbx,%r11 | |
| 310 movq %rdx,%r12 | |
| 311 adcq $0,%r12 | |
| 312 | |
| 313 movq %r11,%rbx | |
| 314 leaq (%r15,%r11,2),%r11 | |
| 315 | |
| 316 mulq %rax | |
| 317 addq %rax,%r9 | |
| 318 adcq %rdx,%r10 | |
| 319 adcq $0,%r11 | |
| 320 | |
| 321 movq %r9,80(%rsp) | |
| 322 movq %r10,88(%rsp) | |
| 323 | |
| 324 | |
| 325 movq 48(%rsi),%r13 | |
| 326 movq 56(%rsi),%rax | |
| 327 mulq %r13 | |
| 328 addq %rax,%r12 | |
| 329 movq %r13,%rax | |
| 330 movq %rdx,%r13 | |
| 331 adcq $0,%r13 | |
| 332 | |
| 333 xorq %r14,%r14 | |
| 334 shlq $1,%rbx | |
| 335 adcq %r12,%r12 | |
| 336 adcq %r13,%r13 | |
| 337 adcq %r14,%r14 | |
| 338 | |
| 339 mulq %rax | |
| 340 addq %rax,%r11 | |
| 341 adcq %rdx,%r12 | |
| 342 adcq $0,%r13 | |
| 343 | |
| 344 movq %r11,96(%rsp) | |
| 345 movq %r12,104(%rsp) | |
| 346 | |
| 347 | |
| 348 movq 56(%rsi),%rax | |
| 349 mulq %rax | |
| 350 addq %rax,%r13 | |
| 351 adcq $0,%rdx | |
| 352 | |
| 353 addq %rdx,%r14 | |
| 354 | |
| 355 movq %r13,112(%rsp) | |
| 356 movq %r14,120(%rsp) | |
| 357 | |
| 358 movq (%rsp),%r8 | |
| 359 movq 8(%rsp),%r9 | |
| 360 movq 16(%rsp),%r10 | |
| 361 movq 24(%rsp),%r11 | |
| 362 movq 32(%rsp),%r12 | |
| 363 movq 40(%rsp),%r13 | |
| 364 movq 48(%rsp),%r14 | |
| 365 movq 56(%rsp),%r15 | |
| 366 | |
| 367 call __rsaz_512_reduce | |
| 368 | |
| 369 addq 64(%rsp),%r8 | |
| 370 adcq 72(%rsp),%r9 | |
| 371 adcq 80(%rsp),%r10 | |
| 372 adcq 88(%rsp),%r11 | |
| 373 adcq 96(%rsp),%r12 | |
| 374 adcq 104(%rsp),%r13 | |
| 375 adcq 112(%rsp),%r14 | |
| 376 adcq 120(%rsp),%r15 | |
| 377 sbbq %rcx,%rcx | |
| 378 | |
| 379 call __rsaz_512_subtract | |
| 380 | |
| 381 movq %r8,%rdx | |
| 382 movq %r9,%rax | |
| 383 movl 128+8(%rsp),%r8d | |
| 384 movq %rdi,%rsi | |
| 385 | |
| 386 decl %r8d | |
| 387 jnz .Loop_sqr | |
| 388 | |
| 389 leaq 128+24+48(%rsp),%rax | |
| 390 movq -48(%rax),%r15 | |
| 391 movq -40(%rax),%r14 | |
| 392 movq -32(%rax),%r13 | |
| 393 movq -24(%rax),%r12 | |
| 394 movq -16(%rax),%rbp | |
| 395 movq -8(%rax),%rbx | |
| 396 leaq (%rax),%rsp | |
| 397 .Lsqr_epilogue: | |
| 398 .byte 0xf3,0xc3 | |
| 399 .size rsaz_512_sqr,.-rsaz_512_sqr | |
| 400 .globl rsaz_512_mul | |
| 401 .hidden rsaz_512_mul | |
| 402 .type rsaz_512_mul,@function | |
| 403 .align 32 | |
| 404 rsaz_512_mul: | |
| 405 pushq %rbx | |
| 406 pushq %rbp | |
| 407 pushq %r12 | |
| 408 pushq %r13 | |
| 409 pushq %r14 | |
| 410 pushq %r15 | |
| 411 | |
| 412 subq $128+24,%rsp | |
| 413 .Lmul_body: | |
| 414 .byte 102,72,15,110,199 | |
| 415 .byte 102,72,15,110,201 | |
| 416 movq %r8,128(%rsp) | |
| 417 movq (%rdx),%rbx | |
| 418 movq %rdx,%rbp | |
| 419 call __rsaz_512_mul | |
| 420 | |
| 421 .byte 102,72,15,126,199 | |
| 422 .byte 102,72,15,126,205 | |
| 423 | |
| 424 movq (%rsp),%r8 | |
| 425 movq 8(%rsp),%r9 | |
| 426 movq 16(%rsp),%r10 | |
| 427 movq 24(%rsp),%r11 | |
| 428 movq 32(%rsp),%r12 | |
| 429 movq 40(%rsp),%r13 | |
| 430 movq 48(%rsp),%r14 | |
| 431 movq 56(%rsp),%r15 | |
| 432 | |
| 433 call __rsaz_512_reduce | |
| 434 addq 64(%rsp),%r8 | |
| 435 adcq 72(%rsp),%r9 | |
| 436 adcq 80(%rsp),%r10 | |
| 437 adcq 88(%rsp),%r11 | |
| 438 adcq 96(%rsp),%r12 | |
| 439 adcq 104(%rsp),%r13 | |
| 440 adcq 112(%rsp),%r14 | |
| 441 adcq 120(%rsp),%r15 | |
| 442 sbbq %rcx,%rcx | |
| 443 | |
| 444 call __rsaz_512_subtract | |
| 445 | |
| 446 leaq 128+24+48(%rsp),%rax | |
| 447 movq -48(%rax),%r15 | |
| 448 movq -40(%rax),%r14 | |
| 449 movq -32(%rax),%r13 | |
| 450 movq -24(%rax),%r12 | |
| 451 movq -16(%rax),%rbp | |
| 452 movq -8(%rax),%rbx | |
| 453 leaq (%rax),%rsp | |
| 454 .Lmul_epilogue: | |
| 455 .byte 0xf3,0xc3 | |
| 456 .size rsaz_512_mul,.-rsaz_512_mul | |
| 457 .globl rsaz_512_mul_gather4 | |
| 458 .hidden rsaz_512_mul_gather4 | |
| 459 .type rsaz_512_mul_gather4,@function | |
| 460 .align 32 | |
| 461 rsaz_512_mul_gather4: | |
| 462 pushq %rbx | |
| 463 pushq %rbp | |
| 464 pushq %r12 | |
| 465 pushq %r13 | |
| 466 pushq %r14 | |
| 467 pushq %r15 | |
| 468 | |
| 469 subq $152,%rsp | |
| 470 .Lmul_gather4_body: | |
| 471 movd %r9d,%xmm8 | |
| 472 movdqa .Linc+16(%rip),%xmm1 | |
| 473 movdqa .Linc(%rip),%xmm0 | |
| 474 | |
| 475 pshufd $0,%xmm8,%xmm8 | |
| 476 movdqa %xmm1,%xmm7 | |
| 477 movdqa %xmm1,%xmm2 | |
| 478 paddd %xmm0,%xmm1 | |
| 479 pcmpeqd %xmm8,%xmm0 | |
| 480 movdqa %xmm7,%xmm3 | |
| 481 paddd %xmm1,%xmm2 | |
| 482 pcmpeqd %xmm8,%xmm1 | |
| 483 movdqa %xmm7,%xmm4 | |
| 484 paddd %xmm2,%xmm3 | |
| 485 pcmpeqd %xmm8,%xmm2 | |
| 486 movdqa %xmm7,%xmm5 | |
| 487 paddd %xmm3,%xmm4 | |
| 488 pcmpeqd %xmm8,%xmm3 | |
| 489 movdqa %xmm7,%xmm6 | |
| 490 paddd %xmm4,%xmm5 | |
| 491 pcmpeqd %xmm8,%xmm4 | |
| 492 paddd %xmm5,%xmm6 | |
| 493 pcmpeqd %xmm8,%xmm5 | |
| 494 paddd %xmm6,%xmm7 | |
| 495 pcmpeqd %xmm8,%xmm6 | |
| 496 pcmpeqd %xmm8,%xmm7 | |
| 497 | |
| 498 movdqa 0(%rdx),%xmm8 | |
| 499 movdqa 16(%rdx),%xmm9 | |
| 500 movdqa 32(%rdx),%xmm10 | |
| 501 movdqa 48(%rdx),%xmm11 | |
| 502 pand %xmm0,%xmm8 | |
| 503 movdqa 64(%rdx),%xmm12 | |
| 504 pand %xmm1,%xmm9 | |
| 505 movdqa 80(%rdx),%xmm13 | |
| 506 pand %xmm2,%xmm10 | |
| 507 movdqa 96(%rdx),%xmm14 | |
| 508 pand %xmm3,%xmm11 | |
| 509 movdqa 112(%rdx),%xmm15 | |
| 510 leaq 128(%rdx),%rbp | |
| 511 pand %xmm4,%xmm12 | |
| 512 pand %xmm5,%xmm13 | |
| 513 pand %xmm6,%xmm14 | |
| 514 pand %xmm7,%xmm15 | |
| 515 por %xmm10,%xmm8 | |
| 516 por %xmm11,%xmm9 | |
| 517 por %xmm12,%xmm8 | |
| 518 por %xmm13,%xmm9 | |
| 519 por %xmm14,%xmm8 | |
| 520 por %xmm15,%xmm9 | |
| 521 | |
| 522 por %xmm9,%xmm8 | |
| 523 pshufd $0x4e,%xmm8,%xmm9 | |
| 524 por %xmm9,%xmm8 | |
| 525 .byte 102,76,15,126,195 | |
| 526 | |
| 527 movq %r8,128(%rsp) | |
| 528 movq %rdi,128+8(%rsp) | |
| 529 movq %rcx,128+16(%rsp) | |
| 530 | |
| 531 movq (%rsi),%rax | |
| 532 movq 8(%rsi),%rcx | |
| 533 mulq %rbx | |
| 534 movq %rax,(%rsp) | |
| 535 movq %rcx,%rax | |
| 536 movq %rdx,%r8 | |
| 537 | |
| 538 mulq %rbx | |
| 539 addq %rax,%r8 | |
| 540 movq 16(%rsi),%rax | |
| 541 movq %rdx,%r9 | |
| 542 adcq $0,%r9 | |
| 543 | |
| 544 mulq %rbx | |
| 545 addq %rax,%r9 | |
| 546 movq 24(%rsi),%rax | |
| 547 movq %rdx,%r10 | |
| 548 adcq $0,%r10 | |
| 549 | |
| 550 mulq %rbx | |
| 551 addq %rax,%r10 | |
| 552 movq 32(%rsi),%rax | |
| 553 movq %rdx,%r11 | |
| 554 adcq $0,%r11 | |
| 555 | |
| 556 mulq %rbx | |
| 557 addq %rax,%r11 | |
| 558 movq 40(%rsi),%rax | |
| 559 movq %rdx,%r12 | |
| 560 adcq $0,%r12 | |
| 561 | |
| 562 mulq %rbx | |
| 563 addq %rax,%r12 | |
| 564 movq 48(%rsi),%rax | |
| 565 movq %rdx,%r13 | |
| 566 adcq $0,%r13 | |
| 567 | |
| 568 mulq %rbx | |
| 569 addq %rax,%r13 | |
| 570 movq 56(%rsi),%rax | |
| 571 movq %rdx,%r14 | |
| 572 adcq $0,%r14 | |
| 573 | |
| 574 mulq %rbx | |
| 575 addq %rax,%r14 | |
| 576 movq (%rsi),%rax | |
| 577 movq %rdx,%r15 | |
| 578 adcq $0,%r15 | |
| 579 | |
| 580 leaq 8(%rsp),%rdi | |
| 581 movl $7,%ecx | |
| 582 jmp .Loop_mul_gather | |
| 583 | |
| 584 .align 32 | |
| 585 .Loop_mul_gather: | |
| 586 movdqa 0(%rbp),%xmm8 | |
| 587 movdqa 16(%rbp),%xmm9 | |
| 588 movdqa 32(%rbp),%xmm10 | |
| 589 movdqa 48(%rbp),%xmm11 | |
| 590 pand %xmm0,%xmm8 | |
| 591 movdqa 64(%rbp),%xmm12 | |
| 592 pand %xmm1,%xmm9 | |
| 593 movdqa 80(%rbp),%xmm13 | |
| 594 pand %xmm2,%xmm10 | |
| 595 movdqa 96(%rbp),%xmm14 | |
| 596 pand %xmm3,%xmm11 | |
| 597 movdqa 112(%rbp),%xmm15 | |
| 598 leaq 128(%rbp),%rbp | |
| 599 pand %xmm4,%xmm12 | |
| 600 pand %xmm5,%xmm13 | |
| 601 pand %xmm6,%xmm14 | |
| 602 pand %xmm7,%xmm15 | |
| 603 por %xmm10,%xmm8 | |
| 604 por %xmm11,%xmm9 | |
| 605 por %xmm12,%xmm8 | |
| 606 por %xmm13,%xmm9 | |
| 607 por %xmm14,%xmm8 | |
| 608 por %xmm15,%xmm9 | |
| 609 | |
| 610 por %xmm9,%xmm8 | |
| 611 pshufd $0x4e,%xmm8,%xmm9 | |
| 612 por %xmm9,%xmm8 | |
| 613 .byte 102,76,15,126,195 | |
| 614 | |
| 615 mulq %rbx | |
| 616 addq %rax,%r8 | |
| 617 movq 8(%rsi),%rax | |
| 618 movq %r8,(%rdi) | |
| 619 movq %rdx,%r8 | |
| 620 adcq $0,%r8 | |
| 621 | |
| 622 mulq %rbx | |
| 623 addq %rax,%r9 | |
| 624 movq 16(%rsi),%rax | |
| 625 adcq $0,%rdx | |
| 626 addq %r9,%r8 | |
| 627 movq %rdx,%r9 | |
| 628 adcq $0,%r9 | |
| 629 | |
| 630 mulq %rbx | |
| 631 addq %rax,%r10 | |
| 632 movq 24(%rsi),%rax | |
| 633 adcq $0,%rdx | |
| 634 addq %r10,%r9 | |
| 635 movq %rdx,%r10 | |
| 636 adcq $0,%r10 | |
| 637 | |
| 638 mulq %rbx | |
| 639 addq %rax,%r11 | |
| 640 movq 32(%rsi),%rax | |
| 641 adcq $0,%rdx | |
| 642 addq %r11,%r10 | |
| 643 movq %rdx,%r11 | |
| 644 adcq $0,%r11 | |
| 645 | |
| 646 mulq %rbx | |
| 647 addq %rax,%r12 | |
| 648 movq 40(%rsi),%rax | |
| 649 adcq $0,%rdx | |
| 650 addq %r12,%r11 | |
| 651 movq %rdx,%r12 | |
| 652 adcq $0,%r12 | |
| 653 | |
| 654 mulq %rbx | |
| 655 addq %rax,%r13 | |
| 656 movq 48(%rsi),%rax | |
| 657 adcq $0,%rdx | |
| 658 addq %r13,%r12 | |
| 659 movq %rdx,%r13 | |
| 660 adcq $0,%r13 | |
| 661 | |
| 662 mulq %rbx | |
| 663 addq %rax,%r14 | |
| 664 movq 56(%rsi),%rax | |
| 665 adcq $0,%rdx | |
| 666 addq %r14,%r13 | |
| 667 movq %rdx,%r14 | |
| 668 adcq $0,%r14 | |
| 669 | |
| 670 mulq %rbx | |
| 671 addq %rax,%r15 | |
| 672 movq (%rsi),%rax | |
| 673 adcq $0,%rdx | |
| 674 addq %r15,%r14 | |
| 675 movq %rdx,%r15 | |
| 676 adcq $0,%r15 | |
| 677 | |
| 678 leaq 8(%rdi),%rdi | |
| 679 | |
| 680 decl %ecx | |
| 681 jnz .Loop_mul_gather | |
| 682 | |
| 683 movq %r8,(%rdi) | |
| 684 movq %r9,8(%rdi) | |
| 685 movq %r10,16(%rdi) | |
| 686 movq %r11,24(%rdi) | |
| 687 movq %r12,32(%rdi) | |
| 688 movq %r13,40(%rdi) | |
| 689 movq %r14,48(%rdi) | |
| 690 movq %r15,56(%rdi) | |
| 691 | |
| 692 movq 128+8(%rsp),%rdi | |
| 693 movq 128+16(%rsp),%rbp | |
| 694 | |
| 695 movq (%rsp),%r8 | |
| 696 movq 8(%rsp),%r9 | |
| 697 movq 16(%rsp),%r10 | |
| 698 movq 24(%rsp),%r11 | |
| 699 movq 32(%rsp),%r12 | |
| 700 movq 40(%rsp),%r13 | |
| 701 movq 48(%rsp),%r14 | |
| 702 movq 56(%rsp),%r15 | |
| 703 | |
| 704 call __rsaz_512_reduce | |
| 705 addq 64(%rsp),%r8 | |
| 706 adcq 72(%rsp),%r9 | |
| 707 adcq 80(%rsp),%r10 | |
| 708 adcq 88(%rsp),%r11 | |
| 709 adcq 96(%rsp),%r12 | |
| 710 adcq 104(%rsp),%r13 | |
| 711 adcq 112(%rsp),%r14 | |
| 712 adcq 120(%rsp),%r15 | |
| 713 sbbq %rcx,%rcx | |
| 714 | |
| 715 call __rsaz_512_subtract | |
| 716 | |
| 717 leaq 128+24+48(%rsp),%rax | |
| 718 movq -48(%rax),%r15 | |
| 719 movq -40(%rax),%r14 | |
| 720 movq -32(%rax),%r13 | |
| 721 movq -24(%rax),%r12 | |
| 722 movq -16(%rax),%rbp | |
| 723 movq -8(%rax),%rbx | |
| 724 leaq (%rax),%rsp | |
| 725 .Lmul_gather4_epilogue: | |
| 726 .byte 0xf3,0xc3 | |
| 727 .size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4 | |
| 728 .globl rsaz_512_mul_scatter4 | |
| 729 .hidden rsaz_512_mul_scatter4 | |
| 730 .type rsaz_512_mul_scatter4,@function | |
| 731 .align 32 | |
| 732 rsaz_512_mul_scatter4: | |
| 733 pushq %rbx | |
| 734 pushq %rbp | |
| 735 pushq %r12 | |
| 736 pushq %r13 | |
| 737 pushq %r14 | |
| 738 pushq %r15 | |
| 739 | |
| 740 movl %r9d,%r9d | |
| 741 subq $128+24,%rsp | |
| 742 .Lmul_scatter4_body: | |
| 743 leaq (%r8,%r9,8),%r8 | |
| 744 .byte 102,72,15,110,199 | |
| 745 .byte 102,72,15,110,202 | |
| 746 .byte 102,73,15,110,208 | |
| 747 movq %rcx,128(%rsp) | |
| 748 | |
| 749 movq %rdi,%rbp | |
| 750 movq (%rdi),%rbx | |
| 751 call __rsaz_512_mul | |
| 752 | |
| 753 .byte 102,72,15,126,199 | |
| 754 .byte 102,72,15,126,205 | |
| 755 | |
| 756 movq (%rsp),%r8 | |
| 757 movq 8(%rsp),%r9 | |
| 758 movq 16(%rsp),%r10 | |
| 759 movq 24(%rsp),%r11 | |
| 760 movq 32(%rsp),%r12 | |
| 761 movq 40(%rsp),%r13 | |
| 762 movq 48(%rsp),%r14 | |
| 763 movq 56(%rsp),%r15 | |
| 764 | |
| 765 call __rsaz_512_reduce | |
| 766 addq 64(%rsp),%r8 | |
| 767 adcq 72(%rsp),%r9 | |
| 768 adcq 80(%rsp),%r10 | |
| 769 adcq 88(%rsp),%r11 | |
| 770 adcq 96(%rsp),%r12 | |
| 771 adcq 104(%rsp),%r13 | |
| 772 adcq 112(%rsp),%r14 | |
| 773 adcq 120(%rsp),%r15 | |
| 774 .byte 102,72,15,126,214 | |
| 775 sbbq %rcx,%rcx | |
| 776 | |
| 777 call __rsaz_512_subtract | |
| 778 | |
| 779 movq %r8,0(%rsi) | |
| 780 movq %r9,128(%rsi) | |
| 781 movq %r10,256(%rsi) | |
| 782 movq %r11,384(%rsi) | |
| 783 movq %r12,512(%rsi) | |
| 784 movq %r13,640(%rsi) | |
| 785 movq %r14,768(%rsi) | |
| 786 movq %r15,896(%rsi) | |
| 787 | |
| 788 leaq 128+24+48(%rsp),%rax | |
| 789 movq -48(%rax),%r15 | |
| 790 movq -40(%rax),%r14 | |
| 791 movq -32(%rax),%r13 | |
| 792 movq -24(%rax),%r12 | |
| 793 movq -16(%rax),%rbp | |
| 794 movq -8(%rax),%rbx | |
| 795 leaq (%rax),%rsp | |
| 796 .Lmul_scatter4_epilogue: | |
| 797 .byte 0xf3,0xc3 | |
| 798 .size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4 | |
| 799 .globl rsaz_512_mul_by_one | |
| 800 .hidden rsaz_512_mul_by_one | |
| 801 .type rsaz_512_mul_by_one,@function | |
| 802 .align 32 | |
| 803 rsaz_512_mul_by_one: | |
| 804 pushq %rbx | |
| 805 pushq %rbp | |
| 806 pushq %r12 | |
| 807 pushq %r13 | |
| 808 pushq %r14 | |
| 809 pushq %r15 | |
| 810 | |
| 811 subq $128+24,%rsp | |
| 812 .Lmul_by_one_body: | |
| 813 movq %rdx,%rbp | |
| 814 movq %rcx,128(%rsp) | |
| 815 | |
| 816 movq (%rsi),%r8 | |
| 817 pxor %xmm0,%xmm0 | |
| 818 movq 8(%rsi),%r9 | |
| 819 movq 16(%rsi),%r10 | |
| 820 movq 24(%rsi),%r11 | |
| 821 movq 32(%rsi),%r12 | |
| 822 movq 40(%rsi),%r13 | |
| 823 movq 48(%rsi),%r14 | |
| 824 movq 56(%rsi),%r15 | |
| 825 | |
| 826 movdqa %xmm0,(%rsp) | |
| 827 movdqa %xmm0,16(%rsp) | |
| 828 movdqa %xmm0,32(%rsp) | |
| 829 movdqa %xmm0,48(%rsp) | |
| 830 movdqa %xmm0,64(%rsp) | |
| 831 movdqa %xmm0,80(%rsp) | |
| 832 movdqa %xmm0,96(%rsp) | |
| 833 call __rsaz_512_reduce | |
| 834 movq %r8,(%rdi) | |
| 835 movq %r9,8(%rdi) | |
| 836 movq %r10,16(%rdi) | |
| 837 movq %r11,24(%rdi) | |
| 838 movq %r12,32(%rdi) | |
| 839 movq %r13,40(%rdi) | |
| 840 movq %r14,48(%rdi) | |
| 841 movq %r15,56(%rdi) | |
| 842 | |
| 843 leaq 128+24+48(%rsp),%rax | |
| 844 movq -48(%rax),%r15 | |
| 845 movq -40(%rax),%r14 | |
| 846 movq -32(%rax),%r13 | |
| 847 movq -24(%rax),%r12 | |
| 848 movq -16(%rax),%rbp | |
| 849 movq -8(%rax),%rbx | |
| 850 leaq (%rax),%rsp | |
| 851 .Lmul_by_one_epilogue: | |
| 852 .byte 0xf3,0xc3 | |
| 853 .size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one | |
| 854 .type __rsaz_512_reduce,@function | |
| 855 .align 32 | |
| 856 __rsaz_512_reduce: | |
| 857 movq %r8,%rbx | |
| 858 imulq 128+8(%rsp),%rbx | |
| 859 movq 0(%rbp),%rax | |
| 860 movl $8,%ecx | |
| 861 jmp .Lreduction_loop | |
| 862 | |
| 863 .align 32 | |
| 864 .Lreduction_loop: | |
| 865 mulq %rbx | |
| 866 movq 8(%rbp),%rax | |
| 867 negq %r8 | |
| 868 movq %rdx,%r8 | |
| 869 adcq $0,%r8 | |
| 870 | |
| 871 mulq %rbx | |
| 872 addq %rax,%r9 | |
| 873 movq 16(%rbp),%rax | |
| 874 adcq $0,%rdx | |
| 875 addq %r9,%r8 | |
| 876 movq %rdx,%r9 | |
| 877 adcq $0,%r9 | |
| 878 | |
| 879 mulq %rbx | |
| 880 addq %rax,%r10 | |
| 881 movq 24(%rbp),%rax | |
| 882 adcq $0,%rdx | |
| 883 addq %r10,%r9 | |
| 884 movq %rdx,%r10 | |
| 885 adcq $0,%r10 | |
| 886 | |
| 887 mulq %rbx | |
| 888 addq %rax,%r11 | |
| 889 movq 32(%rbp),%rax | |
| 890 adcq $0,%rdx | |
| 891 addq %r11,%r10 | |
| 892 movq 128+8(%rsp),%rsi | |
| 893 | |
| 894 | |
| 895 adcq $0,%rdx | |
| 896 movq %rdx,%r11 | |
| 897 | |
| 898 mulq %rbx | |
| 899 addq %rax,%r12 | |
| 900 movq 40(%rbp),%rax | |
| 901 adcq $0,%rdx | |
| 902 imulq %r8,%rsi | |
| 903 addq %r12,%r11 | |
| 904 movq %rdx,%r12 | |
| 905 adcq $0,%r12 | |
| 906 | |
| 907 mulq %rbx | |
| 908 addq %rax,%r13 | |
| 909 movq 48(%rbp),%rax | |
| 910 adcq $0,%rdx | |
| 911 addq %r13,%r12 | |
| 912 movq %rdx,%r13 | |
| 913 adcq $0,%r13 | |
| 914 | |
| 915 mulq %rbx | |
| 916 addq %rax,%r14 | |
| 917 movq 56(%rbp),%rax | |
| 918 adcq $0,%rdx | |
| 919 addq %r14,%r13 | |
| 920 movq %rdx,%r14 | |
| 921 adcq $0,%r14 | |
| 922 | |
| 923 mulq %rbx | |
| 924 movq %rsi,%rbx | |
| 925 addq %rax,%r15 | |
| 926 movq 0(%rbp),%rax | |
| 927 adcq $0,%rdx | |
| 928 addq %r15,%r14 | |
| 929 movq %rdx,%r15 | |
| 930 adcq $0,%r15 | |
| 931 | |
| 932 decl %ecx | |
| 933 jne .Lreduction_loop | |
| 934 | |
| 935 .byte 0xf3,0xc3 | |
| 936 .size __rsaz_512_reduce,.-__rsaz_512_reduce | |
| 937 .type __rsaz_512_subtract,@function | |
| 938 .align 32 | |
| 939 __rsaz_512_subtract: | |
| 940 movq %r8,(%rdi) | |
| 941 movq %r9,8(%rdi) | |
| 942 movq %r10,16(%rdi) | |
| 943 movq %r11,24(%rdi) | |
| 944 movq %r12,32(%rdi) | |
| 945 movq %r13,40(%rdi) | |
| 946 movq %r14,48(%rdi) | |
| 947 movq %r15,56(%rdi) | |
| 948 | |
| 949 movq 0(%rbp),%r8 | |
| 950 movq 8(%rbp),%r9 | |
| 951 negq %r8 | |
| 952 notq %r9 | |
| 953 andq %rcx,%r8 | |
| 954 movq 16(%rbp),%r10 | |
| 955 andq %rcx,%r9 | |
| 956 notq %r10 | |
| 957 movq 24(%rbp),%r11 | |
| 958 andq %rcx,%r10 | |
| 959 notq %r11 | |
| 960 movq 32(%rbp),%r12 | |
| 961 andq %rcx,%r11 | |
| 962 notq %r12 | |
| 963 movq 40(%rbp),%r13 | |
| 964 andq %rcx,%r12 | |
| 965 notq %r13 | |
| 966 movq 48(%rbp),%r14 | |
| 967 andq %rcx,%r13 | |
| 968 notq %r14 | |
| 969 movq 56(%rbp),%r15 | |
| 970 andq %rcx,%r14 | |
| 971 notq %r15 | |
| 972 andq %rcx,%r15 | |
| 973 | |
| 974 addq (%rdi),%r8 | |
| 975 adcq 8(%rdi),%r9 | |
| 976 adcq 16(%rdi),%r10 | |
| 977 adcq 24(%rdi),%r11 | |
| 978 adcq 32(%rdi),%r12 | |
| 979 adcq 40(%rdi),%r13 | |
| 980 adcq 48(%rdi),%r14 | |
| 981 adcq 56(%rdi),%r15 | |
| 982 | |
| 983 movq %r8,(%rdi) | |
| 984 movq %r9,8(%rdi) | |
| 985 movq %r10,16(%rdi) | |
| 986 movq %r11,24(%rdi) | |
| 987 movq %r12,32(%rdi) | |
| 988 movq %r13,40(%rdi) | |
| 989 movq %r14,48(%rdi) | |
| 990 movq %r15,56(%rdi) | |
| 991 | |
| 992 .byte 0xf3,0xc3 | |
| 993 .size __rsaz_512_subtract,.-__rsaz_512_subtract | |
| 994 .type __rsaz_512_mul,@function | |
| 995 .align 32 | |
| 996 __rsaz_512_mul: | |
| 997 leaq 8(%rsp),%rdi | |
| 998 | |
| 999 movq (%rsi),%rax | |
| 1000 mulq %rbx | |
| 1001 movq %rax,(%rdi) | |
| 1002 movq 8(%rsi),%rax | |
| 1003 movq %rdx,%r8 | |
| 1004 | |
| 1005 mulq %rbx | |
| 1006 addq %rax,%r8 | |
| 1007 movq 16(%rsi),%rax | |
| 1008 movq %rdx,%r9 | |
| 1009 adcq $0,%r9 | |
| 1010 | |
| 1011 mulq %rbx | |
| 1012 addq %rax,%r9 | |
| 1013 movq 24(%rsi),%rax | |
| 1014 movq %rdx,%r10 | |
| 1015 adcq $0,%r10 | |
| 1016 | |
| 1017 mulq %rbx | |
| 1018 addq %rax,%r10 | |
| 1019 movq 32(%rsi),%rax | |
| 1020 movq %rdx,%r11 | |
| 1021 adcq $0,%r11 | |
| 1022 | |
| 1023 mulq %rbx | |
| 1024 addq %rax,%r11 | |
| 1025 movq 40(%rsi),%rax | |
| 1026 movq %rdx,%r12 | |
| 1027 adcq $0,%r12 | |
| 1028 | |
| 1029 mulq %rbx | |
| 1030 addq %rax,%r12 | |
| 1031 movq 48(%rsi),%rax | |
| 1032 movq %rdx,%r13 | |
| 1033 adcq $0,%r13 | |
| 1034 | |
| 1035 mulq %rbx | |
| 1036 addq %rax,%r13 | |
| 1037 movq 56(%rsi),%rax | |
| 1038 movq %rdx,%r14 | |
| 1039 adcq $0,%r14 | |
| 1040 | |
| 1041 mulq %rbx | |
| 1042 addq %rax,%r14 | |
| 1043 movq (%rsi),%rax | |
| 1044 movq %rdx,%r15 | |
| 1045 adcq $0,%r15 | |
| 1046 | |
| 1047 leaq 8(%rbp),%rbp | |
| 1048 leaq 8(%rdi),%rdi | |
| 1049 | |
| 1050 movl $7,%ecx | |
| 1051 jmp .Loop_mul | |
| 1052 | |
| 1053 .align 32 | |
| 1054 .Loop_mul: | |
| 1055 movq (%rbp),%rbx | |
| 1056 mulq %rbx | |
| 1057 addq %rax,%r8 | |
| 1058 movq 8(%rsi),%rax | |
| 1059 movq %r8,(%rdi) | |
| 1060 movq %rdx,%r8 | |
| 1061 adcq $0,%r8 | |
| 1062 | |
| 1063 mulq %rbx | |
| 1064 addq %rax,%r9 | |
| 1065 movq 16(%rsi),%rax | |
| 1066 adcq $0,%rdx | |
| 1067 addq %r9,%r8 | |
| 1068 movq %rdx,%r9 | |
| 1069 adcq $0,%r9 | |
| 1070 | |
| 1071 mulq %rbx | |
| 1072 addq %rax,%r10 | |
| 1073 movq 24(%rsi),%rax | |
| 1074 adcq $0,%rdx | |
| 1075 addq %r10,%r9 | |
| 1076 movq %rdx,%r10 | |
| 1077 adcq $0,%r10 | |
| 1078 | |
| 1079 mulq %rbx | |
| 1080 addq %rax,%r11 | |
| 1081 movq 32(%rsi),%rax | |
| 1082 adcq $0,%rdx | |
| 1083 addq %r11,%r10 | |
| 1084 movq %rdx,%r11 | |
| 1085 adcq $0,%r11 | |
| 1086 | |
| 1087 mulq %rbx | |
| 1088 addq %rax,%r12 | |
| 1089 movq 40(%rsi),%rax | |
| 1090 adcq $0,%rdx | |
| 1091 addq %r12,%r11 | |
| 1092 movq %rdx,%r12 | |
| 1093 adcq $0,%r12 | |
| 1094 | |
| 1095 mulq %rbx | |
| 1096 addq %rax,%r13 | |
| 1097 movq 48(%rsi),%rax | |
| 1098 adcq $0,%rdx | |
| 1099 addq %r13,%r12 | |
| 1100 movq %rdx,%r13 | |
| 1101 adcq $0,%r13 | |
| 1102 | |
| 1103 mulq %rbx | |
| 1104 addq %rax,%r14 | |
| 1105 movq 56(%rsi),%rax | |
| 1106 adcq $0,%rdx | |
| 1107 addq %r14,%r13 | |
| 1108 movq %rdx,%r14 | |
| 1109 leaq 8(%rbp),%rbp | |
| 1110 adcq $0,%r14 | |
| 1111 | |
| 1112 mulq %rbx | |
| 1113 addq %rax,%r15 | |
| 1114 movq (%rsi),%rax | |
| 1115 adcq $0,%rdx | |
| 1116 addq %r15,%r14 | |
| 1117 movq %rdx,%r15 | |
| 1118 adcq $0,%r15 | |
| 1119 | |
| 1120 leaq 8(%rdi),%rdi | |
| 1121 | |
| 1122 decl %ecx | |
| 1123 jnz .Loop_mul | |
| 1124 | |
| 1125 movq %r8,(%rdi) | |
| 1126 movq %r9,8(%rdi) | |
| 1127 movq %r10,16(%rdi) | |
| 1128 movq %r11,24(%rdi) | |
| 1129 movq %r12,32(%rdi) | |
| 1130 movq %r13,40(%rdi) | |
| 1131 movq %r14,48(%rdi) | |
| 1132 movq %r15,56(%rdi) | |
| 1133 | |
| 1134 .byte 0xf3,0xc3 | |
| 1135 .size __rsaz_512_mul,.-__rsaz_512_mul | |
| 1136 .globl rsaz_512_scatter4 | |
| 1137 .hidden rsaz_512_scatter4 | |
| 1138 .type rsaz_512_scatter4,@function | |
| 1139 .align 16 | |
| 1140 rsaz_512_scatter4: | |
| 1141 leaq (%rdi,%rdx,8),%rdi | |
| 1142 movl $8,%r9d | |
| 1143 jmp .Loop_scatter | |
| 1144 .align 16 | |
| 1145 .Loop_scatter: | |
| 1146 movq (%rsi),%rax | |
| 1147 leaq 8(%rsi),%rsi | |
| 1148 movq %rax,(%rdi) | |
| 1149 leaq 128(%rdi),%rdi | |
| 1150 decl %r9d | |
| 1151 jnz .Loop_scatter | |
| 1152 .byte 0xf3,0xc3 | |
| 1153 .size rsaz_512_scatter4,.-rsaz_512_scatter4 | |
| 1154 | |
| 1155 .globl rsaz_512_gather4 | |
| 1156 .hidden rsaz_512_gather4 | |
| 1157 .type rsaz_512_gather4,@function | |
| 1158 .align 16 | |
| 1159 rsaz_512_gather4: | |
| 1160 movd %edx,%xmm8 | |
| 1161 movdqa .Linc+16(%rip),%xmm1 | |
| 1162 movdqa .Linc(%rip),%xmm0 | |
| 1163 | |
| 1164 pshufd $0,%xmm8,%xmm8 | |
| 1165 movdqa %xmm1,%xmm7 | |
| 1166 movdqa %xmm1,%xmm2 | |
| 1167 paddd %xmm0,%xmm1 | |
| 1168 pcmpeqd %xmm8,%xmm0 | |
| 1169 movdqa %xmm7,%xmm3 | |
| 1170 paddd %xmm1,%xmm2 | |
| 1171 pcmpeqd %xmm8,%xmm1 | |
| 1172 movdqa %xmm7,%xmm4 | |
| 1173 paddd %xmm2,%xmm3 | |
| 1174 pcmpeqd %xmm8,%xmm2 | |
| 1175 movdqa %xmm7,%xmm5 | |
| 1176 paddd %xmm3,%xmm4 | |
| 1177 pcmpeqd %xmm8,%xmm3 | |
| 1178 movdqa %xmm7,%xmm6 | |
| 1179 paddd %xmm4,%xmm5 | |
| 1180 pcmpeqd %xmm8,%xmm4 | |
| 1181 paddd %xmm5,%xmm6 | |
| 1182 pcmpeqd %xmm8,%xmm5 | |
| 1183 paddd %xmm6,%xmm7 | |
| 1184 pcmpeqd %xmm8,%xmm6 | |
| 1185 pcmpeqd %xmm8,%xmm7 | |
| 1186 movl $8,%r9d | |
| 1187 jmp .Loop_gather | |
| 1188 .align 16 | |
| 1189 .Loop_gather: | |
| 1190 movdqa 0(%rsi),%xmm8 | |
| 1191 movdqa 16(%rsi),%xmm9 | |
| 1192 movdqa 32(%rsi),%xmm10 | |
| 1193 movdqa 48(%rsi),%xmm11 | |
| 1194 pand %xmm0,%xmm8 | |
| 1195 movdqa 64(%rsi),%xmm12 | |
| 1196 pand %xmm1,%xmm9 | |
| 1197 movdqa 80(%rsi),%xmm13 | |
| 1198 pand %xmm2,%xmm10 | |
| 1199 movdqa 96(%rsi),%xmm14 | |
| 1200 pand %xmm3,%xmm11 | |
| 1201 movdqa 112(%rsi),%xmm15 | |
| 1202 leaq 128(%rsi),%rsi | |
| 1203 pand %xmm4,%xmm12 | |
| 1204 pand %xmm5,%xmm13 | |
| 1205 pand %xmm6,%xmm14 | |
| 1206 pand %xmm7,%xmm15 | |
| 1207 por %xmm10,%xmm8 | |
| 1208 por %xmm11,%xmm9 | |
| 1209 por %xmm12,%xmm8 | |
| 1210 por %xmm13,%xmm9 | |
| 1211 por %xmm14,%xmm8 | |
| 1212 por %xmm15,%xmm9 | |
| 1213 | |
| 1214 por %xmm9,%xmm8 | |
| 1215 pshufd $0x4e,%xmm8,%xmm9 | |
| 1216 por %xmm9,%xmm8 | |
| 1217 movq %xmm8,(%rdi) | |
| 1218 leaq 8(%rdi),%rdi | |
| 1219 decl %r9d | |
| 1220 jnz .Loop_gather | |
| 1221 .byte 0xf3,0xc3 | |
| 1222 .LSEH_end_rsaz_512_gather4: | |
| 1223 .size rsaz_512_gather4,.-rsaz_512_gather4 | |
| 1224 | |
| 1225 .align 64 | |
| 1226 .Linc: | |
| 1227 .long 0,0, 1,1 | |
| 1228 .long 2,2, 2,2 | |
| 1229 #endif | |
| OLD | NEW |