| OLD | NEW |
| (Empty) |
| 1 .text | |
| 2 | |
| 3 .globl bn_mul_mont_gather5 | |
| 4 .type bn_mul_mont_gather5,@function | |
| 5 .align 64 | |
| 6 bn_mul_mont_gather5: | |
| 7 testl $3,%r9d | |
| 8 jnz .Lmul_enter | |
| 9 cmpl $8,%r9d | |
| 10 jb .Lmul_enter | |
| 11 jmp .Lmul4x_enter | |
| 12 | |
| 13 .align 16 | |
| 14 .Lmul_enter: | |
| 15 movl %r9d,%r9d | |
| 16 movl 8(%rsp),%r10d | |
| 17 pushq %rbx | |
| 18 pushq %rbp | |
| 19 pushq %r12 | |
| 20 pushq %r13 | |
| 21 pushq %r14 | |
| 22 pushq %r15 | |
| 23 movq %rsp,%rax | |
| 24 leaq 2(%r9),%r11 | |
| 25 negq %r11 | |
| 26 leaq (%rsp,%r11,8),%rsp | |
| 27 andq $-1024,%rsp | |
| 28 | |
| 29 movq %rax,8(%rsp,%r9,8) | |
| 30 .Lmul_body: | |
| 31 movq %rdx,%r12 | |
| 32 movq %r10,%r11 | |
| 33 shrq $3,%r10 | |
| 34 andq $7,%r11 | |
| 35 notq %r10 | |
| 36 leaq .Lmagic_masks(%rip),%rax | |
| 37 andq $3,%r10 | |
| 38 leaq 96(%r12,%r11,8),%r12 | |
| 39 movq 0(%rax,%r10,8),%xmm4 | |
| 40 movq 8(%rax,%r10,8),%xmm5 | |
| 41 movq 16(%rax,%r10,8),%xmm6 | |
| 42 movq 24(%rax,%r10,8),%xmm7 | |
| 43 | |
| 44 movq -96(%r12),%xmm0 | |
| 45 movq -32(%r12),%xmm1 | |
| 46 pand %xmm4,%xmm0 | |
| 47 movq 32(%r12),%xmm2 | |
| 48 pand %xmm5,%xmm1 | |
| 49 movq 96(%r12),%xmm3 | |
| 50 pand %xmm6,%xmm2 | |
| 51 por %xmm1,%xmm0 | |
| 52 pand %xmm7,%xmm3 | |
| 53 por %xmm2,%xmm0 | |
| 54 leaq 256(%r12),%r12 | |
| 55 por %xmm3,%xmm0 | |
| 56 | |
| 57 .byte 102,72,15,126,195 | |
| 58 | |
| 59 movq (%r8),%r8 | |
| 60 movq (%rsi),%rax | |
| 61 | |
| 62 xorq %r14,%r14 | |
| 63 xorq %r15,%r15 | |
| 64 | |
| 65 movq -96(%r12),%xmm0 | |
| 66 movq -32(%r12),%xmm1 | |
| 67 pand %xmm4,%xmm0 | |
| 68 movq 32(%r12),%xmm2 | |
| 69 pand %xmm5,%xmm1 | |
| 70 | |
| 71 movq %r8,%rbp | |
| 72 mulq %rbx | |
| 73 movq %rax,%r10 | |
| 74 movq (%rcx),%rax | |
| 75 | |
| 76 movq 96(%r12),%xmm3 | |
| 77 pand %xmm6,%xmm2 | |
| 78 por %xmm1,%xmm0 | |
| 79 pand %xmm7,%xmm3 | |
| 80 | |
| 81 imulq %r10,%rbp | |
| 82 movq %rdx,%r11 | |
| 83 | |
| 84 por %xmm2,%xmm0 | |
| 85 leaq 256(%r12),%r12 | |
| 86 por %xmm3,%xmm0 | |
| 87 | |
| 88 mulq %rbp | |
| 89 addq %rax,%r10 | |
| 90 movq 8(%rsi),%rax | |
| 91 adcq $0,%rdx | |
| 92 movq %rdx,%r13 | |
| 93 | |
| 94 leaq 1(%r15),%r15 | |
| 95 jmp .L1st_enter | |
| 96 | |
| 97 .align 16 | |
| 98 .L1st: | |
| 99 addq %rax,%r13 | |
| 100 movq (%rsi,%r15,8),%rax | |
| 101 adcq $0,%rdx | |
| 102 addq %r11,%r13 | |
| 103 movq %r10,%r11 | |
| 104 adcq $0,%rdx | |
| 105 movq %r13,-16(%rsp,%r15,8) | |
| 106 movq %rdx,%r13 | |
| 107 | |
| 108 .L1st_enter: | |
| 109 mulq %rbx | |
| 110 addq %rax,%r11 | |
| 111 movq (%rcx,%r15,8),%rax | |
| 112 adcq $0,%rdx | |
| 113 leaq 1(%r15),%r15 | |
| 114 movq %rdx,%r10 | |
| 115 | |
| 116 mulq %rbp | |
| 117 cmpq %r9,%r15 | |
| 118 jne .L1st | |
| 119 | |
| 120 .byte 102,72,15,126,195 | |
| 121 | |
| 122 addq %rax,%r13 | |
| 123 movq (%rsi),%rax | |
| 124 adcq $0,%rdx | |
| 125 addq %r11,%r13 | |
| 126 adcq $0,%rdx | |
| 127 movq %r13,-16(%rsp,%r15,8) | |
| 128 movq %rdx,%r13 | |
| 129 movq %r10,%r11 | |
| 130 | |
| 131 xorq %rdx,%rdx | |
| 132 addq %r11,%r13 | |
| 133 adcq $0,%rdx | |
| 134 movq %r13,-8(%rsp,%r9,8) | |
| 135 movq %rdx,(%rsp,%r9,8) | |
| 136 | |
| 137 leaq 1(%r14),%r14 | |
| 138 jmp .Louter | |
| 139 .align 16 | |
| 140 .Louter: | |
| 141 xorq %r15,%r15 | |
| 142 movq %r8,%rbp | |
| 143 movq (%rsp),%r10 | |
| 144 | |
| 145 movq -96(%r12),%xmm0 | |
| 146 movq -32(%r12),%xmm1 | |
| 147 pand %xmm4,%xmm0 | |
| 148 movq 32(%r12),%xmm2 | |
| 149 pand %xmm5,%xmm1 | |
| 150 | |
| 151 mulq %rbx | |
| 152 addq %rax,%r10 | |
| 153 movq (%rcx),%rax | |
| 154 adcq $0,%rdx | |
| 155 | |
| 156 movq 96(%r12),%xmm3 | |
| 157 pand %xmm6,%xmm2 | |
| 158 por %xmm1,%xmm0 | |
| 159 pand %xmm7,%xmm3 | |
| 160 | |
| 161 imulq %r10,%rbp | |
| 162 movq %rdx,%r11 | |
| 163 | |
| 164 por %xmm2,%xmm0 | |
| 165 leaq 256(%r12),%r12 | |
| 166 por %xmm3,%xmm0 | |
| 167 | |
| 168 mulq %rbp | |
| 169 addq %rax,%r10 | |
| 170 movq 8(%rsi),%rax | |
| 171 adcq $0,%rdx | |
| 172 movq 8(%rsp),%r10 | |
| 173 movq %rdx,%r13 | |
| 174 | |
| 175 leaq 1(%r15),%r15 | |
| 176 jmp .Linner_enter | |
| 177 | |
| 178 .align 16 | |
| 179 .Linner: | |
| 180 addq %rax,%r13 | |
| 181 movq (%rsi,%r15,8),%rax | |
| 182 adcq $0,%rdx | |
| 183 addq %r10,%r13 | |
| 184 movq (%rsp,%r15,8),%r10 | |
| 185 adcq $0,%rdx | |
| 186 movq %r13,-16(%rsp,%r15,8) | |
| 187 movq %rdx,%r13 | |
| 188 | |
| 189 .Linner_enter: | |
| 190 mulq %rbx | |
| 191 addq %rax,%r11 | |
| 192 movq (%rcx,%r15,8),%rax | |
| 193 adcq $0,%rdx | |
| 194 addq %r11,%r10 | |
| 195 movq %rdx,%r11 | |
| 196 adcq $0,%r11 | |
| 197 leaq 1(%r15),%r15 | |
| 198 | |
| 199 mulq %rbp | |
| 200 cmpq %r9,%r15 | |
| 201 jne .Linner | |
| 202 | |
| 203 .byte 102,72,15,126,195 | |
| 204 | |
| 205 addq %rax,%r13 | |
| 206 movq (%rsi),%rax | |
| 207 adcq $0,%rdx | |
| 208 addq %r10,%r13 | |
| 209 movq (%rsp,%r15,8),%r10 | |
| 210 adcq $0,%rdx | |
| 211 movq %r13,-16(%rsp,%r15,8) | |
| 212 movq %rdx,%r13 | |
| 213 | |
| 214 xorq %rdx,%rdx | |
| 215 addq %r11,%r13 | |
| 216 adcq $0,%rdx | |
| 217 addq %r10,%r13 | |
| 218 adcq $0,%rdx | |
| 219 movq %r13,-8(%rsp,%r9,8) | |
| 220 movq %rdx,(%rsp,%r9,8) | |
| 221 | |
| 222 leaq 1(%r14),%r14 | |
| 223 cmpq %r9,%r14 | |
| 224 jl .Louter | |
| 225 | |
| 226 xorq %r14,%r14 | |
| 227 movq (%rsp),%rax | |
| 228 leaq (%rsp),%rsi | |
| 229 movq %r9,%r15 | |
| 230 jmp .Lsub | |
| 231 .align 16 | |
| 232 .Lsub: sbbq (%rcx,%r14,8),%rax | |
| 233 movq %rax,(%rdi,%r14,8) | |
| 234 movq 8(%rsi,%r14,8),%rax | |
| 235 leaq 1(%r14),%r14 | |
| 236 decq %r15 | |
| 237 jnz .Lsub | |
| 238 | |
| 239 sbbq $0,%rax | |
| 240 xorq %r14,%r14 | |
| 241 andq %rax,%rsi | |
| 242 notq %rax | |
| 243 movq %rdi,%rcx | |
| 244 andq %rax,%rcx | |
| 245 movq %r9,%r15 | |
| 246 orq %rcx,%rsi | |
| 247 .align 16 | |
| 248 .Lcopy: | |
| 249 movq (%rsi,%r14,8),%rax | |
| 250 movq %r14,(%rsp,%r14,8) | |
| 251 movq %rax,(%rdi,%r14,8) | |
| 252 leaq 1(%r14),%r14 | |
| 253 subq $1,%r15 | |
| 254 jnz .Lcopy | |
| 255 | |
| 256 movq 8(%rsp,%r9,8),%rsi | |
| 257 movq $1,%rax | |
| 258 movq (%rsi),%r15 | |
| 259 movq 8(%rsi),%r14 | |
| 260 movq 16(%rsi),%r13 | |
| 261 movq 24(%rsi),%r12 | |
| 262 movq 32(%rsi),%rbp | |
| 263 movq 40(%rsi),%rbx | |
| 264 leaq 48(%rsi),%rsp | |
| 265 .Lmul_epilogue: | |
| 266 .byte 0xf3,0xc3 | |
| 267 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5 | |
| 268 .type bn_mul4x_mont_gather5,@function | |
| 269 .align 16 | |
| 270 bn_mul4x_mont_gather5: | |
| 271 .Lmul4x_enter: | |
| 272 movl %r9d,%r9d | |
| 273 movl 8(%rsp),%r10d | |
| 274 pushq %rbx | |
| 275 pushq %rbp | |
| 276 pushq %r12 | |
| 277 pushq %r13 | |
| 278 pushq %r14 | |
| 279 pushq %r15 | |
| 280 movq %rsp,%rax | |
| 281 leaq 4(%r9),%r11 | |
| 282 negq %r11 | |
| 283 leaq (%rsp,%r11,8),%rsp | |
| 284 andq $-1024,%rsp | |
| 285 | |
| 286 movq %rax,8(%rsp,%r9,8) | |
| 287 .Lmul4x_body: | |
| 288 movq %rdi,16(%rsp,%r9,8) | |
| 289 movq %rdx,%r12 | |
| 290 movq %r10,%r11 | |
| 291 shrq $3,%r10 | |
| 292 andq $7,%r11 | |
| 293 notq %r10 | |
| 294 leaq .Lmagic_masks(%rip),%rax | |
| 295 andq $3,%r10 | |
| 296 leaq 96(%r12,%r11,8),%r12 | |
| 297 movq 0(%rax,%r10,8),%xmm4 | |
| 298 movq 8(%rax,%r10,8),%xmm5 | |
| 299 movq 16(%rax,%r10,8),%xmm6 | |
| 300 movq 24(%rax,%r10,8),%xmm7 | |
| 301 | |
| 302 movq -96(%r12),%xmm0 | |
| 303 movq -32(%r12),%xmm1 | |
| 304 pand %xmm4,%xmm0 | |
| 305 movq 32(%r12),%xmm2 | |
| 306 pand %xmm5,%xmm1 | |
| 307 movq 96(%r12),%xmm3 | |
| 308 pand %xmm6,%xmm2 | |
| 309 por %xmm1,%xmm0 | |
| 310 pand %xmm7,%xmm3 | |
| 311 por %xmm2,%xmm0 | |
| 312 leaq 256(%r12),%r12 | |
| 313 por %xmm3,%xmm0 | |
| 314 | |
| 315 .byte 102,72,15,126,195 | |
| 316 movq (%r8),%r8 | |
| 317 movq (%rsi),%rax | |
| 318 | |
| 319 xorq %r14,%r14 | |
| 320 xorq %r15,%r15 | |
| 321 | |
| 322 movq -96(%r12),%xmm0 | |
| 323 movq -32(%r12),%xmm1 | |
| 324 pand %xmm4,%xmm0 | |
| 325 movq 32(%r12),%xmm2 | |
| 326 pand %xmm5,%xmm1 | |
| 327 | |
| 328 movq %r8,%rbp | |
| 329 mulq %rbx | |
| 330 movq %rax,%r10 | |
| 331 movq (%rcx),%rax | |
| 332 | |
| 333 movq 96(%r12),%xmm3 | |
| 334 pand %xmm6,%xmm2 | |
| 335 por %xmm1,%xmm0 | |
| 336 pand %xmm7,%xmm3 | |
| 337 | |
| 338 imulq %r10,%rbp | |
| 339 movq %rdx,%r11 | |
| 340 | |
| 341 por %xmm2,%xmm0 | |
| 342 leaq 256(%r12),%r12 | |
| 343 por %xmm3,%xmm0 | |
| 344 | |
| 345 mulq %rbp | |
| 346 addq %rax,%r10 | |
| 347 movq 8(%rsi),%rax | |
| 348 adcq $0,%rdx | |
| 349 movq %rdx,%rdi | |
| 350 | |
| 351 mulq %rbx | |
| 352 addq %rax,%r11 | |
| 353 movq 8(%rcx),%rax | |
| 354 adcq $0,%rdx | |
| 355 movq %rdx,%r10 | |
| 356 | |
| 357 mulq %rbp | |
| 358 addq %rax,%rdi | |
| 359 movq 16(%rsi),%rax | |
| 360 adcq $0,%rdx | |
| 361 addq %r11,%rdi | |
| 362 leaq 4(%r15),%r15 | |
| 363 adcq $0,%rdx | |
| 364 movq %rdi,(%rsp) | |
| 365 movq %rdx,%r13 | |
| 366 jmp .L1st4x | |
| 367 .align 16 | |
| 368 .L1st4x: | |
| 369 mulq %rbx | |
| 370 addq %rax,%r10 | |
| 371 movq -16(%rcx,%r15,8),%rax | |
| 372 adcq $0,%rdx | |
| 373 movq %rdx,%r11 | |
| 374 | |
| 375 mulq %rbp | |
| 376 addq %rax,%r13 | |
| 377 movq -8(%rsi,%r15,8),%rax | |
| 378 adcq $0,%rdx | |
| 379 addq %r10,%r13 | |
| 380 adcq $0,%rdx | |
| 381 movq %r13,-24(%rsp,%r15,8) | |
| 382 movq %rdx,%rdi | |
| 383 | |
| 384 mulq %rbx | |
| 385 addq %rax,%r11 | |
| 386 movq -8(%rcx,%r15,8),%rax | |
| 387 adcq $0,%rdx | |
| 388 movq %rdx,%r10 | |
| 389 | |
| 390 mulq %rbp | |
| 391 addq %rax,%rdi | |
| 392 movq (%rsi,%r15,8),%rax | |
| 393 adcq $0,%rdx | |
| 394 addq %r11,%rdi | |
| 395 adcq $0,%rdx | |
| 396 movq %rdi,-16(%rsp,%r15,8) | |
| 397 movq %rdx,%r13 | |
| 398 | |
| 399 mulq %rbx | |
| 400 addq %rax,%r10 | |
| 401 movq (%rcx,%r15,8),%rax | |
| 402 adcq $0,%rdx | |
| 403 movq %rdx,%r11 | |
| 404 | |
| 405 mulq %rbp | |
| 406 addq %rax,%r13 | |
| 407 movq 8(%rsi,%r15,8),%rax | |
| 408 adcq $0,%rdx | |
| 409 addq %r10,%r13 | |
| 410 adcq $0,%rdx | |
| 411 movq %r13,-8(%rsp,%r15,8) | |
| 412 movq %rdx,%rdi | |
| 413 | |
| 414 mulq %rbx | |
| 415 addq %rax,%r11 | |
| 416 movq 8(%rcx,%r15,8),%rax | |
| 417 adcq $0,%rdx | |
| 418 leaq 4(%r15),%r15 | |
| 419 movq %rdx,%r10 | |
| 420 | |
| 421 mulq %rbp | |
| 422 addq %rax,%rdi | |
| 423 movq -16(%rsi,%r15,8),%rax | |
| 424 adcq $0,%rdx | |
| 425 addq %r11,%rdi | |
| 426 adcq $0,%rdx | |
| 427 movq %rdi,-32(%rsp,%r15,8) | |
| 428 movq %rdx,%r13 | |
| 429 cmpq %r9,%r15 | |
| 430 jl .L1st4x | |
| 431 | |
| 432 mulq %rbx | |
| 433 addq %rax,%r10 | |
| 434 movq -16(%rcx,%r15,8),%rax | |
| 435 adcq $0,%rdx | |
| 436 movq %rdx,%r11 | |
| 437 | |
| 438 mulq %rbp | |
| 439 addq %rax,%r13 | |
| 440 movq -8(%rsi,%r15,8),%rax | |
| 441 adcq $0,%rdx | |
| 442 addq %r10,%r13 | |
| 443 adcq $0,%rdx | |
| 444 movq %r13,-24(%rsp,%r15,8) | |
| 445 movq %rdx,%rdi | |
| 446 | |
| 447 mulq %rbx | |
| 448 addq %rax,%r11 | |
| 449 movq -8(%rcx,%r15,8),%rax | |
| 450 adcq $0,%rdx | |
| 451 movq %rdx,%r10 | |
| 452 | |
| 453 mulq %rbp | |
| 454 addq %rax,%rdi | |
| 455 movq (%rsi),%rax | |
| 456 adcq $0,%rdx | |
| 457 addq %r11,%rdi | |
| 458 adcq $0,%rdx | |
| 459 movq %rdi,-16(%rsp,%r15,8) | |
| 460 movq %rdx,%r13 | |
| 461 | |
| 462 .byte 102,72,15,126,195 | |
| 463 | |
| 464 xorq %rdi,%rdi | |
| 465 addq %r10,%r13 | |
| 466 adcq $0,%rdi | |
| 467 movq %r13,-8(%rsp,%r15,8) | |
| 468 movq %rdi,(%rsp,%r15,8) | |
| 469 | |
| 470 leaq 1(%r14),%r14 | |
| 471 .align 4 | |
| 472 .Louter4x: | |
| 473 xorq %r15,%r15 | |
| 474 movq -96(%r12),%xmm0 | |
| 475 movq -32(%r12),%xmm1 | |
| 476 pand %xmm4,%xmm0 | |
| 477 movq 32(%r12),%xmm2 | |
| 478 pand %xmm5,%xmm1 | |
| 479 | |
| 480 movq (%rsp),%r10 | |
| 481 movq %r8,%rbp | |
| 482 mulq %rbx | |
| 483 addq %rax,%r10 | |
| 484 movq (%rcx),%rax | |
| 485 adcq $0,%rdx | |
| 486 | |
| 487 movq 96(%r12),%xmm3 | |
| 488 pand %xmm6,%xmm2 | |
| 489 por %xmm1,%xmm0 | |
| 490 pand %xmm7,%xmm3 | |
| 491 | |
| 492 imulq %r10,%rbp | |
| 493 movq %rdx,%r11 | |
| 494 | |
| 495 por %xmm2,%xmm0 | |
| 496 leaq 256(%r12),%r12 | |
| 497 por %xmm3,%xmm0 | |
| 498 | |
| 499 mulq %rbp | |
| 500 addq %rax,%r10 | |
| 501 movq 8(%rsi),%rax | |
| 502 adcq $0,%rdx | |
| 503 movq %rdx,%rdi | |
| 504 | |
| 505 mulq %rbx | |
| 506 addq %rax,%r11 | |
| 507 movq 8(%rcx),%rax | |
| 508 adcq $0,%rdx | |
| 509 addq 8(%rsp),%r11 | |
| 510 adcq $0,%rdx | |
| 511 movq %rdx,%r10 | |
| 512 | |
| 513 mulq %rbp | |
| 514 addq %rax,%rdi | |
| 515 movq 16(%rsi),%rax | |
| 516 adcq $0,%rdx | |
| 517 addq %r11,%rdi | |
| 518 leaq 4(%r15),%r15 | |
| 519 adcq $0,%rdx | |
| 520 movq %rdx,%r13 | |
| 521 jmp .Linner4x | |
| 522 .align 16 | |
| 523 .Linner4x: | |
| 524 mulq %rbx | |
| 525 addq %rax,%r10 | |
| 526 movq -16(%rcx,%r15,8),%rax | |
| 527 adcq $0,%rdx | |
| 528 addq -16(%rsp,%r15,8),%r10 | |
| 529 adcq $0,%rdx | |
| 530 movq %rdx,%r11 | |
| 531 | |
| 532 mulq %rbp | |
| 533 addq %rax,%r13 | |
| 534 movq -8(%rsi,%r15,8),%rax | |
| 535 adcq $0,%rdx | |
| 536 addq %r10,%r13 | |
| 537 adcq $0,%rdx | |
| 538 movq %rdi,-32(%rsp,%r15,8) | |
| 539 movq %rdx,%rdi | |
| 540 | |
| 541 mulq %rbx | |
| 542 addq %rax,%r11 | |
| 543 movq -8(%rcx,%r15,8),%rax | |
| 544 adcq $0,%rdx | |
| 545 addq -8(%rsp,%r15,8),%r11 | |
| 546 adcq $0,%rdx | |
| 547 movq %rdx,%r10 | |
| 548 | |
| 549 mulq %rbp | |
| 550 addq %rax,%rdi | |
| 551 movq (%rsi,%r15,8),%rax | |
| 552 adcq $0,%rdx | |
| 553 addq %r11,%rdi | |
| 554 adcq $0,%rdx | |
| 555 movq %r13,-24(%rsp,%r15,8) | |
| 556 movq %rdx,%r13 | |
| 557 | |
| 558 mulq %rbx | |
| 559 addq %rax,%r10 | |
| 560 movq (%rcx,%r15,8),%rax | |
| 561 adcq $0,%rdx | |
| 562 addq (%rsp,%r15,8),%r10 | |
| 563 adcq $0,%rdx | |
| 564 movq %rdx,%r11 | |
| 565 | |
| 566 mulq %rbp | |
| 567 addq %rax,%r13 | |
| 568 movq 8(%rsi,%r15,8),%rax | |
| 569 adcq $0,%rdx | |
| 570 addq %r10,%r13 | |
| 571 adcq $0,%rdx | |
| 572 movq %rdi,-16(%rsp,%r15,8) | |
| 573 movq %rdx,%rdi | |
| 574 | |
| 575 mulq %rbx | |
| 576 addq %rax,%r11 | |
| 577 movq 8(%rcx,%r15,8),%rax | |
| 578 adcq $0,%rdx | |
| 579 addq 8(%rsp,%r15,8),%r11 | |
| 580 adcq $0,%rdx | |
| 581 leaq 4(%r15),%r15 | |
| 582 movq %rdx,%r10 | |
| 583 | |
| 584 mulq %rbp | |
| 585 addq %rax,%rdi | |
| 586 movq -16(%rsi,%r15,8),%rax | |
| 587 adcq $0,%rdx | |
| 588 addq %r11,%rdi | |
| 589 adcq $0,%rdx | |
| 590 movq %r13,-40(%rsp,%r15,8) | |
| 591 movq %rdx,%r13 | |
| 592 cmpq %r9,%r15 | |
| 593 jl .Linner4x | |
| 594 | |
| 595 mulq %rbx | |
| 596 addq %rax,%r10 | |
| 597 movq -16(%rcx,%r15,8),%rax | |
| 598 adcq $0,%rdx | |
| 599 addq -16(%rsp,%r15,8),%r10 | |
| 600 adcq $0,%rdx | |
| 601 movq %rdx,%r11 | |
| 602 | |
| 603 mulq %rbp | |
| 604 addq %rax,%r13 | |
| 605 movq -8(%rsi,%r15,8),%rax | |
| 606 adcq $0,%rdx | |
| 607 addq %r10,%r13 | |
| 608 adcq $0,%rdx | |
| 609 movq %rdi,-32(%rsp,%r15,8) | |
| 610 movq %rdx,%rdi | |
| 611 | |
| 612 mulq %rbx | |
| 613 addq %rax,%r11 | |
| 614 movq -8(%rcx,%r15,8),%rax | |
| 615 adcq $0,%rdx | |
| 616 addq -8(%rsp,%r15,8),%r11 | |
| 617 adcq $0,%rdx | |
| 618 leaq 1(%r14),%r14 | |
| 619 movq %rdx,%r10 | |
| 620 | |
| 621 mulq %rbp | |
| 622 addq %rax,%rdi | |
| 623 movq (%rsi),%rax | |
| 624 adcq $0,%rdx | |
| 625 addq %r11,%rdi | |
| 626 adcq $0,%rdx | |
| 627 movq %r13,-24(%rsp,%r15,8) | |
| 628 movq %rdx,%r13 | |
| 629 | |
| 630 .byte 102,72,15,126,195 | |
| 631 movq %rdi,-16(%rsp,%r15,8) | |
| 632 | |
| 633 xorq %rdi,%rdi | |
| 634 addq %r10,%r13 | |
| 635 adcq $0,%rdi | |
| 636 addq (%rsp,%r9,8),%r13 | |
| 637 adcq $0,%rdi | |
| 638 movq %r13,-8(%rsp,%r15,8) | |
| 639 movq %rdi,(%rsp,%r15,8) | |
| 640 | |
| 641 cmpq %r9,%r14 | |
| 642 jl .Louter4x | |
| 643 movq 16(%rsp,%r9,8),%rdi | |
| 644 movq 0(%rsp),%rax | |
| 645 pxor %xmm0,%xmm0 | |
| 646 movq 8(%rsp),%rdx | |
| 647 shrq $2,%r9 | |
| 648 leaq (%rsp),%rsi | |
| 649 xorq %r14,%r14 | |
| 650 | |
| 651 subq 0(%rcx),%rax | |
| 652 movq 16(%rsi),%rbx | |
| 653 movq 24(%rsi),%rbp | |
| 654 sbbq 8(%rcx),%rdx | |
| 655 leaq -1(%r9),%r15 | |
| 656 jmp .Lsub4x | |
| 657 .align 16 | |
| 658 .Lsub4x: | |
| 659 movq %rax,0(%rdi,%r14,8) | |
| 660 movq %rdx,8(%rdi,%r14,8) | |
| 661 sbbq 16(%rcx,%r14,8),%rbx | |
| 662 movq 32(%rsi,%r14,8),%rax | |
| 663 movq 40(%rsi,%r14,8),%rdx | |
| 664 sbbq 24(%rcx,%r14,8),%rbp | |
| 665 movq %rbx,16(%rdi,%r14,8) | |
| 666 movq %rbp,24(%rdi,%r14,8) | |
| 667 sbbq 32(%rcx,%r14,8),%rax | |
| 668 movq 48(%rsi,%r14,8),%rbx | |
| 669 movq 56(%rsi,%r14,8),%rbp | |
| 670 sbbq 40(%rcx,%r14,8),%rdx | |
| 671 leaq 4(%r14),%r14 | |
| 672 decq %r15 | |
| 673 jnz .Lsub4x | |
| 674 | |
| 675 movq %rax,0(%rdi,%r14,8) | |
| 676 movq 32(%rsi,%r14,8),%rax | |
| 677 sbbq 16(%rcx,%r14,8),%rbx | |
| 678 movq %rdx,8(%rdi,%r14,8) | |
| 679 sbbq 24(%rcx,%r14,8),%rbp | |
| 680 movq %rbx,16(%rdi,%r14,8) | |
| 681 | |
| 682 sbbq $0,%rax | |
| 683 movq %rbp,24(%rdi,%r14,8) | |
| 684 xorq %r14,%r14 | |
| 685 andq %rax,%rsi | |
| 686 notq %rax | |
| 687 movq %rdi,%rcx | |
| 688 andq %rax,%rcx | |
| 689 leaq -1(%r9),%r15 | |
| 690 orq %rcx,%rsi | |
| 691 | |
| 692 movdqu (%rsi),%xmm1 | |
| 693 movdqa %xmm0,(%rsp) | |
| 694 movdqu %xmm1,(%rdi) | |
| 695 jmp .Lcopy4x | |
| 696 .align 16 | |
| 697 .Lcopy4x: | |
| 698 movdqu 16(%rsi,%r14,1),%xmm2 | |
| 699 movdqu 32(%rsi,%r14,1),%xmm1 | |
| 700 movdqa %xmm0,16(%rsp,%r14,1) | |
| 701 movdqu %xmm2,16(%rdi,%r14,1) | |
| 702 movdqa %xmm0,32(%rsp,%r14,1) | |
| 703 movdqu %xmm1,32(%rdi,%r14,1) | |
| 704 leaq 32(%r14),%r14 | |
| 705 decq %r15 | |
| 706 jnz .Lcopy4x | |
| 707 | |
| 708 shlq $2,%r9 | |
| 709 movdqu 16(%rsi,%r14,1),%xmm2 | |
| 710 movdqa %xmm0,16(%rsp,%r14,1) | |
| 711 movdqu %xmm2,16(%rdi,%r14,1) | |
| 712 movq 8(%rsp,%r9,8),%rsi | |
| 713 movq $1,%rax | |
| 714 movq (%rsi),%r15 | |
| 715 movq 8(%rsi),%r14 | |
| 716 movq 16(%rsi),%r13 | |
| 717 movq 24(%rsi),%r12 | |
| 718 movq 32(%rsi),%rbp | |
| 719 movq 40(%rsi),%rbx | |
| 720 leaq 48(%rsi),%rsp | |
| 721 .Lmul4x_epilogue: | |
| 722 .byte 0xf3,0xc3 | |
| 723 .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 | |
| 724 .globl bn_scatter5 | |
| 725 .type bn_scatter5,@function | |
| 726 .align 16 | |
| 727 bn_scatter5: | |
| 728 cmpq $0,%rsi | |
| 729 jz .Lscatter_epilogue | |
| 730 leaq (%rdx,%rcx,8),%rdx | |
| 731 .Lscatter: | |
| 732 movq (%rdi),%rax | |
| 733 leaq 8(%rdi),%rdi | |
| 734 movq %rax,(%rdx) | |
| 735 leaq 256(%rdx),%rdx | |
| 736 subq $1,%rsi | |
| 737 jnz .Lscatter | |
| 738 .Lscatter_epilogue: | |
| 739 .byte 0xf3,0xc3 | |
| 740 .size bn_scatter5,.-bn_scatter5 | |
| 741 | |
| 742 .globl bn_gather5 | |
| 743 .type bn_gather5,@function | |
| 744 .align 16 | |
| 745 bn_gather5: | |
| 746 movq %rcx,%r11 | |
| 747 shrq $3,%rcx | |
| 748 andq $7,%r11 | |
| 749 notq %rcx | |
| 750 leaq .Lmagic_masks(%rip),%rax | |
| 751 andq $3,%rcx | |
| 752 leaq 96(%rdx,%r11,8),%rdx | |
| 753 movq 0(%rax,%rcx,8),%xmm4 | |
| 754 movq 8(%rax,%rcx,8),%xmm5 | |
| 755 movq 16(%rax,%rcx,8),%xmm6 | |
| 756 movq 24(%rax,%rcx,8),%xmm7 | |
| 757 jmp .Lgather | |
| 758 .align 16 | |
| 759 .Lgather: | |
| 760 movq -96(%rdx),%xmm0 | |
| 761 movq -32(%rdx),%xmm1 | |
| 762 pand %xmm4,%xmm0 | |
| 763 movq 32(%rdx),%xmm2 | |
| 764 pand %xmm5,%xmm1 | |
| 765 movq 96(%rdx),%xmm3 | |
| 766 pand %xmm6,%xmm2 | |
| 767 por %xmm1,%xmm0 | |
| 768 pand %xmm7,%xmm3 | |
| 769 por %xmm2,%xmm0 | |
| 770 leaq 256(%rdx),%rdx | |
| 771 por %xmm3,%xmm0 | |
| 772 | |
| 773 movq %xmm0,(%rdi) | |
| 774 leaq 8(%rdi),%rdi | |
| 775 subq $1,%rsi | |
| 776 jnz .Lgather | |
| 777 .byte 0xf3,0xc3 | |
| 778 .LSEH_end_bn_gather5: | |
| 779 .size bn_gather5,.-bn_gather5 | |
| 780 .align 64 | |
| 781 .Lmagic_masks: | |
| 782 .long 0,0, 0,0, 0,0, -1,-1 | |
| 783 .long 0,0, 0,0, 0,0, 0,0 | |
| 784 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97
,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71
,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,1
11,114,103,62,0 | |
| OLD | NEW |