| OLD | NEW |
| (Empty) |
| 1 #if defined(__x86_64__) | |
| 2 .text | |
| 3 | |
| 4 | |
| 5 | |
| 6 .globl _bn_mul_mont_gather5 | |
| 7 .private_extern _bn_mul_mont_gather5 | |
| 8 | |
| 9 .p2align 6 | |
| 10 _bn_mul_mont_gather5: | |
| 11 testl $7,%r9d | |
| 12 jnz L$mul_enter | |
| 13 jmp L$mul4x_enter | |
| 14 | |
| 15 .p2align 4 | |
| 16 L$mul_enter: | |
| 17 movl %r9d,%r9d | |
| 18 movq %rsp,%rax | |
| 19 movd 8(%rsp),%xmm5 | |
| 20 leaq L$inc(%rip),%r10 | |
| 21 pushq %rbx | |
| 22 pushq %rbp | |
| 23 pushq %r12 | |
| 24 pushq %r13 | |
| 25 pushq %r14 | |
| 26 pushq %r15 | |
| 27 | |
| 28 leaq 2(%r9),%r11 | |
| 29 negq %r11 | |
| 30 leaq -264(%rsp,%r11,8),%rsp | |
| 31 andq $-1024,%rsp | |
| 32 | |
| 33 movq %rax,8(%rsp,%r9,8) | |
| 34 L$mul_body: | |
| 35 leaq 128(%rdx),%r12 | |
| 36 movdqa 0(%r10),%xmm0 | |
| 37 movdqa 16(%r10),%xmm1 | |
| 38 leaq 24-112(%rsp,%r9,8),%r10 | |
| 39 andq $-16,%r10 | |
| 40 | |
| 41 pshufd $0,%xmm5,%xmm5 | |
| 42 movdqa %xmm1,%xmm4 | |
| 43 movdqa %xmm1,%xmm2 | |
| 44 paddd %xmm0,%xmm1 | |
| 45 pcmpeqd %xmm5,%xmm0 | |
| 46 .byte 0x67 | |
| 47 movdqa %xmm4,%xmm3 | |
| 48 paddd %xmm1,%xmm2 | |
| 49 pcmpeqd %xmm5,%xmm1 | |
| 50 movdqa %xmm0,112(%r10) | |
| 51 movdqa %xmm4,%xmm0 | |
| 52 | |
| 53 paddd %xmm2,%xmm3 | |
| 54 pcmpeqd %xmm5,%xmm2 | |
| 55 movdqa %xmm1,128(%r10) | |
| 56 movdqa %xmm4,%xmm1 | |
| 57 | |
| 58 paddd %xmm3,%xmm0 | |
| 59 pcmpeqd %xmm5,%xmm3 | |
| 60 movdqa %xmm2,144(%r10) | |
| 61 movdqa %xmm4,%xmm2 | |
| 62 | |
| 63 paddd %xmm0,%xmm1 | |
| 64 pcmpeqd %xmm5,%xmm0 | |
| 65 movdqa %xmm3,160(%r10) | |
| 66 movdqa %xmm4,%xmm3 | |
| 67 paddd %xmm1,%xmm2 | |
| 68 pcmpeqd %xmm5,%xmm1 | |
| 69 movdqa %xmm0,176(%r10) | |
| 70 movdqa %xmm4,%xmm0 | |
| 71 | |
| 72 paddd %xmm2,%xmm3 | |
| 73 pcmpeqd %xmm5,%xmm2 | |
| 74 movdqa %xmm1,192(%r10) | |
| 75 movdqa %xmm4,%xmm1 | |
| 76 | |
| 77 paddd %xmm3,%xmm0 | |
| 78 pcmpeqd %xmm5,%xmm3 | |
| 79 movdqa %xmm2,208(%r10) | |
| 80 movdqa %xmm4,%xmm2 | |
| 81 | |
| 82 paddd %xmm0,%xmm1 | |
| 83 pcmpeqd %xmm5,%xmm0 | |
| 84 movdqa %xmm3,224(%r10) | |
| 85 movdqa %xmm4,%xmm3 | |
| 86 paddd %xmm1,%xmm2 | |
| 87 pcmpeqd %xmm5,%xmm1 | |
| 88 movdqa %xmm0,240(%r10) | |
| 89 movdqa %xmm4,%xmm0 | |
| 90 | |
| 91 paddd %xmm2,%xmm3 | |
| 92 pcmpeqd %xmm5,%xmm2 | |
| 93 movdqa %xmm1,256(%r10) | |
| 94 movdqa %xmm4,%xmm1 | |
| 95 | |
| 96 paddd %xmm3,%xmm0 | |
| 97 pcmpeqd %xmm5,%xmm3 | |
| 98 movdqa %xmm2,272(%r10) | |
| 99 movdqa %xmm4,%xmm2 | |
| 100 | |
| 101 paddd %xmm0,%xmm1 | |
| 102 pcmpeqd %xmm5,%xmm0 | |
| 103 movdqa %xmm3,288(%r10) | |
| 104 movdqa %xmm4,%xmm3 | |
| 105 paddd %xmm1,%xmm2 | |
| 106 pcmpeqd %xmm5,%xmm1 | |
| 107 movdqa %xmm0,304(%r10) | |
| 108 | |
| 109 paddd %xmm2,%xmm3 | |
| 110 .byte 0x67 | |
| 111 pcmpeqd %xmm5,%xmm2 | |
| 112 movdqa %xmm1,320(%r10) | |
| 113 | |
| 114 pcmpeqd %xmm5,%xmm3 | |
| 115 movdqa %xmm2,336(%r10) | |
| 116 pand 64(%r12),%xmm0 | |
| 117 | |
| 118 pand 80(%r12),%xmm1 | |
| 119 pand 96(%r12),%xmm2 | |
| 120 movdqa %xmm3,352(%r10) | |
| 121 pand 112(%r12),%xmm3 | |
| 122 por %xmm2,%xmm0 | |
| 123 por %xmm3,%xmm1 | |
| 124 movdqa -128(%r12),%xmm4 | |
| 125 movdqa -112(%r12),%xmm5 | |
| 126 movdqa -96(%r12),%xmm2 | |
| 127 pand 112(%r10),%xmm4 | |
| 128 movdqa -80(%r12),%xmm3 | |
| 129 pand 128(%r10),%xmm5 | |
| 130 por %xmm4,%xmm0 | |
| 131 pand 144(%r10),%xmm2 | |
| 132 por %xmm5,%xmm1 | |
| 133 pand 160(%r10),%xmm3 | |
| 134 por %xmm2,%xmm0 | |
| 135 por %xmm3,%xmm1 | |
| 136 movdqa -64(%r12),%xmm4 | |
| 137 movdqa -48(%r12),%xmm5 | |
| 138 movdqa -32(%r12),%xmm2 | |
| 139 pand 176(%r10),%xmm4 | |
| 140 movdqa -16(%r12),%xmm3 | |
| 141 pand 192(%r10),%xmm5 | |
| 142 por %xmm4,%xmm0 | |
| 143 pand 208(%r10),%xmm2 | |
| 144 por %xmm5,%xmm1 | |
| 145 pand 224(%r10),%xmm3 | |
| 146 por %xmm2,%xmm0 | |
| 147 por %xmm3,%xmm1 | |
| 148 movdqa 0(%r12),%xmm4 | |
| 149 movdqa 16(%r12),%xmm5 | |
| 150 movdqa 32(%r12),%xmm2 | |
| 151 pand 240(%r10),%xmm4 | |
| 152 movdqa 48(%r12),%xmm3 | |
| 153 pand 256(%r10),%xmm5 | |
| 154 por %xmm4,%xmm0 | |
| 155 pand 272(%r10),%xmm2 | |
| 156 por %xmm5,%xmm1 | |
| 157 pand 288(%r10),%xmm3 | |
| 158 por %xmm2,%xmm0 | |
| 159 por %xmm3,%xmm1 | |
| 160 por %xmm1,%xmm0 | |
| 161 pshufd $0x4e,%xmm0,%xmm1 | |
| 162 por %xmm1,%xmm0 | |
| 163 leaq 256(%r12),%r12 | |
| 164 .byte 102,72,15,126,195 | |
| 165 | |
| 166 movq (%r8),%r8 | |
| 167 movq (%rsi),%rax | |
| 168 | |
| 169 xorq %r14,%r14 | |
| 170 xorq %r15,%r15 | |
| 171 | |
| 172 movq %r8,%rbp | |
| 173 mulq %rbx | |
| 174 movq %rax,%r10 | |
| 175 movq (%rcx),%rax | |
| 176 | |
| 177 imulq %r10,%rbp | |
| 178 movq %rdx,%r11 | |
| 179 | |
| 180 mulq %rbp | |
| 181 addq %rax,%r10 | |
| 182 movq 8(%rsi),%rax | |
| 183 adcq $0,%rdx | |
| 184 movq %rdx,%r13 | |
| 185 | |
| 186 leaq 1(%r15),%r15 | |
| 187 jmp L$1st_enter | |
| 188 | |
| 189 .p2align 4 | |
| 190 L$1st: | |
| 191 addq %rax,%r13 | |
| 192 movq (%rsi,%r15,8),%rax | |
| 193 adcq $0,%rdx | |
| 194 addq %r11,%r13 | |
| 195 movq %r10,%r11 | |
| 196 adcq $0,%rdx | |
| 197 movq %r13,-16(%rsp,%r15,8) | |
| 198 movq %rdx,%r13 | |
| 199 | |
| 200 L$1st_enter: | |
| 201 mulq %rbx | |
| 202 addq %rax,%r11 | |
| 203 movq (%rcx,%r15,8),%rax | |
| 204 adcq $0,%rdx | |
| 205 leaq 1(%r15),%r15 | |
| 206 movq %rdx,%r10 | |
| 207 | |
| 208 mulq %rbp | |
| 209 cmpq %r9,%r15 | |
| 210 jne L$1st | |
| 211 | |
| 212 | |
| 213 addq %rax,%r13 | |
| 214 adcq $0,%rdx | |
| 215 addq %r11,%r13 | |
| 216 adcq $0,%rdx | |
| 217 movq %r13,-16(%rsp,%r9,8) | |
| 218 movq %rdx,%r13 | |
| 219 movq %r10,%r11 | |
| 220 | |
| 221 xorq %rdx,%rdx | |
| 222 addq %r11,%r13 | |
| 223 adcq $0,%rdx | |
| 224 movq %r13,-8(%rsp,%r9,8) | |
| 225 movq %rdx,(%rsp,%r9,8) | |
| 226 | |
| 227 leaq 1(%r14),%r14 | |
| 228 jmp L$outer | |
| 229 .p2align 4 | |
| 230 L$outer: | |
| 231 leaq 24+128(%rsp,%r9,8),%rdx | |
| 232 andq $-16,%rdx | |
| 233 pxor %xmm4,%xmm4 | |
| 234 pxor %xmm5,%xmm5 | |
| 235 movdqa -128(%r12),%xmm0 | |
| 236 movdqa -112(%r12),%xmm1 | |
| 237 movdqa -96(%r12),%xmm2 | |
| 238 movdqa -80(%r12),%xmm3 | |
| 239 pand -128(%rdx),%xmm0 | |
| 240 pand -112(%rdx),%xmm1 | |
| 241 por %xmm0,%xmm4 | |
| 242 pand -96(%rdx),%xmm2 | |
| 243 por %xmm1,%xmm5 | |
| 244 pand -80(%rdx),%xmm3 | |
| 245 por %xmm2,%xmm4 | |
| 246 por %xmm3,%xmm5 | |
| 247 movdqa -64(%r12),%xmm0 | |
| 248 movdqa -48(%r12),%xmm1 | |
| 249 movdqa -32(%r12),%xmm2 | |
| 250 movdqa -16(%r12),%xmm3 | |
| 251 pand -64(%rdx),%xmm0 | |
| 252 pand -48(%rdx),%xmm1 | |
| 253 por %xmm0,%xmm4 | |
| 254 pand -32(%rdx),%xmm2 | |
| 255 por %xmm1,%xmm5 | |
| 256 pand -16(%rdx),%xmm3 | |
| 257 por %xmm2,%xmm4 | |
| 258 por %xmm3,%xmm5 | |
| 259 movdqa 0(%r12),%xmm0 | |
| 260 movdqa 16(%r12),%xmm1 | |
| 261 movdqa 32(%r12),%xmm2 | |
| 262 movdqa 48(%r12),%xmm3 | |
| 263 pand 0(%rdx),%xmm0 | |
| 264 pand 16(%rdx),%xmm1 | |
| 265 por %xmm0,%xmm4 | |
| 266 pand 32(%rdx),%xmm2 | |
| 267 por %xmm1,%xmm5 | |
| 268 pand 48(%rdx),%xmm3 | |
| 269 por %xmm2,%xmm4 | |
| 270 por %xmm3,%xmm5 | |
| 271 movdqa 64(%r12),%xmm0 | |
| 272 movdqa 80(%r12),%xmm1 | |
| 273 movdqa 96(%r12),%xmm2 | |
| 274 movdqa 112(%r12),%xmm3 | |
| 275 pand 64(%rdx),%xmm0 | |
| 276 pand 80(%rdx),%xmm1 | |
| 277 por %xmm0,%xmm4 | |
| 278 pand 96(%rdx),%xmm2 | |
| 279 por %xmm1,%xmm5 | |
| 280 pand 112(%rdx),%xmm3 | |
| 281 por %xmm2,%xmm4 | |
| 282 por %xmm3,%xmm5 | |
| 283 por %xmm5,%xmm4 | |
| 284 pshufd $0x4e,%xmm4,%xmm0 | |
| 285 por %xmm4,%xmm0 | |
| 286 leaq 256(%r12),%r12 | |
| 287 | |
| 288 movq (%rsi),%rax | |
| 289 .byte 102,72,15,126,195 | |
| 290 | |
| 291 xorq %r15,%r15 | |
| 292 movq %r8,%rbp | |
| 293 movq (%rsp),%r10 | |
| 294 | |
| 295 mulq %rbx | |
| 296 addq %rax,%r10 | |
| 297 movq (%rcx),%rax | |
| 298 adcq $0,%rdx | |
| 299 | |
| 300 imulq %r10,%rbp | |
| 301 movq %rdx,%r11 | |
| 302 | |
| 303 mulq %rbp | |
| 304 addq %rax,%r10 | |
| 305 movq 8(%rsi),%rax | |
| 306 adcq $0,%rdx | |
| 307 movq 8(%rsp),%r10 | |
| 308 movq %rdx,%r13 | |
| 309 | |
| 310 leaq 1(%r15),%r15 | |
| 311 jmp L$inner_enter | |
| 312 | |
| 313 .p2align 4 | |
| 314 L$inner: | |
| 315 addq %rax,%r13 | |
| 316 movq (%rsi,%r15,8),%rax | |
| 317 adcq $0,%rdx | |
| 318 addq %r10,%r13 | |
| 319 movq (%rsp,%r15,8),%r10 | |
| 320 adcq $0,%rdx | |
| 321 movq %r13,-16(%rsp,%r15,8) | |
| 322 movq %rdx,%r13 | |
| 323 | |
| 324 L$inner_enter: | |
| 325 mulq %rbx | |
| 326 addq %rax,%r11 | |
| 327 movq (%rcx,%r15,8),%rax | |
| 328 adcq $0,%rdx | |
| 329 addq %r11,%r10 | |
| 330 movq %rdx,%r11 | |
| 331 adcq $0,%r11 | |
| 332 leaq 1(%r15),%r15 | |
| 333 | |
| 334 mulq %rbp | |
| 335 cmpq %r9,%r15 | |
| 336 jne L$inner | |
| 337 | |
| 338 addq %rax,%r13 | |
| 339 adcq $0,%rdx | |
| 340 addq %r10,%r13 | |
| 341 movq (%rsp,%r9,8),%r10 | |
| 342 adcq $0,%rdx | |
| 343 movq %r13,-16(%rsp,%r9,8) | |
| 344 movq %rdx,%r13 | |
| 345 | |
| 346 xorq %rdx,%rdx | |
| 347 addq %r11,%r13 | |
| 348 adcq $0,%rdx | |
| 349 addq %r10,%r13 | |
| 350 adcq $0,%rdx | |
| 351 movq %r13,-8(%rsp,%r9,8) | |
| 352 movq %rdx,(%rsp,%r9,8) | |
| 353 | |
| 354 leaq 1(%r14),%r14 | |
| 355 cmpq %r9,%r14 | |
| 356 jb L$outer | |
| 357 | |
| 358 xorq %r14,%r14 | |
| 359 movq (%rsp),%rax | |
| 360 leaq (%rsp),%rsi | |
| 361 movq %r9,%r15 | |
| 362 jmp L$sub | |
| 363 .p2align 4 | |
| 364 L$sub: sbbq (%rcx,%r14,8),%rax | |
| 365 movq %rax,(%rdi,%r14,8) | |
| 366 movq 8(%rsi,%r14,8),%rax | |
| 367 leaq 1(%r14),%r14 | |
| 368 decq %r15 | |
| 369 jnz L$sub | |
| 370 | |
| 371 sbbq $0,%rax | |
| 372 xorq %r14,%r14 | |
| 373 movq %r9,%r15 | |
| 374 .p2align 4 | |
| 375 L$copy: | |
| 376 movq (%rsp,%r14,8),%rsi | |
| 377 movq (%rdi,%r14,8),%rcx | |
| 378 xorq %rcx,%rsi | |
| 379 andq %rax,%rsi | |
| 380 xorq %rcx,%rsi | |
| 381 movq %r14,(%rsp,%r14,8) | |
| 382 movq %rsi,(%rdi,%r14,8) | |
| 383 leaq 1(%r14),%r14 | |
| 384 subq $1,%r15 | |
| 385 jnz L$copy | |
| 386 | |
| 387 movq 8(%rsp,%r9,8),%rsi | |
| 388 movq $1,%rax | |
| 389 | |
| 390 movq -48(%rsi),%r15 | |
| 391 movq -40(%rsi),%r14 | |
| 392 movq -32(%rsi),%r13 | |
| 393 movq -24(%rsi),%r12 | |
| 394 movq -16(%rsi),%rbp | |
| 395 movq -8(%rsi),%rbx | |
| 396 leaq (%rsi),%rsp | |
| 397 L$mul_epilogue: | |
| 398 .byte 0xf3,0xc3 | |
| 399 | |
| 400 | |
| 401 .p2align 5 | |
| 402 bn_mul4x_mont_gather5: | |
| 403 L$mul4x_enter: | |
| 404 .byte 0x67 | |
| 405 movq %rsp,%rax | |
| 406 pushq %rbx | |
| 407 pushq %rbp | |
| 408 pushq %r12 | |
| 409 pushq %r13 | |
| 410 pushq %r14 | |
| 411 pushq %r15 | |
| 412 | |
| 413 .byte 0x67 | |
| 414 shll $3,%r9d | |
| 415 leaq (%r9,%r9,2),%r10 | |
| 416 negq %r9 | |
| 417 | |
| 418 | |
| 419 | |
| 420 | |
| 421 | |
| 422 | |
| 423 | |
| 424 | |
| 425 | |
| 426 | |
| 427 leaq -320(%rsp,%r9,2),%r11 | |
| 428 subq %rdi,%r11 | |
| 429 andq $4095,%r11 | |
| 430 cmpq %r11,%r10 | |
| 431 jb L$mul4xsp_alt | |
| 432 subq %r11,%rsp | |
| 433 leaq -320(%rsp,%r9,2),%rsp | |
| 434 jmp L$mul4xsp_done | |
| 435 | |
| 436 .p2align 5 | |
| 437 L$mul4xsp_alt: | |
| 438 leaq 4096-320(,%r9,2),%r10 | |
| 439 leaq -320(%rsp,%r9,2),%rsp | |
| 440 subq %r10,%r11 | |
| 441 movq $0,%r10 | |
| 442 cmovcq %r10,%r11 | |
| 443 subq %r11,%rsp | |
| 444 L$mul4xsp_done: | |
| 445 andq $-64,%rsp | |
| 446 negq %r9 | |
| 447 | |
| 448 movq %rax,40(%rsp) | |
| 449 L$mul4x_body: | |
| 450 | |
| 451 call mul4x_internal | |
| 452 | |
| 453 movq 40(%rsp),%rsi | |
| 454 movq $1,%rax | |
| 455 | |
| 456 movq -48(%rsi),%r15 | |
| 457 movq -40(%rsi),%r14 | |
| 458 movq -32(%rsi),%r13 | |
| 459 movq -24(%rsi),%r12 | |
| 460 movq -16(%rsi),%rbp | |
| 461 movq -8(%rsi),%rbx | |
| 462 leaq (%rsi),%rsp | |
| 463 L$mul4x_epilogue: | |
| 464 .byte 0xf3,0xc3 | |
| 465 | |
| 466 | |
| 467 | |
| 468 .p2align 5 | |
| 469 mul4x_internal: | |
| 470 shlq $5,%r9 | |
| 471 movd 8(%rax),%xmm5 | |
| 472 leaq L$inc(%rip),%rax | |
| 473 leaq 128(%rdx,%r9,1),%r13 | |
| 474 shrq $5,%r9 | |
| 475 movdqa 0(%rax),%xmm0 | |
| 476 movdqa 16(%rax),%xmm1 | |
| 477 leaq 88-112(%rsp,%r9,1),%r10 | |
| 478 leaq 128(%rdx),%r12 | |
| 479 | |
| 480 pshufd $0,%xmm5,%xmm5 | |
| 481 movdqa %xmm1,%xmm4 | |
| 482 .byte 0x67,0x67 | |
| 483 movdqa %xmm1,%xmm2 | |
| 484 paddd %xmm0,%xmm1 | |
| 485 pcmpeqd %xmm5,%xmm0 | |
| 486 .byte 0x67 | |
| 487 movdqa %xmm4,%xmm3 | |
| 488 paddd %xmm1,%xmm2 | |
| 489 pcmpeqd %xmm5,%xmm1 | |
| 490 movdqa %xmm0,112(%r10) | |
| 491 movdqa %xmm4,%xmm0 | |
| 492 | |
| 493 paddd %xmm2,%xmm3 | |
| 494 pcmpeqd %xmm5,%xmm2 | |
| 495 movdqa %xmm1,128(%r10) | |
| 496 movdqa %xmm4,%xmm1 | |
| 497 | |
| 498 paddd %xmm3,%xmm0 | |
| 499 pcmpeqd %xmm5,%xmm3 | |
| 500 movdqa %xmm2,144(%r10) | |
| 501 movdqa %xmm4,%xmm2 | |
| 502 | |
| 503 paddd %xmm0,%xmm1 | |
| 504 pcmpeqd %xmm5,%xmm0 | |
| 505 movdqa %xmm3,160(%r10) | |
| 506 movdqa %xmm4,%xmm3 | |
| 507 paddd %xmm1,%xmm2 | |
| 508 pcmpeqd %xmm5,%xmm1 | |
| 509 movdqa %xmm0,176(%r10) | |
| 510 movdqa %xmm4,%xmm0 | |
| 511 | |
| 512 paddd %xmm2,%xmm3 | |
| 513 pcmpeqd %xmm5,%xmm2 | |
| 514 movdqa %xmm1,192(%r10) | |
| 515 movdqa %xmm4,%xmm1 | |
| 516 | |
| 517 paddd %xmm3,%xmm0 | |
| 518 pcmpeqd %xmm5,%xmm3 | |
| 519 movdqa %xmm2,208(%r10) | |
| 520 movdqa %xmm4,%xmm2 | |
| 521 | |
| 522 paddd %xmm0,%xmm1 | |
| 523 pcmpeqd %xmm5,%xmm0 | |
| 524 movdqa %xmm3,224(%r10) | |
| 525 movdqa %xmm4,%xmm3 | |
| 526 paddd %xmm1,%xmm2 | |
| 527 pcmpeqd %xmm5,%xmm1 | |
| 528 movdqa %xmm0,240(%r10) | |
| 529 movdqa %xmm4,%xmm0 | |
| 530 | |
| 531 paddd %xmm2,%xmm3 | |
| 532 pcmpeqd %xmm5,%xmm2 | |
| 533 movdqa %xmm1,256(%r10) | |
| 534 movdqa %xmm4,%xmm1 | |
| 535 | |
| 536 paddd %xmm3,%xmm0 | |
| 537 pcmpeqd %xmm5,%xmm3 | |
| 538 movdqa %xmm2,272(%r10) | |
| 539 movdqa %xmm4,%xmm2 | |
| 540 | |
| 541 paddd %xmm0,%xmm1 | |
| 542 pcmpeqd %xmm5,%xmm0 | |
| 543 movdqa %xmm3,288(%r10) | |
| 544 movdqa %xmm4,%xmm3 | |
| 545 paddd %xmm1,%xmm2 | |
| 546 pcmpeqd %xmm5,%xmm1 | |
| 547 movdqa %xmm0,304(%r10) | |
| 548 | |
| 549 paddd %xmm2,%xmm3 | |
| 550 .byte 0x67 | |
| 551 pcmpeqd %xmm5,%xmm2 | |
| 552 movdqa %xmm1,320(%r10) | |
| 553 | |
| 554 pcmpeqd %xmm5,%xmm3 | |
| 555 movdqa %xmm2,336(%r10) | |
| 556 pand 64(%r12),%xmm0 | |
| 557 | |
| 558 pand 80(%r12),%xmm1 | |
| 559 pand 96(%r12),%xmm2 | |
| 560 movdqa %xmm3,352(%r10) | |
| 561 pand 112(%r12),%xmm3 | |
| 562 por %xmm2,%xmm0 | |
| 563 por %xmm3,%xmm1 | |
| 564 movdqa -128(%r12),%xmm4 | |
| 565 movdqa -112(%r12),%xmm5 | |
| 566 movdqa -96(%r12),%xmm2 | |
| 567 pand 112(%r10),%xmm4 | |
| 568 movdqa -80(%r12),%xmm3 | |
| 569 pand 128(%r10),%xmm5 | |
| 570 por %xmm4,%xmm0 | |
| 571 pand 144(%r10),%xmm2 | |
| 572 por %xmm5,%xmm1 | |
| 573 pand 160(%r10),%xmm3 | |
| 574 por %xmm2,%xmm0 | |
| 575 por %xmm3,%xmm1 | |
| 576 movdqa -64(%r12),%xmm4 | |
| 577 movdqa -48(%r12),%xmm5 | |
| 578 movdqa -32(%r12),%xmm2 | |
| 579 pand 176(%r10),%xmm4 | |
| 580 movdqa -16(%r12),%xmm3 | |
| 581 pand 192(%r10),%xmm5 | |
| 582 por %xmm4,%xmm0 | |
| 583 pand 208(%r10),%xmm2 | |
| 584 por %xmm5,%xmm1 | |
| 585 pand 224(%r10),%xmm3 | |
| 586 por %xmm2,%xmm0 | |
| 587 por %xmm3,%xmm1 | |
| 588 movdqa 0(%r12),%xmm4 | |
| 589 movdqa 16(%r12),%xmm5 | |
| 590 movdqa 32(%r12),%xmm2 | |
| 591 pand 240(%r10),%xmm4 | |
| 592 movdqa 48(%r12),%xmm3 | |
| 593 pand 256(%r10),%xmm5 | |
| 594 por %xmm4,%xmm0 | |
| 595 pand 272(%r10),%xmm2 | |
| 596 por %xmm5,%xmm1 | |
| 597 pand 288(%r10),%xmm3 | |
| 598 por %xmm2,%xmm0 | |
| 599 por %xmm3,%xmm1 | |
| 600 por %xmm1,%xmm0 | |
| 601 pshufd $0x4e,%xmm0,%xmm1 | |
| 602 por %xmm1,%xmm0 | |
| 603 leaq 256(%r12),%r12 | |
| 604 .byte 102,72,15,126,195 | |
| 605 | |
| 606 movq %r13,16+8(%rsp) | |
| 607 movq %rdi,56+8(%rsp) | |
| 608 | |
| 609 movq (%r8),%r8 | |
| 610 movq (%rsi),%rax | |
| 611 leaq (%rsi,%r9,1),%rsi | |
| 612 negq %r9 | |
| 613 | |
| 614 movq %r8,%rbp | |
| 615 mulq %rbx | |
| 616 movq %rax,%r10 | |
| 617 movq (%rcx),%rax | |
| 618 | |
| 619 imulq %r10,%rbp | |
| 620 leaq 64+8(%rsp),%r14 | |
| 621 movq %rdx,%r11 | |
| 622 | |
| 623 mulq %rbp | |
| 624 addq %rax,%r10 | |
| 625 movq 8(%rsi,%r9,1),%rax | |
| 626 adcq $0,%rdx | |
| 627 movq %rdx,%rdi | |
| 628 | |
| 629 mulq %rbx | |
| 630 addq %rax,%r11 | |
| 631 movq 8(%rcx),%rax | |
| 632 adcq $0,%rdx | |
| 633 movq %rdx,%r10 | |
| 634 | |
| 635 mulq %rbp | |
| 636 addq %rax,%rdi | |
| 637 movq 16(%rsi,%r9,1),%rax | |
| 638 adcq $0,%rdx | |
| 639 addq %r11,%rdi | |
| 640 leaq 32(%r9),%r15 | |
| 641 leaq 32(%rcx),%rcx | |
| 642 adcq $0,%rdx | |
| 643 movq %rdi,(%r14) | |
| 644 movq %rdx,%r13 | |
| 645 jmp L$1st4x | |
| 646 | |
| 647 .p2align 5 | |
| 648 L$1st4x: | |
| 649 mulq %rbx | |
| 650 addq %rax,%r10 | |
| 651 movq -16(%rcx),%rax | |
| 652 leaq 32(%r14),%r14 | |
| 653 adcq $0,%rdx | |
| 654 movq %rdx,%r11 | |
| 655 | |
| 656 mulq %rbp | |
| 657 addq %rax,%r13 | |
| 658 movq -8(%rsi,%r15,1),%rax | |
| 659 adcq $0,%rdx | |
| 660 addq %r10,%r13 | |
| 661 adcq $0,%rdx | |
| 662 movq %r13,-24(%r14) | |
| 663 movq %rdx,%rdi | |
| 664 | |
| 665 mulq %rbx | |
| 666 addq %rax,%r11 | |
| 667 movq -8(%rcx),%rax | |
| 668 adcq $0,%rdx | |
| 669 movq %rdx,%r10 | |
| 670 | |
| 671 mulq %rbp | |
| 672 addq %rax,%rdi | |
| 673 movq (%rsi,%r15,1),%rax | |
| 674 adcq $0,%rdx | |
| 675 addq %r11,%rdi | |
| 676 adcq $0,%rdx | |
| 677 movq %rdi,-16(%r14) | |
| 678 movq %rdx,%r13 | |
| 679 | |
| 680 mulq %rbx | |
| 681 addq %rax,%r10 | |
| 682 movq 0(%rcx),%rax | |
| 683 adcq $0,%rdx | |
| 684 movq %rdx,%r11 | |
| 685 | |
| 686 mulq %rbp | |
| 687 addq %rax,%r13 | |
| 688 movq 8(%rsi,%r15,1),%rax | |
| 689 adcq $0,%rdx | |
| 690 addq %r10,%r13 | |
| 691 adcq $0,%rdx | |
| 692 movq %r13,-8(%r14) | |
| 693 movq %rdx,%rdi | |
| 694 | |
| 695 mulq %rbx | |
| 696 addq %rax,%r11 | |
| 697 movq 8(%rcx),%rax | |
| 698 adcq $0,%rdx | |
| 699 movq %rdx,%r10 | |
| 700 | |
| 701 mulq %rbp | |
| 702 addq %rax,%rdi | |
| 703 movq 16(%rsi,%r15,1),%rax | |
| 704 adcq $0,%rdx | |
| 705 addq %r11,%rdi | |
| 706 leaq 32(%rcx),%rcx | |
| 707 adcq $0,%rdx | |
| 708 movq %rdi,(%r14) | |
| 709 movq %rdx,%r13 | |
| 710 | |
| 711 addq $32,%r15 | |
| 712 jnz L$1st4x | |
| 713 | |
| 714 mulq %rbx | |
| 715 addq %rax,%r10 | |
| 716 movq -16(%rcx),%rax | |
| 717 leaq 32(%r14),%r14 | |
| 718 adcq $0,%rdx | |
| 719 movq %rdx,%r11 | |
| 720 | |
| 721 mulq %rbp | |
| 722 addq %rax,%r13 | |
| 723 movq -8(%rsi),%rax | |
| 724 adcq $0,%rdx | |
| 725 addq %r10,%r13 | |
| 726 adcq $0,%rdx | |
| 727 movq %r13,-24(%r14) | |
| 728 movq %rdx,%rdi | |
| 729 | |
| 730 mulq %rbx | |
| 731 addq %rax,%r11 | |
| 732 movq -8(%rcx),%rax | |
| 733 adcq $0,%rdx | |
| 734 movq %rdx,%r10 | |
| 735 | |
| 736 mulq %rbp | |
| 737 addq %rax,%rdi | |
| 738 movq (%rsi,%r9,1),%rax | |
| 739 adcq $0,%rdx | |
| 740 addq %r11,%rdi | |
| 741 adcq $0,%rdx | |
| 742 movq %rdi,-16(%r14) | |
| 743 movq %rdx,%r13 | |
| 744 | |
| 745 leaq (%rcx,%r9,1),%rcx | |
| 746 | |
| 747 xorq %rdi,%rdi | |
| 748 addq %r10,%r13 | |
| 749 adcq $0,%rdi | |
| 750 movq %r13,-8(%r14) | |
| 751 | |
| 752 jmp L$outer4x | |
| 753 | |
| 754 .p2align 5 | |
| 755 L$outer4x: | |
| 756 leaq 16+128(%r14),%rdx | |
| 757 pxor %xmm4,%xmm4 | |
| 758 pxor %xmm5,%xmm5 | |
| 759 movdqa -128(%r12),%xmm0 | |
| 760 movdqa -112(%r12),%xmm1 | |
| 761 movdqa -96(%r12),%xmm2 | |
| 762 movdqa -80(%r12),%xmm3 | |
| 763 pand -128(%rdx),%xmm0 | |
| 764 pand -112(%rdx),%xmm1 | |
| 765 por %xmm0,%xmm4 | |
| 766 pand -96(%rdx),%xmm2 | |
| 767 por %xmm1,%xmm5 | |
| 768 pand -80(%rdx),%xmm3 | |
| 769 por %xmm2,%xmm4 | |
| 770 por %xmm3,%xmm5 | |
| 771 movdqa -64(%r12),%xmm0 | |
| 772 movdqa -48(%r12),%xmm1 | |
| 773 movdqa -32(%r12),%xmm2 | |
| 774 movdqa -16(%r12),%xmm3 | |
| 775 pand -64(%rdx),%xmm0 | |
| 776 pand -48(%rdx),%xmm1 | |
| 777 por %xmm0,%xmm4 | |
| 778 pand -32(%rdx),%xmm2 | |
| 779 por %xmm1,%xmm5 | |
| 780 pand -16(%rdx),%xmm3 | |
| 781 por %xmm2,%xmm4 | |
| 782 por %xmm3,%xmm5 | |
| 783 movdqa 0(%r12),%xmm0 | |
| 784 movdqa 16(%r12),%xmm1 | |
| 785 movdqa 32(%r12),%xmm2 | |
| 786 movdqa 48(%r12),%xmm3 | |
| 787 pand 0(%rdx),%xmm0 | |
| 788 pand 16(%rdx),%xmm1 | |
| 789 por %xmm0,%xmm4 | |
| 790 pand 32(%rdx),%xmm2 | |
| 791 por %xmm1,%xmm5 | |
| 792 pand 48(%rdx),%xmm3 | |
| 793 por %xmm2,%xmm4 | |
| 794 por %xmm3,%xmm5 | |
| 795 movdqa 64(%r12),%xmm0 | |
| 796 movdqa 80(%r12),%xmm1 | |
| 797 movdqa 96(%r12),%xmm2 | |
| 798 movdqa 112(%r12),%xmm3 | |
| 799 pand 64(%rdx),%xmm0 | |
| 800 pand 80(%rdx),%xmm1 | |
| 801 por %xmm0,%xmm4 | |
| 802 pand 96(%rdx),%xmm2 | |
| 803 por %xmm1,%xmm5 | |
| 804 pand 112(%rdx),%xmm3 | |
| 805 por %xmm2,%xmm4 | |
| 806 por %xmm3,%xmm5 | |
| 807 por %xmm5,%xmm4 | |
| 808 pshufd $0x4e,%xmm4,%xmm0 | |
| 809 por %xmm4,%xmm0 | |
| 810 leaq 256(%r12),%r12 | |
| 811 .byte 102,72,15,126,195 | |
| 812 | |
| 813 movq (%r14,%r9,1),%r10 | |
| 814 movq %r8,%rbp | |
| 815 mulq %rbx | |
| 816 addq %rax,%r10 | |
| 817 movq (%rcx),%rax | |
| 818 adcq $0,%rdx | |
| 819 | |
| 820 imulq %r10,%rbp | |
| 821 movq %rdx,%r11 | |
| 822 movq %rdi,(%r14) | |
| 823 | |
| 824 leaq (%r14,%r9,1),%r14 | |
| 825 | |
| 826 mulq %rbp | |
| 827 addq %rax,%r10 | |
| 828 movq 8(%rsi,%r9,1),%rax | |
| 829 adcq $0,%rdx | |
| 830 movq %rdx,%rdi | |
| 831 | |
| 832 mulq %rbx | |
| 833 addq %rax,%r11 | |
| 834 movq 8(%rcx),%rax | |
| 835 adcq $0,%rdx | |
| 836 addq 8(%r14),%r11 | |
| 837 adcq $0,%rdx | |
| 838 movq %rdx,%r10 | |
| 839 | |
| 840 mulq %rbp | |
| 841 addq %rax,%rdi | |
| 842 movq 16(%rsi,%r9,1),%rax | |
| 843 adcq $0,%rdx | |
| 844 addq %r11,%rdi | |
| 845 leaq 32(%r9),%r15 | |
| 846 leaq 32(%rcx),%rcx | |
| 847 adcq $0,%rdx | |
| 848 movq %rdx,%r13 | |
| 849 jmp L$inner4x | |
| 850 | |
| 851 .p2align 5 | |
| 852 L$inner4x: | |
| 853 mulq %rbx | |
| 854 addq %rax,%r10 | |
| 855 movq -16(%rcx),%rax | |
| 856 adcq $0,%rdx | |
| 857 addq 16(%r14),%r10 | |
| 858 leaq 32(%r14),%r14 | |
| 859 adcq $0,%rdx | |
| 860 movq %rdx,%r11 | |
| 861 | |
| 862 mulq %rbp | |
| 863 addq %rax,%r13 | |
| 864 movq -8(%rsi,%r15,1),%rax | |
| 865 adcq $0,%rdx | |
| 866 addq %r10,%r13 | |
| 867 adcq $0,%rdx | |
| 868 movq %rdi,-32(%r14) | |
| 869 movq %rdx,%rdi | |
| 870 | |
| 871 mulq %rbx | |
| 872 addq %rax,%r11 | |
| 873 movq -8(%rcx),%rax | |
| 874 adcq $0,%rdx | |
| 875 addq -8(%r14),%r11 | |
| 876 adcq $0,%rdx | |
| 877 movq %rdx,%r10 | |
| 878 | |
| 879 mulq %rbp | |
| 880 addq %rax,%rdi | |
| 881 movq (%rsi,%r15,1),%rax | |
| 882 adcq $0,%rdx | |
| 883 addq %r11,%rdi | |
| 884 adcq $0,%rdx | |
| 885 movq %r13,-24(%r14) | |
| 886 movq %rdx,%r13 | |
| 887 | |
| 888 mulq %rbx | |
| 889 addq %rax,%r10 | |
| 890 movq 0(%rcx),%rax | |
| 891 adcq $0,%rdx | |
| 892 addq (%r14),%r10 | |
| 893 adcq $0,%rdx | |
| 894 movq %rdx,%r11 | |
| 895 | |
| 896 mulq %rbp | |
| 897 addq %rax,%r13 | |
| 898 movq 8(%rsi,%r15,1),%rax | |
| 899 adcq $0,%rdx | |
| 900 addq %r10,%r13 | |
| 901 adcq $0,%rdx | |
| 902 movq %rdi,-16(%r14) | |
| 903 movq %rdx,%rdi | |
| 904 | |
| 905 mulq %rbx | |
| 906 addq %rax,%r11 | |
| 907 movq 8(%rcx),%rax | |
| 908 adcq $0,%rdx | |
| 909 addq 8(%r14),%r11 | |
| 910 adcq $0,%rdx | |
| 911 movq %rdx,%r10 | |
| 912 | |
| 913 mulq %rbp | |
| 914 addq %rax,%rdi | |
| 915 movq 16(%rsi,%r15,1),%rax | |
| 916 adcq $0,%rdx | |
| 917 addq %r11,%rdi | |
| 918 leaq 32(%rcx),%rcx | |
| 919 adcq $0,%rdx | |
| 920 movq %r13,-8(%r14) | |
| 921 movq %rdx,%r13 | |
| 922 | |
| 923 addq $32,%r15 | |
| 924 jnz L$inner4x | |
| 925 | |
| 926 mulq %rbx | |
| 927 addq %rax,%r10 | |
| 928 movq -16(%rcx),%rax | |
| 929 adcq $0,%rdx | |
| 930 addq 16(%r14),%r10 | |
| 931 leaq 32(%r14),%r14 | |
| 932 adcq $0,%rdx | |
| 933 movq %rdx,%r11 | |
| 934 | |
| 935 mulq %rbp | |
| 936 addq %rax,%r13 | |
| 937 movq -8(%rsi),%rax | |
| 938 adcq $0,%rdx | |
| 939 addq %r10,%r13 | |
| 940 adcq $0,%rdx | |
| 941 movq %rdi,-32(%r14) | |
| 942 movq %rdx,%rdi | |
| 943 | |
| 944 mulq %rbx | |
| 945 addq %rax,%r11 | |
| 946 movq %rbp,%rax | |
| 947 movq -8(%rcx),%rbp | |
| 948 adcq $0,%rdx | |
| 949 addq -8(%r14),%r11 | |
| 950 adcq $0,%rdx | |
| 951 movq %rdx,%r10 | |
| 952 | |
| 953 mulq %rbp | |
| 954 addq %rax,%rdi | |
| 955 movq (%rsi,%r9,1),%rax | |
| 956 adcq $0,%rdx | |
| 957 addq %r11,%rdi | |
| 958 adcq $0,%rdx | |
| 959 movq %r13,-24(%r14) | |
| 960 movq %rdx,%r13 | |
| 961 | |
| 962 movq %rdi,-16(%r14) | |
| 963 leaq (%rcx,%r9,1),%rcx | |
| 964 | |
| 965 xorq %rdi,%rdi | |
| 966 addq %r10,%r13 | |
| 967 adcq $0,%rdi | |
| 968 addq (%r14),%r13 | |
| 969 adcq $0,%rdi | |
| 970 movq %r13,-8(%r14) | |
| 971 | |
| 972 cmpq 16+8(%rsp),%r12 | |
| 973 jb L$outer4x | |
| 974 xorq %rax,%rax | |
| 975 subq %r13,%rbp | |
| 976 adcq %r15,%r15 | |
| 977 orq %r15,%rdi | |
| 978 subq %rdi,%rax | |
| 979 leaq (%r14,%r9,1),%rbx | |
| 980 movq (%rcx),%r12 | |
| 981 leaq (%rcx),%rbp | |
| 982 movq %r9,%rcx | |
| 983 sarq $3+2,%rcx | |
| 984 movq 56+8(%rsp),%rdi | |
| 985 decq %r12 | |
| 986 xorq %r10,%r10 | |
| 987 movq 8(%rbp),%r13 | |
| 988 movq 16(%rbp),%r14 | |
| 989 movq 24(%rbp),%r15 | |
| 990 jmp L$sqr4x_sub_entry | |
| 991 | |
| 992 .globl _bn_power5 | |
| 993 .private_extern _bn_power5 | |
| 994 | |
| 995 .p2align 5 | |
| 996 _bn_power5: | |
| 997 movq %rsp,%rax | |
| 998 pushq %rbx | |
| 999 pushq %rbp | |
| 1000 pushq %r12 | |
| 1001 pushq %r13 | |
| 1002 pushq %r14 | |
| 1003 pushq %r15 | |
| 1004 | |
| 1005 shll $3,%r9d | |
| 1006 leal (%r9,%r9,2),%r10d | |
| 1007 negq %r9 | |
| 1008 movq (%r8),%r8 | |
| 1009 | |
| 1010 | |
| 1011 | |
| 1012 | |
| 1013 | |
| 1014 | |
| 1015 | |
| 1016 | |
| 1017 leaq -320(%rsp,%r9,2),%r11 | |
| 1018 subq %rdi,%r11 | |
| 1019 andq $4095,%r11 | |
| 1020 cmpq %r11,%r10 | |
| 1021 jb L$pwr_sp_alt | |
| 1022 subq %r11,%rsp | |
| 1023 leaq -320(%rsp,%r9,2),%rsp | |
| 1024 jmp L$pwr_sp_done | |
| 1025 | |
| 1026 .p2align 5 | |
| 1027 L$pwr_sp_alt: | |
| 1028 leaq 4096-320(,%r9,2),%r10 | |
| 1029 leaq -320(%rsp,%r9,2),%rsp | |
| 1030 subq %r10,%r11 | |
| 1031 movq $0,%r10 | |
| 1032 cmovcq %r10,%r11 | |
| 1033 subq %r11,%rsp | |
| 1034 L$pwr_sp_done: | |
| 1035 andq $-64,%rsp | |
| 1036 movq %r9,%r10 | |
| 1037 negq %r9 | |
| 1038 | |
| 1039 | |
| 1040 | |
| 1041 | |
| 1042 | |
| 1043 | |
| 1044 | |
| 1045 | |
| 1046 | |
| 1047 | |
| 1048 movq %r8,32(%rsp) | |
| 1049 movq %rax,40(%rsp) | |
| 1050 L$power5_body: | |
| 1051 .byte 102,72,15,110,207 | |
| 1052 .byte 102,72,15,110,209 | |
| 1053 .byte 102,73,15,110,218 | |
| 1054 .byte 102,72,15,110,226 | |
| 1055 | |
| 1056 call __bn_sqr8x_internal | |
| 1057 call __bn_post4x_internal | |
| 1058 call __bn_sqr8x_internal | |
| 1059 call __bn_post4x_internal | |
| 1060 call __bn_sqr8x_internal | |
| 1061 call __bn_post4x_internal | |
| 1062 call __bn_sqr8x_internal | |
| 1063 call __bn_post4x_internal | |
| 1064 call __bn_sqr8x_internal | |
| 1065 call __bn_post4x_internal | |
| 1066 | |
| 1067 .byte 102,72,15,126,209 | |
| 1068 .byte 102,72,15,126,226 | |
| 1069 movq %rsi,%rdi | |
| 1070 movq 40(%rsp),%rax | |
| 1071 leaq 32(%rsp),%r8 | |
| 1072 | |
| 1073 call mul4x_internal | |
| 1074 | |
| 1075 movq 40(%rsp),%rsi | |
| 1076 movq $1,%rax | |
| 1077 movq -48(%rsi),%r15 | |
| 1078 movq -40(%rsi),%r14 | |
| 1079 movq -32(%rsi),%r13 | |
| 1080 movq -24(%rsi),%r12 | |
| 1081 movq -16(%rsi),%rbp | |
| 1082 movq -8(%rsi),%rbx | |
| 1083 leaq (%rsi),%rsp | |
| 1084 L$power5_epilogue: | |
| 1085 .byte 0xf3,0xc3 | |
| 1086 | |
| 1087 | |
| 1088 .globl _bn_sqr8x_internal | |
| 1089 .private_extern _bn_sqr8x_internal | |
| 1090 .private_extern _bn_sqr8x_internal | |
| 1091 | |
| 1092 .p2align 5 | |
| 1093 _bn_sqr8x_internal: | |
| 1094 __bn_sqr8x_internal: | |
| 1095 | |
| 1096 | |
| 1097 | |
| 1098 | |
| 1099 | |
| 1100 | |
| 1101 | |
| 1102 | |
| 1103 | |
| 1104 | |
| 1105 | |
| 1106 | |
| 1107 | |
| 1108 | |
| 1109 | |
| 1110 | |
| 1111 | |
| 1112 | |
| 1113 | |
| 1114 | |
| 1115 | |
| 1116 | |
| 1117 | |
| 1118 | |
| 1119 | |
| 1120 | |
| 1121 | |
| 1122 | |
| 1123 | |
| 1124 | |
| 1125 | |
| 1126 | |
| 1127 | |
| 1128 | |
| 1129 | |
| 1130 | |
| 1131 | |
| 1132 | |
| 1133 | |
| 1134 | |
| 1135 | |
| 1136 | |
| 1137 | |
| 1138 | |
| 1139 | |
| 1140 | |
| 1141 | |
| 1142 | |
| 1143 | |
| 1144 | |
| 1145 | |
| 1146 | |
| 1147 | |
| 1148 | |
| 1149 | |
| 1150 | |
| 1151 | |
| 1152 | |
| 1153 | |
| 1154 | |
| 1155 | |
| 1156 | |
| 1157 | |
| 1158 | |
| 1159 | |
| 1160 | |
| 1161 | |
| 1162 | |
| 1163 | |
| 1164 | |
| 1165 | |
| 1166 | |
| 1167 | |
| 1168 leaq 32(%r10),%rbp | |
| 1169 leaq (%rsi,%r9,1),%rsi | |
| 1170 | |
| 1171 movq %r9,%rcx | |
| 1172 | |
| 1173 | |
| 1174 movq -32(%rsi,%rbp,1),%r14 | |
| 1175 leaq 48+8(%rsp,%r9,2),%rdi | |
| 1176 movq -24(%rsi,%rbp,1),%rax | |
| 1177 leaq -32(%rdi,%rbp,1),%rdi | |
| 1178 movq -16(%rsi,%rbp,1),%rbx | |
| 1179 movq %rax,%r15 | |
| 1180 | |
| 1181 mulq %r14 | |
| 1182 movq %rax,%r10 | |
| 1183 movq %rbx,%rax | |
| 1184 movq %rdx,%r11 | |
| 1185 movq %r10,-24(%rdi,%rbp,1) | |
| 1186 | |
| 1187 mulq %r14 | |
| 1188 addq %rax,%r11 | |
| 1189 movq %rbx,%rax | |
| 1190 adcq $0,%rdx | |
| 1191 movq %r11,-16(%rdi,%rbp,1) | |
| 1192 movq %rdx,%r10 | |
| 1193 | |
| 1194 | |
| 1195 movq -8(%rsi,%rbp,1),%rbx | |
| 1196 mulq %r15 | |
| 1197 movq %rax,%r12 | |
| 1198 movq %rbx,%rax | |
| 1199 movq %rdx,%r13 | |
| 1200 | |
| 1201 leaq (%rbp),%rcx | |
| 1202 mulq %r14 | |
| 1203 addq %rax,%r10 | |
| 1204 movq %rbx,%rax | |
| 1205 movq %rdx,%r11 | |
| 1206 adcq $0,%r11 | |
| 1207 addq %r12,%r10 | |
| 1208 adcq $0,%r11 | |
| 1209 movq %r10,-8(%rdi,%rcx,1) | |
| 1210 jmp L$sqr4x_1st | |
| 1211 | |
| 1212 .p2align 5 | |
| 1213 L$sqr4x_1st: | |
| 1214 movq (%rsi,%rcx,1),%rbx | |
| 1215 mulq %r15 | |
| 1216 addq %rax,%r13 | |
| 1217 movq %rbx,%rax | |
| 1218 movq %rdx,%r12 | |
| 1219 adcq $0,%r12 | |
| 1220 | |
| 1221 mulq %r14 | |
| 1222 addq %rax,%r11 | |
| 1223 movq %rbx,%rax | |
| 1224 movq 8(%rsi,%rcx,1),%rbx | |
| 1225 movq %rdx,%r10 | |
| 1226 adcq $0,%r10 | |
| 1227 addq %r13,%r11 | |
| 1228 adcq $0,%r10 | |
| 1229 | |
| 1230 | |
| 1231 mulq %r15 | |
| 1232 addq %rax,%r12 | |
| 1233 movq %rbx,%rax | |
| 1234 movq %r11,(%rdi,%rcx,1) | |
| 1235 movq %rdx,%r13 | |
| 1236 adcq $0,%r13 | |
| 1237 | |
| 1238 mulq %r14 | |
| 1239 addq %rax,%r10 | |
| 1240 movq %rbx,%rax | |
| 1241 movq 16(%rsi,%rcx,1),%rbx | |
| 1242 movq %rdx,%r11 | |
| 1243 adcq $0,%r11 | |
| 1244 addq %r12,%r10 | |
| 1245 adcq $0,%r11 | |
| 1246 | |
| 1247 mulq %r15 | |
| 1248 addq %rax,%r13 | |
| 1249 movq %rbx,%rax | |
| 1250 movq %r10,8(%rdi,%rcx,1) | |
| 1251 movq %rdx,%r12 | |
| 1252 adcq $0,%r12 | |
| 1253 | |
| 1254 mulq %r14 | |
| 1255 addq %rax,%r11 | |
| 1256 movq %rbx,%rax | |
| 1257 movq 24(%rsi,%rcx,1),%rbx | |
| 1258 movq %rdx,%r10 | |
| 1259 adcq $0,%r10 | |
| 1260 addq %r13,%r11 | |
| 1261 adcq $0,%r10 | |
| 1262 | |
| 1263 | |
| 1264 mulq %r15 | |
| 1265 addq %rax,%r12 | |
| 1266 movq %rbx,%rax | |
| 1267 movq %r11,16(%rdi,%rcx,1) | |
| 1268 movq %rdx,%r13 | |
| 1269 adcq $0,%r13 | |
| 1270 leaq 32(%rcx),%rcx | |
| 1271 | |
| 1272 mulq %r14 | |
| 1273 addq %rax,%r10 | |
| 1274 movq %rbx,%rax | |
| 1275 movq %rdx,%r11 | |
| 1276 adcq $0,%r11 | |
| 1277 addq %r12,%r10 | |
| 1278 adcq $0,%r11 | |
| 1279 movq %r10,-8(%rdi,%rcx,1) | |
| 1280 | |
| 1281 cmpq $0,%rcx | |
| 1282 jne L$sqr4x_1st | |
| 1283 | |
| 1284 mulq %r15 | |
| 1285 addq %rax,%r13 | |
| 1286 leaq 16(%rbp),%rbp | |
| 1287 adcq $0,%rdx | |
| 1288 addq %r11,%r13 | |
| 1289 adcq $0,%rdx | |
| 1290 | |
| 1291 movq %r13,(%rdi) | |
| 1292 movq %rdx,%r12 | |
| 1293 movq %rdx,8(%rdi) | |
| 1294 jmp L$sqr4x_outer | |
| 1295 | |
| 1296 .p2align 5 | |
| 1297 L$sqr4x_outer: | |
| 1298 movq -32(%rsi,%rbp,1),%r14 | |
| 1299 leaq 48+8(%rsp,%r9,2),%rdi | |
| 1300 movq -24(%rsi,%rbp,1),%rax | |
| 1301 leaq -32(%rdi,%rbp,1),%rdi | |
| 1302 movq -16(%rsi,%rbp,1),%rbx | |
| 1303 movq %rax,%r15 | |
| 1304 | |
| 1305 mulq %r14 | |
| 1306 movq -24(%rdi,%rbp,1),%r10 | |
| 1307 addq %rax,%r10 | |
| 1308 movq %rbx,%rax | |
| 1309 adcq $0,%rdx | |
| 1310 movq %r10,-24(%rdi,%rbp,1) | |
| 1311 movq %rdx,%r11 | |
| 1312 | |
| 1313 mulq %r14 | |
| 1314 addq %rax,%r11 | |
| 1315 movq %rbx,%rax | |
| 1316 adcq $0,%rdx | |
| 1317 addq -16(%rdi,%rbp,1),%r11 | |
| 1318 movq %rdx,%r10 | |
| 1319 adcq $0,%r10 | |
| 1320 movq %r11,-16(%rdi,%rbp,1) | |
| 1321 | |
| 1322 xorq %r12,%r12 | |
| 1323 | |
| 1324 movq -8(%rsi,%rbp,1),%rbx | |
| 1325 mulq %r15 | |
| 1326 addq %rax,%r12 | |
| 1327 movq %rbx,%rax | |
| 1328 adcq $0,%rdx | |
| 1329 addq -8(%rdi,%rbp,1),%r12 | |
| 1330 movq %rdx,%r13 | |
| 1331 adcq $0,%r13 | |
| 1332 | |
| 1333 mulq %r14 | |
| 1334 addq %rax,%r10 | |
| 1335 movq %rbx,%rax | |
| 1336 adcq $0,%rdx | |
| 1337 addq %r12,%r10 | |
| 1338 movq %rdx,%r11 | |
| 1339 adcq $0,%r11 | |
| 1340 movq %r10,-8(%rdi,%rbp,1) | |
| 1341 | |
| 1342 leaq (%rbp),%rcx | |
| 1343 jmp L$sqr4x_inner | |
| 1344 | |
| 1345 .p2align 5 | |
| 1346 L$sqr4x_inner: | |
| 1347 movq (%rsi,%rcx,1),%rbx | |
| 1348 mulq %r15 | |
| 1349 addq %rax,%r13 | |
| 1350 movq %rbx,%rax | |
| 1351 movq %rdx,%r12 | |
| 1352 adcq $0,%r12 | |
| 1353 addq (%rdi,%rcx,1),%r13 | |
| 1354 adcq $0,%r12 | |
| 1355 | |
| 1356 .byte 0x67 | |
| 1357 mulq %r14 | |
| 1358 addq %rax,%r11 | |
| 1359 movq %rbx,%rax | |
| 1360 movq 8(%rsi,%rcx,1),%rbx | |
| 1361 movq %rdx,%r10 | |
| 1362 adcq $0,%r10 | |
| 1363 addq %r13,%r11 | |
| 1364 adcq $0,%r10 | |
| 1365 | |
| 1366 mulq %r15 | |
| 1367 addq %rax,%r12 | |
| 1368 movq %r11,(%rdi,%rcx,1) | |
| 1369 movq %rbx,%rax | |
| 1370 movq %rdx,%r13 | |
| 1371 adcq $0,%r13 | |
| 1372 addq 8(%rdi,%rcx,1),%r12 | |
| 1373 leaq 16(%rcx),%rcx | |
| 1374 adcq $0,%r13 | |
| 1375 | |
| 1376 mulq %r14 | |
| 1377 addq %rax,%r10 | |
| 1378 movq %rbx,%rax | |
| 1379 adcq $0,%rdx | |
| 1380 addq %r12,%r10 | |
| 1381 movq %rdx,%r11 | |
| 1382 adcq $0,%r11 | |
| 1383 movq %r10,-8(%rdi,%rcx,1) | |
| 1384 | |
| 1385 cmpq $0,%rcx | |
| 1386 jne L$sqr4x_inner | |
| 1387 | |
| 1388 .byte 0x67 | |
| 1389 mulq %r15 | |
| 1390 addq %rax,%r13 | |
| 1391 adcq $0,%rdx | |
| 1392 addq %r11,%r13 | |
| 1393 adcq $0,%rdx | |
| 1394 | |
| 1395 movq %r13,(%rdi) | |
| 1396 movq %rdx,%r12 | |
| 1397 movq %rdx,8(%rdi) | |
| 1398 | |
| 1399 addq $16,%rbp | |
| 1400 jnz L$sqr4x_outer | |
| 1401 | |
| 1402 | |
| 1403 movq -32(%rsi),%r14 | |
| 1404 leaq 48+8(%rsp,%r9,2),%rdi | |
| 1405 movq -24(%rsi),%rax | |
| 1406 leaq -32(%rdi,%rbp,1),%rdi | |
| 1407 movq -16(%rsi),%rbx | |
| 1408 movq %rax,%r15 | |
| 1409 | |
| 1410 mulq %r14 | |
| 1411 addq %rax,%r10 | |
| 1412 movq %rbx,%rax | |
| 1413 movq %rdx,%r11 | |
| 1414 adcq $0,%r11 | |
| 1415 | |
| 1416 mulq %r14 | |
| 1417 addq %rax,%r11 | |
| 1418 movq %rbx,%rax | |
| 1419 movq %r10,-24(%rdi) | |
| 1420 movq %rdx,%r10 | |
| 1421 adcq $0,%r10 | |
| 1422 addq %r13,%r11 | |
| 1423 movq -8(%rsi),%rbx | |
| 1424 adcq $0,%r10 | |
| 1425 | |
| 1426 mulq %r15 | |
| 1427 addq %rax,%r12 | |
| 1428 movq %rbx,%rax | |
| 1429 movq %r11,-16(%rdi) | |
| 1430 movq %rdx,%r13 | |
| 1431 adcq $0,%r13 | |
| 1432 | |
| 1433 mulq %r14 | |
| 1434 addq %rax,%r10 | |
| 1435 movq %rbx,%rax | |
| 1436 movq %rdx,%r11 | |
| 1437 adcq $0,%r11 | |
| 1438 addq %r12,%r10 | |
| 1439 adcq $0,%r11 | |
| 1440 movq %r10,-8(%rdi) | |
| 1441 | |
| 1442 mulq %r15 | |
| 1443 addq %rax,%r13 | |
| 1444 movq -16(%rsi),%rax | |
| 1445 adcq $0,%rdx | |
| 1446 addq %r11,%r13 | |
| 1447 adcq $0,%rdx | |
| 1448 | |
| 1449 movq %r13,(%rdi) | |
| 1450 movq %rdx,%r12 | |
| 1451 movq %rdx,8(%rdi) | |
| 1452 | |
| 1453 mulq %rbx | |
| 1454 addq $16,%rbp | |
| 1455 xorq %r14,%r14 | |
| 1456 subq %r9,%rbp | |
| 1457 xorq %r15,%r15 | |
| 1458 | |
| 1459 addq %r12,%rax | |
| 1460 adcq $0,%rdx | |
| 1461 movq %rax,8(%rdi) | |
| 1462 movq %rdx,16(%rdi) | |
| 1463 movq %r15,24(%rdi) | |
| 1464 | |
| 1465 movq -16(%rsi,%rbp,1),%rax | |
| 1466 leaq 48+8(%rsp),%rdi | |
| 1467 xorq %r10,%r10 | |
| 1468 movq 8(%rdi),%r11 | |
| 1469 | |
| 1470 leaq (%r14,%r10,2),%r12 | |
| 1471 shrq $63,%r10 | |
| 1472 leaq (%rcx,%r11,2),%r13 | |
| 1473 shrq $63,%r11 | |
| 1474 orq %r10,%r13 | |
| 1475 movq 16(%rdi),%r10 | |
| 1476 movq %r11,%r14 | |
| 1477 mulq %rax | |
| 1478 negq %r15 | |
| 1479 movq 24(%rdi),%r11 | |
| 1480 adcq %rax,%r12 | |
| 1481 movq -8(%rsi,%rbp,1),%rax | |
| 1482 movq %r12,(%rdi) | |
| 1483 adcq %rdx,%r13 | |
| 1484 | |
| 1485 leaq (%r14,%r10,2),%rbx | |
| 1486 movq %r13,8(%rdi) | |
| 1487 sbbq %r15,%r15 | |
| 1488 shrq $63,%r10 | |
| 1489 leaq (%rcx,%r11,2),%r8 | |
| 1490 shrq $63,%r11 | |
| 1491 orq %r10,%r8 | |
| 1492 movq 32(%rdi),%r10 | |
| 1493 movq %r11,%r14 | |
| 1494 mulq %rax | |
| 1495 negq %r15 | |
| 1496 movq 40(%rdi),%r11 | |
| 1497 adcq %rax,%rbx | |
| 1498 movq 0(%rsi,%rbp,1),%rax | |
| 1499 movq %rbx,16(%rdi) | |
| 1500 adcq %rdx,%r8 | |
| 1501 leaq 16(%rbp),%rbp | |
| 1502 movq %r8,24(%rdi) | |
| 1503 sbbq %r15,%r15 | |
| 1504 leaq 64(%rdi),%rdi | |
| 1505 jmp L$sqr4x_shift_n_add | |
| 1506 | |
| 1507 .p2align 5 | |
| 1508 L$sqr4x_shift_n_add: | |
| 1509 leaq (%r14,%r10,2),%r12 | |
| 1510 shrq $63,%r10 | |
| 1511 leaq (%rcx,%r11,2),%r13 | |
| 1512 shrq $63,%r11 | |
| 1513 orq %r10,%r13 | |
| 1514 movq -16(%rdi),%r10 | |
| 1515 movq %r11,%r14 | |
| 1516 mulq %rax | |
| 1517 negq %r15 | |
| 1518 movq -8(%rdi),%r11 | |
| 1519 adcq %rax,%r12 | |
| 1520 movq -8(%rsi,%rbp,1),%rax | |
| 1521 movq %r12,-32(%rdi) | |
| 1522 adcq %rdx,%r13 | |
| 1523 | |
| 1524 leaq (%r14,%r10,2),%rbx | |
| 1525 movq %r13,-24(%rdi) | |
| 1526 sbbq %r15,%r15 | |
| 1527 shrq $63,%r10 | |
| 1528 leaq (%rcx,%r11,2),%r8 | |
| 1529 shrq $63,%r11 | |
| 1530 orq %r10,%r8 | |
| 1531 movq 0(%rdi),%r10 | |
| 1532 movq %r11,%r14 | |
| 1533 mulq %rax | |
| 1534 negq %r15 | |
| 1535 movq 8(%rdi),%r11 | |
| 1536 adcq %rax,%rbx | |
| 1537 movq 0(%rsi,%rbp,1),%rax | |
| 1538 movq %rbx,-16(%rdi) | |
| 1539 adcq %rdx,%r8 | |
| 1540 | |
| 1541 leaq (%r14,%r10,2),%r12 | |
| 1542 movq %r8,-8(%rdi) | |
| 1543 sbbq %r15,%r15 | |
| 1544 shrq $63,%r10 | |
| 1545 leaq (%rcx,%r11,2),%r13 | |
| 1546 shrq $63,%r11 | |
| 1547 orq %r10,%r13 | |
| 1548 movq 16(%rdi),%r10 | |
| 1549 movq %r11,%r14 | |
| 1550 mulq %rax | |
| 1551 negq %r15 | |
| 1552 movq 24(%rdi),%r11 | |
| 1553 adcq %rax,%r12 | |
| 1554 movq 8(%rsi,%rbp,1),%rax | |
| 1555 movq %r12,0(%rdi) | |
| 1556 adcq %rdx,%r13 | |
| 1557 | |
| 1558 leaq (%r14,%r10,2),%rbx | |
| 1559 movq %r13,8(%rdi) | |
| 1560 sbbq %r15,%r15 | |
| 1561 shrq $63,%r10 | |
| 1562 leaq (%rcx,%r11,2),%r8 | |
| 1563 shrq $63,%r11 | |
| 1564 orq %r10,%r8 | |
| 1565 movq 32(%rdi),%r10 | |
| 1566 movq %r11,%r14 | |
| 1567 mulq %rax | |
| 1568 negq %r15 | |
| 1569 movq 40(%rdi),%r11 | |
| 1570 adcq %rax,%rbx | |
| 1571 movq 16(%rsi,%rbp,1),%rax | |
| 1572 movq %rbx,16(%rdi) | |
| 1573 adcq %rdx,%r8 | |
| 1574 movq %r8,24(%rdi) | |
| 1575 sbbq %r15,%r15 | |
| 1576 leaq 64(%rdi),%rdi | |
| 1577 addq $32,%rbp | |
| 1578 jnz L$sqr4x_shift_n_add | |
| 1579 | |
| 1580 leaq (%r14,%r10,2),%r12 | |
| 1581 .byte 0x67 | |
| 1582 shrq $63,%r10 | |
| 1583 leaq (%rcx,%r11,2),%r13 | |
| 1584 shrq $63,%r11 | |
| 1585 orq %r10,%r13 | |
| 1586 movq -16(%rdi),%r10 | |
| 1587 movq %r11,%r14 | |
| 1588 mulq %rax | |
| 1589 negq %r15 | |
| 1590 movq -8(%rdi),%r11 | |
| 1591 adcq %rax,%r12 | |
| 1592 movq -8(%rsi),%rax | |
| 1593 movq %r12,-32(%rdi) | |
| 1594 adcq %rdx,%r13 | |
| 1595 | |
| 1596 leaq (%r14,%r10,2),%rbx | |
| 1597 movq %r13,-24(%rdi) | |
| 1598 sbbq %r15,%r15 | |
| 1599 shrq $63,%r10 | |
| 1600 leaq (%rcx,%r11,2),%r8 | |
| 1601 shrq $63,%r11 | |
| 1602 orq %r10,%r8 | |
| 1603 mulq %rax | |
| 1604 negq %r15 | |
| 1605 adcq %rax,%rbx | |
| 1606 adcq %rdx,%r8 | |
| 1607 movq %rbx,-16(%rdi) | |
| 1608 movq %r8,-8(%rdi) | |
| 1609 .byte 102,72,15,126,213 | |
| 1610 __bn_sqr8x_reduction: | |
| 1611 xorq %rax,%rax | |
| 1612 leaq (%r9,%rbp,1),%rcx | |
| 1613 leaq 48+8(%rsp,%r9,2),%rdx | |
| 1614 movq %rcx,0+8(%rsp) | |
| 1615 leaq 48+8(%rsp,%r9,1),%rdi | |
| 1616 movq %rdx,8+8(%rsp) | |
| 1617 negq %r9 | |
| 1618 jmp L$8x_reduction_loop | |
| 1619 | |
| 1620 .p2align 5 | |
| 1621 L$8x_reduction_loop: | |
| 1622 leaq (%rdi,%r9,1),%rdi | |
| 1623 .byte 0x66 | |
| 1624 movq 0(%rdi),%rbx | |
| 1625 movq 8(%rdi),%r9 | |
| 1626 movq 16(%rdi),%r10 | |
| 1627 movq 24(%rdi),%r11 | |
| 1628 movq 32(%rdi),%r12 | |
| 1629 movq 40(%rdi),%r13 | |
| 1630 movq 48(%rdi),%r14 | |
| 1631 movq 56(%rdi),%r15 | |
| 1632 movq %rax,(%rdx) | |
| 1633 leaq 64(%rdi),%rdi | |
| 1634 | |
| 1635 .byte 0x67 | |
| 1636 movq %rbx,%r8 | |
| 1637 imulq 32+8(%rsp),%rbx | |
| 1638 movq 0(%rbp),%rax | |
| 1639 movl $8,%ecx | |
| 1640 jmp L$8x_reduce | |
| 1641 | |
| 1642 .p2align 5 | |
| 1643 L$8x_reduce: | |
| 1644 mulq %rbx | |
| 1645 movq 8(%rbp),%rax | |
| 1646 negq %r8 | |
| 1647 movq %rdx,%r8 | |
| 1648 adcq $0,%r8 | |
| 1649 | |
| 1650 mulq %rbx | |
| 1651 addq %rax,%r9 | |
| 1652 movq 16(%rbp),%rax | |
| 1653 adcq $0,%rdx | |
| 1654 addq %r9,%r8 | |
| 1655 movq %rbx,48-8+8(%rsp,%rcx,8) | |
| 1656 movq %rdx,%r9 | |
| 1657 adcq $0,%r9 | |
| 1658 | |
| 1659 mulq %rbx | |
| 1660 addq %rax,%r10 | |
| 1661 movq 24(%rbp),%rax | |
| 1662 adcq $0,%rdx | |
| 1663 addq %r10,%r9 | |
| 1664 movq 32+8(%rsp),%rsi | |
| 1665 movq %rdx,%r10 | |
| 1666 adcq $0,%r10 | |
| 1667 | |
| 1668 mulq %rbx | |
| 1669 addq %rax,%r11 | |
| 1670 movq 32(%rbp),%rax | |
| 1671 adcq $0,%rdx | |
| 1672 imulq %r8,%rsi | |
| 1673 addq %r11,%r10 | |
| 1674 movq %rdx,%r11 | |
| 1675 adcq $0,%r11 | |
| 1676 | |
| 1677 mulq %rbx | |
| 1678 addq %rax,%r12 | |
| 1679 movq 40(%rbp),%rax | |
| 1680 adcq $0,%rdx | |
| 1681 addq %r12,%r11 | |
| 1682 movq %rdx,%r12 | |
| 1683 adcq $0,%r12 | |
| 1684 | |
| 1685 mulq %rbx | |
| 1686 addq %rax,%r13 | |
| 1687 movq 48(%rbp),%rax | |
| 1688 adcq $0,%rdx | |
| 1689 addq %r13,%r12 | |
| 1690 movq %rdx,%r13 | |
| 1691 adcq $0,%r13 | |
| 1692 | |
| 1693 mulq %rbx | |
| 1694 addq %rax,%r14 | |
| 1695 movq 56(%rbp),%rax | |
| 1696 adcq $0,%rdx | |
| 1697 addq %r14,%r13 | |
| 1698 movq %rdx,%r14 | |
| 1699 adcq $0,%r14 | |
| 1700 | |
| 1701 mulq %rbx | |
| 1702 movq %rsi,%rbx | |
| 1703 addq %rax,%r15 | |
| 1704 movq 0(%rbp),%rax | |
| 1705 adcq $0,%rdx | |
| 1706 addq %r15,%r14 | |
| 1707 movq %rdx,%r15 | |
| 1708 adcq $0,%r15 | |
| 1709 | |
| 1710 decl %ecx | |
| 1711 jnz L$8x_reduce | |
| 1712 | |
| 1713 leaq 64(%rbp),%rbp | |
| 1714 xorq %rax,%rax | |
| 1715 movq 8+8(%rsp),%rdx | |
| 1716 cmpq 0+8(%rsp),%rbp | |
| 1717 jae L$8x_no_tail | |
| 1718 | |
| 1719 .byte 0x66 | |
| 1720 addq 0(%rdi),%r8 | |
| 1721 adcq 8(%rdi),%r9 | |
| 1722 adcq 16(%rdi),%r10 | |
| 1723 adcq 24(%rdi),%r11 | |
| 1724 adcq 32(%rdi),%r12 | |
| 1725 adcq 40(%rdi),%r13 | |
| 1726 adcq 48(%rdi),%r14 | |
| 1727 adcq 56(%rdi),%r15 | |
| 1728 sbbq %rsi,%rsi | |
| 1729 | |
| 1730 movq 48+56+8(%rsp),%rbx | |
| 1731 movl $8,%ecx | |
| 1732 movq 0(%rbp),%rax | |
| 1733 jmp L$8x_tail | |
| 1734 | |
| 1735 .p2align 5 | |
| 1736 L$8x_tail: | |
| 1737 mulq %rbx | |
| 1738 addq %rax,%r8 | |
| 1739 movq 8(%rbp),%rax | |
| 1740 movq %r8,(%rdi) | |
| 1741 movq %rdx,%r8 | |
| 1742 adcq $0,%r8 | |
| 1743 | |
| 1744 mulq %rbx | |
| 1745 addq %rax,%r9 | |
| 1746 movq 16(%rbp),%rax | |
| 1747 adcq $0,%rdx | |
| 1748 addq %r9,%r8 | |
| 1749 leaq 8(%rdi),%rdi | |
| 1750 movq %rdx,%r9 | |
| 1751 adcq $0,%r9 | |
| 1752 | |
| 1753 mulq %rbx | |
| 1754 addq %rax,%r10 | |
| 1755 movq 24(%rbp),%rax | |
| 1756 adcq $0,%rdx | |
| 1757 addq %r10,%r9 | |
| 1758 movq %rdx,%r10 | |
| 1759 adcq $0,%r10 | |
| 1760 | |
| 1761 mulq %rbx | |
| 1762 addq %rax,%r11 | |
| 1763 movq 32(%rbp),%rax | |
| 1764 adcq $0,%rdx | |
| 1765 addq %r11,%r10 | |
| 1766 movq %rdx,%r11 | |
| 1767 adcq $0,%r11 | |
| 1768 | |
| 1769 mulq %rbx | |
| 1770 addq %rax,%r12 | |
| 1771 movq 40(%rbp),%rax | |
| 1772 adcq $0,%rdx | |
| 1773 addq %r12,%r11 | |
| 1774 movq %rdx,%r12 | |
| 1775 adcq $0,%r12 | |
| 1776 | |
| 1777 mulq %rbx | |
| 1778 addq %rax,%r13 | |
| 1779 movq 48(%rbp),%rax | |
| 1780 adcq $0,%rdx | |
| 1781 addq %r13,%r12 | |
| 1782 movq %rdx,%r13 | |
| 1783 adcq $0,%r13 | |
| 1784 | |
| 1785 mulq %rbx | |
| 1786 addq %rax,%r14 | |
| 1787 movq 56(%rbp),%rax | |
| 1788 adcq $0,%rdx | |
| 1789 addq %r14,%r13 | |
| 1790 movq %rdx,%r14 | |
| 1791 adcq $0,%r14 | |
| 1792 | |
| 1793 mulq %rbx | |
| 1794 movq 48-16+8(%rsp,%rcx,8),%rbx | |
| 1795 addq %rax,%r15 | |
| 1796 adcq $0,%rdx | |
| 1797 addq %r15,%r14 | |
| 1798 movq 0(%rbp),%rax | |
| 1799 movq %rdx,%r15 | |
| 1800 adcq $0,%r15 | |
| 1801 | |
| 1802 decl %ecx | |
| 1803 jnz L$8x_tail | |
| 1804 | |
| 1805 leaq 64(%rbp),%rbp | |
| 1806 movq 8+8(%rsp),%rdx | |
| 1807 cmpq 0+8(%rsp),%rbp | |
| 1808 jae L$8x_tail_done | |
| 1809 | |
| 1810 movq 48+56+8(%rsp),%rbx | |
| 1811 negq %rsi | |
| 1812 movq 0(%rbp),%rax | |
| 1813 adcq 0(%rdi),%r8 | |
| 1814 adcq 8(%rdi),%r9 | |
| 1815 adcq 16(%rdi),%r10 | |
| 1816 adcq 24(%rdi),%r11 | |
| 1817 adcq 32(%rdi),%r12 | |
| 1818 adcq 40(%rdi),%r13 | |
| 1819 adcq 48(%rdi),%r14 | |
| 1820 adcq 56(%rdi),%r15 | |
| 1821 sbbq %rsi,%rsi | |
| 1822 | |
| 1823 movl $8,%ecx | |
| 1824 jmp L$8x_tail | |
| 1825 | |
| 1826 .p2align 5 | |
| 1827 L$8x_tail_done: | |
| 1828 addq (%rdx),%r8 | |
| 1829 adcq $0,%r9 | |
| 1830 adcq $0,%r10 | |
| 1831 adcq $0,%r11 | |
| 1832 adcq $0,%r12 | |
| 1833 adcq $0,%r13 | |
| 1834 adcq $0,%r14 | |
| 1835 adcq $0,%r15 | |
| 1836 | |
| 1837 | |
| 1838 xorq %rax,%rax | |
| 1839 | |
| 1840 negq %rsi | |
| 1841 L$8x_no_tail: | |
| 1842 adcq 0(%rdi),%r8 | |
| 1843 adcq 8(%rdi),%r9 | |
| 1844 adcq 16(%rdi),%r10 | |
| 1845 adcq 24(%rdi),%r11 | |
| 1846 adcq 32(%rdi),%r12 | |
| 1847 adcq 40(%rdi),%r13 | |
| 1848 adcq 48(%rdi),%r14 | |
| 1849 adcq 56(%rdi),%r15 | |
| 1850 adcq $0,%rax | |
| 1851 movq -8(%rbp),%rcx | |
| 1852 xorq %rsi,%rsi | |
| 1853 | |
| 1854 .byte 102,72,15,126,213 | |
| 1855 | |
| 1856 movq %r8,0(%rdi) | |
| 1857 movq %r9,8(%rdi) | |
| 1858 .byte 102,73,15,126,217 | |
| 1859 movq %r10,16(%rdi) | |
| 1860 movq %r11,24(%rdi) | |
| 1861 movq %r12,32(%rdi) | |
| 1862 movq %r13,40(%rdi) | |
| 1863 movq %r14,48(%rdi) | |
| 1864 movq %r15,56(%rdi) | |
| 1865 leaq 64(%rdi),%rdi | |
| 1866 | |
| 1867 cmpq %rdx,%rdi | |
| 1868 jb L$8x_reduction_loop | |
| 1869 .byte 0xf3,0xc3 | |
| 1870 | |
| 1871 | |
| 1872 .p2align 5 | |
| 1873 __bn_post4x_internal: | |
| 1874 movq 0(%rbp),%r12 | |
| 1875 leaq (%rdi,%r9,1),%rbx | |
| 1876 movq %r9,%rcx | |
| 1877 .byte 102,72,15,126,207 | |
| 1878 negq %rax | |
| 1879 .byte 102,72,15,126,206 | |
| 1880 sarq $3+2,%rcx | |
| 1881 decq %r12 | |
| 1882 xorq %r10,%r10 | |
| 1883 movq 8(%rbp),%r13 | |
| 1884 movq 16(%rbp),%r14 | |
| 1885 movq 24(%rbp),%r15 | |
| 1886 jmp L$sqr4x_sub_entry | |
| 1887 | |
| 1888 .p2align 4 | |
| 1889 L$sqr4x_sub: | |
| 1890 movq 0(%rbp),%r12 | |
| 1891 movq 8(%rbp),%r13 | |
| 1892 movq 16(%rbp),%r14 | |
| 1893 movq 24(%rbp),%r15 | |
| 1894 L$sqr4x_sub_entry: | |
| 1895 leaq 32(%rbp),%rbp | |
| 1896 notq %r12 | |
| 1897 notq %r13 | |
| 1898 notq %r14 | |
| 1899 notq %r15 | |
| 1900 andq %rax,%r12 | |
| 1901 andq %rax,%r13 | |
| 1902 andq %rax,%r14 | |
| 1903 andq %rax,%r15 | |
| 1904 | |
| 1905 negq %r10 | |
| 1906 adcq 0(%rbx),%r12 | |
| 1907 adcq 8(%rbx),%r13 | |
| 1908 adcq 16(%rbx),%r14 | |
| 1909 adcq 24(%rbx),%r15 | |
| 1910 movq %r12,0(%rdi) | |
| 1911 leaq 32(%rbx),%rbx | |
| 1912 movq %r13,8(%rdi) | |
| 1913 sbbq %r10,%r10 | |
| 1914 movq %r14,16(%rdi) | |
| 1915 movq %r15,24(%rdi) | |
| 1916 leaq 32(%rdi),%rdi | |
| 1917 | |
| 1918 incq %rcx | |
| 1919 jnz L$sqr4x_sub | |
| 1920 | |
| 1921 movq %r9,%r10 | |
| 1922 negq %r9 | |
| 1923 .byte 0xf3,0xc3 | |
| 1924 | |
| 1925 .globl _bn_from_montgomery | |
| 1926 .private_extern _bn_from_montgomery | |
| 1927 | |
| 1928 .p2align 5 | |
| 1929 _bn_from_montgomery: | |
| 1930 testl $7,%r9d | |
| 1931 jz bn_from_mont8x | |
| 1932 xorl %eax,%eax | |
| 1933 .byte 0xf3,0xc3 | |
| 1934 | |
| 1935 | |
| 1936 | |
| 1937 .p2align 5 | |
| 1938 bn_from_mont8x: | |
| 1939 .byte 0x67 | |
| 1940 movq %rsp,%rax | |
| 1941 pushq %rbx | |
| 1942 pushq %rbp | |
| 1943 pushq %r12 | |
| 1944 pushq %r13 | |
| 1945 pushq %r14 | |
| 1946 pushq %r15 | |
| 1947 | |
| 1948 shll $3,%r9d | |
| 1949 leaq (%r9,%r9,2),%r10 | |
| 1950 negq %r9 | |
| 1951 movq (%r8),%r8 | |
| 1952 | |
| 1953 | |
| 1954 | |
| 1955 | |
| 1956 | |
| 1957 | |
| 1958 | |
| 1959 | |
| 1960 leaq -320(%rsp,%r9,2),%r11 | |
| 1961 subq %rdi,%r11 | |
| 1962 andq $4095,%r11 | |
| 1963 cmpq %r11,%r10 | |
| 1964 jb L$from_sp_alt | |
| 1965 subq %r11,%rsp | |
| 1966 leaq -320(%rsp,%r9,2),%rsp | |
| 1967 jmp L$from_sp_done | |
| 1968 | |
| 1969 .p2align 5 | |
| 1970 L$from_sp_alt: | |
| 1971 leaq 4096-320(,%r9,2),%r10 | |
| 1972 leaq -320(%rsp,%r9,2),%rsp | |
| 1973 subq %r10,%r11 | |
| 1974 movq $0,%r10 | |
| 1975 cmovcq %r10,%r11 | |
| 1976 subq %r11,%rsp | |
| 1977 L$from_sp_done: | |
| 1978 andq $-64,%rsp | |
| 1979 movq %r9,%r10 | |
| 1980 negq %r9 | |
| 1981 | |
| 1982 | |
| 1983 | |
| 1984 | |
| 1985 | |
| 1986 | |
| 1987 | |
| 1988 | |
| 1989 | |
| 1990 | |
| 1991 movq %r8,32(%rsp) | |
| 1992 movq %rax,40(%rsp) | |
| 1993 L$from_body: | |
| 1994 movq %r9,%r11 | |
| 1995 leaq 48(%rsp),%rax | |
| 1996 pxor %xmm0,%xmm0 | |
| 1997 jmp L$mul_by_1 | |
| 1998 | |
| 1999 .p2align 5 | |
| 2000 L$mul_by_1: | |
| 2001 movdqu (%rsi),%xmm1 | |
| 2002 movdqu 16(%rsi),%xmm2 | |
| 2003 movdqu 32(%rsi),%xmm3 | |
| 2004 movdqa %xmm0,(%rax,%r9,1) | |
| 2005 movdqu 48(%rsi),%xmm4 | |
| 2006 movdqa %xmm0,16(%rax,%r9,1) | |
| 2007 .byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 | |
| 2008 movdqa %xmm1,(%rax) | |
| 2009 movdqa %xmm0,32(%rax,%r9,1) | |
| 2010 movdqa %xmm2,16(%rax) | |
| 2011 movdqa %xmm0,48(%rax,%r9,1) | |
| 2012 movdqa %xmm3,32(%rax) | |
| 2013 movdqa %xmm4,48(%rax) | |
| 2014 leaq 64(%rax),%rax | |
| 2015 subq $64,%r11 | |
| 2016 jnz L$mul_by_1 | |
| 2017 | |
| 2018 .byte 102,72,15,110,207 | |
| 2019 .byte 102,72,15,110,209 | |
| 2020 .byte 0x67 | |
| 2021 movq %rcx,%rbp | |
| 2022 .byte 102,73,15,110,218 | |
| 2023 call __bn_sqr8x_reduction | |
| 2024 call __bn_post4x_internal | |
| 2025 | |
| 2026 pxor %xmm0,%xmm0 | |
| 2027 leaq 48(%rsp),%rax | |
| 2028 movq 40(%rsp),%rsi | |
| 2029 jmp L$from_mont_zero | |
| 2030 | |
| 2031 .p2align 5 | |
| 2032 L$from_mont_zero: | |
| 2033 movdqa %xmm0,0(%rax) | |
| 2034 movdqa %xmm0,16(%rax) | |
| 2035 movdqa %xmm0,32(%rax) | |
| 2036 movdqa %xmm0,48(%rax) | |
| 2037 leaq 64(%rax),%rax | |
| 2038 subq $32,%r9 | |
| 2039 jnz L$from_mont_zero | |
| 2040 | |
| 2041 movq $1,%rax | |
| 2042 movq -48(%rsi),%r15 | |
| 2043 movq -40(%rsi),%r14 | |
| 2044 movq -32(%rsi),%r13 | |
| 2045 movq -24(%rsi),%r12 | |
| 2046 movq -16(%rsi),%rbp | |
| 2047 movq -8(%rsi),%rbx | |
| 2048 leaq (%rsi),%rsp | |
| 2049 L$from_epilogue: | |
| 2050 .byte 0xf3,0xc3 | |
| 2051 | |
| 2052 .globl _bn_scatter5 | |
| 2053 .private_extern _bn_scatter5 | |
| 2054 | |
| 2055 .p2align 4 | |
| 2056 _bn_scatter5: | |
| 2057 cmpl $0,%esi | |
| 2058 jz L$scatter_epilogue | |
| 2059 leaq (%rdx,%rcx,8),%rdx | |
| 2060 L$scatter: | |
| 2061 movq (%rdi),%rax | |
| 2062 leaq 8(%rdi),%rdi | |
| 2063 movq %rax,(%rdx) | |
| 2064 leaq 256(%rdx),%rdx | |
| 2065 subl $1,%esi | |
| 2066 jnz L$scatter | |
| 2067 L$scatter_epilogue: | |
| 2068 .byte 0xf3,0xc3 | |
| 2069 | |
| 2070 | |
| 2071 .globl _bn_gather5 | |
| 2072 .private_extern _bn_gather5 | |
| 2073 | |
| 2074 .p2align 5 | |
| 2075 _bn_gather5: | |
| 2076 L$SEH_begin_bn_gather5: | |
| 2077 | |
| 2078 .byte 0x4c,0x8d,0x14,0x24 | |
| 2079 .byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 | |
| 2080 leaq L$inc(%rip),%rax | |
| 2081 andq $-16,%rsp | |
| 2082 | |
| 2083 movd %ecx,%xmm5 | |
| 2084 movdqa 0(%rax),%xmm0 | |
| 2085 movdqa 16(%rax),%xmm1 | |
| 2086 leaq 128(%rdx),%r11 | |
| 2087 leaq 128(%rsp),%rax | |
| 2088 | |
| 2089 pshufd $0,%xmm5,%xmm5 | |
| 2090 movdqa %xmm1,%xmm4 | |
| 2091 movdqa %xmm1,%xmm2 | |
| 2092 paddd %xmm0,%xmm1 | |
| 2093 pcmpeqd %xmm5,%xmm0 | |
| 2094 movdqa %xmm4,%xmm3 | |
| 2095 | |
| 2096 paddd %xmm1,%xmm2 | |
| 2097 pcmpeqd %xmm5,%xmm1 | |
| 2098 movdqa %xmm0,-128(%rax) | |
| 2099 movdqa %xmm4,%xmm0 | |
| 2100 | |
| 2101 paddd %xmm2,%xmm3 | |
| 2102 pcmpeqd %xmm5,%xmm2 | |
| 2103 movdqa %xmm1,-112(%rax) | |
| 2104 movdqa %xmm4,%xmm1 | |
| 2105 | |
| 2106 paddd %xmm3,%xmm0 | |
| 2107 pcmpeqd %xmm5,%xmm3 | |
| 2108 movdqa %xmm2,-96(%rax) | |
| 2109 movdqa %xmm4,%xmm2 | |
| 2110 paddd %xmm0,%xmm1 | |
| 2111 pcmpeqd %xmm5,%xmm0 | |
| 2112 movdqa %xmm3,-80(%rax) | |
| 2113 movdqa %xmm4,%xmm3 | |
| 2114 | |
| 2115 paddd %xmm1,%xmm2 | |
| 2116 pcmpeqd %xmm5,%xmm1 | |
| 2117 movdqa %xmm0,-64(%rax) | |
| 2118 movdqa %xmm4,%xmm0 | |
| 2119 | |
| 2120 paddd %xmm2,%xmm3 | |
| 2121 pcmpeqd %xmm5,%xmm2 | |
| 2122 movdqa %xmm1,-48(%rax) | |
| 2123 movdqa %xmm4,%xmm1 | |
| 2124 | |
| 2125 paddd %xmm3,%xmm0 | |
| 2126 pcmpeqd %xmm5,%xmm3 | |
| 2127 movdqa %xmm2,-32(%rax) | |
| 2128 movdqa %xmm4,%xmm2 | |
| 2129 paddd %xmm0,%xmm1 | |
| 2130 pcmpeqd %xmm5,%xmm0 | |
| 2131 movdqa %xmm3,-16(%rax) | |
| 2132 movdqa %xmm4,%xmm3 | |
| 2133 | |
| 2134 paddd %xmm1,%xmm2 | |
| 2135 pcmpeqd %xmm5,%xmm1 | |
| 2136 movdqa %xmm0,0(%rax) | |
| 2137 movdqa %xmm4,%xmm0 | |
| 2138 | |
| 2139 paddd %xmm2,%xmm3 | |
| 2140 pcmpeqd %xmm5,%xmm2 | |
| 2141 movdqa %xmm1,16(%rax) | |
| 2142 movdqa %xmm4,%xmm1 | |
| 2143 | |
| 2144 paddd %xmm3,%xmm0 | |
| 2145 pcmpeqd %xmm5,%xmm3 | |
| 2146 movdqa %xmm2,32(%rax) | |
| 2147 movdqa %xmm4,%xmm2 | |
| 2148 paddd %xmm0,%xmm1 | |
| 2149 pcmpeqd %xmm5,%xmm0 | |
| 2150 movdqa %xmm3,48(%rax) | |
| 2151 movdqa %xmm4,%xmm3 | |
| 2152 | |
| 2153 paddd %xmm1,%xmm2 | |
| 2154 pcmpeqd %xmm5,%xmm1 | |
| 2155 movdqa %xmm0,64(%rax) | |
| 2156 movdqa %xmm4,%xmm0 | |
| 2157 | |
| 2158 paddd %xmm2,%xmm3 | |
| 2159 pcmpeqd %xmm5,%xmm2 | |
| 2160 movdqa %xmm1,80(%rax) | |
| 2161 movdqa %xmm4,%xmm1 | |
| 2162 | |
| 2163 paddd %xmm3,%xmm0 | |
| 2164 pcmpeqd %xmm5,%xmm3 | |
| 2165 movdqa %xmm2,96(%rax) | |
| 2166 movdqa %xmm4,%xmm2 | |
| 2167 movdqa %xmm3,112(%rax) | |
| 2168 jmp L$gather | |
| 2169 | |
| 2170 .p2align 5 | |
| 2171 L$gather: | |
| 2172 pxor %xmm4,%xmm4 | |
| 2173 pxor %xmm5,%xmm5 | |
| 2174 movdqa -128(%r11),%xmm0 | |
| 2175 movdqa -112(%r11),%xmm1 | |
| 2176 movdqa -96(%r11),%xmm2 | |
| 2177 pand -128(%rax),%xmm0 | |
| 2178 movdqa -80(%r11),%xmm3 | |
| 2179 pand -112(%rax),%xmm1 | |
| 2180 por %xmm0,%xmm4 | |
| 2181 pand -96(%rax),%xmm2 | |
| 2182 por %xmm1,%xmm5 | |
| 2183 pand -80(%rax),%xmm3 | |
| 2184 por %xmm2,%xmm4 | |
| 2185 por %xmm3,%xmm5 | |
| 2186 movdqa -64(%r11),%xmm0 | |
| 2187 movdqa -48(%r11),%xmm1 | |
| 2188 movdqa -32(%r11),%xmm2 | |
| 2189 pand -64(%rax),%xmm0 | |
| 2190 movdqa -16(%r11),%xmm3 | |
| 2191 pand -48(%rax),%xmm1 | |
| 2192 por %xmm0,%xmm4 | |
| 2193 pand -32(%rax),%xmm2 | |
| 2194 por %xmm1,%xmm5 | |
| 2195 pand -16(%rax),%xmm3 | |
| 2196 por %xmm2,%xmm4 | |
| 2197 por %xmm3,%xmm5 | |
| 2198 movdqa 0(%r11),%xmm0 | |
| 2199 movdqa 16(%r11),%xmm1 | |
| 2200 movdqa 32(%r11),%xmm2 | |
| 2201 pand 0(%rax),%xmm0 | |
| 2202 movdqa 48(%r11),%xmm3 | |
| 2203 pand 16(%rax),%xmm1 | |
| 2204 por %xmm0,%xmm4 | |
| 2205 pand 32(%rax),%xmm2 | |
| 2206 por %xmm1,%xmm5 | |
| 2207 pand 48(%rax),%xmm3 | |
| 2208 por %xmm2,%xmm4 | |
| 2209 por %xmm3,%xmm5 | |
| 2210 movdqa 64(%r11),%xmm0 | |
| 2211 movdqa 80(%r11),%xmm1 | |
| 2212 movdqa 96(%r11),%xmm2 | |
| 2213 pand 64(%rax),%xmm0 | |
| 2214 movdqa 112(%r11),%xmm3 | |
| 2215 pand 80(%rax),%xmm1 | |
| 2216 por %xmm0,%xmm4 | |
| 2217 pand 96(%rax),%xmm2 | |
| 2218 por %xmm1,%xmm5 | |
| 2219 pand 112(%rax),%xmm3 | |
| 2220 por %xmm2,%xmm4 | |
| 2221 por %xmm3,%xmm5 | |
| 2222 por %xmm5,%xmm4 | |
| 2223 leaq 256(%r11),%r11 | |
| 2224 pshufd $0x4e,%xmm4,%xmm0 | |
| 2225 por %xmm4,%xmm0 | |
| 2226 movq %xmm0,(%rdi) | |
| 2227 leaq 8(%rdi),%rdi | |
| 2228 subl $1,%esi | |
| 2229 jnz L$gather | |
| 2230 | |
| 2231 leaq (%r10),%rsp | |
| 2232 .byte 0xf3,0xc3 | |
| 2233 L$SEH_end_bn_gather5: | |
| 2234 | |
| 2235 .p2align 6 | |
| 2236 L$inc: | |
| 2237 .long 0,0, 1,1 | |
| 2238 .long 2,2, 2,2 | |
| 2239 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97
,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71
,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,1
11,114,103,62,0 | |
| 2240 #endif | |
| OLD | NEW |