OLD | NEW |
(Empty) | |
| 1 %ifidn __OUTPUT_FORMAT__,obj |
| 2 section code use32 class=code align=64 |
| 3 %elifidn __OUTPUT_FORMAT__,win32 |
| 4 %ifdef __YASM_VERSION_ID__ |
| 5 %if __YASM_VERSION_ID__ < 01010000h |
| 6 %error yasm version 1.1.0 or later needed. |
| 7 %endif |
| 8 ; Yasm automatically includes .00 and complains about redefining it. |
| 9 ; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html |
| 10 %else |
| 11 $@feat.00 equ 1 |
| 12 %endif |
| 13 section .text code align=64 |
| 14 %else |
| 15 section .text code |
| 16 %endif |
| 17 global _bn_mul_comba8 |
| 18 align 16 |
| 19 _bn_mul_comba8: |
| 20 L$_bn_mul_comba8_begin: |
| 21 push esi |
| 22 mov esi,DWORD [12+esp] |
| 23 push edi |
| 24 mov edi,DWORD [20+esp] |
| 25 push ebp |
| 26 push ebx |
| 27 xor ebx,ebx |
| 28 mov eax,DWORD [esi] |
| 29 xor ecx,ecx |
| 30 mov edx,DWORD [edi] |
| 31 ; ################## Calculate word 0 |
| 32 xor ebp,ebp |
| 33 ; mul a[0]*b[0] |
| 34 mul edx |
| 35 add ebx,eax |
| 36 mov eax,DWORD [20+esp] |
| 37 adc ecx,edx |
| 38 mov edx,DWORD [edi] |
| 39 adc ebp,0 |
| 40 mov DWORD [eax],ebx |
| 41 mov eax,DWORD [4+esi] |
| 42 ; saved r[0] |
| 43 ; ################## Calculate word 1 |
| 44 xor ebx,ebx |
| 45 ; mul a[1]*b[0] |
| 46 mul edx |
| 47 add ecx,eax |
| 48 mov eax,DWORD [esi] |
| 49 adc ebp,edx |
| 50 mov edx,DWORD [4+edi] |
| 51 adc ebx,0 |
| 52 ; mul a[0]*b[1] |
| 53 mul edx |
| 54 add ecx,eax |
| 55 mov eax,DWORD [20+esp] |
| 56 adc ebp,edx |
| 57 mov edx,DWORD [edi] |
| 58 adc ebx,0 |
| 59 mov DWORD [4+eax],ecx |
| 60 mov eax,DWORD [8+esi] |
| 61 ; saved r[1] |
| 62 ; ################## Calculate word 2 |
| 63 xor ecx,ecx |
| 64 ; mul a[2]*b[0] |
| 65 mul edx |
| 66 add ebp,eax |
| 67 mov eax,DWORD [4+esi] |
| 68 adc ebx,edx |
| 69 mov edx,DWORD [4+edi] |
| 70 adc ecx,0 |
| 71 ; mul a[1]*b[1] |
| 72 mul edx |
| 73 add ebp,eax |
| 74 mov eax,DWORD [esi] |
| 75 adc ebx,edx |
| 76 mov edx,DWORD [8+edi] |
| 77 adc ecx,0 |
| 78 ; mul a[0]*b[2] |
| 79 mul edx |
| 80 add ebp,eax |
| 81 mov eax,DWORD [20+esp] |
| 82 adc ebx,edx |
| 83 mov edx,DWORD [edi] |
| 84 adc ecx,0 |
| 85 mov DWORD [8+eax],ebp |
| 86 mov eax,DWORD [12+esi] |
| 87 ; saved r[2] |
| 88 ; ################## Calculate word 3 |
| 89 xor ebp,ebp |
| 90 ; mul a[3]*b[0] |
| 91 mul edx |
| 92 add ebx,eax |
| 93 mov eax,DWORD [8+esi] |
| 94 adc ecx,edx |
| 95 mov edx,DWORD [4+edi] |
| 96 adc ebp,0 |
| 97 ; mul a[2]*b[1] |
| 98 mul edx |
| 99 add ebx,eax |
| 100 mov eax,DWORD [4+esi] |
| 101 adc ecx,edx |
| 102 mov edx,DWORD [8+edi] |
| 103 adc ebp,0 |
| 104 ; mul a[1]*b[2] |
| 105 mul edx |
| 106 add ebx,eax |
| 107 mov eax,DWORD [esi] |
| 108 adc ecx,edx |
| 109 mov edx,DWORD [12+edi] |
| 110 adc ebp,0 |
| 111 ; mul a[0]*b[3] |
| 112 mul edx |
| 113 add ebx,eax |
| 114 mov eax,DWORD [20+esp] |
| 115 adc ecx,edx |
| 116 mov edx,DWORD [edi] |
| 117 adc ebp,0 |
| 118 mov DWORD [12+eax],ebx |
| 119 mov eax,DWORD [16+esi] |
| 120 ; saved r[3] |
| 121 ; ################## Calculate word 4 |
| 122 xor ebx,ebx |
| 123 ; mul a[4]*b[0] |
| 124 mul edx |
| 125 add ecx,eax |
| 126 mov eax,DWORD [12+esi] |
| 127 adc ebp,edx |
| 128 mov edx,DWORD [4+edi] |
| 129 adc ebx,0 |
| 130 ; mul a[3]*b[1] |
| 131 mul edx |
| 132 add ecx,eax |
| 133 mov eax,DWORD [8+esi] |
| 134 adc ebp,edx |
| 135 mov edx,DWORD [8+edi] |
| 136 adc ebx,0 |
| 137 ; mul a[2]*b[2] |
| 138 mul edx |
| 139 add ecx,eax |
| 140 mov eax,DWORD [4+esi] |
| 141 adc ebp,edx |
| 142 mov edx,DWORD [12+edi] |
| 143 adc ebx,0 |
| 144 ; mul a[1]*b[3] |
| 145 mul edx |
| 146 add ecx,eax |
| 147 mov eax,DWORD [esi] |
| 148 adc ebp,edx |
| 149 mov edx,DWORD [16+edi] |
| 150 adc ebx,0 |
| 151 ; mul a[0]*b[4] |
| 152 mul edx |
| 153 add ecx,eax |
| 154 mov eax,DWORD [20+esp] |
| 155 adc ebp,edx |
| 156 mov edx,DWORD [edi] |
| 157 adc ebx,0 |
| 158 mov DWORD [16+eax],ecx |
| 159 mov eax,DWORD [20+esi] |
| 160 ; saved r[4] |
| 161 ; ################## Calculate word 5 |
| 162 xor ecx,ecx |
| 163 ; mul a[5]*b[0] |
| 164 mul edx |
| 165 add ebp,eax |
| 166 mov eax,DWORD [16+esi] |
| 167 adc ebx,edx |
| 168 mov edx,DWORD [4+edi] |
| 169 adc ecx,0 |
| 170 ; mul a[4]*b[1] |
| 171 mul edx |
| 172 add ebp,eax |
| 173 mov eax,DWORD [12+esi] |
| 174 adc ebx,edx |
| 175 mov edx,DWORD [8+edi] |
| 176 adc ecx,0 |
| 177 ; mul a[3]*b[2] |
| 178 mul edx |
| 179 add ebp,eax |
| 180 mov eax,DWORD [8+esi] |
| 181 adc ebx,edx |
| 182 mov edx,DWORD [12+edi] |
| 183 adc ecx,0 |
| 184 ; mul a[2]*b[3] |
| 185 mul edx |
| 186 add ebp,eax |
| 187 mov eax,DWORD [4+esi] |
| 188 adc ebx,edx |
| 189 mov edx,DWORD [16+edi] |
| 190 adc ecx,0 |
| 191 ; mul a[1]*b[4] |
| 192 mul edx |
| 193 add ebp,eax |
| 194 mov eax,DWORD [esi] |
| 195 adc ebx,edx |
| 196 mov edx,DWORD [20+edi] |
| 197 adc ecx,0 |
| 198 ; mul a[0]*b[5] |
| 199 mul edx |
| 200 add ebp,eax |
| 201 mov eax,DWORD [20+esp] |
| 202 adc ebx,edx |
| 203 mov edx,DWORD [edi] |
| 204 adc ecx,0 |
| 205 mov DWORD [20+eax],ebp |
| 206 mov eax,DWORD [24+esi] |
| 207 ; saved r[5] |
| 208 ; ################## Calculate word 6 |
| 209 xor ebp,ebp |
| 210 ; mul a[6]*b[0] |
| 211 mul edx |
| 212 add ebx,eax |
| 213 mov eax,DWORD [20+esi] |
| 214 adc ecx,edx |
| 215 mov edx,DWORD [4+edi] |
| 216 adc ebp,0 |
| 217 ; mul a[5]*b[1] |
| 218 mul edx |
| 219 add ebx,eax |
| 220 mov eax,DWORD [16+esi] |
| 221 adc ecx,edx |
| 222 mov edx,DWORD [8+edi] |
| 223 adc ebp,0 |
| 224 ; mul a[4]*b[2] |
| 225 mul edx |
| 226 add ebx,eax |
| 227 mov eax,DWORD [12+esi] |
| 228 adc ecx,edx |
| 229 mov edx,DWORD [12+edi] |
| 230 adc ebp,0 |
| 231 ; mul a[3]*b[3] |
| 232 mul edx |
| 233 add ebx,eax |
| 234 mov eax,DWORD [8+esi] |
| 235 adc ecx,edx |
| 236 mov edx,DWORD [16+edi] |
| 237 adc ebp,0 |
| 238 ; mul a[2]*b[4] |
| 239 mul edx |
| 240 add ebx,eax |
| 241 mov eax,DWORD [4+esi] |
| 242 adc ecx,edx |
| 243 mov edx,DWORD [20+edi] |
| 244 adc ebp,0 |
| 245 ; mul a[1]*b[5] |
| 246 mul edx |
| 247 add ebx,eax |
| 248 mov eax,DWORD [esi] |
| 249 adc ecx,edx |
| 250 mov edx,DWORD [24+edi] |
| 251 adc ebp,0 |
| 252 ; mul a[0]*b[6] |
| 253 mul edx |
| 254 add ebx,eax |
| 255 mov eax,DWORD [20+esp] |
| 256 adc ecx,edx |
| 257 mov edx,DWORD [edi] |
| 258 adc ebp,0 |
| 259 mov DWORD [24+eax],ebx |
| 260 mov eax,DWORD [28+esi] |
| 261 ; saved r[6] |
| 262 ; ################## Calculate word 7 |
| 263 xor ebx,ebx |
| 264 ; mul a[7]*b[0] |
| 265 mul edx |
| 266 add ecx,eax |
| 267 mov eax,DWORD [24+esi] |
| 268 adc ebp,edx |
| 269 mov edx,DWORD [4+edi] |
| 270 adc ebx,0 |
| 271 ; mul a[6]*b[1] |
| 272 mul edx |
| 273 add ecx,eax |
| 274 mov eax,DWORD [20+esi] |
| 275 adc ebp,edx |
| 276 mov edx,DWORD [8+edi] |
| 277 adc ebx,0 |
| 278 ; mul a[5]*b[2] |
| 279 mul edx |
| 280 add ecx,eax |
| 281 mov eax,DWORD [16+esi] |
| 282 adc ebp,edx |
| 283 mov edx,DWORD [12+edi] |
| 284 adc ebx,0 |
| 285 ; mul a[4]*b[3] |
| 286 mul edx |
| 287 add ecx,eax |
| 288 mov eax,DWORD [12+esi] |
| 289 adc ebp,edx |
| 290 mov edx,DWORD [16+edi] |
| 291 adc ebx,0 |
| 292 ; mul a[3]*b[4] |
| 293 mul edx |
| 294 add ecx,eax |
| 295 mov eax,DWORD [8+esi] |
| 296 adc ebp,edx |
| 297 mov edx,DWORD [20+edi] |
| 298 adc ebx,0 |
| 299 ; mul a[2]*b[5] |
| 300 mul edx |
| 301 add ecx,eax |
| 302 mov eax,DWORD [4+esi] |
| 303 adc ebp,edx |
| 304 mov edx,DWORD [24+edi] |
| 305 adc ebx,0 |
| 306 ; mul a[1]*b[6] |
| 307 mul edx |
| 308 add ecx,eax |
| 309 mov eax,DWORD [esi] |
| 310 adc ebp,edx |
| 311 mov edx,DWORD [28+edi] |
| 312 adc ebx,0 |
| 313 ; mul a[0]*b[7] |
| 314 mul edx |
| 315 add ecx,eax |
| 316 mov eax,DWORD [20+esp] |
| 317 adc ebp,edx |
| 318 mov edx,DWORD [4+edi] |
| 319 adc ebx,0 |
| 320 mov DWORD [28+eax],ecx |
| 321 mov eax,DWORD [28+esi] |
| 322 ; saved r[7] |
| 323 ; ################## Calculate word 8 |
| 324 xor ecx,ecx |
| 325 ; mul a[7]*b[1] |
| 326 mul edx |
| 327 add ebp,eax |
| 328 mov eax,DWORD [24+esi] |
| 329 adc ebx,edx |
| 330 mov edx,DWORD [8+edi] |
| 331 adc ecx,0 |
| 332 ; mul a[6]*b[2] |
| 333 mul edx |
| 334 add ebp,eax |
| 335 mov eax,DWORD [20+esi] |
| 336 adc ebx,edx |
| 337 mov edx,DWORD [12+edi] |
| 338 adc ecx,0 |
| 339 ; mul a[5]*b[3] |
| 340 mul edx |
| 341 add ebp,eax |
| 342 mov eax,DWORD [16+esi] |
| 343 adc ebx,edx |
| 344 mov edx,DWORD [16+edi] |
| 345 adc ecx,0 |
| 346 ; mul a[4]*b[4] |
| 347 mul edx |
| 348 add ebp,eax |
| 349 mov eax,DWORD [12+esi] |
| 350 adc ebx,edx |
| 351 mov edx,DWORD [20+edi] |
| 352 adc ecx,0 |
| 353 ; mul a[3]*b[5] |
| 354 mul edx |
| 355 add ebp,eax |
| 356 mov eax,DWORD [8+esi] |
| 357 adc ebx,edx |
| 358 mov edx,DWORD [24+edi] |
| 359 adc ecx,0 |
| 360 ; mul a[2]*b[6] |
| 361 mul edx |
| 362 add ebp,eax |
| 363 mov eax,DWORD [4+esi] |
| 364 adc ebx,edx |
| 365 mov edx,DWORD [28+edi] |
| 366 adc ecx,0 |
| 367 ; mul a[1]*b[7] |
| 368 mul edx |
| 369 add ebp,eax |
| 370 mov eax,DWORD [20+esp] |
| 371 adc ebx,edx |
| 372 mov edx,DWORD [8+edi] |
| 373 adc ecx,0 |
| 374 mov DWORD [32+eax],ebp |
| 375 mov eax,DWORD [28+esi] |
| 376 ; saved r[8] |
| 377 ; ################## Calculate word 9 |
| 378 xor ebp,ebp |
| 379 ; mul a[7]*b[2] |
| 380 mul edx |
| 381 add ebx,eax |
| 382 mov eax,DWORD [24+esi] |
| 383 adc ecx,edx |
| 384 mov edx,DWORD [12+edi] |
| 385 adc ebp,0 |
| 386 ; mul a[6]*b[3] |
| 387 mul edx |
| 388 add ebx,eax |
| 389 mov eax,DWORD [20+esi] |
| 390 adc ecx,edx |
| 391 mov edx,DWORD [16+edi] |
| 392 adc ebp,0 |
| 393 ; mul a[5]*b[4] |
| 394 mul edx |
| 395 add ebx,eax |
| 396 mov eax,DWORD [16+esi] |
| 397 adc ecx,edx |
| 398 mov edx,DWORD [20+edi] |
| 399 adc ebp,0 |
| 400 ; mul a[4]*b[5] |
| 401 mul edx |
| 402 add ebx,eax |
| 403 mov eax,DWORD [12+esi] |
| 404 adc ecx,edx |
| 405 mov edx,DWORD [24+edi] |
| 406 adc ebp,0 |
| 407 ; mul a[3]*b[6] |
| 408 mul edx |
| 409 add ebx,eax |
| 410 mov eax,DWORD [8+esi] |
| 411 adc ecx,edx |
| 412 mov edx,DWORD [28+edi] |
| 413 adc ebp,0 |
| 414 ; mul a[2]*b[7] |
| 415 mul edx |
| 416 add ebx,eax |
| 417 mov eax,DWORD [20+esp] |
| 418 adc ecx,edx |
| 419 mov edx,DWORD [12+edi] |
| 420 adc ebp,0 |
| 421 mov DWORD [36+eax],ebx |
| 422 mov eax,DWORD [28+esi] |
| 423 ; saved r[9] |
| 424 ; ################## Calculate word 10 |
| 425 xor ebx,ebx |
| 426 ; mul a[7]*b[3] |
| 427 mul edx |
| 428 add ecx,eax |
| 429 mov eax,DWORD [24+esi] |
| 430 adc ebp,edx |
| 431 mov edx,DWORD [16+edi] |
| 432 adc ebx,0 |
| 433 ; mul a[6]*b[4] |
| 434 mul edx |
| 435 add ecx,eax |
| 436 mov eax,DWORD [20+esi] |
| 437 adc ebp,edx |
| 438 mov edx,DWORD [20+edi] |
| 439 adc ebx,0 |
| 440 ; mul a[5]*b[5] |
| 441 mul edx |
| 442 add ecx,eax |
| 443 mov eax,DWORD [16+esi] |
| 444 adc ebp,edx |
| 445 mov edx,DWORD [24+edi] |
| 446 adc ebx,0 |
| 447 ; mul a[4]*b[6] |
| 448 mul edx |
| 449 add ecx,eax |
| 450 mov eax,DWORD [12+esi] |
| 451 adc ebp,edx |
| 452 mov edx,DWORD [28+edi] |
| 453 adc ebx,0 |
| 454 ; mul a[3]*b[7] |
| 455 mul edx |
| 456 add ecx,eax |
| 457 mov eax,DWORD [20+esp] |
| 458 adc ebp,edx |
| 459 mov edx,DWORD [16+edi] |
| 460 adc ebx,0 |
| 461 mov DWORD [40+eax],ecx |
| 462 mov eax,DWORD [28+esi] |
| 463 ; saved r[10] |
| 464 ; ################## Calculate word 11 |
| 465 xor ecx,ecx |
| 466 ; mul a[7]*b[4] |
| 467 mul edx |
| 468 add ebp,eax |
| 469 mov eax,DWORD [24+esi] |
| 470 adc ebx,edx |
| 471 mov edx,DWORD [20+edi] |
| 472 adc ecx,0 |
| 473 ; mul a[6]*b[5] |
| 474 mul edx |
| 475 add ebp,eax |
| 476 mov eax,DWORD [20+esi] |
| 477 adc ebx,edx |
| 478 mov edx,DWORD [24+edi] |
| 479 adc ecx,0 |
| 480 ; mul a[5]*b[6] |
| 481 mul edx |
| 482 add ebp,eax |
| 483 mov eax,DWORD [16+esi] |
| 484 adc ebx,edx |
| 485 mov edx,DWORD [28+edi] |
| 486 adc ecx,0 |
| 487 ; mul a[4]*b[7] |
| 488 mul edx |
| 489 add ebp,eax |
| 490 mov eax,DWORD [20+esp] |
| 491 adc ebx,edx |
| 492 mov edx,DWORD [20+edi] |
| 493 adc ecx,0 |
| 494 mov DWORD [44+eax],ebp |
| 495 mov eax,DWORD [28+esi] |
| 496 ; saved r[11] |
| 497 ; ################## Calculate word 12 |
| 498 xor ebp,ebp |
| 499 ; mul a[7]*b[5] |
| 500 mul edx |
| 501 add ebx,eax |
| 502 mov eax,DWORD [24+esi] |
| 503 adc ecx,edx |
| 504 mov edx,DWORD [24+edi] |
| 505 adc ebp,0 |
| 506 ; mul a[6]*b[6] |
| 507 mul edx |
| 508 add ebx,eax |
| 509 mov eax,DWORD [20+esi] |
| 510 adc ecx,edx |
| 511 mov edx,DWORD [28+edi] |
| 512 adc ebp,0 |
| 513 ; mul a[5]*b[7] |
| 514 mul edx |
| 515 add ebx,eax |
| 516 mov eax,DWORD [20+esp] |
| 517 adc ecx,edx |
| 518 mov edx,DWORD [24+edi] |
| 519 adc ebp,0 |
| 520 mov DWORD [48+eax],ebx |
| 521 mov eax,DWORD [28+esi] |
| 522 ; saved r[12] |
| 523 ; ################## Calculate word 13 |
| 524 xor ebx,ebx |
| 525 ; mul a[7]*b[6] |
| 526 mul edx |
| 527 add ecx,eax |
| 528 mov eax,DWORD [24+esi] |
| 529 adc ebp,edx |
| 530 mov edx,DWORD [28+edi] |
| 531 adc ebx,0 |
| 532 ; mul a[6]*b[7] |
| 533 mul edx |
| 534 add ecx,eax |
| 535 mov eax,DWORD [20+esp] |
| 536 adc ebp,edx |
| 537 mov edx,DWORD [28+edi] |
| 538 adc ebx,0 |
| 539 mov DWORD [52+eax],ecx |
| 540 mov eax,DWORD [28+esi] |
| 541 ; saved r[13] |
| 542 ; ################## Calculate word 14 |
| 543 xor ecx,ecx |
| 544 ; mul a[7]*b[7] |
| 545 mul edx |
| 546 add ebp,eax |
| 547 mov eax,DWORD [20+esp] |
| 548 adc ebx,edx |
| 549 adc ecx,0 |
| 550 mov DWORD [56+eax],ebp |
| 551 ; saved r[14] |
| 552 ; save r[15] |
| 553 mov DWORD [60+eax],ebx |
| 554 pop ebx |
| 555 pop ebp |
| 556 pop edi |
| 557 pop esi |
| 558 ret |
| 559 global _bn_mul_comba4 |
| 560 align 16 |
| 561 _bn_mul_comba4: |
| 562 L$_bn_mul_comba4_begin: |
| 563 push esi |
| 564 mov esi,DWORD [12+esp] |
| 565 push edi |
| 566 mov edi,DWORD [20+esp] |
| 567 push ebp |
| 568 push ebx |
| 569 xor ebx,ebx |
| 570 mov eax,DWORD [esi] |
| 571 xor ecx,ecx |
| 572 mov edx,DWORD [edi] |
| 573 ; ################## Calculate word 0 |
| 574 xor ebp,ebp |
| 575 ; mul a[0]*b[0] |
| 576 mul edx |
| 577 add ebx,eax |
| 578 mov eax,DWORD [20+esp] |
| 579 adc ecx,edx |
| 580 mov edx,DWORD [edi] |
| 581 adc ebp,0 |
| 582 mov DWORD [eax],ebx |
| 583 mov eax,DWORD [4+esi] |
| 584 ; saved r[0] |
| 585 ; ################## Calculate word 1 |
| 586 xor ebx,ebx |
| 587 ; mul a[1]*b[0] |
| 588 mul edx |
| 589 add ecx,eax |
| 590 mov eax,DWORD [esi] |
| 591 adc ebp,edx |
| 592 mov edx,DWORD [4+edi] |
| 593 adc ebx,0 |
| 594 ; mul a[0]*b[1] |
| 595 mul edx |
| 596 add ecx,eax |
| 597 mov eax,DWORD [20+esp] |
| 598 adc ebp,edx |
| 599 mov edx,DWORD [edi] |
| 600 adc ebx,0 |
| 601 mov DWORD [4+eax],ecx |
| 602 mov eax,DWORD [8+esi] |
| 603 ; saved r[1] |
| 604 ; ################## Calculate word 2 |
| 605 xor ecx,ecx |
| 606 ; mul a[2]*b[0] |
| 607 mul edx |
| 608 add ebp,eax |
| 609 mov eax,DWORD [4+esi] |
| 610 adc ebx,edx |
| 611 mov edx,DWORD [4+edi] |
| 612 adc ecx,0 |
| 613 ; mul a[1]*b[1] |
| 614 mul edx |
| 615 add ebp,eax |
| 616 mov eax,DWORD [esi] |
| 617 adc ebx,edx |
| 618 mov edx,DWORD [8+edi] |
| 619 adc ecx,0 |
| 620 ; mul a[0]*b[2] |
| 621 mul edx |
| 622 add ebp,eax |
| 623 mov eax,DWORD [20+esp] |
| 624 adc ebx,edx |
| 625 mov edx,DWORD [edi] |
| 626 adc ecx,0 |
| 627 mov DWORD [8+eax],ebp |
| 628 mov eax,DWORD [12+esi] |
| 629 ; saved r[2] |
| 630 ; ################## Calculate word 3 |
| 631 xor ebp,ebp |
| 632 ; mul a[3]*b[0] |
| 633 mul edx |
| 634 add ebx,eax |
| 635 mov eax,DWORD [8+esi] |
| 636 adc ecx,edx |
| 637 mov edx,DWORD [4+edi] |
| 638 adc ebp,0 |
| 639 ; mul a[2]*b[1] |
| 640 mul edx |
| 641 add ebx,eax |
| 642 mov eax,DWORD [4+esi] |
| 643 adc ecx,edx |
| 644 mov edx,DWORD [8+edi] |
| 645 adc ebp,0 |
| 646 ; mul a[1]*b[2] |
| 647 mul edx |
| 648 add ebx,eax |
| 649 mov eax,DWORD [esi] |
| 650 adc ecx,edx |
| 651 mov edx,DWORD [12+edi] |
| 652 adc ebp,0 |
| 653 ; mul a[0]*b[3] |
| 654 mul edx |
| 655 add ebx,eax |
| 656 mov eax,DWORD [20+esp] |
| 657 adc ecx,edx |
| 658 mov edx,DWORD [4+edi] |
| 659 adc ebp,0 |
| 660 mov DWORD [12+eax],ebx |
| 661 mov eax,DWORD [12+esi] |
| 662 ; saved r[3] |
| 663 ; ################## Calculate word 4 |
| 664 xor ebx,ebx |
| 665 ; mul a[3]*b[1] |
| 666 mul edx |
| 667 add ecx,eax |
| 668 mov eax,DWORD [8+esi] |
| 669 adc ebp,edx |
| 670 mov edx,DWORD [8+edi] |
| 671 adc ebx,0 |
| 672 ; mul a[2]*b[2] |
| 673 mul edx |
| 674 add ecx,eax |
| 675 mov eax,DWORD [4+esi] |
| 676 adc ebp,edx |
| 677 mov edx,DWORD [12+edi] |
| 678 adc ebx,0 |
| 679 ; mul a[1]*b[3] |
| 680 mul edx |
| 681 add ecx,eax |
| 682 mov eax,DWORD [20+esp] |
| 683 adc ebp,edx |
| 684 mov edx,DWORD [8+edi] |
| 685 adc ebx,0 |
| 686 mov DWORD [16+eax],ecx |
| 687 mov eax,DWORD [12+esi] |
| 688 ; saved r[4] |
| 689 ; ################## Calculate word 5 |
| 690 xor ecx,ecx |
| 691 ; mul a[3]*b[2] |
| 692 mul edx |
| 693 add ebp,eax |
| 694 mov eax,DWORD [8+esi] |
| 695 adc ebx,edx |
| 696 mov edx,DWORD [12+edi] |
| 697 adc ecx,0 |
| 698 ; mul a[2]*b[3] |
| 699 mul edx |
| 700 add ebp,eax |
| 701 mov eax,DWORD [20+esp] |
| 702 adc ebx,edx |
| 703 mov edx,DWORD [12+edi] |
| 704 adc ecx,0 |
| 705 mov DWORD [20+eax],ebp |
| 706 mov eax,DWORD [12+esi] |
| 707 ; saved r[5] |
| 708 ; ################## Calculate word 6 |
| 709 xor ebp,ebp |
| 710 ; mul a[3]*b[3] |
| 711 mul edx |
| 712 add ebx,eax |
| 713 mov eax,DWORD [20+esp] |
| 714 adc ecx,edx |
| 715 adc ebp,0 |
| 716 mov DWORD [24+eax],ebx |
| 717 ; saved r[6] |
| 718 ; save r[7] |
| 719 mov DWORD [28+eax],ecx |
| 720 pop ebx |
| 721 pop ebp |
| 722 pop edi |
| 723 pop esi |
| 724 ret |
| 725 global _bn_sqr_comba8 |
| 726 align 16 |
| 727 _bn_sqr_comba8: |
| 728 L$_bn_sqr_comba8_begin: |
| 729 push esi |
| 730 push edi |
| 731 push ebp |
| 732 push ebx |
| 733 mov edi,DWORD [20+esp] |
| 734 mov esi,DWORD [24+esp] |
| 735 xor ebx,ebx |
| 736 xor ecx,ecx |
| 737 mov eax,DWORD [esi] |
| 738 ; ############### Calculate word 0 |
| 739 xor ebp,ebp |
| 740 ; sqr a[0]*a[0] |
| 741 mul eax |
| 742 add ebx,eax |
| 743 adc ecx,edx |
| 744 mov edx,DWORD [esi] |
| 745 adc ebp,0 |
| 746 mov DWORD [edi],ebx |
| 747 mov eax,DWORD [4+esi] |
| 748 ; saved r[0] |
| 749 ; ############### Calculate word 1 |
| 750 xor ebx,ebx |
| 751 ; sqr a[1]*a[0] |
| 752 mul edx |
| 753 add eax,eax |
| 754 adc edx,edx |
| 755 adc ebx,0 |
| 756 add ecx,eax |
| 757 adc ebp,edx |
| 758 mov eax,DWORD [8+esi] |
| 759 adc ebx,0 |
| 760 mov DWORD [4+edi],ecx |
| 761 mov edx,DWORD [esi] |
| 762 ; saved r[1] |
| 763 ; ############### Calculate word 2 |
| 764 xor ecx,ecx |
| 765 ; sqr a[2]*a[0] |
| 766 mul edx |
| 767 add eax,eax |
| 768 adc edx,edx |
| 769 adc ecx,0 |
| 770 add ebp,eax |
| 771 adc ebx,edx |
| 772 mov eax,DWORD [4+esi] |
| 773 adc ecx,0 |
| 774 ; sqr a[1]*a[1] |
| 775 mul eax |
| 776 add ebp,eax |
| 777 adc ebx,edx |
| 778 mov edx,DWORD [esi] |
| 779 adc ecx,0 |
| 780 mov DWORD [8+edi],ebp |
| 781 mov eax,DWORD [12+esi] |
| 782 ; saved r[2] |
| 783 ; ############### Calculate word 3 |
| 784 xor ebp,ebp |
| 785 ; sqr a[3]*a[0] |
| 786 mul edx |
| 787 add eax,eax |
| 788 adc edx,edx |
| 789 adc ebp,0 |
| 790 add ebx,eax |
| 791 adc ecx,edx |
| 792 mov eax,DWORD [8+esi] |
| 793 adc ebp,0 |
| 794 mov edx,DWORD [4+esi] |
| 795 ; sqr a[2]*a[1] |
| 796 mul edx |
| 797 add eax,eax |
| 798 adc edx,edx |
| 799 adc ebp,0 |
| 800 add ebx,eax |
| 801 adc ecx,edx |
| 802 mov eax,DWORD [16+esi] |
| 803 adc ebp,0 |
| 804 mov DWORD [12+edi],ebx |
| 805 mov edx,DWORD [esi] |
| 806 ; saved r[3] |
| 807 ; ############### Calculate word 4 |
| 808 xor ebx,ebx |
| 809 ; sqr a[4]*a[0] |
| 810 mul edx |
| 811 add eax,eax |
| 812 adc edx,edx |
| 813 adc ebx,0 |
| 814 add ecx,eax |
| 815 adc ebp,edx |
| 816 mov eax,DWORD [12+esi] |
| 817 adc ebx,0 |
| 818 mov edx,DWORD [4+esi] |
| 819 ; sqr a[3]*a[1] |
| 820 mul edx |
| 821 add eax,eax |
| 822 adc edx,edx |
| 823 adc ebx,0 |
| 824 add ecx,eax |
| 825 adc ebp,edx |
| 826 mov eax,DWORD [8+esi] |
| 827 adc ebx,0 |
| 828 ; sqr a[2]*a[2] |
| 829 mul eax |
| 830 add ecx,eax |
| 831 adc ebp,edx |
| 832 mov edx,DWORD [esi] |
| 833 adc ebx,0 |
| 834 mov DWORD [16+edi],ecx |
| 835 mov eax,DWORD [20+esi] |
| 836 ; saved r[4] |
| 837 ; ############### Calculate word 5 |
| 838 xor ecx,ecx |
| 839 ; sqr a[5]*a[0] |
| 840 mul edx |
| 841 add eax,eax |
| 842 adc edx,edx |
| 843 adc ecx,0 |
| 844 add ebp,eax |
| 845 adc ebx,edx |
| 846 mov eax,DWORD [16+esi] |
| 847 adc ecx,0 |
| 848 mov edx,DWORD [4+esi] |
| 849 ; sqr a[4]*a[1] |
| 850 mul edx |
| 851 add eax,eax |
| 852 adc edx,edx |
| 853 adc ecx,0 |
| 854 add ebp,eax |
| 855 adc ebx,edx |
| 856 mov eax,DWORD [12+esi] |
| 857 adc ecx,0 |
| 858 mov edx,DWORD [8+esi] |
| 859 ; sqr a[3]*a[2] |
| 860 mul edx |
| 861 add eax,eax |
| 862 adc edx,edx |
| 863 adc ecx,0 |
| 864 add ebp,eax |
| 865 adc ebx,edx |
| 866 mov eax,DWORD [24+esi] |
| 867 adc ecx,0 |
| 868 mov DWORD [20+edi],ebp |
| 869 mov edx,DWORD [esi] |
| 870 ; saved r[5] |
| 871 ; ############### Calculate word 6 |
| 872 xor ebp,ebp |
| 873 ; sqr a[6]*a[0] |
| 874 mul edx |
| 875 add eax,eax |
| 876 adc edx,edx |
| 877 adc ebp,0 |
| 878 add ebx,eax |
| 879 adc ecx,edx |
| 880 mov eax,DWORD [20+esi] |
| 881 adc ebp,0 |
| 882 mov edx,DWORD [4+esi] |
| 883 ; sqr a[5]*a[1] |
| 884 mul edx |
| 885 add eax,eax |
| 886 adc edx,edx |
| 887 adc ebp,0 |
| 888 add ebx,eax |
| 889 adc ecx,edx |
| 890 mov eax,DWORD [16+esi] |
| 891 adc ebp,0 |
| 892 mov edx,DWORD [8+esi] |
| 893 ; sqr a[4]*a[2] |
| 894 mul edx |
| 895 add eax,eax |
| 896 adc edx,edx |
| 897 adc ebp,0 |
| 898 add ebx,eax |
| 899 adc ecx,edx |
| 900 mov eax,DWORD [12+esi] |
| 901 adc ebp,0 |
| 902 ; sqr a[3]*a[3] |
| 903 mul eax |
| 904 add ebx,eax |
| 905 adc ecx,edx |
| 906 mov edx,DWORD [esi] |
| 907 adc ebp,0 |
| 908 mov DWORD [24+edi],ebx |
| 909 mov eax,DWORD [28+esi] |
| 910 ; saved r[6] |
| 911 ; ############### Calculate word 7 |
| 912 xor ebx,ebx |
| 913 ; sqr a[7]*a[0] |
| 914 mul edx |
| 915 add eax,eax |
| 916 adc edx,edx |
| 917 adc ebx,0 |
| 918 add ecx,eax |
| 919 adc ebp,edx |
| 920 mov eax,DWORD [24+esi] |
| 921 adc ebx,0 |
| 922 mov edx,DWORD [4+esi] |
| 923 ; sqr a[6]*a[1] |
| 924 mul edx |
| 925 add eax,eax |
| 926 adc edx,edx |
| 927 adc ebx,0 |
| 928 add ecx,eax |
| 929 adc ebp,edx |
| 930 mov eax,DWORD [20+esi] |
| 931 adc ebx,0 |
| 932 mov edx,DWORD [8+esi] |
| 933 ; sqr a[5]*a[2] |
| 934 mul edx |
| 935 add eax,eax |
| 936 adc edx,edx |
| 937 adc ebx,0 |
| 938 add ecx,eax |
| 939 adc ebp,edx |
| 940 mov eax,DWORD [16+esi] |
| 941 adc ebx,0 |
| 942 mov edx,DWORD [12+esi] |
| 943 ; sqr a[4]*a[3] |
| 944 mul edx |
| 945 add eax,eax |
| 946 adc edx,edx |
| 947 adc ebx,0 |
| 948 add ecx,eax |
| 949 adc ebp,edx |
| 950 mov eax,DWORD [28+esi] |
| 951 adc ebx,0 |
| 952 mov DWORD [28+edi],ecx |
| 953 mov edx,DWORD [4+esi] |
| 954 ; saved r[7] |
| 955 ; ############### Calculate word 8 |
| 956 xor ecx,ecx |
| 957 ; sqr a[7]*a[1] |
| 958 mul edx |
| 959 add eax,eax |
| 960 adc edx,edx |
| 961 adc ecx,0 |
| 962 add ebp,eax |
| 963 adc ebx,edx |
| 964 mov eax,DWORD [24+esi] |
| 965 adc ecx,0 |
| 966 mov edx,DWORD [8+esi] |
| 967 ; sqr a[6]*a[2] |
| 968 mul edx |
| 969 add eax,eax |
| 970 adc edx,edx |
| 971 adc ecx,0 |
| 972 add ebp,eax |
| 973 adc ebx,edx |
| 974 mov eax,DWORD [20+esi] |
| 975 adc ecx,0 |
| 976 mov edx,DWORD [12+esi] |
| 977 ; sqr a[5]*a[3] |
| 978 mul edx |
| 979 add eax,eax |
| 980 adc edx,edx |
| 981 adc ecx,0 |
| 982 add ebp,eax |
| 983 adc ebx,edx |
| 984 mov eax,DWORD [16+esi] |
| 985 adc ecx,0 |
| 986 ; sqr a[4]*a[4] |
| 987 mul eax |
| 988 add ebp,eax |
| 989 adc ebx,edx |
| 990 mov edx,DWORD [8+esi] |
| 991 adc ecx,0 |
| 992 mov DWORD [32+edi],ebp |
| 993 mov eax,DWORD [28+esi] |
| 994 ; saved r[8] |
| 995 ; ############### Calculate word 9 |
| 996 xor ebp,ebp |
| 997 ; sqr a[7]*a[2] |
| 998 mul edx |
| 999 add eax,eax |
| 1000 adc edx,edx |
| 1001 adc ebp,0 |
| 1002 add ebx,eax |
| 1003 adc ecx,edx |
| 1004 mov eax,DWORD [24+esi] |
| 1005 adc ebp,0 |
| 1006 mov edx,DWORD [12+esi] |
| 1007 ; sqr a[6]*a[3] |
| 1008 mul edx |
| 1009 add eax,eax |
| 1010 adc edx,edx |
| 1011 adc ebp,0 |
| 1012 add ebx,eax |
| 1013 adc ecx,edx |
| 1014 mov eax,DWORD [20+esi] |
| 1015 adc ebp,0 |
| 1016 mov edx,DWORD [16+esi] |
| 1017 ; sqr a[5]*a[4] |
| 1018 mul edx |
| 1019 add eax,eax |
| 1020 adc edx,edx |
| 1021 adc ebp,0 |
| 1022 add ebx,eax |
| 1023 adc ecx,edx |
| 1024 mov eax,DWORD [28+esi] |
| 1025 adc ebp,0 |
| 1026 mov DWORD [36+edi],ebx |
| 1027 mov edx,DWORD [12+esi] |
| 1028 ; saved r[9] |
| 1029 ; ############### Calculate word 10 |
| 1030 xor ebx,ebx |
| 1031 ; sqr a[7]*a[3] |
| 1032 mul edx |
| 1033 add eax,eax |
| 1034 adc edx,edx |
| 1035 adc ebx,0 |
| 1036 add ecx,eax |
| 1037 adc ebp,edx |
| 1038 mov eax,DWORD [24+esi] |
| 1039 adc ebx,0 |
| 1040 mov edx,DWORD [16+esi] |
| 1041 ; sqr a[6]*a[4] |
| 1042 mul edx |
| 1043 add eax,eax |
| 1044 adc edx,edx |
| 1045 adc ebx,0 |
| 1046 add ecx,eax |
| 1047 adc ebp,edx |
| 1048 mov eax,DWORD [20+esi] |
| 1049 adc ebx,0 |
| 1050 ; sqr a[5]*a[5] |
| 1051 mul eax |
| 1052 add ecx,eax |
| 1053 adc ebp,edx |
| 1054 mov edx,DWORD [16+esi] |
| 1055 adc ebx,0 |
| 1056 mov DWORD [40+edi],ecx |
| 1057 mov eax,DWORD [28+esi] |
| 1058 ; saved r[10] |
| 1059 ; ############### Calculate word 11 |
| 1060 xor ecx,ecx |
| 1061 ; sqr a[7]*a[4] |
| 1062 mul edx |
| 1063 add eax,eax |
| 1064 adc edx,edx |
| 1065 adc ecx,0 |
| 1066 add ebp,eax |
| 1067 adc ebx,edx |
| 1068 mov eax,DWORD [24+esi] |
| 1069 adc ecx,0 |
| 1070 mov edx,DWORD [20+esi] |
| 1071 ; sqr a[6]*a[5] |
| 1072 mul edx |
| 1073 add eax,eax |
| 1074 adc edx,edx |
| 1075 adc ecx,0 |
| 1076 add ebp,eax |
| 1077 adc ebx,edx |
| 1078 mov eax,DWORD [28+esi] |
| 1079 adc ecx,0 |
| 1080 mov DWORD [44+edi],ebp |
| 1081 mov edx,DWORD [20+esi] |
| 1082 ; saved r[11] |
| 1083 ; ############### Calculate word 12 |
| 1084 xor ebp,ebp |
| 1085 ; sqr a[7]*a[5] |
| 1086 mul edx |
| 1087 add eax,eax |
| 1088 adc edx,edx |
| 1089 adc ebp,0 |
| 1090 add ebx,eax |
| 1091 adc ecx,edx |
| 1092 mov eax,DWORD [24+esi] |
| 1093 adc ebp,0 |
| 1094 ; sqr a[6]*a[6] |
| 1095 mul eax |
| 1096 add ebx,eax |
| 1097 adc ecx,edx |
| 1098 mov edx,DWORD [24+esi] |
| 1099 adc ebp,0 |
| 1100 mov DWORD [48+edi],ebx |
| 1101 mov eax,DWORD [28+esi] |
| 1102 ; saved r[12] |
| 1103 ; ############### Calculate word 13 |
| 1104 xor ebx,ebx |
| 1105 ; sqr a[7]*a[6] |
| 1106 mul edx |
| 1107 add eax,eax |
| 1108 adc edx,edx |
| 1109 adc ebx,0 |
| 1110 add ecx,eax |
| 1111 adc ebp,edx |
| 1112 mov eax,DWORD [28+esi] |
| 1113 adc ebx,0 |
| 1114 mov DWORD [52+edi],ecx |
| 1115 ; saved r[13] |
| 1116 ; ############### Calculate word 14 |
| 1117 xor ecx,ecx |
| 1118 ; sqr a[7]*a[7] |
| 1119 mul eax |
| 1120 add ebp,eax |
| 1121 adc ebx,edx |
| 1122 adc ecx,0 |
| 1123 mov DWORD [56+edi],ebp |
| 1124 ; saved r[14] |
| 1125 mov DWORD [60+edi],ebx |
| 1126 pop ebx |
| 1127 pop ebp |
| 1128 pop edi |
| 1129 pop esi |
| 1130 ret |
| 1131 global _bn_sqr_comba4 |
| 1132 align 16 |
| 1133 _bn_sqr_comba4: |
| 1134 L$_bn_sqr_comba4_begin: |
| 1135 push esi |
| 1136 push edi |
| 1137 push ebp |
| 1138 push ebx |
| 1139 mov edi,DWORD [20+esp] |
| 1140 mov esi,DWORD [24+esp] |
| 1141 xor ebx,ebx |
| 1142 xor ecx,ecx |
| 1143 mov eax,DWORD [esi] |
| 1144 ; ############### Calculate word 0 |
| 1145 xor ebp,ebp |
| 1146 ; sqr a[0]*a[0] |
| 1147 mul eax |
| 1148 add ebx,eax |
| 1149 adc ecx,edx |
| 1150 mov edx,DWORD [esi] |
| 1151 adc ebp,0 |
| 1152 mov DWORD [edi],ebx |
| 1153 mov eax,DWORD [4+esi] |
| 1154 ; saved r[0] |
| 1155 ; ############### Calculate word 1 |
| 1156 xor ebx,ebx |
| 1157 ; sqr a[1]*a[0] |
| 1158 mul edx |
| 1159 add eax,eax |
| 1160 adc edx,edx |
| 1161 adc ebx,0 |
| 1162 add ecx,eax |
| 1163 adc ebp,edx |
| 1164 mov eax,DWORD [8+esi] |
| 1165 adc ebx,0 |
| 1166 mov DWORD [4+edi],ecx |
| 1167 mov edx,DWORD [esi] |
| 1168 ; saved r[1] |
| 1169 ; ############### Calculate word 2 |
| 1170 xor ecx,ecx |
| 1171 ; sqr a[2]*a[0] |
| 1172 mul edx |
| 1173 add eax,eax |
| 1174 adc edx,edx |
| 1175 adc ecx,0 |
| 1176 add ebp,eax |
| 1177 adc ebx,edx |
| 1178 mov eax,DWORD [4+esi] |
| 1179 adc ecx,0 |
| 1180 ; sqr a[1]*a[1] |
| 1181 mul eax |
| 1182 add ebp,eax |
| 1183 adc ebx,edx |
| 1184 mov edx,DWORD [esi] |
| 1185 adc ecx,0 |
| 1186 mov DWORD [8+edi],ebp |
| 1187 mov eax,DWORD [12+esi] |
| 1188 ; saved r[2] |
| 1189 ; ############### Calculate word 3 |
| 1190 xor ebp,ebp |
| 1191 ; sqr a[3]*a[0] |
| 1192 mul edx |
| 1193 add eax,eax |
| 1194 adc edx,edx |
| 1195 adc ebp,0 |
| 1196 add ebx,eax |
| 1197 adc ecx,edx |
| 1198 mov eax,DWORD [8+esi] |
| 1199 adc ebp,0 |
| 1200 mov edx,DWORD [4+esi] |
| 1201 ; sqr a[2]*a[1] |
| 1202 mul edx |
| 1203 add eax,eax |
| 1204 adc edx,edx |
| 1205 adc ebp,0 |
| 1206 add ebx,eax |
| 1207 adc ecx,edx |
| 1208 mov eax,DWORD [12+esi] |
| 1209 adc ebp,0 |
| 1210 mov DWORD [12+edi],ebx |
| 1211 mov edx,DWORD [4+esi] |
| 1212 ; saved r[3] |
| 1213 ; ############### Calculate word 4 |
| 1214 xor ebx,ebx |
| 1215 ; sqr a[3]*a[1] |
| 1216 mul edx |
| 1217 add eax,eax |
| 1218 adc edx,edx |
| 1219 adc ebx,0 |
| 1220 add ecx,eax |
| 1221 adc ebp,edx |
| 1222 mov eax,DWORD [8+esi] |
| 1223 adc ebx,0 |
| 1224 ; sqr a[2]*a[2] |
| 1225 mul eax |
| 1226 add ecx,eax |
| 1227 adc ebp,edx |
| 1228 mov edx,DWORD [8+esi] |
| 1229 adc ebx,0 |
| 1230 mov DWORD [16+edi],ecx |
| 1231 mov eax,DWORD [12+esi] |
| 1232 ; saved r[4] |
| 1233 ; ############### Calculate word 5 |
| 1234 xor ecx,ecx |
| 1235 ; sqr a[3]*a[2] |
| 1236 mul edx |
| 1237 add eax,eax |
| 1238 adc edx,edx |
| 1239 adc ecx,0 |
| 1240 add ebp,eax |
| 1241 adc ebx,edx |
| 1242 mov eax,DWORD [12+esi] |
| 1243 adc ecx,0 |
| 1244 mov DWORD [20+edi],ebp |
| 1245 ; saved r[5] |
| 1246 ; ############### Calculate word 6 |
| 1247 xor ebp,ebp |
| 1248 ; sqr a[3]*a[3] |
| 1249 mul eax |
| 1250 add ebx,eax |
| 1251 adc ecx,edx |
| 1252 adc ebp,0 |
| 1253 mov DWORD [24+edi],ebx |
| 1254 ; saved r[6] |
| 1255 mov DWORD [28+edi],ecx |
| 1256 pop ebx |
| 1257 pop ebp |
| 1258 pop edi |
| 1259 pop esi |
| 1260 ret |
OLD | NEW |