| OLD | NEW |
| (Empty) |
| 1 .set mips2 | |
| 2 .rdata | |
| 3 .asciiz "mips3.s, Version 1.2" | |
| 4 .asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" | |
| 5 | |
| 6 .text | |
| 7 .set noat | |
| 8 | |
| 9 .align 5 | |
| 10 .globl bn_mul_add_words | |
| 11 .ent bn_mul_add_words | |
| 12 bn_mul_add_words: | |
| 13 .set noreorder | |
| 14 bgtz $6,bn_mul_add_words_internal | |
| 15 move $2,$0 | |
| 16 jr $31 | |
| 17 move $4,$2 | |
| 18 .end bn_mul_add_words | |
| 19 | |
| 20 .align 5 | |
| 21 .ent bn_mul_add_words_internal | |
| 22 bn_mul_add_words_internal: | |
| 23 .set reorder | |
| 24 li $3,-4 | |
| 25 and $8,$6,$3 | |
| 26 lw $12,0($5) | |
| 27 beqz $8,.L_bn_mul_add_words_tail | |
| 28 | |
| 29 .L_bn_mul_add_words_loop: | |
| 30 multu $12,$7 | |
| 31 lw $13,0($4) | |
| 32 lw $14,4($5) | |
| 33 lw $15,4($4) | |
| 34 lw $8,2*4($5) | |
| 35 lw $9,2*4($4) | |
| 36 addu $13,$2 | |
| 37 sltu $2,$13,$2 # All manuals say it "compares 32-bit | |
| 38 # values", but it seems to work fine | |
| 39 # even on 64-bit registers. | |
| 40 mflo $1 | |
| 41 mfhi $12 | |
| 42 addu $13,$1 | |
| 43 addu $2,$12 | |
| 44 multu $14,$7 | |
| 45 sltu $1,$13,$1 | |
| 46 sw $13,0($4) | |
| 47 addu $2,$1 | |
| 48 | |
| 49 lw $10,3*4($5) | |
| 50 lw $11,3*4($4) | |
| 51 addu $15,$2 | |
| 52 sltu $2,$15,$2 | |
| 53 mflo $1 | |
| 54 mfhi $14 | |
| 55 addu $15,$1 | |
| 56 addu $2,$14 | |
| 57 multu $8,$7 | |
| 58 sltu $1,$15,$1 | |
| 59 sw $15,4($4) | |
| 60 addu $2,$1 | |
| 61 | |
| 62 subu $6,4 | |
| 63 addu $4,4*4 | |
| 64 addu $5,4*4 | |
| 65 addu $9,$2 | |
| 66 sltu $2,$9,$2 | |
| 67 mflo $1 | |
| 68 mfhi $8 | |
| 69 addu $9,$1 | |
| 70 addu $2,$8 | |
| 71 multu $10,$7 | |
| 72 sltu $1,$9,$1 | |
| 73 sw $9,-2*4($4) | |
| 74 addu $2,$1 | |
| 75 | |
| 76 | |
| 77 and $8,$6,$3 | |
| 78 addu $11,$2 | |
| 79 sltu $2,$11,$2 | |
| 80 mflo $1 | |
| 81 mfhi $10 | |
| 82 addu $11,$1 | |
| 83 addu $2,$10 | |
| 84 sltu $1,$11,$1 | |
| 85 sw $11,-4($4) | |
| 86 addu $2,$1 | |
| 87 .set noreorder | |
| 88 bgtzl $8,.L_bn_mul_add_words_loop | |
| 89 lw $12,0($5) | |
| 90 | |
| 91 beqz $6,.L_bn_mul_add_words_return | |
| 92 nop | |
| 93 | |
| 94 .L_bn_mul_add_words_tail: | |
| 95 .set reorder | |
| 96 lw $12,0($5) | |
| 97 multu $12,$7 | |
| 98 lw $13,0($4) | |
| 99 subu $6,1 | |
| 100 addu $13,$2 | |
| 101 sltu $2,$13,$2 | |
| 102 mflo $1 | |
| 103 mfhi $12 | |
| 104 addu $13,$1 | |
| 105 addu $2,$12 | |
| 106 sltu $1,$13,$1 | |
| 107 sw $13,0($4) | |
| 108 addu $2,$1 | |
| 109 beqz $6,.L_bn_mul_add_words_return | |
| 110 | |
| 111 lw $12,4($5) | |
| 112 multu $12,$7 | |
| 113 lw $13,4($4) | |
| 114 subu $6,1 | |
| 115 addu $13,$2 | |
| 116 sltu $2,$13,$2 | |
| 117 mflo $1 | |
| 118 mfhi $12 | |
| 119 addu $13,$1 | |
| 120 addu $2,$12 | |
| 121 sltu $1,$13,$1 | |
| 122 sw $13,4($4) | |
| 123 addu $2,$1 | |
| 124 beqz $6,.L_bn_mul_add_words_return | |
| 125 | |
| 126 lw $12,2*4($5) | |
| 127 multu $12,$7 | |
| 128 lw $13,2*4($4) | |
| 129 addu $13,$2 | |
| 130 sltu $2,$13,$2 | |
| 131 mflo $1 | |
| 132 mfhi $12 | |
| 133 addu $13,$1 | |
| 134 addu $2,$12 | |
| 135 sltu $1,$13,$1 | |
| 136 sw $13,2*4($4) | |
| 137 addu $2,$1 | |
| 138 | |
| 139 .L_bn_mul_add_words_return: | |
| 140 .set noreorder | |
| 141 jr $31 | |
| 142 move $4,$2 | |
| 143 .end bn_mul_add_words_internal | |
| 144 | |
| 145 .align 5 | |
| 146 .globl bn_mul_words | |
| 147 .ent bn_mul_words | |
| 148 bn_mul_words: | |
| 149 .set noreorder | |
| 150 bgtz $6,bn_mul_words_internal | |
| 151 move $2,$0 | |
| 152 jr $31 | |
| 153 move $4,$2 | |
| 154 .end bn_mul_words | |
| 155 | |
| 156 .align 5 | |
| 157 .ent bn_mul_words_internal | |
| 158 bn_mul_words_internal: | |
| 159 .set reorder | |
| 160 li $3,-4 | |
| 161 and $8,$6,$3 | |
| 162 lw $12,0($5) | |
| 163 beqz $8,.L_bn_mul_words_tail | |
| 164 | |
| 165 .L_bn_mul_words_loop: | |
| 166 multu $12,$7 | |
| 167 lw $14,4($5) | |
| 168 lw $8,2*4($5) | |
| 169 lw $10,3*4($5) | |
| 170 mflo $1 | |
| 171 mfhi $12 | |
| 172 addu $2,$1 | |
| 173 sltu $13,$2,$1 | |
| 174 multu $14,$7 | |
| 175 sw $2,0($4) | |
| 176 addu $2,$13,$12 | |
| 177 | |
| 178 subu $6,4 | |
| 179 addu $4,4*4 | |
| 180 addu $5,4*4 | |
| 181 mflo $1 | |
| 182 mfhi $14 | |
| 183 addu $2,$1 | |
| 184 sltu $15,$2,$1 | |
| 185 multu $8,$7 | |
| 186 sw $2,-3*4($4) | |
| 187 addu $2,$15,$14 | |
| 188 | |
| 189 mflo $1 | |
| 190 mfhi $8 | |
| 191 addu $2,$1 | |
| 192 sltu $9,$2,$1 | |
| 193 multu $10,$7 | |
| 194 sw $2,-2*4($4) | |
| 195 addu $2,$9,$8 | |
| 196 | |
| 197 and $8,$6,$3 | |
| 198 mflo $1 | |
| 199 mfhi $10 | |
| 200 addu $2,$1 | |
| 201 sltu $11,$2,$1 | |
| 202 sw $2,-4($4) | |
| 203 addu $2,$11,$10 | |
| 204 .set noreorder | |
| 205 bgtzl $8,.L_bn_mul_words_loop | |
| 206 lw $12,0($5) | |
| 207 | |
| 208 beqz $6,.L_bn_mul_words_return | |
| 209 nop | |
| 210 | |
| 211 .L_bn_mul_words_tail: | |
| 212 .set reorder | |
| 213 lw $12,0($5) | |
| 214 multu $12,$7 | |
| 215 subu $6,1 | |
| 216 mflo $1 | |
| 217 mfhi $12 | |
| 218 addu $2,$1 | |
| 219 sltu $13,$2,$1 | |
| 220 sw $2,0($4) | |
| 221 addu $2,$13,$12 | |
| 222 beqz $6,.L_bn_mul_words_return | |
| 223 | |
| 224 lw $12,4($5) | |
| 225 multu $12,$7 | |
| 226 subu $6,1 | |
| 227 mflo $1 | |
| 228 mfhi $12 | |
| 229 addu $2,$1 | |
| 230 sltu $13,$2,$1 | |
| 231 sw $2,4($4) | |
| 232 addu $2,$13,$12 | |
| 233 beqz $6,.L_bn_mul_words_return | |
| 234 | |
| 235 lw $12,2*4($5) | |
| 236 multu $12,$7 | |
| 237 mflo $1 | |
| 238 mfhi $12 | |
| 239 addu $2,$1 | |
| 240 sltu $13,$2,$1 | |
| 241 sw $2,2*4($4) | |
| 242 addu $2,$13,$12 | |
| 243 | |
| 244 .L_bn_mul_words_return: | |
| 245 .set noreorder | |
| 246 jr $31 | |
| 247 move $4,$2 | |
| 248 .end bn_mul_words_internal | |
| 249 | |
| 250 .align 5 | |
| 251 .globl bn_sqr_words | |
| 252 .ent bn_sqr_words | |
| 253 bn_sqr_words: | |
| 254 .set noreorder | |
| 255 bgtz $6,bn_sqr_words_internal | |
| 256 move $2,$0 | |
| 257 jr $31 | |
| 258 move $4,$2 | |
| 259 .end bn_sqr_words | |
| 260 | |
| 261 .align 5 | |
| 262 .ent bn_sqr_words_internal | |
| 263 bn_sqr_words_internal: | |
| 264 .set reorder | |
| 265 li $3,-4 | |
| 266 and $8,$6,$3 | |
| 267 lw $12,0($5) | |
| 268 beqz $8,.L_bn_sqr_words_tail | |
| 269 | |
| 270 .L_bn_sqr_words_loop: | |
| 271 multu $12,$12 | |
| 272 lw $14,4($5) | |
| 273 lw $8,2*4($5) | |
| 274 lw $10,3*4($5) | |
| 275 mflo $13 | |
| 276 mfhi $12 | |
| 277 sw $13,0($4) | |
| 278 sw $12,4($4) | |
| 279 | |
| 280 multu $14,$14 | |
| 281 subu $6,4 | |
| 282 addu $4,8*4 | |
| 283 addu $5,4*4 | |
| 284 mflo $15 | |
| 285 mfhi $14 | |
| 286 sw $15,-6*4($4) | |
| 287 sw $14,-5*4($4) | |
| 288 | |
| 289 multu $8,$8 | |
| 290 mflo $9 | |
| 291 mfhi $8 | |
| 292 sw $9,-4*4($4) | |
| 293 sw $8,-3*4($4) | |
| 294 | |
| 295 | |
| 296 multu $10,$10 | |
| 297 and $8,$6,$3 | |
| 298 mflo $11 | |
| 299 mfhi $10 | |
| 300 sw $11,-2*4($4) | |
| 301 sw $10,-4($4) | |
| 302 | |
| 303 .set noreorder | |
| 304 bgtzl $8,.L_bn_sqr_words_loop | |
| 305 lw $12,0($5) | |
| 306 | |
| 307 beqz $6,.L_bn_sqr_words_return | |
| 308 nop | |
| 309 | |
| 310 .L_bn_sqr_words_tail: | |
| 311 .set reorder | |
| 312 lw $12,0($5) | |
| 313 multu $12,$12 | |
| 314 subu $6,1 | |
| 315 mflo $13 | |
| 316 mfhi $12 | |
| 317 sw $13,0($4) | |
| 318 sw $12,4($4) | |
| 319 beqz $6,.L_bn_sqr_words_return | |
| 320 | |
| 321 lw $12,4($5) | |
| 322 multu $12,$12 | |
| 323 subu $6,1 | |
| 324 mflo $13 | |
| 325 mfhi $12 | |
| 326 sw $13,2*4($4) | |
| 327 sw $12,3*4($4) | |
| 328 beqz $6,.L_bn_sqr_words_return | |
| 329 | |
| 330 lw $12,2*4($5) | |
| 331 multu $12,$12 | |
| 332 mflo $13 | |
| 333 mfhi $12 | |
| 334 sw $13,4*4($4) | |
| 335 sw $12,5*4($4) | |
| 336 | |
| 337 .L_bn_sqr_words_return: | |
| 338 .set noreorder | |
| 339 jr $31 | |
| 340 move $4,$2 | |
| 341 | |
| 342 .end bn_sqr_words_internal | |
| 343 | |
| 344 .align 5 | |
| 345 .globl bn_add_words | |
| 346 .ent bn_add_words | |
| 347 bn_add_words: | |
| 348 .set noreorder | |
| 349 bgtz $7,bn_add_words_internal | |
| 350 move $2,$0 | |
| 351 jr $31 | |
| 352 move $4,$2 | |
| 353 .end bn_add_words | |
| 354 | |
| 355 .align 5 | |
| 356 .ent bn_add_words_internal | |
| 357 bn_add_words_internal: | |
| 358 .set reorder | |
| 359 li $3,-4 | |
| 360 and $1,$7,$3 | |
| 361 lw $12,0($5) | |
| 362 beqz $1,.L_bn_add_words_tail | |
| 363 | |
| 364 .L_bn_add_words_loop: | |
| 365 lw $8,0($6) | |
| 366 subu $7,4 | |
| 367 lw $13,4($5) | |
| 368 and $1,$7,$3 | |
| 369 lw $14,2*4($5) | |
| 370 addu $6,4*4 | |
| 371 lw $15,3*4($5) | |
| 372 addu $4,4*4 | |
| 373 lw $9,-3*4($6) | |
| 374 addu $5,4*4 | |
| 375 lw $10,-2*4($6) | |
| 376 lw $11,-4($6) | |
| 377 addu $8,$12 | |
| 378 sltu $24,$8,$12 | |
| 379 addu $12,$8,$2 | |
| 380 sltu $2,$12,$8 | |
| 381 sw $12,-4*4($4) | |
| 382 addu $2,$24 | |
| 383 | |
| 384 addu $9,$13 | |
| 385 sltu $25,$9,$13 | |
| 386 addu $13,$9,$2 | |
| 387 sltu $2,$13,$9 | |
| 388 sw $13,-3*4($4) | |
| 389 addu $2,$25 | |
| 390 | |
| 391 addu $10,$14 | |
| 392 sltu $24,$10,$14 | |
| 393 addu $14,$10,$2 | |
| 394 sltu $2,$14,$10 | |
| 395 sw $14,-2*4($4) | |
| 396 addu $2,$24 | |
| 397 | |
| 398 addu $11,$15 | |
| 399 sltu $25,$11,$15 | |
| 400 addu $15,$11,$2 | |
| 401 sltu $2,$15,$11 | |
| 402 sw $15,-4($4) | |
| 403 addu $2,$25 | |
| 404 | |
| 405 .set noreorder | |
| 406 bgtzl $1,.L_bn_add_words_loop | |
| 407 lw $12,0($5) | |
| 408 | |
| 409 beqz $7,.L_bn_add_words_return | |
| 410 nop | |
| 411 | |
| 412 .L_bn_add_words_tail: | |
| 413 .set reorder | |
| 414 lw $12,0($5) | |
| 415 lw $8,0($6) | |
| 416 addu $8,$12 | |
| 417 subu $7,1 | |
| 418 sltu $24,$8,$12 | |
| 419 addu $12,$8,$2 | |
| 420 sltu $2,$12,$8 | |
| 421 sw $12,0($4) | |
| 422 addu $2,$24 | |
| 423 beqz $7,.L_bn_add_words_return | |
| 424 | |
| 425 lw $13,4($5) | |
| 426 lw $9,4($6) | |
| 427 addu $9,$13 | |
| 428 subu $7,1 | |
| 429 sltu $25,$9,$13 | |
| 430 addu $13,$9,$2 | |
| 431 sltu $2,$13,$9 | |
| 432 sw $13,4($4) | |
| 433 addu $2,$25 | |
| 434 beqz $7,.L_bn_add_words_return | |
| 435 | |
| 436 lw $14,2*4($5) | |
| 437 lw $10,2*4($6) | |
| 438 addu $10,$14 | |
| 439 sltu $24,$10,$14 | |
| 440 addu $14,$10,$2 | |
| 441 sltu $2,$14,$10 | |
| 442 sw $14,2*4($4) | |
| 443 addu $2,$24 | |
| 444 | |
| 445 .L_bn_add_words_return: | |
| 446 .set noreorder | |
| 447 jr $31 | |
| 448 move $4,$2 | |
| 449 | |
| 450 .end bn_add_words_internal | |
| 451 | |
| 452 .align 5 | |
| 453 .globl bn_sub_words | |
| 454 .ent bn_sub_words | |
| 455 bn_sub_words: | |
| 456 .set noreorder | |
| 457 bgtz $7,bn_sub_words_internal | |
| 458 move $2,$0 | |
| 459 jr $31 | |
| 460 move $4,$0 | |
| 461 .end bn_sub_words | |
| 462 | |
| 463 .align 5 | |
| 464 .ent bn_sub_words_internal | |
| 465 bn_sub_words_internal: | |
| 466 .set reorder | |
| 467 li $3,-4 | |
| 468 and $1,$7,$3 | |
| 469 lw $12,0($5) | |
| 470 beqz $1,.L_bn_sub_words_tail | |
| 471 | |
| 472 .L_bn_sub_words_loop: | |
| 473 lw $8,0($6) | |
| 474 subu $7,4 | |
| 475 lw $13,4($5) | |
| 476 and $1,$7,$3 | |
| 477 lw $14,2*4($5) | |
| 478 addu $6,4*4 | |
| 479 lw $15,3*4($5) | |
| 480 addu $4,4*4 | |
| 481 lw $9,-3*4($6) | |
| 482 addu $5,4*4 | |
| 483 lw $10,-2*4($6) | |
| 484 lw $11,-4($6) | |
| 485 sltu $24,$12,$8 | |
| 486 subu $8,$12,$8 | |
| 487 subu $12,$8,$2 | |
| 488 sgtu $2,$12,$8 | |
| 489 sw $12,-4*4($4) | |
| 490 addu $2,$24 | |
| 491 | |
| 492 sltu $25,$13,$9 | |
| 493 subu $9,$13,$9 | |
| 494 subu $13,$9,$2 | |
| 495 sgtu $2,$13,$9 | |
| 496 sw $13,-3*4($4) | |
| 497 addu $2,$25 | |
| 498 | |
| 499 | |
| 500 sltu $24,$14,$10 | |
| 501 subu $10,$14,$10 | |
| 502 subu $14,$10,$2 | |
| 503 sgtu $2,$14,$10 | |
| 504 sw $14,-2*4($4) | |
| 505 addu $2,$24 | |
| 506 | |
| 507 sltu $25,$15,$11 | |
| 508 subu $11,$15,$11 | |
| 509 subu $15,$11,$2 | |
| 510 sgtu $2,$15,$11 | |
| 511 sw $15,-4($4) | |
| 512 addu $2,$25 | |
| 513 | |
| 514 .set noreorder | |
| 515 bgtzl $1,.L_bn_sub_words_loop | |
| 516 lw $12,0($5) | |
| 517 | |
| 518 beqz $7,.L_bn_sub_words_return | |
| 519 nop | |
| 520 | |
| 521 .L_bn_sub_words_tail: | |
| 522 .set reorder | |
| 523 lw $12,0($5) | |
| 524 lw $8,0($6) | |
| 525 subu $7,1 | |
| 526 sltu $24,$12,$8 | |
| 527 subu $8,$12,$8 | |
| 528 subu $12,$8,$2 | |
| 529 sgtu $2,$12,$8 | |
| 530 sw $12,0($4) | |
| 531 addu $2,$24 | |
| 532 beqz $7,.L_bn_sub_words_return | |
| 533 | |
| 534 lw $13,4($5) | |
| 535 subu $7,1 | |
| 536 lw $9,4($6) | |
| 537 sltu $25,$13,$9 | |
| 538 subu $9,$13,$9 | |
| 539 subu $13,$9,$2 | |
| 540 sgtu $2,$13,$9 | |
| 541 sw $13,4($4) | |
| 542 addu $2,$25 | |
| 543 beqz $7,.L_bn_sub_words_return | |
| 544 | |
| 545 lw $14,2*4($5) | |
| 546 lw $10,2*4($6) | |
| 547 sltu $24,$14,$10 | |
| 548 subu $10,$14,$10 | |
| 549 subu $14,$10,$2 | |
| 550 sgtu $2,$14,$10 | |
| 551 sw $14,2*4($4) | |
| 552 addu $2,$24 | |
| 553 | |
| 554 .L_bn_sub_words_return: | |
| 555 .set noreorder | |
| 556 jr $31 | |
| 557 move $4,$2 | |
| 558 .end bn_sub_words_internal | |
| 559 | |
| 560 .align 5 | |
| 561 .globl bn_div_3_words | |
| 562 .ent bn_div_3_words | |
| 563 bn_div_3_words: | |
| 564 .set noreorder | |
| 565 move $7,$4 # we know that bn_div_words does not | |
| 566 # touch $7, $10, $11 and preserves $6 | |
| 567 # so that we can save two arguments | |
| 568 # and return address in registers | |
| 569 # instead of stack:-) | |
| 570 | |
| 571 lw $4,($7) | |
| 572 move $10,$5 | |
| 573 bne $4,$6,bn_div_3_words_internal | |
| 574 lw $5,-4($7) | |
| 575 li $2,-1 | |
| 576 jr $31 | |
| 577 move $4,$2 | |
| 578 .end bn_div_3_words | |
| 579 | |
| 580 .align 5 | |
| 581 .ent bn_div_3_words_internal | |
| 582 bn_div_3_words_internal: | |
| 583 .set reorder | |
| 584 move $11,$31 | |
| 585 bal bn_div_words_internal | |
| 586 move $31,$11 | |
| 587 multu $10,$2 | |
| 588 lw $14,-2*4($7) | |
| 589 move $8,$0 | |
| 590 mfhi $13 | |
| 591 mflo $12 | |
| 592 sltu $24,$13,$5 | |
| 593 .L_bn_div_3_words_inner_loop: | |
| 594 bnez $24,.L_bn_div_3_words_inner_loop_done | |
| 595 sgeu $1,$14,$12 | |
| 596 seq $25,$13,$5 | |
| 597 and $1,$25 | |
| 598 sltu $15,$12,$10 | |
| 599 addu $5,$6 | |
| 600 subu $13,$15 | |
| 601 subu $12,$10 | |
| 602 sltu $24,$13,$5 | |
| 603 sltu $8,$5,$6 | |
| 604 or $24,$8 | |
| 605 .set noreorder | |
| 606 beqzl $1,.L_bn_div_3_words_inner_loop | |
| 607 subu $2,1 | |
| 608 .set reorder | |
| 609 .L_bn_div_3_words_inner_loop_done: | |
| 610 .set noreorder | |
| 611 jr $31 | |
| 612 move $4,$2 | |
| 613 .end bn_div_3_words_internal | |
| 614 | |
| 615 .align 5 | |
| 616 .globl bn_div_words | |
| 617 .ent bn_div_words | |
| 618 bn_div_words: | |
| 619 .set noreorder | |
| 620 bnez $6,bn_div_words_internal | |
| 621 li $2,-1 # I would rather signal div-by-zero | |
| 622 # which can be done with 'break 7' | |
| 623 jr $31 | |
| 624 move $4,$2 | |
| 625 .end bn_div_words | |
| 626 | |
| 627 .align 5 | |
| 628 .ent bn_div_words_internal | |
| 629 bn_div_words_internal: | |
| 630 move $3,$0 | |
| 631 bltz $6,.L_bn_div_words_body | |
| 632 move $25,$3 | |
| 633 sll $6,1 | |
| 634 bgtz $6,.-4 | |
| 635 addu $25,1 | |
| 636 | |
| 637 .set reorder | |
| 638 negu $13,$25 | |
| 639 li $14,-1 | |
| 640 sll $14,$13 | |
| 641 and $14,$4 | |
| 642 srl $1,$5,$13 | |
| 643 .set noreorder | |
| 644 bnezl $14,.+8 | |
| 645 break 6 # signal overflow | |
| 646 .set reorder | |
| 647 sll $4,$25 | |
| 648 sll $5,$25 | |
| 649 or $4,$1 | |
| 650 .L_bn_div_words_body: | |
| 651 srl $3,$6,4*4 # bits | |
| 652 sgeu $1,$4,$6 | |
| 653 .set noreorder | |
| 654 bnezl $1,.+8 | |
| 655 subu $4,$6 | |
| 656 .set reorder | |
| 657 | |
| 658 li $8,-1 | |
| 659 srl $9,$4,4*4 # bits | |
| 660 srl $8,4*4 # q=0xffffffff | |
| 661 beq $3,$9,.L_bn_div_words_skip_div1 | |
| 662 divu $0,$4,$3 | |
| 663 mflo $8 | |
| 664 .L_bn_div_words_skip_div1: | |
| 665 multu $6,$8 | |
| 666 sll $15,$4,4*4 # bits | |
| 667 srl $1,$5,4*4 # bits | |
| 668 or $15,$1 | |
| 669 mflo $12 | |
| 670 mfhi $13 | |
| 671 .L_bn_div_words_inner_loop1: | |
| 672 sltu $14,$15,$12 | |
| 673 seq $24,$9,$13 | |
| 674 sltu $1,$9,$13 | |
| 675 and $14,$24 | |
| 676 sltu $2,$12,$6 | |
| 677 or $1,$14 | |
| 678 .set noreorder | |
| 679 beqz $1,.L_bn_div_words_inner_loop1_done | |
| 680 subu $13,$2 | |
| 681 subu $12,$6 | |
| 682 b .L_bn_div_words_inner_loop1 | |
| 683 subu $8,1 | |
| 684 .set reorder | |
| 685 .L_bn_div_words_inner_loop1_done: | |
| 686 | |
| 687 sll $5,4*4 # bits | |
| 688 subu $4,$15,$12 | |
| 689 sll $2,$8,4*4 # bits | |
| 690 | |
| 691 li $8,-1 | |
| 692 srl $9,$4,4*4 # bits | |
| 693 srl $8,4*4 # q=0xffffffff | |
| 694 beq $3,$9,.L_bn_div_words_skip_div2 | |
| 695 divu $0,$4,$3 | |
| 696 mflo $8 | |
| 697 .L_bn_div_words_skip_div2: | |
| 698 multu $6,$8 | |
| 699 sll $15,$4,4*4 # bits | |
| 700 srl $1,$5,4*4 # bits | |
| 701 or $15,$1 | |
| 702 mflo $12 | |
| 703 mfhi $13 | |
| 704 .L_bn_div_words_inner_loop2: | |
| 705 sltu $14,$15,$12 | |
| 706 seq $24,$9,$13 | |
| 707 sltu $1,$9,$13 | |
| 708 and $14,$24 | |
| 709 sltu $3,$12,$6 | |
| 710 or $1,$14 | |
| 711 .set noreorder | |
| 712 beqz $1,.L_bn_div_words_inner_loop2_done | |
| 713 subu $13,$3 | |
| 714 subu $12,$6 | |
| 715 b .L_bn_div_words_inner_loop2 | |
| 716 subu $8,1 | |
| 717 .set reorder | |
| 718 .L_bn_div_words_inner_loop2_done: | |
| 719 | |
| 720 subu $4,$15,$12 | |
| 721 or $2,$8 | |
| 722 srl $3,$4,$25 # $3 contains remainder if anybody wants it | |
| 723 srl $6,$25 # restore $6 | |
| 724 | |
| 725 .set noreorder | |
| 726 move $5,$3 | |
| 727 jr $31 | |
| 728 move $4,$2 | |
| 729 .end bn_div_words_internal | |
| 730 | |
| 731 .align 5 | |
| 732 .globl bn_mul_comba8 | |
| 733 .ent bn_mul_comba8 | |
| 734 bn_mul_comba8: | |
| 735 .set noreorder | |
| 736 .frame $29,6*4,$31 | |
| 737 .mask 0x003f0000,-4 | |
| 738 subu $29,6*4 | |
| 739 sw $21,5*4($29) | |
| 740 sw $20,4*4($29) | |
| 741 sw $19,3*4($29) | |
| 742 sw $18,2*4($29) | |
| 743 sw $17,1*4($29) | |
| 744 sw $16,0*4($29) | |
| 745 | |
| 746 .set reorder | |
| 747 lw $12,0($5) # If compiled with -mips3 option on | |
| 748 # R5000 box assembler barks on this | |
| 749 # 1ine with "should not have mult/div | |
| 750 # as last instruction in bb (R10K | |
| 751 # bug)" warning. If anybody out there | |
| 752 # has a clue about how to circumvent | |
| 753 # this do send me a note. | |
| 754 # <appro@fy.chalmers.se> | |
| 755 | |
| 756 lw $8,0($6) | |
| 757 lw $13,4($5) | |
| 758 lw $14,2*4($5) | |
| 759 multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3); | |
| 760 lw $15,3*4($5) | |
| 761 lw $9,4($6) | |
| 762 lw $10,2*4($6) | |
| 763 lw $11,3*4($6) | |
| 764 mflo $2 | |
| 765 mfhi $3 | |
| 766 | |
| 767 lw $16,4*4($5) | |
| 768 lw $18,5*4($5) | |
| 769 multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1); | |
| 770 lw $20,6*4($5) | |
| 771 lw $5,7*4($5) | |
| 772 lw $17,4*4($6) | |
| 773 lw $19,5*4($6) | |
| 774 mflo $24 | |
| 775 mfhi $25 | |
| 776 addu $3,$24 | |
| 777 sltu $1,$3,$24 | |
| 778 multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1); | |
| 779 addu $7,$25,$1 | |
| 780 lw $21,6*4($6) | |
| 781 lw $6,7*4($6) | |
| 782 sw $2,0($4) # r[0]=c1; | |
| 783 mflo $24 | |
| 784 mfhi $25 | |
| 785 addu $3,$24 | |
| 786 sltu $1,$3,$24 | |
| 787 multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2); | |
| 788 addu $25,$1 | |
| 789 addu $7,$25 | |
| 790 sltu $2,$7,$25 | |
| 791 sw $3,4($4) # r[1]=c2; | |
| 792 | |
| 793 mflo $24 | |
| 794 mfhi $25 | |
| 795 addu $7,$24 | |
| 796 sltu $1,$7,$24 | |
| 797 multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2); | |
| 798 addu $25,$1 | |
| 799 addu $2,$25 | |
| 800 mflo $24 | |
| 801 mfhi $25 | |
| 802 addu $7,$24 | |
| 803 sltu $1,$7,$24 | |
| 804 multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2); | |
| 805 addu $25,$1 | |
| 806 addu $2,$25 | |
| 807 sltu $3,$2,$25 | |
| 808 mflo $24 | |
| 809 mfhi $25 | |
| 810 addu $7,$24 | |
| 811 sltu $1,$7,$24 | |
| 812 multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3); | |
| 813 addu $25,$1 | |
| 814 addu $2,$25 | |
| 815 sltu $1,$2,$25 | |
| 816 addu $3,$1 | |
| 817 sw $7,2*4($4) # r[2]=c3; | |
| 818 | |
| 819 mflo $24 | |
| 820 mfhi $25 | |
| 821 addu $2,$24 | |
| 822 sltu $1,$2,$24 | |
| 823 multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3); | |
| 824 addu $25,$1 | |
| 825 addu $3,$25 | |
| 826 sltu $7,$3,$25 | |
| 827 mflo $24 | |
| 828 mfhi $25 | |
| 829 addu $2,$24 | |
| 830 sltu $1,$2,$24 | |
| 831 multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3); | |
| 832 addu $25,$1 | |
| 833 addu $3,$25 | |
| 834 sltu $1,$3,$25 | |
| 835 addu $7,$1 | |
| 836 mflo $24 | |
| 837 mfhi $25 | |
| 838 addu $2,$24 | |
| 839 sltu $1,$2,$24 | |
| 840 multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3); | |
| 841 addu $25,$1 | |
| 842 addu $3,$25 | |
| 843 sltu $1,$3,$25 | |
| 844 addu $7,$1 | |
| 845 mflo $24 | |
| 846 mfhi $25 | |
| 847 addu $2,$24 | |
| 848 sltu $1,$2,$24 | |
| 849 multu $16,$8 # mul_add_c(a[4],b[0],c2,c3,c1); | |
| 850 addu $25,$1 | |
| 851 addu $3,$25 | |
| 852 sltu $1,$3,$25 | |
| 853 addu $7,$1 | |
| 854 sw $2,3*4($4) # r[3]=c1; | |
| 855 | |
| 856 mflo $24 | |
| 857 mfhi $25 | |
| 858 addu $3,$24 | |
| 859 sltu $1,$3,$24 | |
| 860 multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1); | |
| 861 addu $25,$1 | |
| 862 addu $7,$25 | |
| 863 sltu $2,$7,$25 | |
| 864 mflo $24 | |
| 865 mfhi $25 | |
| 866 addu $3,$24 | |
| 867 sltu $1,$3,$24 | |
| 868 multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1); | |
| 869 addu $25,$1 | |
| 870 addu $7,$25 | |
| 871 sltu $1,$7,$25 | |
| 872 addu $2,$1 | |
| 873 mflo $24 | |
| 874 mfhi $25 | |
| 875 addu $3,$24 | |
| 876 sltu $1,$3,$24 | |
| 877 multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1); | |
| 878 addu $25,$1 | |
| 879 addu $7,$25 | |
| 880 sltu $1,$7,$25 | |
| 881 addu $2,$1 | |
| 882 mflo $24 | |
| 883 mfhi $25 | |
| 884 addu $3,$24 | |
| 885 sltu $1,$3,$24 | |
| 886 multu $12,$17 # mul_add_c(a[0],b[4],c2,c3,c1); | |
| 887 addu $25,$1 | |
| 888 addu $7,$25 | |
| 889 sltu $1,$7,$25 | |
| 890 addu $2,$1 | |
| 891 mflo $24 | |
| 892 mfhi $25 | |
| 893 addu $3,$24 | |
| 894 sltu $1,$3,$24 | |
| 895 multu $12,$19 # mul_add_c(a[0],b[5],c3,c1,c2); | |
| 896 addu $25,$1 | |
| 897 addu $7,$25 | |
| 898 sltu $1,$7,$25 | |
| 899 addu $2,$1 | |
| 900 sw $3,4*4($4) # r[4]=c2; | |
| 901 | |
| 902 mflo $24 | |
| 903 mfhi $25 | |
| 904 addu $7,$24 | |
| 905 sltu $1,$7,$24 | |
| 906 multu $13,$17 # mul_add_c(a[1],b[4],c3,c1,c2); | |
| 907 addu $25,$1 | |
| 908 addu $2,$25 | |
| 909 sltu $3,$2,$25 | |
| 910 mflo $24 | |
| 911 mfhi $25 | |
| 912 addu $7,$24 | |
| 913 sltu $1,$7,$24 | |
| 914 multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2); | |
| 915 addu $25,$1 | |
| 916 addu $2,$25 | |
| 917 sltu $1,$2,$25 | |
| 918 addu $3,$1 | |
| 919 mflo $24 | |
| 920 mfhi $25 | |
| 921 addu $7,$24 | |
| 922 sltu $1,$7,$24 | |
| 923 multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2); | |
| 924 addu $25,$1 | |
| 925 addu $2,$25 | |
| 926 sltu $1,$2,$25 | |
| 927 addu $3,$1 | |
| 928 mflo $24 | |
| 929 mfhi $25 | |
| 930 addu $7,$24 | |
| 931 sltu $1,$7,$24 | |
| 932 multu $16,$9 # mul_add_c(a[4],b[1],c3,c1,c2); | |
| 933 addu $25,$1 | |
| 934 addu $2,$25 | |
| 935 sltu $1,$2,$25 | |
| 936 addu $3,$1 | |
| 937 mflo $24 | |
| 938 mfhi $25 | |
| 939 addu $7,$24 | |
| 940 sltu $1,$7,$24 | |
| 941 multu $18,$8 # mul_add_c(a[5],b[0],c3,c1,c2); | |
| 942 addu $25,$1 | |
| 943 addu $2,$25 | |
| 944 sltu $1,$2,$25 | |
| 945 addu $3,$1 | |
| 946 mflo $24 | |
| 947 mfhi $25 | |
| 948 addu $7,$24 | |
| 949 sltu $1,$7,$24 | |
| 950 multu $20,$8 # mul_add_c(a[6],b[0],c1,c2,c3); | |
| 951 addu $25,$1 | |
| 952 addu $2,$25 | |
| 953 sltu $1,$2,$25 | |
| 954 addu $3,$1 | |
| 955 sw $7,5*4($4) # r[5]=c3; | |
| 956 | |
| 957 mflo $24 | |
| 958 mfhi $25 | |
| 959 addu $2,$24 | |
| 960 sltu $1,$2,$24 | |
| 961 multu $18,$9 # mul_add_c(a[5],b[1],c1,c2,c3); | |
| 962 addu $25,$1 | |
| 963 addu $3,$25 | |
| 964 sltu $7,$3,$25 | |
| 965 mflo $24 | |
| 966 mfhi $25 | |
| 967 addu $2,$24 | |
| 968 sltu $1,$2,$24 | |
| 969 multu $16,$10 # mul_add_c(a[4],b[2],c1,c2,c3); | |
| 970 addu $25,$1 | |
| 971 addu $3,$25 | |
| 972 sltu $1,$3,$25 | |
| 973 addu $7,$1 | |
| 974 mflo $24 | |
| 975 mfhi $25 | |
| 976 addu $2,$24 | |
| 977 sltu $1,$2,$24 | |
| 978 multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3); | |
| 979 addu $25,$1 | |
| 980 addu $3,$25 | |
| 981 sltu $1,$3,$25 | |
| 982 addu $7,$1 | |
| 983 mflo $24 | |
| 984 mfhi $25 | |
| 985 addu $2,$24 | |
| 986 sltu $1,$2,$24 | |
| 987 multu $14,$17 # mul_add_c(a[2],b[4],c1,c2,c3); | |
| 988 addu $25,$1 | |
| 989 addu $3,$25 | |
| 990 sltu $1,$3,$25 | |
| 991 addu $7,$1 | |
| 992 mflo $24 | |
| 993 mfhi $25 | |
| 994 addu $2,$24 | |
| 995 sltu $1,$2,$24 | |
| 996 multu $13,$19 # mul_add_c(a[1],b[5],c1,c2,c3); | |
| 997 addu $25,$1 | |
| 998 addu $3,$25 | |
| 999 sltu $1,$3,$25 | |
| 1000 addu $7,$1 | |
| 1001 mflo $24 | |
| 1002 mfhi $25 | |
| 1003 addu $2,$24 | |
| 1004 sltu $1,$2,$24 | |
| 1005 multu $12,$21 # mul_add_c(a[0],b[6],c1,c2,c3); | |
| 1006 addu $25,$1 | |
| 1007 addu $3,$25 | |
| 1008 sltu $1,$3,$25 | |
| 1009 addu $7,$1 | |
| 1010 mflo $24 | |
| 1011 mfhi $25 | |
| 1012 addu $2,$24 | |
| 1013 sltu $1,$2,$24 | |
| 1014 multu $12,$6 # mul_add_c(a[0],b[7],c2,c3,c1); | |
| 1015 addu $25,$1 | |
| 1016 addu $3,$25 | |
| 1017 sltu $1,$3,$25 | |
| 1018 addu $7,$1 | |
| 1019 sw $2,6*4($4) # r[6]=c1; | |
| 1020 | |
| 1021 mflo $24 | |
| 1022 mfhi $25 | |
| 1023 addu $3,$24 | |
| 1024 sltu $1,$3,$24 | |
| 1025 multu $13,$21 # mul_add_c(a[1],b[6],c2,c3,c1); | |
| 1026 addu $25,$1 | |
| 1027 addu $7,$25 | |
| 1028 sltu $2,$7,$25 | |
| 1029 mflo $24 | |
| 1030 mfhi $25 | |
| 1031 addu $3,$24 | |
| 1032 sltu $1,$3,$24 | |
| 1033 multu $14,$19 # mul_add_c(a[2],b[5],c2,c3,c1); | |
| 1034 addu $25,$1 | |
| 1035 addu $7,$25 | |
| 1036 sltu $1,$7,$25 | |
| 1037 addu $2,$1 | |
| 1038 mflo $24 | |
| 1039 mfhi $25 | |
| 1040 addu $3,$24 | |
| 1041 sltu $1,$3,$24 | |
| 1042 multu $15,$17 # mul_add_c(a[3],b[4],c2,c3,c1); | |
| 1043 addu $25,$1 | |
| 1044 addu $7,$25 | |
| 1045 sltu $1,$7,$25 | |
| 1046 addu $2,$1 | |
| 1047 mflo $24 | |
| 1048 mfhi $25 | |
| 1049 addu $3,$24 | |
| 1050 sltu $1,$3,$24 | |
| 1051 multu $16,$11 # mul_add_c(a[4],b[3],c2,c3,c1); | |
| 1052 addu $25,$1 | |
| 1053 addu $7,$25 | |
| 1054 sltu $1,$7,$25 | |
| 1055 addu $2,$1 | |
| 1056 mflo $24 | |
| 1057 mfhi $25 | |
| 1058 addu $3,$24 | |
| 1059 sltu $1,$3,$24 | |
| 1060 multu $18,$10 # mul_add_c(a[5],b[2],c2,c3,c1); | |
| 1061 addu $25,$1 | |
| 1062 addu $7,$25 | |
| 1063 sltu $1,$7,$25 | |
| 1064 addu $2,$1 | |
| 1065 mflo $24 | |
| 1066 mfhi $25 | |
| 1067 addu $3,$24 | |
| 1068 sltu $1,$3,$24 | |
| 1069 multu $20,$9 # mul_add_c(a[6],b[1],c2,c3,c1); | |
| 1070 addu $25,$1 | |
| 1071 addu $7,$25 | |
| 1072 sltu $1,$7,$25 | |
| 1073 addu $2,$1 | |
| 1074 mflo $24 | |
| 1075 mfhi $25 | |
| 1076 addu $3,$24 | |
| 1077 sltu $1,$3,$24 | |
| 1078 multu $5,$8 # mul_add_c(a[7],b[0],c2,c3,c1); | |
| 1079 addu $25,$1 | |
| 1080 addu $7,$25 | |
| 1081 sltu $1,$7,$25 | |
| 1082 addu $2,$1 | |
| 1083 mflo $24 | |
| 1084 mfhi $25 | |
| 1085 addu $3,$24 | |
| 1086 sltu $1,$3,$24 | |
| 1087 multu $5,$9 # mul_add_c(a[7],b[1],c3,c1,c2); | |
| 1088 addu $25,$1 | |
| 1089 addu $7,$25 | |
| 1090 sltu $1,$7,$25 | |
| 1091 addu $2,$1 | |
| 1092 sw $3,7*4($4) # r[7]=c2; | |
| 1093 | |
| 1094 mflo $24 | |
| 1095 mfhi $25 | |
| 1096 addu $7,$24 | |
| 1097 sltu $1,$7,$24 | |
| 1098 multu $20,$10 # mul_add_c(a[6],b[2],c3,c1,c2); | |
| 1099 addu $25,$1 | |
| 1100 addu $2,$25 | |
| 1101 sltu $3,$2,$25 | |
| 1102 mflo $24 | |
| 1103 mfhi $25 | |
| 1104 addu $7,$24 | |
| 1105 sltu $1,$7,$24 | |
| 1106 multu $18,$11 # mul_add_c(a[5],b[3],c3,c1,c2); | |
| 1107 addu $25,$1 | |
| 1108 addu $2,$25 | |
| 1109 sltu $1,$2,$25 | |
| 1110 addu $3,$1 | |
| 1111 mflo $24 | |
| 1112 mfhi $25 | |
| 1113 addu $7,$24 | |
| 1114 sltu $1,$7,$24 | |
| 1115 multu $16,$17 # mul_add_c(a[4],b[4],c3,c1,c2); | |
| 1116 addu $25,$1 | |
| 1117 addu $2,$25 | |
| 1118 sltu $1,$2,$25 | |
| 1119 addu $3,$1 | |
| 1120 mflo $24 | |
| 1121 mfhi $25 | |
| 1122 addu $7,$24 | |
| 1123 sltu $1,$7,$24 | |
| 1124 multu $15,$19 # mul_add_c(a[3],b[5],c3,c1,c2); | |
| 1125 addu $25,$1 | |
| 1126 addu $2,$25 | |
| 1127 sltu $1,$2,$25 | |
| 1128 addu $3,$1 | |
| 1129 mflo $24 | |
| 1130 mfhi $25 | |
| 1131 addu $7,$24 | |
| 1132 sltu $1,$7,$24 | |
| 1133 multu $14,$21 # mul_add_c(a[2],b[6],c3,c1,c2); | |
| 1134 addu $25,$1 | |
| 1135 addu $2,$25 | |
| 1136 sltu $1,$2,$25 | |
| 1137 addu $3,$1 | |
| 1138 mflo $24 | |
| 1139 mfhi $25 | |
| 1140 addu $7,$24 | |
| 1141 sltu $1,$7,$24 | |
| 1142 multu $13,$6 # mul_add_c(a[1],b[7],c3,c1,c2); | |
| 1143 addu $25,$1 | |
| 1144 addu $2,$25 | |
| 1145 sltu $1,$2,$25 | |
| 1146 addu $3,$1 | |
| 1147 mflo $24 | |
| 1148 mfhi $25 | |
| 1149 addu $7,$24 | |
| 1150 sltu $1,$7,$24 | |
| 1151 multu $14,$6 # mul_add_c(a[2],b[7],c1,c2,c3); | |
| 1152 addu $25,$1 | |
| 1153 addu $2,$25 | |
| 1154 sltu $1,$2,$25 | |
| 1155 addu $3,$1 | |
| 1156 sw $7,8*4($4) # r[8]=c3; | |
| 1157 | |
| 1158 mflo $24 | |
| 1159 mfhi $25 | |
| 1160 addu $2,$24 | |
| 1161 sltu $1,$2,$24 | |
| 1162 multu $15,$21 # mul_add_c(a[3],b[6],c1,c2,c3); | |
| 1163 addu $25,$1 | |
| 1164 addu $3,$25 | |
| 1165 sltu $7,$3,$25 | |
| 1166 mflo $24 | |
| 1167 mfhi $25 | |
| 1168 addu $2,$24 | |
| 1169 sltu $1,$2,$24 | |
| 1170 multu $16,$19 # mul_add_c(a[4],b[5],c1,c2,c3); | |
| 1171 addu $25,$1 | |
| 1172 addu $3,$25 | |
| 1173 sltu $1,$3,$25 | |
| 1174 addu $7,$1 | |
| 1175 mflo $24 | |
| 1176 mfhi $25 | |
| 1177 addu $2,$24 | |
| 1178 sltu $1,$2,$24 | |
| 1179 multu $18,$17 # mul_add_c(a[5],b[4],c1,c2,c3); | |
| 1180 addu $25,$1 | |
| 1181 addu $3,$25 | |
| 1182 sltu $1,$3,$25 | |
| 1183 addu $7,$1 | |
| 1184 mflo $24 | |
| 1185 mfhi $25 | |
| 1186 addu $2,$24 | |
| 1187 sltu $1,$2,$24 | |
| 1188 multu $20,$11 # mul_add_c(a[6],b[3],c1,c2,c3); | |
| 1189 addu $25,$1 | |
| 1190 addu $3,$25 | |
| 1191 sltu $1,$3,$25 | |
| 1192 addu $7,$1 | |
| 1193 mflo $24 | |
| 1194 mfhi $25 | |
| 1195 addu $2,$24 | |
| 1196 sltu $1,$2,$24 | |
| 1197 multu $5,$10 # mul_add_c(a[7],b[2],c1,c2,c3); | |
| 1198 addu $25,$1 | |
| 1199 addu $3,$25 | |
| 1200 sltu $1,$3,$25 | |
| 1201 addu $7,$1 | |
| 1202 mflo $24 | |
| 1203 mfhi $25 | |
| 1204 addu $2,$24 | |
| 1205 sltu $1,$2,$24 | |
| 1206 multu $5,$11 # mul_add_c(a[7],b[3],c2,c3,c1); | |
| 1207 addu $25,$1 | |
| 1208 addu $3,$25 | |
| 1209 sltu $1,$3,$25 | |
| 1210 addu $7,$1 | |
| 1211 sw $2,9*4($4) # r[9]=c1; | |
| 1212 | |
| 1213 mflo $24 | |
| 1214 mfhi $25 | |
| 1215 addu $3,$24 | |
| 1216 sltu $1,$3,$24 | |
| 1217 multu $20,$17 # mul_add_c(a[6],b[4],c2,c3,c1); | |
| 1218 addu $25,$1 | |
| 1219 addu $7,$25 | |
| 1220 sltu $2,$7,$25 | |
| 1221 mflo $24 | |
| 1222 mfhi $25 | |
| 1223 addu $3,$24 | |
| 1224 sltu $1,$3,$24 | |
| 1225 multu $18,$19 # mul_add_c(a[5],b[5],c2,c3,c1); | |
| 1226 addu $25,$1 | |
| 1227 addu $7,$25 | |
| 1228 sltu $1,$7,$25 | |
| 1229 addu $2,$1 | |
| 1230 mflo $24 | |
| 1231 mfhi $25 | |
| 1232 addu $3,$24 | |
| 1233 sltu $1,$3,$24 | |
| 1234 multu $16,$21 # mul_add_c(a[4],b[6],c2,c3,c1); | |
| 1235 addu $25,$1 | |
| 1236 addu $7,$25 | |
| 1237 sltu $1,$7,$25 | |
| 1238 addu $2,$1 | |
| 1239 mflo $24 | |
| 1240 mfhi $25 | |
| 1241 addu $3,$24 | |
| 1242 sltu $1,$3,$24 | |
| 1243 multu $15,$6 # mul_add_c(a[3],b[7],c2,c3,c1); | |
| 1244 addu $25,$1 | |
| 1245 addu $7,$25 | |
| 1246 sltu $1,$7,$25 | |
| 1247 addu $2,$1 | |
| 1248 mflo $24 | |
| 1249 mfhi $25 | |
| 1250 addu $3,$24 | |
| 1251 sltu $1,$3,$24 | |
| 1252 multu $16,$6 # mul_add_c(a[4],b[7],c3,c1,c2); | |
| 1253 addu $25,$1 | |
| 1254 addu $7,$25 | |
| 1255 sltu $1,$7,$25 | |
| 1256 addu $2,$1 | |
| 1257 sw $3,10*4($4) # r[10]=c2; | |
| 1258 | |
| 1259 mflo $24 | |
| 1260 mfhi $25 | |
| 1261 addu $7,$24 | |
| 1262 sltu $1,$7,$24 | |
| 1263 multu $18,$21 # mul_add_c(a[5],b[6],c3,c1,c2); | |
| 1264 addu $25,$1 | |
| 1265 addu $2,$25 | |
| 1266 sltu $3,$2,$25 | |
| 1267 mflo $24 | |
| 1268 mfhi $25 | |
| 1269 addu $7,$24 | |
| 1270 sltu $1,$7,$24 | |
| 1271 multu $20,$19 # mul_add_c(a[6],b[5],c3,c1,c2); | |
| 1272 addu $25,$1 | |
| 1273 addu $2,$25 | |
| 1274 sltu $1,$2,$25 | |
| 1275 addu $3,$1 | |
| 1276 mflo $24 | |
| 1277 mfhi $25 | |
| 1278 addu $7,$24 | |
| 1279 sltu $1,$7,$24 | |
| 1280 multu $5,$17 # mul_add_c(a[7],b[4],c3,c1,c2); | |
| 1281 addu $25,$1 | |
| 1282 addu $2,$25 | |
| 1283 sltu $1,$2,$25 | |
| 1284 addu $3,$1 | |
| 1285 mflo $24 | |
| 1286 mfhi $25 | |
| 1287 addu $7,$24 | |
| 1288 sltu $1,$7,$24 | |
| 1289 multu $5,$19 # mul_add_c(a[7],b[5],c1,c2,c3); | |
| 1290 addu $25,$1 | |
| 1291 addu $2,$25 | |
| 1292 sltu $1,$2,$25 | |
| 1293 addu $3,$1 | |
| 1294 sw $7,11*4($4) # r[11]=c3; | |
| 1295 | |
| 1296 mflo $24 | |
| 1297 mfhi $25 | |
| 1298 addu $2,$24 | |
| 1299 sltu $1,$2,$24 | |
| 1300 multu $20,$21 # mul_add_c(a[6],b[6],c1,c2,c3); | |
| 1301 addu $25,$1 | |
| 1302 addu $3,$25 | |
| 1303 sltu $7,$3,$25 | |
| 1304 mflo $24 | |
| 1305 mfhi $25 | |
| 1306 addu $2,$24 | |
| 1307 sltu $1,$2,$24 | |
| 1308 multu $18,$6 # mul_add_c(a[5],b[7],c1,c2,c3); | |
| 1309 addu $25,$1 | |
| 1310 addu $3,$25 | |
| 1311 sltu $1,$3,$25 | |
| 1312 addu $7,$1 | |
| 1313 mflo $24 | |
| 1314 mfhi $25 | |
| 1315 addu $2,$24 | |
| 1316 sltu $1,$2,$24 | |
| 1317 multu $20,$6 # mul_add_c(a[6],b[7],c2,c3,c1); | |
| 1318 addu $25,$1 | |
| 1319 addu $3,$25 | |
| 1320 sltu $1,$3,$25 | |
| 1321 addu $7,$1 | |
| 1322 sw $2,12*4($4) # r[12]=c1; | |
| 1323 | |
| 1324 mflo $24 | |
| 1325 mfhi $25 | |
| 1326 addu $3,$24 | |
| 1327 sltu $1,$3,$24 | |
| 1328 multu $5,$21 # mul_add_c(a[7],b[6],c2,c3,c1); | |
| 1329 addu $25,$1 | |
| 1330 addu $7,$25 | |
| 1331 sltu $2,$7,$25 | |
| 1332 mflo $24 | |
| 1333 mfhi $25 | |
| 1334 addu $3,$24 | |
| 1335 sltu $1,$3,$24 | |
| 1336 multu $5,$6 # mul_add_c(a[7],b[7],c3,c1,c2); | |
| 1337 addu $25,$1 | |
| 1338 addu $7,$25 | |
| 1339 sltu $1,$7,$25 | |
| 1340 addu $2,$1 | |
| 1341 sw $3,13*4($4) # r[13]=c2; | |
| 1342 | |
| 1343 mflo $24 | |
| 1344 mfhi $25 | |
| 1345 addu $7,$24 | |
| 1346 sltu $1,$7,$24 | |
| 1347 addu $25,$1 | |
| 1348 addu $2,$25 | |
| 1349 sw $7,14*4($4) # r[14]=c3; | |
| 1350 sw $2,15*4($4) # r[15]=c1; | |
| 1351 | |
| 1352 .set noreorder | |
| 1353 lw $21,5*4($29) | |
| 1354 lw $20,4*4($29) | |
| 1355 lw $19,3*4($29) | |
| 1356 lw $18,2*4($29) | |
| 1357 lw $17,1*4($29) | |
| 1358 lw $16,0*4($29) | |
| 1359 jr $31 | |
| 1360 addu $29,6*4 | |
| 1361 .end bn_mul_comba8 | |
| 1362 | |
| 1363 .align 5 | |
| 1364 .globl bn_mul_comba4 | |
| 1365 .ent bn_mul_comba4 | |
| 1366 bn_mul_comba4: | |
| 1367 .set reorder | |
| 1368 lw $12,0($5) | |
| 1369 lw $8,0($6) | |
| 1370 lw $13,4($5) | |
| 1371 lw $14,2*4($5) | |
| 1372 multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3); | |
| 1373 lw $15,3*4($5) | |
| 1374 lw $9,4($6) | |
| 1375 lw $10,2*4($6) | |
| 1376 lw $11,3*4($6) | |
| 1377 mflo $2 | |
| 1378 mfhi $3 | |
| 1379 sw $2,0($4) | |
| 1380 | |
| 1381 multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1); | |
| 1382 mflo $24 | |
| 1383 mfhi $25 | |
| 1384 addu $3,$24 | |
| 1385 sltu $1,$3,$24 | |
| 1386 multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1); | |
| 1387 addu $7,$25,$1 | |
| 1388 mflo $24 | |
| 1389 mfhi $25 | |
| 1390 addu $3,$24 | |
| 1391 sltu $1,$3,$24 | |
| 1392 multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2); | |
| 1393 addu $25,$1 | |
| 1394 addu $7,$25 | |
| 1395 sltu $2,$7,$25 | |
| 1396 sw $3,4($4) | |
| 1397 | |
| 1398 mflo $24 | |
| 1399 mfhi $25 | |
| 1400 addu $7,$24 | |
| 1401 sltu $1,$7,$24 | |
| 1402 multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2); | |
| 1403 addu $25,$1 | |
| 1404 addu $2,$25 | |
| 1405 mflo $24 | |
| 1406 mfhi $25 | |
| 1407 addu $7,$24 | |
| 1408 sltu $1,$7,$24 | |
| 1409 multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2); | |
| 1410 addu $25,$1 | |
| 1411 addu $2,$25 | |
| 1412 sltu $3,$2,$25 | |
| 1413 mflo $24 | |
| 1414 mfhi $25 | |
| 1415 addu $7,$24 | |
| 1416 sltu $1,$7,$24 | |
| 1417 multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3); | |
| 1418 addu $25,$1 | |
| 1419 addu $2,$25 | |
| 1420 sltu $1,$2,$25 | |
| 1421 addu $3,$1 | |
| 1422 sw $7,2*4($4) | |
| 1423 | |
| 1424 mflo $24 | |
| 1425 mfhi $25 | |
| 1426 addu $2,$24 | |
| 1427 sltu $1,$2,$24 | |
| 1428 multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3); | |
| 1429 addu $25,$1 | |
| 1430 addu $3,$25 | |
| 1431 sltu $7,$3,$25 | |
| 1432 mflo $24 | |
| 1433 mfhi $25 | |
| 1434 addu $2,$24 | |
| 1435 sltu $1,$2,$24 | |
| 1436 multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3); | |
| 1437 addu $25,$1 | |
| 1438 addu $3,$25 | |
| 1439 sltu $1,$3,$25 | |
| 1440 addu $7,$1 | |
| 1441 mflo $24 | |
| 1442 mfhi $25 | |
| 1443 addu $2,$24 | |
| 1444 sltu $1,$2,$24 | |
| 1445 multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3); | |
| 1446 addu $25,$1 | |
| 1447 addu $3,$25 | |
| 1448 sltu $1,$3,$25 | |
| 1449 addu $7,$1 | |
| 1450 mflo $24 | |
| 1451 mfhi $25 | |
| 1452 addu $2,$24 | |
| 1453 sltu $1,$2,$24 | |
| 1454 multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1); | |
| 1455 addu $25,$1 | |
| 1456 addu $3,$25 | |
| 1457 sltu $1,$3,$25 | |
| 1458 addu $7,$1 | |
| 1459 sw $2,3*4($4) | |
| 1460 | |
| 1461 mflo $24 | |
| 1462 mfhi $25 | |
| 1463 addu $3,$24 | |
| 1464 sltu $1,$3,$24 | |
| 1465 multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1); | |
| 1466 addu $25,$1 | |
| 1467 addu $7,$25 | |
| 1468 sltu $2,$7,$25 | |
| 1469 mflo $24 | |
| 1470 mfhi $25 | |
| 1471 addu $3,$24 | |
| 1472 sltu $1,$3,$24 | |
| 1473 multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1); | |
| 1474 addu $25,$1 | |
| 1475 addu $7,$25 | |
| 1476 sltu $1,$7,$25 | |
| 1477 addu $2,$1 | |
| 1478 mflo $24 | |
| 1479 mfhi $25 | |
| 1480 addu $3,$24 | |
| 1481 sltu $1,$3,$24 | |
| 1482 multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2); | |
| 1483 addu $25,$1 | |
| 1484 addu $7,$25 | |
| 1485 sltu $1,$7,$25 | |
| 1486 addu $2,$1 | |
| 1487 sw $3,4*4($4) | |
| 1488 | |
| 1489 mflo $24 | |
| 1490 mfhi $25 | |
| 1491 addu $7,$24 | |
| 1492 sltu $1,$7,$24 | |
| 1493 multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2); | |
| 1494 addu $25,$1 | |
| 1495 addu $2,$25 | |
| 1496 sltu $3,$2,$25 | |
| 1497 mflo $24 | |
| 1498 mfhi $25 | |
| 1499 addu $7,$24 | |
| 1500 sltu $1,$7,$24 | |
| 1501 multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3); | |
| 1502 addu $25,$1 | |
| 1503 addu $2,$25 | |
| 1504 sltu $1,$2,$25 | |
| 1505 addu $3,$1 | |
| 1506 sw $7,5*4($4) | |
| 1507 | |
| 1508 mflo $24 | |
| 1509 mfhi $25 | |
| 1510 addu $2,$24 | |
| 1511 sltu $1,$2,$24 | |
| 1512 addu $25,$1 | |
| 1513 addu $3,$25 | |
| 1514 sw $2,6*4($4) | |
| 1515 sw $3,7*4($4) | |
| 1516 | |
| 1517 .set noreorder | |
| 1518 jr $31 | |
| 1519 nop | |
| 1520 .end bn_mul_comba4 | |
| 1521 | |
| 1522 .align 5 | |
| 1523 .globl bn_sqr_comba8 | |
| 1524 .ent bn_sqr_comba8 | |
| 1525 bn_sqr_comba8: | |
| 1526 .set reorder | |
| 1527 lw $12,0($5) | |
| 1528 lw $13,4($5) | |
| 1529 lw $14,2*4($5) | |
| 1530 lw $15,3*4($5) | |
| 1531 | |
| 1532 multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3); | |
| 1533 lw $8,4*4($5) | |
| 1534 lw $9,5*4($5) | |
| 1535 lw $10,6*4($5) | |
| 1536 lw $11,7*4($5) | |
| 1537 mflo $2 | |
| 1538 mfhi $3 | |
| 1539 sw $2,0($4) | |
| 1540 | |
| 1541 multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1); | |
| 1542 mflo $24 | |
| 1543 mfhi $25 | |
| 1544 slt $2,$25,$0 | |
| 1545 sll $25,1 | |
| 1546 multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2); | |
| 1547 slt $6,$24,$0 | |
| 1548 addu $25,$6 | |
| 1549 sll $24,1 | |
| 1550 addu $3,$24 | |
| 1551 sltu $1,$3,$24 | |
| 1552 addu $7,$25,$1 | |
| 1553 sw $3,4($4) | |
| 1554 | |
| 1555 mflo $24 | |
| 1556 mfhi $25 | |
| 1557 slt $3,$25,$0 | |
| 1558 sll $25,1 | |
| 1559 multu $13,$13 # mul_add_c(a[1],b[1],c3,c1,c2); | |
| 1560 slt $6,$24,$0 | |
| 1561 addu $25,$6 | |
| 1562 sll $24,1 | |
| 1563 addu $7,$24 | |
| 1564 sltu $1,$7,$24 | |
| 1565 addu $25,$1 | |
| 1566 addu $2,$25 | |
| 1567 sltu $1,$2,$25 | |
| 1568 addu $3,$1 | |
| 1569 mflo $24 | |
| 1570 mfhi $25 | |
| 1571 addu $7,$24 | |
| 1572 sltu $1,$7,$24 | |
| 1573 multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3); | |
| 1574 addu $25,$1 | |
| 1575 addu $2,$25 | |
| 1576 sltu $1,$2,$25 | |
| 1577 addu $3,$1 | |
| 1578 sw $7,2*4($4) | |
| 1579 | |
| 1580 mflo $24 | |
| 1581 mfhi $25 | |
| 1582 slt $7,$25,$0 | |
| 1583 sll $25,1 | |
| 1584 multu $13,$14 # mul_add_c2(a[1],b[2],c1,c2,c3); | |
| 1585 slt $6,$24,$0 | |
| 1586 addu $25,$6 | |
| 1587 sll $24,1 | |
| 1588 addu $2,$24 | |
| 1589 sltu $1,$2,$24 | |
| 1590 addu $25,$1 | |
| 1591 addu $3,$25 | |
| 1592 sltu $1,$3,$25 | |
| 1593 addu $7,$1 | |
| 1594 mflo $24 | |
| 1595 mfhi $25 | |
| 1596 slt $1,$25,$0 | |
| 1597 addu $7,$1 | |
| 1598 multu $8,$12 # mul_add_c2(a[4],b[0],c2,c3,c1); | |
| 1599 sll $25,1 | |
| 1600 slt $6,$24,$0 | |
| 1601 addu $25,$6 | |
| 1602 sll $24,1 | |
| 1603 addu $2,$24 | |
| 1604 sltu $1,$2,$24 | |
| 1605 addu $25,$1 | |
| 1606 addu $3,$25 | |
| 1607 sltu $1,$3,$25 | |
| 1608 addu $7,$1 | |
| 1609 sw $2,3*4($4) | |
| 1610 | |
| 1611 mflo $24 | |
| 1612 mfhi $25 | |
| 1613 slt $2,$25,$0 | |
| 1614 sll $25,1 | |
| 1615 multu $15,$13 # mul_add_c2(a[3],b[1],c2,c3,c1); | |
| 1616 slt $6,$24,$0 | |
| 1617 addu $25,$6 | |
| 1618 sll $24,1 | |
| 1619 addu $3,$24 | |
| 1620 sltu $1,$3,$24 | |
| 1621 addu $25,$1 | |
| 1622 addu $7,$25 | |
| 1623 sltu $1,$7,$25 | |
| 1624 addu $2,$1 | |
| 1625 mflo $24 | |
| 1626 mfhi $25 | |
| 1627 slt $1,$25,$0 | |
| 1628 addu $2,$1 | |
| 1629 multu $14,$14 # mul_add_c(a[2],b[2],c2,c3,c1); | |
| 1630 sll $25,1 | |
| 1631 slt $6,$24,$0 | |
| 1632 addu $25,$6 | |
| 1633 sll $24,1 | |
| 1634 addu $3,$24 | |
| 1635 sltu $1,$3,$24 | |
| 1636 addu $25,$1 | |
| 1637 addu $7,$25 | |
| 1638 sltu $1,$7,$25 | |
| 1639 addu $2,$1 | |
| 1640 mflo $24 | |
| 1641 mfhi $25 | |
| 1642 addu $3,$24 | |
| 1643 sltu $1,$3,$24 | |
| 1644 multu $12,$9 # mul_add_c2(a[0],b[5],c3,c1,c2); | |
| 1645 addu $25,$1 | |
| 1646 addu $7,$25 | |
| 1647 sltu $1,$7,$25 | |
| 1648 addu $2,$1 | |
| 1649 sw $3,4*4($4) | |
| 1650 | |
| 1651 mflo $24 | |
| 1652 mfhi $25 | |
| 1653 slt $3,$25,$0 | |
| 1654 sll $25,1 | |
| 1655 multu $13,$8 # mul_add_c2(a[1],b[4],c3,c1,c2); | |
| 1656 slt $6,$24,$0 | |
| 1657 addu $25,$6 | |
| 1658 sll $24,1 | |
| 1659 addu $7,$24 | |
| 1660 sltu $1,$7,$24 | |
| 1661 addu $25,$1 | |
| 1662 addu $2,$25 | |
| 1663 sltu $1,$2,$25 | |
| 1664 addu $3,$1 | |
| 1665 mflo $24 | |
| 1666 mfhi $25 | |
| 1667 slt $1,$25,$0 | |
| 1668 addu $3,$1 | |
| 1669 multu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2); | |
| 1670 sll $25,1 | |
| 1671 slt $6,$24,$0 | |
| 1672 addu $25,$6 | |
| 1673 sll $24,1 | |
| 1674 addu $7,$24 | |
| 1675 sltu $1,$7,$24 | |
| 1676 addu $25,$1 | |
| 1677 addu $2,$25 | |
| 1678 sltu $1,$2,$25 | |
| 1679 addu $3,$1 | |
| 1680 mflo $24 | |
| 1681 mfhi $25 | |
| 1682 slt $1,$25,$0 | |
| 1683 multu $10,$12 # mul_add_c2(a[6],b[0],c1,c2,c3); | |
| 1684 addu $3,$1 | |
| 1685 sll $25,1 | |
| 1686 slt $6,$24,$0 | |
| 1687 addu $25,$6 | |
| 1688 sll $24,1 | |
| 1689 addu $7,$24 | |
| 1690 sltu $1,$7,$24 | |
| 1691 addu $25,$1 | |
| 1692 addu $2,$25 | |
| 1693 sltu $1,$2,$25 | |
| 1694 addu $3,$1 | |
| 1695 sw $7,5*4($4) | |
| 1696 | |
| 1697 mflo $24 | |
| 1698 mfhi $25 | |
| 1699 slt $7,$25,$0 | |
| 1700 sll $25,1 | |
| 1701 multu $9,$13 # mul_add_c2(a[5],b[1],c1,c2,c3); | |
| 1702 slt $6,$24,$0 | |
| 1703 addu $25,$6 | |
| 1704 sll $24,1 | |
| 1705 addu $2,$24 | |
| 1706 sltu $1,$2,$24 | |
| 1707 addu $25,$1 | |
| 1708 addu $3,$25 | |
| 1709 sltu $1,$3,$25 | |
| 1710 addu $7,$1 | |
| 1711 mflo $24 | |
| 1712 mfhi $25 | |
| 1713 slt $1,$25,$0 | |
| 1714 addu $7,$1 | |
| 1715 multu $8,$14 # mul_add_c2(a[4],b[2],c1,c2,c3); | |
| 1716 sll $25,1 | |
| 1717 slt $6,$24,$0 | |
| 1718 addu $25,$6 | |
| 1719 sll $24,1 | |
| 1720 addu $2,$24 | |
| 1721 sltu $1,$2,$24 | |
| 1722 addu $25,$1 | |
| 1723 addu $3,$25 | |
| 1724 sltu $1,$3,$25 | |
| 1725 addu $7,$1 | |
| 1726 mflo $24 | |
| 1727 mfhi $25 | |
| 1728 slt $1,$25,$0 | |
| 1729 addu $7,$1 | |
| 1730 multu $15,$15 # mul_add_c(a[3],b[3],c1,c2,c3); | |
| 1731 sll $25,1 | |
| 1732 slt $6,$24,$0 | |
| 1733 addu $25,$6 | |
| 1734 sll $24,1 | |
| 1735 addu $2,$24 | |
| 1736 sltu $1,$2,$24 | |
| 1737 addu $25,$1 | |
| 1738 addu $3,$25 | |
| 1739 sltu $1,$3,$25 | |
| 1740 addu $7,$1 | |
| 1741 mflo $24 | |
| 1742 mfhi $25 | |
| 1743 addu $2,$24 | |
| 1744 sltu $1,$2,$24 | |
| 1745 multu $12,$11 # mul_add_c2(a[0],b[7],c2,c3,c1); | |
| 1746 addu $25,$1 | |
| 1747 addu $3,$25 | |
| 1748 sltu $1,$3,$25 | |
| 1749 addu $7,$1 | |
| 1750 sw $2,6*4($4) | |
| 1751 | |
| 1752 mflo $24 | |
| 1753 mfhi $25 | |
| 1754 slt $2,$25,$0 | |
| 1755 sll $25,1 | |
| 1756 multu $13,$10 # mul_add_c2(a[1],b[6],c2,c3,c1); | |
| 1757 slt $6,$24,$0 | |
| 1758 addu $25,$6 | |
| 1759 sll $24,1 | |
| 1760 addu $3,$24 | |
| 1761 sltu $1,$3,$24 | |
| 1762 addu $25,$1 | |
| 1763 addu $7,$25 | |
| 1764 sltu $1,$7,$25 | |
| 1765 addu $2,$1 | |
| 1766 mflo $24 | |
| 1767 mfhi $25 | |
| 1768 slt $1,$25,$0 | |
| 1769 addu $2,$1 | |
| 1770 multu $14,$9 # mul_add_c2(a[2],b[5],c2,c3,c1); | |
| 1771 sll $25,1 | |
| 1772 slt $6,$24,$0 | |
| 1773 addu $25,$6 | |
| 1774 sll $24,1 | |
| 1775 addu $3,$24 | |
| 1776 sltu $1,$3,$24 | |
| 1777 addu $25,$1 | |
| 1778 addu $7,$25 | |
| 1779 sltu $1,$7,$25 | |
| 1780 addu $2,$1 | |
| 1781 mflo $24 | |
| 1782 mfhi $25 | |
| 1783 slt $1,$25,$0 | |
| 1784 addu $2,$1 | |
| 1785 multu $15,$8 # mul_add_c2(a[3],b[4],c2,c3,c1); | |
| 1786 sll $25,1 | |
| 1787 slt $6,$24,$0 | |
| 1788 addu $25,$6 | |
| 1789 sll $24,1 | |
| 1790 addu $3,$24 | |
| 1791 sltu $1,$3,$24 | |
| 1792 addu $25,$1 | |
| 1793 addu $7,$25 | |
| 1794 sltu $1,$7,$25 | |
| 1795 addu $2,$1 | |
| 1796 mflo $24 | |
| 1797 mfhi $25 | |
| 1798 slt $1,$25,$0 | |
| 1799 addu $2,$1 | |
| 1800 multu $11,$13 # mul_add_c2(a[7],b[1],c3,c1,c2); | |
| 1801 sll $25,1 | |
| 1802 slt $6,$24,$0 | |
| 1803 addu $25,$6 | |
| 1804 sll $24,1 | |
| 1805 addu $3,$24 | |
| 1806 sltu $1,$3,$24 | |
| 1807 addu $25,$1 | |
| 1808 addu $7,$25 | |
| 1809 sltu $1,$7,$25 | |
| 1810 addu $2,$1 | |
| 1811 sw $3,7*4($4) | |
| 1812 | |
| 1813 mflo $24 | |
| 1814 mfhi $25 | |
| 1815 slt $3,$25,$0 | |
| 1816 sll $25,1 | |
| 1817 multu $10,$14 # mul_add_c2(a[6],b[2],c3,c1,c2); | |
| 1818 slt $6,$24,$0 | |
| 1819 addu $25,$6 | |
| 1820 sll $24,1 | |
| 1821 addu $7,$24 | |
| 1822 sltu $1,$7,$24 | |
| 1823 addu $25,$1 | |
| 1824 addu $2,$25 | |
| 1825 sltu $1,$2,$25 | |
| 1826 addu $3,$1 | |
| 1827 mflo $24 | |
| 1828 mfhi $25 | |
| 1829 slt $1,$25,$0 | |
| 1830 addu $3,$1 | |
| 1831 multu $9,$15 # mul_add_c2(a[5],b[3],c3,c1,c2); | |
| 1832 sll $25,1 | |
| 1833 slt $6,$24,$0 | |
| 1834 addu $25,$6 | |
| 1835 sll $24,1 | |
| 1836 addu $7,$24 | |
| 1837 sltu $1,$7,$24 | |
| 1838 addu $25,$1 | |
| 1839 addu $2,$25 | |
| 1840 sltu $1,$2,$25 | |
| 1841 addu $3,$1 | |
| 1842 mflo $24 | |
| 1843 mfhi $25 | |
| 1844 slt $1,$25,$0 | |
| 1845 addu $3,$1 | |
| 1846 multu $8,$8 # mul_add_c(a[4],b[4],c3,c1,c2); | |
| 1847 sll $25,1 | |
| 1848 slt $6,$24,$0 | |
| 1849 addu $25,$6 | |
| 1850 sll $24,1 | |
| 1851 addu $7,$24 | |
| 1852 sltu $1,$7,$24 | |
| 1853 addu $25,$1 | |
| 1854 addu $2,$25 | |
| 1855 sltu $1,$2,$25 | |
| 1856 addu $3,$1 | |
| 1857 mflo $24 | |
| 1858 mfhi $25 | |
| 1859 addu $7,$24 | |
| 1860 sltu $1,$7,$24 | |
| 1861 multu $14,$11 # mul_add_c2(a[2],b[7],c1,c2,c3); | |
| 1862 addu $25,$1 | |
| 1863 addu $2,$25 | |
| 1864 sltu $1,$2,$25 | |
| 1865 addu $3,$1 | |
| 1866 sw $7,8*4($4) | |
| 1867 | |
| 1868 mflo $24 | |
| 1869 mfhi $25 | |
| 1870 slt $7,$25,$0 | |
| 1871 sll $25,1 | |
| 1872 multu $15,$10 # mul_add_c2(a[3],b[6],c1,c2,c3); | |
| 1873 slt $6,$24,$0 | |
| 1874 addu $25,$6 | |
| 1875 sll $24,1 | |
| 1876 addu $2,$24 | |
| 1877 sltu $1,$2,$24 | |
| 1878 addu $25,$1 | |
| 1879 addu $3,$25 | |
| 1880 sltu $1,$3,$25 | |
| 1881 addu $7,$1 | |
| 1882 mflo $24 | |
| 1883 mfhi $25 | |
| 1884 slt $1,$25,$0 | |
| 1885 addu $7,$1 | |
| 1886 multu $8,$9 # mul_add_c2(a[4],b[5],c1,c2,c3); | |
| 1887 sll $25,1 | |
| 1888 slt $6,$24,$0 | |
| 1889 addu $25,$6 | |
| 1890 sll $24,1 | |
| 1891 addu $2,$24 | |
| 1892 sltu $1,$2,$24 | |
| 1893 addu $25,$1 | |
| 1894 addu $3,$25 | |
| 1895 sltu $1,$3,$25 | |
| 1896 addu $7,$1 | |
| 1897 mflo $24 | |
| 1898 mfhi $25 | |
| 1899 slt $1,$25,$0 | |
| 1900 addu $7,$1 | |
| 1901 multu $11,$15 # mul_add_c2(a[7],b[3],c2,c3,c1); | |
| 1902 sll $25,1 | |
| 1903 slt $6,$24,$0 | |
| 1904 addu $25,$6 | |
| 1905 sll $24,1 | |
| 1906 addu $2,$24 | |
| 1907 sltu $1,$2,$24 | |
| 1908 addu $25,$1 | |
| 1909 addu $3,$25 | |
| 1910 sltu $1,$3,$25 | |
| 1911 addu $7,$1 | |
| 1912 sw $2,9*4($4) | |
| 1913 | |
| 1914 mflo $24 | |
| 1915 mfhi $25 | |
| 1916 slt $2,$25,$0 | |
| 1917 sll $25,1 | |
| 1918 multu $10,$8 # mul_add_c2(a[6],b[4],c2,c3,c1); | |
| 1919 slt $6,$24,$0 | |
| 1920 addu $25,$6 | |
| 1921 sll $24,1 | |
| 1922 addu $3,$24 | |
| 1923 sltu $1,$3,$24 | |
| 1924 addu $25,$1 | |
| 1925 addu $7,$25 | |
| 1926 sltu $1,$7,$25 | |
| 1927 addu $2,$1 | |
| 1928 mflo $24 | |
| 1929 mfhi $25 | |
| 1930 slt $1,$25,$0 | |
| 1931 addu $2,$1 | |
| 1932 multu $9,$9 # mul_add_c(a[5],b[5],c2,c3,c1); | |
| 1933 sll $25,1 | |
| 1934 slt $6,$24,$0 | |
| 1935 addu $25,$6 | |
| 1936 sll $24,1 | |
| 1937 addu $3,$24 | |
| 1938 sltu $1,$3,$24 | |
| 1939 addu $25,$1 | |
| 1940 addu $7,$25 | |
| 1941 sltu $1,$7,$25 | |
| 1942 addu $2,$1 | |
| 1943 mflo $24 | |
| 1944 mfhi $25 | |
| 1945 addu $3,$24 | |
| 1946 sltu $1,$3,$24 | |
| 1947 multu $8,$11 # mul_add_c2(a[4],b[7],c3,c1,c2); | |
| 1948 addu $25,$1 | |
| 1949 addu $7,$25 | |
| 1950 sltu $1,$7,$25 | |
| 1951 addu $2,$1 | |
| 1952 sw $3,10*4($4) | |
| 1953 | |
| 1954 mflo $24 | |
| 1955 mfhi $25 | |
| 1956 slt $3,$25,$0 | |
| 1957 sll $25,1 | |
| 1958 multu $9,$10 # mul_add_c2(a[5],b[6],c3,c1,c2); | |
| 1959 slt $6,$24,$0 | |
| 1960 addu $25,$6 | |
| 1961 sll $24,1 | |
| 1962 addu $7,$24 | |
| 1963 sltu $1,$7,$24 | |
| 1964 addu $25,$1 | |
| 1965 addu $2,$25 | |
| 1966 sltu $1,$2,$25 | |
| 1967 addu $3,$1 | |
| 1968 mflo $24 | |
| 1969 mfhi $25 | |
| 1970 slt $1,$25,$0 | |
| 1971 addu $3,$1 | |
| 1972 multu $11,$9 # mul_add_c2(a[7],b[5],c1,c2,c3); | |
| 1973 sll $25,1 | |
| 1974 slt $6,$24,$0 | |
| 1975 addu $25,$6 | |
| 1976 sll $24,1 | |
| 1977 addu $7,$24 | |
| 1978 sltu $1,$7,$24 | |
| 1979 addu $25,$1 | |
| 1980 addu $2,$25 | |
| 1981 sltu $1,$2,$25 | |
| 1982 addu $3,$1 | |
| 1983 sw $7,11*4($4) | |
| 1984 | |
| 1985 mflo $24 | |
| 1986 mfhi $25 | |
| 1987 slt $7,$25,$0 | |
| 1988 sll $25,1 | |
| 1989 multu $10,$10 # mul_add_c(a[6],b[6],c1,c2,c3); | |
| 1990 slt $6,$24,$0 | |
| 1991 addu $25,$6 | |
| 1992 sll $24,1 | |
| 1993 addu $2,$24 | |
| 1994 sltu $1,$2,$24 | |
| 1995 addu $25,$1 | |
| 1996 addu $3,$25 | |
| 1997 sltu $1,$3,$25 | |
| 1998 addu $7,$1 | |
| 1999 mflo $24 | |
| 2000 mfhi $25 | |
| 2001 addu $2,$24 | |
| 2002 sltu $1,$2,$24 | |
| 2003 multu $10,$11 # mul_add_c2(a[6],b[7],c2,c3,c1); | |
| 2004 addu $25,$1 | |
| 2005 addu $3,$25 | |
| 2006 sltu $1,$3,$25 | |
| 2007 addu $7,$1 | |
| 2008 sw $2,12*4($4) | |
| 2009 | |
| 2010 mflo $24 | |
| 2011 mfhi $25 | |
| 2012 slt $2,$25,$0 | |
| 2013 sll $25,1 | |
| 2014 multu $11,$11 # mul_add_c(a[7],b[7],c3,c1,c2); | |
| 2015 slt $6,$24,$0 | |
| 2016 addu $25,$6 | |
| 2017 sll $24,1 | |
| 2018 addu $3,$24 | |
| 2019 sltu $1,$3,$24 | |
| 2020 addu $25,$1 | |
| 2021 addu $7,$25 | |
| 2022 sltu $1,$7,$25 | |
| 2023 addu $2,$1 | |
| 2024 sw $3,13*4($4) | |
| 2025 | |
| 2026 mflo $24 | |
| 2027 mfhi $25 | |
| 2028 addu $7,$24 | |
| 2029 sltu $1,$7,$24 | |
| 2030 addu $25,$1 | |
| 2031 addu $2,$25 | |
| 2032 sw $7,14*4($4) | |
| 2033 sw $2,15*4($4) | |
| 2034 | |
| 2035 .set noreorder | |
| 2036 jr $31 | |
| 2037 nop | |
| 2038 .end bn_sqr_comba8 | |
| 2039 | |
| 2040 .align 5 | |
| 2041 .globl bn_sqr_comba4 | |
| 2042 .ent bn_sqr_comba4 | |
| 2043 bn_sqr_comba4: | |
| 2044 .set reorder | |
| 2045 lw $12,0($5) | |
| 2046 lw $13,4($5) | |
| 2047 multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3); | |
| 2048 lw $14,2*4($5) | |
| 2049 lw $15,3*4($5) | |
| 2050 mflo $2 | |
| 2051 mfhi $3 | |
| 2052 sw $2,0($4) | |
| 2053 | |
| 2054 multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1); | |
| 2055 mflo $24 | |
| 2056 mfhi $25 | |
| 2057 slt $2,$25,$0 | |
| 2058 sll $25,1 | |
| 2059 multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2); | |
| 2060 slt $6,$24,$0 | |
| 2061 addu $25,$6 | |
| 2062 sll $24,1 | |
| 2063 addu $3,$24 | |
| 2064 sltu $1,$3,$24 | |
| 2065 addu $7,$25,$1 | |
| 2066 sw $3,4($4) | |
| 2067 | |
| 2068 mflo $24 | |
| 2069 mfhi $25 | |
| 2070 slt $3,$25,$0 | |
| 2071 sll $25,1 | |
| 2072 multu $13,$13 # mul_add_c(a[1],b[1],c3,c1,c2); | |
| 2073 slt $6,$24,$0 | |
| 2074 addu $25,$6 | |
| 2075 sll $24,1 | |
| 2076 addu $7,$24 | |
| 2077 sltu $1,$7,$24 | |
| 2078 addu $25,$1 | |
| 2079 addu $2,$25 | |
| 2080 sltu $1,$2,$25 | |
| 2081 addu $3,$1 | |
| 2082 mflo $24 | |
| 2083 mfhi $25 | |
| 2084 addu $7,$24 | |
| 2085 sltu $1,$7,$24 | |
| 2086 multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3); | |
| 2087 addu $25,$1 | |
| 2088 addu $2,$25 | |
| 2089 sltu $1,$2,$25 | |
| 2090 addu $3,$1 | |
| 2091 sw $7,2*4($4) | |
| 2092 | |
| 2093 mflo $24 | |
| 2094 mfhi $25 | |
| 2095 slt $7,$25,$0 | |
| 2096 sll $25,1 | |
| 2097 multu $13,$14 # mul_add_c(a2[1],b[2],c1,c2,c3); | |
| 2098 slt $6,$24,$0 | |
| 2099 addu $25,$6 | |
| 2100 sll $24,1 | |
| 2101 addu $2,$24 | |
| 2102 sltu $1,$2,$24 | |
| 2103 addu $25,$1 | |
| 2104 addu $3,$25 | |
| 2105 sltu $1,$3,$25 | |
| 2106 addu $7,$1 | |
| 2107 mflo $24 | |
| 2108 mfhi $25 | |
| 2109 slt $1,$25,$0 | |
| 2110 addu $7,$1 | |
| 2111 multu $15,$13 # mul_add_c2(a[3],b[1],c2,c3,c1); | |
| 2112 sll $25,1 | |
| 2113 slt $6,$24,$0 | |
| 2114 addu $25,$6 | |
| 2115 sll $24,1 | |
| 2116 addu $2,$24 | |
| 2117 sltu $1,$2,$24 | |
| 2118 addu $25,$1 | |
| 2119 addu $3,$25 | |
| 2120 sltu $1,$3,$25 | |
| 2121 addu $7,$1 | |
| 2122 sw $2,3*4($4) | |
| 2123 | |
| 2124 mflo $24 | |
| 2125 mfhi $25 | |
| 2126 slt $2,$25,$0 | |
| 2127 sll $25,1 | |
| 2128 multu $14,$14 # mul_add_c(a[2],b[2],c2,c3,c1); | |
| 2129 slt $6,$24,$0 | |
| 2130 addu $25,$6 | |
| 2131 sll $24,1 | |
| 2132 addu $3,$24 | |
| 2133 sltu $1,$3,$24 | |
| 2134 addu $25,$1 | |
| 2135 addu $7,$25 | |
| 2136 sltu $1,$7,$25 | |
| 2137 addu $2,$1 | |
| 2138 mflo $24 | |
| 2139 mfhi $25 | |
| 2140 addu $3,$24 | |
| 2141 sltu $1,$3,$24 | |
| 2142 multu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2); | |
| 2143 addu $25,$1 | |
| 2144 addu $7,$25 | |
| 2145 sltu $1,$7,$25 | |
| 2146 addu $2,$1 | |
| 2147 sw $3,4*4($4) | |
| 2148 | |
| 2149 mflo $24 | |
| 2150 mfhi $25 | |
| 2151 slt $3,$25,$0 | |
| 2152 sll $25,1 | |
| 2153 multu $15,$15 # mul_add_c(a[3],b[3],c1,c2,c3); | |
| 2154 slt $6,$24,$0 | |
| 2155 addu $25,$6 | |
| 2156 sll $24,1 | |
| 2157 addu $7,$24 | |
| 2158 sltu $1,$7,$24 | |
| 2159 addu $25,$1 | |
| 2160 addu $2,$25 | |
| 2161 sltu $1,$2,$25 | |
| 2162 addu $3,$1 | |
| 2163 sw $7,5*4($4) | |
| 2164 | |
| 2165 mflo $24 | |
| 2166 mfhi $25 | |
| 2167 addu $2,$24 | |
| 2168 sltu $1,$2,$24 | |
| 2169 addu $25,$1 | |
| 2170 addu $3,$25 | |
| 2171 sw $2,6*4($4) | |
| 2172 sw $3,7*4($4) | |
| 2173 | |
| 2174 .set noreorder | |
| 2175 jr $31 | |
| 2176 nop | |
| 2177 .end bn_sqr_comba4 | |
| OLD | NEW |