| OLD | NEW |
| (Empty) |
| 1 .rdata | |
| 2 .asciiz "mips3.s, Version 1.1" | |
| 3 .asciiz "MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" | |
| 4 | |
| 5 /* | |
| 6 * ==================================================================== | |
| 7 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | |
| 8 * project. | |
| 9 * | |
| 10 * Rights for redistribution and usage in source and binary forms are | |
| 11 * granted according to the OpenSSL license. Warranty of any kind is | |
| 12 * disclaimed. | |
| 13 * ==================================================================== | |
| 14 */ | |
| 15 | |
| 16 /* | |
| 17 * This is my modest contributon to the OpenSSL project (see | |
| 18 * http://www.openssl.org/ for more information about it) and is | |
| 19 * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c | |
| 20 * module. For updates see http://fy.chalmers.se/~appro/hpe/. | |
| 21 * | |
| 22 * The module is designed to work with either of the "new" MIPS ABI(5), | |
| 23 * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under | |
| 24 * IRIX 5.x not only because it doesn't support new ABIs but also | |
| 25 * because 5.x kernels put R4x00 CPU into 32-bit mode and all those | |
| 26 * 64-bit instructions (daddu, dmultu, etc.) found below gonna only | |
| 27 * cause illegal instruction exception:-( | |
| 28 * | |
| 29 * In addition the code depends on preprocessor flags set up by MIPSpro | |
| 30 * compiler driver (either as or cc) and therefore (probably?) can't be | |
| 31 * compiled by the GNU assembler. GNU C driver manages fine though... | |
| 32 * I mean as long as -mmips-as is specified or is the default option, | |
| 33 * because then it simply invokes /usr/bin/as which in turn takes | |
| 34 * perfect care of the preprocessor definitions. Another neat feature | |
| 35 * offered by the MIPSpro assembler is an optimization pass. This gave | |
| 36 * me the opportunity to have the code looking more regular as all those | |
| 37 * architecture dependent instruction rescheduling details were left to | |
| 38 * the assembler. Cool, huh? | |
| 39 * | |
| 40 * Performance improvement is astonishing! 'apps/openssl speed rsa dsa' | |
| 41 * goes way over 3 times faster! | |
| 42 * | |
| 43 * <appro@fy.chalmers.se> | |
| 44 */ | |
| 45 #include <asm.h> | |
| 46 #include <regdef.h> | |
| 47 | |
| 48 #if _MIPS_ISA>=4 | |
| 49 #define MOVNZ(cond,dst,src) \ | |
| 50 movn dst,src,cond | |
| 51 #else | |
| 52 #define MOVNZ(cond,dst,src) \ | |
| 53 .set noreorder; \ | |
| 54 bnezl cond,.+8; \ | |
| 55 move dst,src; \ | |
| 56 .set reorder | |
| 57 #endif | |
| 58 | |
| 59 .text | |
| 60 | |
| 61 .set noat | |
| 62 .set reorder | |
| 63 | |
| 64 #define MINUS4 v1 | |
| 65 | |
| 66 .align 5 | |
| 67 LEAF(bn_mul_add_words) | |
| 68 .set noreorder | |
| 69 bgtzl a2,.L_bn_mul_add_words_proceed | |
| 70 ld t0,0(a1) | |
| 71 jr ra | |
| 72 move v0,zero | |
| 73 .set reorder | |
| 74 | |
| 75 .L_bn_mul_add_words_proceed: | |
| 76 li MINUS4,-4 | |
| 77 and ta0,a2,MINUS4 | |
| 78 move v0,zero | |
| 79 beqz ta0,.L_bn_mul_add_words_tail | |
| 80 | |
| 81 .L_bn_mul_add_words_loop: | |
| 82 dmultu t0,a3 | |
| 83 ld t1,0(a0) | |
| 84 ld t2,8(a1) | |
| 85 ld t3,8(a0) | |
| 86 ld ta0,16(a1) | |
| 87 ld ta1,16(a0) | |
| 88 daddu t1,v0 | |
| 89 sltu v0,t1,v0 /* All manuals say it "compares 32-bit | |
| 90 * values", but it seems to work fine | |
| 91 * even on 64-bit registers. */ | |
| 92 mflo AT | |
| 93 mfhi t0 | |
| 94 daddu t1,AT | |
| 95 daddu v0,t0 | |
| 96 sltu AT,t1,AT | |
| 97 sd t1,0(a0) | |
| 98 daddu v0,AT | |
| 99 | |
| 100 dmultu t2,a3 | |
| 101 ld ta2,24(a1) | |
| 102 ld ta3,24(a0) | |
| 103 daddu t3,v0 | |
| 104 sltu v0,t3,v0 | |
| 105 mflo AT | |
| 106 mfhi t2 | |
| 107 daddu t3,AT | |
| 108 daddu v0,t2 | |
| 109 sltu AT,t3,AT | |
| 110 sd t3,8(a0) | |
| 111 daddu v0,AT | |
| 112 | |
| 113 dmultu ta0,a3 | |
| 114 subu a2,4 | |
| 115 PTR_ADD a0,32 | |
| 116 PTR_ADD a1,32 | |
| 117 daddu ta1,v0 | |
| 118 sltu v0,ta1,v0 | |
| 119 mflo AT | |
| 120 mfhi ta0 | |
| 121 daddu ta1,AT | |
| 122 daddu v0,ta0 | |
| 123 sltu AT,ta1,AT | |
| 124 sd ta1,-16(a0) | |
| 125 daddu v0,AT | |
| 126 | |
| 127 | |
| 128 dmultu ta2,a3 | |
| 129 and ta0,a2,MINUS4 | |
| 130 daddu ta3,v0 | |
| 131 sltu v0,ta3,v0 | |
| 132 mflo AT | |
| 133 mfhi ta2 | |
| 134 daddu ta3,AT | |
| 135 daddu v0,ta2 | |
| 136 sltu AT,ta3,AT | |
| 137 sd ta3,-8(a0) | |
| 138 daddu v0,AT | |
| 139 .set noreorder | |
| 140 bgtzl ta0,.L_bn_mul_add_words_loop | |
| 141 ld t0,0(a1) | |
| 142 | |
| 143 bnezl a2,.L_bn_mul_add_words_tail | |
| 144 ld t0,0(a1) | |
| 145 .set reorder | |
| 146 | |
| 147 .L_bn_mul_add_words_return: | |
| 148 jr ra | |
| 149 | |
| 150 .L_bn_mul_add_words_tail: | |
| 151 dmultu t0,a3 | |
| 152 ld t1,0(a0) | |
| 153 subu a2,1 | |
| 154 daddu t1,v0 | |
| 155 sltu v0,t1,v0 | |
| 156 mflo AT | |
| 157 mfhi t0 | |
| 158 daddu t1,AT | |
| 159 daddu v0,t0 | |
| 160 sltu AT,t1,AT | |
| 161 sd t1,0(a0) | |
| 162 daddu v0,AT | |
| 163 beqz a2,.L_bn_mul_add_words_return | |
| 164 | |
| 165 ld t0,8(a1) | |
| 166 dmultu t0,a3 | |
| 167 ld t1,8(a0) | |
| 168 subu a2,1 | |
| 169 daddu t1,v0 | |
| 170 sltu v0,t1,v0 | |
| 171 mflo AT | |
| 172 mfhi t0 | |
| 173 daddu t1,AT | |
| 174 daddu v0,t0 | |
| 175 sltu AT,t1,AT | |
| 176 sd t1,8(a0) | |
| 177 daddu v0,AT | |
| 178 beqz a2,.L_bn_mul_add_words_return | |
| 179 | |
| 180 ld t0,16(a1) | |
| 181 dmultu t0,a3 | |
| 182 ld t1,16(a0) | |
| 183 daddu t1,v0 | |
| 184 sltu v0,t1,v0 | |
| 185 mflo AT | |
| 186 mfhi t0 | |
| 187 daddu t1,AT | |
| 188 daddu v0,t0 | |
| 189 sltu AT,t1,AT | |
| 190 sd t1,16(a0) | |
| 191 daddu v0,AT | |
| 192 jr ra | |
| 193 END(bn_mul_add_words) | |
| 194 | |
| 195 .align 5 | |
| 196 LEAF(bn_mul_words) | |
| 197 .set noreorder | |
| 198 bgtzl a2,.L_bn_mul_words_proceed | |
| 199 ld t0,0(a1) | |
| 200 jr ra | |
| 201 move v0,zero | |
| 202 .set reorder | |
| 203 | |
| 204 .L_bn_mul_words_proceed: | |
| 205 li MINUS4,-4 | |
| 206 and ta0,a2,MINUS4 | |
| 207 move v0,zero | |
| 208 beqz ta0,.L_bn_mul_words_tail | |
| 209 | |
| 210 .L_bn_mul_words_loop: | |
| 211 dmultu t0,a3 | |
| 212 ld t2,8(a1) | |
| 213 ld ta0,16(a1) | |
| 214 ld ta2,24(a1) | |
| 215 mflo AT | |
| 216 mfhi t0 | |
| 217 daddu v0,AT | |
| 218 sltu t1,v0,AT | |
| 219 sd v0,0(a0) | |
| 220 daddu v0,t1,t0 | |
| 221 | |
| 222 dmultu t2,a3 | |
| 223 subu a2,4 | |
| 224 PTR_ADD a0,32 | |
| 225 PTR_ADD a1,32 | |
| 226 mflo AT | |
| 227 mfhi t2 | |
| 228 daddu v0,AT | |
| 229 sltu t3,v0,AT | |
| 230 sd v0,-24(a0) | |
| 231 daddu v0,t3,t2 | |
| 232 | |
| 233 dmultu ta0,a3 | |
| 234 mflo AT | |
| 235 mfhi ta0 | |
| 236 daddu v0,AT | |
| 237 sltu ta1,v0,AT | |
| 238 sd v0,-16(a0) | |
| 239 daddu v0,ta1,ta0 | |
| 240 | |
| 241 | |
| 242 dmultu ta2,a3 | |
| 243 and ta0,a2,MINUS4 | |
| 244 mflo AT | |
| 245 mfhi ta2 | |
| 246 daddu v0,AT | |
| 247 sltu ta3,v0,AT | |
| 248 sd v0,-8(a0) | |
| 249 daddu v0,ta3,ta2 | |
| 250 .set noreorder | |
| 251 bgtzl ta0,.L_bn_mul_words_loop | |
| 252 ld t0,0(a1) | |
| 253 | |
| 254 bnezl a2,.L_bn_mul_words_tail | |
| 255 ld t0,0(a1) | |
| 256 .set reorder | |
| 257 | |
| 258 .L_bn_mul_words_return: | |
| 259 jr ra | |
| 260 | |
| 261 .L_bn_mul_words_tail: | |
| 262 dmultu t0,a3 | |
| 263 subu a2,1 | |
| 264 mflo AT | |
| 265 mfhi t0 | |
| 266 daddu v0,AT | |
| 267 sltu t1,v0,AT | |
| 268 sd v0,0(a0) | |
| 269 daddu v0,t1,t0 | |
| 270 beqz a2,.L_bn_mul_words_return | |
| 271 | |
| 272 ld t0,8(a1) | |
| 273 dmultu t0,a3 | |
| 274 subu a2,1 | |
| 275 mflo AT | |
| 276 mfhi t0 | |
| 277 daddu v0,AT | |
| 278 sltu t1,v0,AT | |
| 279 sd v0,8(a0) | |
| 280 daddu v0,t1,t0 | |
| 281 beqz a2,.L_bn_mul_words_return | |
| 282 | |
| 283 ld t0,16(a1) | |
| 284 dmultu t0,a3 | |
| 285 mflo AT | |
| 286 mfhi t0 | |
| 287 daddu v0,AT | |
| 288 sltu t1,v0,AT | |
| 289 sd v0,16(a0) | |
| 290 daddu v0,t1,t0 | |
| 291 jr ra | |
| 292 END(bn_mul_words) | |
| 293 | |
| 294 .align 5 | |
| 295 LEAF(bn_sqr_words) | |
| 296 .set noreorder | |
| 297 bgtzl a2,.L_bn_sqr_words_proceed | |
| 298 ld t0,0(a1) | |
| 299 jr ra | |
| 300 move v0,zero | |
| 301 .set reorder | |
| 302 | |
| 303 .L_bn_sqr_words_proceed: | |
| 304 li MINUS4,-4 | |
| 305 and ta0,a2,MINUS4 | |
| 306 move v0,zero | |
| 307 beqz ta0,.L_bn_sqr_words_tail | |
| 308 | |
| 309 .L_bn_sqr_words_loop: | |
| 310 dmultu t0,t0 | |
| 311 ld t2,8(a1) | |
| 312 ld ta0,16(a1) | |
| 313 ld ta2,24(a1) | |
| 314 mflo t1 | |
| 315 mfhi t0 | |
| 316 sd t1,0(a0) | |
| 317 sd t0,8(a0) | |
| 318 | |
| 319 dmultu t2,t2 | |
| 320 subu a2,4 | |
| 321 PTR_ADD a0,64 | |
| 322 PTR_ADD a1,32 | |
| 323 mflo t3 | |
| 324 mfhi t2 | |
| 325 sd t3,-48(a0) | |
| 326 sd t2,-40(a0) | |
| 327 | |
| 328 dmultu ta0,ta0 | |
| 329 mflo ta1 | |
| 330 mfhi ta0 | |
| 331 sd ta1,-32(a0) | |
| 332 sd ta0,-24(a0) | |
| 333 | |
| 334 | |
| 335 dmultu ta2,ta2 | |
| 336 and ta0,a2,MINUS4 | |
| 337 mflo ta3 | |
| 338 mfhi ta2 | |
| 339 sd ta3,-16(a0) | |
| 340 sd ta2,-8(a0) | |
| 341 | |
| 342 .set noreorder | |
| 343 bgtzl ta0,.L_bn_sqr_words_loop | |
| 344 ld t0,0(a1) | |
| 345 | |
| 346 bnezl a2,.L_bn_sqr_words_tail | |
| 347 ld t0,0(a1) | |
| 348 .set reorder | |
| 349 | |
| 350 .L_bn_sqr_words_return: | |
| 351 move v0,zero | |
| 352 jr ra | |
| 353 | |
| 354 .L_bn_sqr_words_tail: | |
| 355 dmultu t0,t0 | |
| 356 subu a2,1 | |
| 357 mflo t1 | |
| 358 mfhi t0 | |
| 359 sd t1,0(a0) | |
| 360 sd t0,8(a0) | |
| 361 beqz a2,.L_bn_sqr_words_return | |
| 362 | |
| 363 ld t0,8(a1) | |
| 364 dmultu t0,t0 | |
| 365 subu a2,1 | |
| 366 mflo t1 | |
| 367 mfhi t0 | |
| 368 sd t1,16(a0) | |
| 369 sd t0,24(a0) | |
| 370 beqz a2,.L_bn_sqr_words_return | |
| 371 | |
| 372 ld t0,16(a1) | |
| 373 dmultu t0,t0 | |
| 374 mflo t1 | |
| 375 mfhi t0 | |
| 376 sd t1,32(a0) | |
| 377 sd t0,40(a0) | |
| 378 jr ra | |
| 379 END(bn_sqr_words) | |
| 380 | |
| 381 .align 5 | |
| 382 LEAF(bn_add_words) | |
| 383 .set noreorder | |
| 384 bgtzl a3,.L_bn_add_words_proceed | |
| 385 ld t0,0(a1) | |
| 386 jr ra | |
| 387 move v0,zero | |
| 388 .set reorder | |
| 389 | |
| 390 .L_bn_add_words_proceed: | |
| 391 li MINUS4,-4 | |
| 392 and AT,a3,MINUS4 | |
| 393 move v0,zero | |
| 394 beqz AT,.L_bn_add_words_tail | |
| 395 | |
| 396 .L_bn_add_words_loop: | |
| 397 ld ta0,0(a2) | |
| 398 subu a3,4 | |
| 399 ld t1,8(a1) | |
| 400 and AT,a3,MINUS4 | |
| 401 ld t2,16(a1) | |
| 402 PTR_ADD a2,32 | |
| 403 ld t3,24(a1) | |
| 404 PTR_ADD a0,32 | |
| 405 ld ta1,-24(a2) | |
| 406 PTR_ADD a1,32 | |
| 407 ld ta2,-16(a2) | |
| 408 ld ta3,-8(a2) | |
| 409 daddu ta0,t0 | |
| 410 sltu t8,ta0,t0 | |
| 411 daddu t0,ta0,v0 | |
| 412 sltu v0,t0,ta0 | |
| 413 sd t0,-32(a0) | |
| 414 daddu v0,t8 | |
| 415 | |
| 416 daddu ta1,t1 | |
| 417 sltu t9,ta1,t1 | |
| 418 daddu t1,ta1,v0 | |
| 419 sltu v0,t1,ta1 | |
| 420 sd t1,-24(a0) | |
| 421 daddu v0,t9 | |
| 422 | |
| 423 daddu ta2,t2 | |
| 424 sltu t8,ta2,t2 | |
| 425 daddu t2,ta2,v0 | |
| 426 sltu v0,t2,ta2 | |
| 427 sd t2,-16(a0) | |
| 428 daddu v0,t8 | |
| 429 | |
| 430 daddu ta3,t3 | |
| 431 sltu t9,ta3,t3 | |
| 432 daddu t3,ta3,v0 | |
| 433 sltu v0,t3,ta3 | |
| 434 sd t3,-8(a0) | |
| 435 daddu v0,t9 | |
| 436 | |
| 437 .set noreorder | |
| 438 bgtzl AT,.L_bn_add_words_loop | |
| 439 ld t0,0(a1) | |
| 440 | |
| 441 bnezl a3,.L_bn_add_words_tail | |
| 442 ld t0,0(a1) | |
| 443 .set reorder | |
| 444 | |
| 445 .L_bn_add_words_return: | |
| 446 jr ra | |
| 447 | |
| 448 .L_bn_add_words_tail: | |
| 449 ld ta0,0(a2) | |
| 450 daddu ta0,t0 | |
| 451 subu a3,1 | |
| 452 sltu t8,ta0,t0 | |
| 453 daddu t0,ta0,v0 | |
| 454 sltu v0,t0,ta0 | |
| 455 sd t0,0(a0) | |
| 456 daddu v0,t8 | |
| 457 beqz a3,.L_bn_add_words_return | |
| 458 | |
| 459 ld t1,8(a1) | |
| 460 ld ta1,8(a2) | |
| 461 daddu ta1,t1 | |
| 462 subu a3,1 | |
| 463 sltu t9,ta1,t1 | |
| 464 daddu t1,ta1,v0 | |
| 465 sltu v0,t1,ta1 | |
| 466 sd t1,8(a0) | |
| 467 daddu v0,t9 | |
| 468 beqz a3,.L_bn_add_words_return | |
| 469 | |
| 470 ld t2,16(a1) | |
| 471 ld ta2,16(a2) | |
| 472 daddu ta2,t2 | |
| 473 sltu t8,ta2,t2 | |
| 474 daddu t2,ta2,v0 | |
| 475 sltu v0,t2,ta2 | |
| 476 sd t2,16(a0) | |
| 477 daddu v0,t8 | |
| 478 jr ra | |
| 479 END(bn_add_words) | |
| 480 | |
| 481 .align 5 | |
| 482 LEAF(bn_sub_words) | |
| 483 .set noreorder | |
| 484 bgtzl a3,.L_bn_sub_words_proceed | |
| 485 ld t0,0(a1) | |
| 486 jr ra | |
| 487 move v0,zero | |
| 488 .set reorder | |
| 489 | |
| 490 .L_bn_sub_words_proceed: | |
| 491 li MINUS4,-4 | |
| 492 and AT,a3,MINUS4 | |
| 493 move v0,zero | |
| 494 beqz AT,.L_bn_sub_words_tail | |
| 495 | |
| 496 .L_bn_sub_words_loop: | |
| 497 ld ta0,0(a2) | |
| 498 subu a3,4 | |
| 499 ld t1,8(a1) | |
| 500 and AT,a3,MINUS4 | |
| 501 ld t2,16(a1) | |
| 502 PTR_ADD a2,32 | |
| 503 ld t3,24(a1) | |
| 504 PTR_ADD a0,32 | |
| 505 ld ta1,-24(a2) | |
| 506 PTR_ADD a1,32 | |
| 507 ld ta2,-16(a2) | |
| 508 ld ta3,-8(a2) | |
| 509 sltu t8,t0,ta0 | |
| 510 dsubu t0,ta0 | |
| 511 dsubu ta0,t0,v0 | |
| 512 sd ta0,-32(a0) | |
| 513 MOVNZ (t0,v0,t8) | |
| 514 | |
| 515 sltu t9,t1,ta1 | |
| 516 dsubu t1,ta1 | |
| 517 dsubu ta1,t1,v0 | |
| 518 sd ta1,-24(a0) | |
| 519 MOVNZ (t1,v0,t9) | |
| 520 | |
| 521 | |
| 522 sltu t8,t2,ta2 | |
| 523 dsubu t2,ta2 | |
| 524 dsubu ta2,t2,v0 | |
| 525 sd ta2,-16(a0) | |
| 526 MOVNZ (t2,v0,t8) | |
| 527 | |
| 528 sltu t9,t3,ta3 | |
| 529 dsubu t3,ta3 | |
| 530 dsubu ta3,t3,v0 | |
| 531 sd ta3,-8(a0) | |
| 532 MOVNZ (t3,v0,t9) | |
| 533 | |
| 534 .set noreorder | |
| 535 bgtzl AT,.L_bn_sub_words_loop | |
| 536 ld t0,0(a1) | |
| 537 | |
| 538 bnezl a3,.L_bn_sub_words_tail | |
| 539 ld t0,0(a1) | |
| 540 .set reorder | |
| 541 | |
| 542 .L_bn_sub_words_return: | |
| 543 jr ra | |
| 544 | |
| 545 .L_bn_sub_words_tail: | |
| 546 ld ta0,0(a2) | |
| 547 subu a3,1 | |
| 548 sltu t8,t0,ta0 | |
| 549 dsubu t0,ta0 | |
| 550 dsubu ta0,t0,v0 | |
| 551 MOVNZ (t0,v0,t8) | |
| 552 sd ta0,0(a0) | |
| 553 beqz a3,.L_bn_sub_words_return | |
| 554 | |
| 555 ld t1,8(a1) | |
| 556 subu a3,1 | |
| 557 ld ta1,8(a2) | |
| 558 sltu t9,t1,ta1 | |
| 559 dsubu t1,ta1 | |
| 560 dsubu ta1,t1,v0 | |
| 561 MOVNZ (t1,v0,t9) | |
| 562 sd ta1,8(a0) | |
| 563 beqz a3,.L_bn_sub_words_return | |
| 564 | |
| 565 ld t2,16(a1) | |
| 566 ld ta2,16(a2) | |
| 567 sltu t8,t2,ta2 | |
| 568 dsubu t2,ta2 | |
| 569 dsubu ta2,t2,v0 | |
| 570 MOVNZ (t2,v0,t8) | |
| 571 sd ta2,16(a0) | |
| 572 jr ra | |
| 573 END(bn_sub_words) | |
| 574 | |
| 575 #undef MINUS4 | |
| 576 | |
| 577 .align 5 | |
| 578 LEAF(bn_div_3_words) | |
| 579 .set reorder | |
| 580 move a3,a0 /* we know that bn_div_words doesn't | |
| 581 * touch a3, ta2, ta3 and preserves a2 | |
| 582 * so that we can save two arguments | |
| 583 * and return address in registers | |
| 584 * instead of stack:-) | |
| 585 */ | |
| 586 ld a0,(a3) | |
| 587 move ta2,a1 | |
| 588 ld a1,-8(a3) | |
| 589 bne a0,a2,.L_bn_div_3_words_proceed | |
| 590 li v0,-1 | |
| 591 jr ra | |
| 592 .L_bn_div_3_words_proceed: | |
| 593 move ta3,ra | |
| 594 bal bn_div_words | |
| 595 move ra,ta3 | |
| 596 dmultu ta2,v0 | |
| 597 ld t2,-16(a3) | |
| 598 move ta0,zero | |
| 599 mfhi t1 | |
| 600 mflo t0 | |
| 601 sltu t8,t1,v1 | |
| 602 .L_bn_div_3_words_inner_loop: | |
| 603 bnez t8,.L_bn_div_3_words_inner_loop_done | |
| 604 sgeu AT,t2,t0 | |
| 605 seq t9,t1,v1 | |
| 606 and AT,t9 | |
| 607 sltu t3,t0,ta2 | |
| 608 daddu v1,a2 | |
| 609 dsubu t1,t3 | |
| 610 dsubu t0,ta2 | |
| 611 sltu t8,t1,v1 | |
| 612 sltu ta0,v1,a2 | |
| 613 or t8,ta0 | |
| 614 .set noreorder | |
| 615 beqzl AT,.L_bn_div_3_words_inner_loop | |
| 616 dsubu v0,1 | |
| 617 .set reorder | |
| 618 .L_bn_div_3_words_inner_loop_done: | |
| 619 jr ra | |
| 620 END(bn_div_3_words) | |
| 621 | |
| 622 .align 5 | |
| 623 LEAF(bn_div_words) | |
| 624 .set noreorder | |
| 625 bnezl a2,.L_bn_div_words_proceed | |
| 626 move v1,zero | |
| 627 jr ra | |
| 628 li v0,-1 /* I'd rather signal div-by-zero | |
| 629 * which can be done with 'break 7' */ | |
| 630 | |
| 631 .L_bn_div_words_proceed: | |
| 632 bltz a2,.L_bn_div_words_body | |
| 633 move t9,v1 | |
| 634 dsll a2,1 | |
| 635 bgtz a2,.-4 | |
| 636 addu t9,1 | |
| 637 | |
| 638 .set reorder | |
| 639 negu t1,t9 | |
| 640 li t2,-1 | |
| 641 dsll t2,t1 | |
| 642 and t2,a0 | |
| 643 dsrl AT,a1,t1 | |
| 644 .set noreorder | |
| 645 bnezl t2,.+8 | |
| 646 break 6 /* signal overflow */ | |
| 647 .set reorder | |
| 648 dsll a0,t9 | |
| 649 dsll a1,t9 | |
| 650 or a0,AT | |
| 651 | |
| 652 #define QT ta0 | |
| 653 #define HH ta1 | |
| 654 #define DH v1 | |
| 655 .L_bn_div_words_body: | |
| 656 dsrl DH,a2,32 | |
| 657 sgeu AT,a0,a2 | |
| 658 .set noreorder | |
| 659 bnezl AT,.+8 | |
| 660 dsubu a0,a2 | |
| 661 .set reorder | |
| 662 | |
| 663 li QT,-1 | |
| 664 dsrl HH,a0,32 | |
| 665 dsrl QT,32 /* q=0xffffffff */ | |
| 666 beq DH,HH,.L_bn_div_words_skip_div1 | |
| 667 ddivu zero,a0,DH | |
| 668 mflo QT | |
| 669 .L_bn_div_words_skip_div1: | |
| 670 dmultu a2,QT | |
| 671 dsll t3,a0,32 | |
| 672 dsrl AT,a1,32 | |
| 673 or t3,AT | |
| 674 mflo t0 | |
| 675 mfhi t1 | |
| 676 .L_bn_div_words_inner_loop1: | |
| 677 sltu t2,t3,t0 | |
| 678 seq t8,HH,t1 | |
| 679 sltu AT,HH,t1 | |
| 680 and t2,t8 | |
| 681 sltu v0,t0,a2 | |
| 682 or AT,t2 | |
| 683 .set noreorder | |
| 684 beqz AT,.L_bn_div_words_inner_loop1_done | |
| 685 dsubu t1,v0 | |
| 686 dsubu t0,a2 | |
| 687 b .L_bn_div_words_inner_loop1 | |
| 688 dsubu QT,1 | |
| 689 .set reorder | |
| 690 .L_bn_div_words_inner_loop1_done: | |
| 691 | |
| 692 dsll a1,32 | |
| 693 dsubu a0,t3,t0 | |
| 694 dsll v0,QT,32 | |
| 695 | |
| 696 li QT,-1 | |
| 697 dsrl HH,a0,32 | |
| 698 dsrl QT,32 /* q=0xffffffff */ | |
| 699 beq DH,HH,.L_bn_div_words_skip_div2 | |
| 700 ddivu zero,a0,DH | |
| 701 mflo QT | |
| 702 .L_bn_div_words_skip_div2: | |
| 703 #undef DH | |
| 704 dmultu a2,QT | |
| 705 dsll t3,a0,32 | |
| 706 dsrl AT,a1,32 | |
| 707 or t3,AT | |
| 708 mflo t0 | |
| 709 mfhi t1 | |
| 710 .L_bn_div_words_inner_loop2: | |
| 711 sltu t2,t3,t0 | |
| 712 seq t8,HH,t1 | |
| 713 sltu AT,HH,t1 | |
| 714 and t2,t8 | |
| 715 sltu v1,t0,a2 | |
| 716 or AT,t2 | |
| 717 .set noreorder | |
| 718 beqz AT,.L_bn_div_words_inner_loop2_done | |
| 719 dsubu t1,v1 | |
| 720 dsubu t0,a2 | |
| 721 b .L_bn_div_words_inner_loop2 | |
| 722 dsubu QT,1 | |
| 723 .set reorder | |
| 724 .L_bn_div_words_inner_loop2_done: | |
| 725 #undef HH | |
| 726 | |
| 727 dsubu a0,t3,t0 | |
| 728 or v0,QT | |
| 729 dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */ | |
| 730 dsrl a2,t9 /* restore a2 */ | |
| 731 jr ra | |
| 732 #undef QT | |
| 733 END(bn_div_words) | |
| 734 | |
| 735 #define a_0 t0 | |
| 736 #define a_1 t1 | |
| 737 #define a_2 t2 | |
| 738 #define a_3 t3 | |
| 739 #define b_0 ta0 | |
| 740 #define b_1 ta1 | |
| 741 #define b_2 ta2 | |
| 742 #define b_3 ta3 | |
| 743 | |
| 744 #define a_4 s0 | |
| 745 #define a_5 s2 | |
| 746 #define a_6 s4 | |
| 747 #define a_7 a1 /* once we load a[7] we don't need a anymore */ | |
| 748 #define b_4 s1 | |
| 749 #define b_5 s3 | |
| 750 #define b_6 s5 | |
| 751 #define b_7 a2 /* once we load b[7] we don't need b anymore */ | |
| 752 | |
| 753 #define t_1 t8 | |
| 754 #define t_2 t9 | |
| 755 | |
| 756 #define c_1 v0 | |
| 757 #define c_2 v1 | |
| 758 #define c_3 a3 | |
| 759 | |
| 760 #define FRAME_SIZE 48 | |
| 761 | |
| 762 .align 5 | |
| 763 LEAF(bn_mul_comba8) | |
| 764 .set noreorder | |
| 765 PTR_SUB sp,FRAME_SIZE | |
| 766 .frame sp,64,ra | |
| 767 .set reorder | |
| 768 ld a_0,0(a1) /* If compiled with -mips3 option on | |
| 769 * R5000 box assembler barks on this | |
| 770 * line with "shouldn't have mult/div | |
| 771 * as last instruction in bb (R10K | |
| 772 * bug)" warning. If anybody out there | |
| 773 * has a clue about how to circumvent | |
| 774 * this do send me a note. | |
| 775 * <appro@fy.chalmers.se> | |
| 776 */ | |
| 777 ld b_0,0(a2) | |
| 778 ld a_1,8(a1) | |
| 779 ld a_2,16(a1) | |
| 780 ld a_3,24(a1) | |
| 781 ld b_1,8(a2) | |
| 782 ld b_2,16(a2) | |
| 783 ld b_3,24(a2) | |
| 784 dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ | |
| 785 sd s0,0(sp) | |
| 786 sd s1,8(sp) | |
| 787 sd s2,16(sp) | |
| 788 sd s3,24(sp) | |
| 789 sd s4,32(sp) | |
| 790 sd s5,40(sp) | |
| 791 mflo c_1 | |
| 792 mfhi c_2 | |
| 793 | |
| 794 dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ | |
| 795 ld a_4,32(a1) | |
| 796 ld a_5,40(a1) | |
| 797 ld a_6,48(a1) | |
| 798 ld a_7,56(a1) | |
| 799 ld b_4,32(a2) | |
| 800 ld b_5,40(a2) | |
| 801 mflo t_1 | |
| 802 mfhi t_2 | |
| 803 daddu c_2,t_1 | |
| 804 sltu AT,c_2,t_1 | |
| 805 daddu c_3,t_2,AT | |
| 806 dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ | |
| 807 ld b_6,48(a2) | |
| 808 ld b_7,56(a2) | |
| 809 sd c_1,0(a0) /* r[0]=c1; */ | |
| 810 mflo t_1 | |
| 811 mfhi t_2 | |
| 812 daddu c_2,t_1 | |
| 813 sltu AT,c_2,t_1 | |
| 814 daddu t_2,AT | |
| 815 daddu c_3,t_2 | |
| 816 sltu c_1,c_3,t_2 | |
| 817 sd c_2,8(a0) /* r[1]=c2; */ | |
| 818 | |
| 819 dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ | |
| 820 mflo t_1 | |
| 821 mfhi t_2 | |
| 822 daddu c_3,t_1 | |
| 823 sltu AT,c_3,t_1 | |
| 824 daddu t_2,AT | |
| 825 daddu c_1,t_2 | |
| 826 dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ | |
| 827 mflo t_1 | |
| 828 mfhi t_2 | |
| 829 daddu c_3,t_1 | |
| 830 sltu AT,c_3,t_1 | |
| 831 daddu t_2,AT | |
| 832 daddu c_1,t_2 | |
| 833 sltu c_2,c_1,t_2 | |
| 834 dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ | |
| 835 mflo t_1 | |
| 836 mfhi t_2 | |
| 837 daddu c_3,t_1 | |
| 838 sltu AT,c_3,t_1 | |
| 839 daddu t_2,AT | |
| 840 daddu c_1,t_2 | |
| 841 sltu AT,c_1,t_2 | |
| 842 daddu c_2,AT | |
| 843 sd c_3,16(a0) /* r[2]=c3; */ | |
| 844 | |
| 845 dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ | |
| 846 mflo t_1 | |
| 847 mfhi t_2 | |
| 848 daddu c_1,t_1 | |
| 849 sltu AT,c_1,t_1 | |
| 850 daddu t_2,AT | |
| 851 daddu c_2,t_2 | |
| 852 sltu c_3,c_2,t_2 | |
| 853 dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ | |
| 854 mflo t_1 | |
| 855 mfhi t_2 | |
| 856 daddu c_1,t_1 | |
| 857 sltu AT,c_1,t_1 | |
| 858 daddu t_2,AT | |
| 859 daddu c_2,t_2 | |
| 860 sltu AT,c_2,t_2 | |
| 861 daddu c_3,AT | |
| 862 dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ | |
| 863 mflo t_1 | |
| 864 mfhi t_2 | |
| 865 daddu c_1,t_1 | |
| 866 sltu AT,c_1,t_1 | |
| 867 daddu t_2,AT | |
| 868 daddu c_2,t_2 | |
| 869 sltu AT,c_2,t_2 | |
| 870 daddu c_3,AT | |
| 871 dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ | |
| 872 mflo t_1 | |
| 873 mfhi t_2 | |
| 874 daddu c_1,t_1 | |
| 875 sltu AT,c_1,t_1 | |
| 876 daddu t_2,AT | |
| 877 daddu c_2,t_2 | |
| 878 sltu AT,c_2,t_2 | |
| 879 daddu c_3,AT | |
| 880 sd c_1,24(a0) /* r[3]=c1; */ | |
| 881 | |
| 882 dmultu a_4,b_0 /* mul_add_c(a[4],b[0],c2,c3,c1); */ | |
| 883 mflo t_1 | |
| 884 mfhi t_2 | |
| 885 daddu c_2,t_1 | |
| 886 sltu AT,c_2,t_1 | |
| 887 daddu t_2,AT | |
| 888 daddu c_3,t_2 | |
| 889 sltu c_1,c_3,t_2 | |
| 890 dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ | |
| 891 mflo t_1 | |
| 892 mfhi t_2 | |
| 893 daddu c_2,t_1 | |
| 894 sltu AT,c_2,t_1 | |
| 895 daddu t_2,AT | |
| 896 daddu c_3,t_2 | |
| 897 sltu AT,c_3,t_2 | |
| 898 daddu c_1,AT | |
| 899 dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ | |
| 900 mflo t_1 | |
| 901 mfhi t_2 | |
| 902 daddu c_2,t_1 | |
| 903 sltu AT,c_2,t_1 | |
| 904 daddu t_2,AT | |
| 905 daddu c_3,t_2 | |
| 906 sltu AT,c_3,t_2 | |
| 907 daddu c_1,AT | |
| 908 dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ | |
| 909 mflo t_1 | |
| 910 mfhi t_2 | |
| 911 daddu c_2,t_1 | |
| 912 sltu AT,c_2,t_1 | |
| 913 daddu t_2,AT | |
| 914 daddu c_3,t_2 | |
| 915 sltu AT,c_3,t_2 | |
| 916 daddu c_1,AT | |
| 917 dmultu a_0,b_4 /* mul_add_c(a[0],b[4],c2,c3,c1); */ | |
| 918 mflo t_1 | |
| 919 mfhi t_2 | |
| 920 daddu c_2,t_1 | |
| 921 sltu AT,c_2,t_1 | |
| 922 daddu t_2,AT | |
| 923 daddu c_3,t_2 | |
| 924 sltu AT,c_3,t_2 | |
| 925 daddu c_1,AT | |
| 926 sd c_2,32(a0) /* r[4]=c2; */ | |
| 927 | |
| 928 dmultu a_0,b_5 /* mul_add_c(a[0],b[5],c3,c1,c2); */ | |
| 929 mflo t_1 | |
| 930 mfhi t_2 | |
| 931 daddu c_3,t_1 | |
| 932 sltu AT,c_3,t_1 | |
| 933 daddu t_2,AT | |
| 934 daddu c_1,t_2 | |
| 935 sltu c_2,c_1,t_2 | |
| 936 dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */ | |
| 937 mflo t_1 | |
| 938 mfhi t_2 | |
| 939 daddu c_3,t_1 | |
| 940 sltu AT,c_3,t_1 | |
| 941 daddu t_2,AT | |
| 942 daddu c_1,t_2 | |
| 943 sltu AT,c_1,t_2 | |
| 944 daddu c_2,AT | |
| 945 dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ | |
| 946 mflo t_1 | |
| 947 mfhi t_2 | |
| 948 daddu c_3,t_1 | |
| 949 sltu AT,c_3,t_1 | |
| 950 daddu t_2,AT | |
| 951 daddu c_1,t_2 | |
| 952 sltu AT,c_1,t_2 | |
| 953 daddu c_2,AT | |
| 954 dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ | |
| 955 mflo t_1 | |
| 956 mfhi t_2 | |
| 957 daddu c_3,t_1 | |
| 958 sltu AT,c_3,t_1 | |
| 959 daddu t_2,AT | |
| 960 daddu c_1,t_2 | |
| 961 sltu AT,c_1,t_2 | |
| 962 daddu c_2,AT | |
| 963 dmultu a_4,b_1 /* mul_add_c(a[4],b[1],c3,c1,c2); */ | |
| 964 mflo t_1 | |
| 965 mfhi t_2 | |
| 966 daddu c_3,t_1 | |
| 967 sltu AT,c_3,t_1 | |
| 968 daddu t_2,AT | |
| 969 daddu c_1,t_2 | |
| 970 sltu AT,c_1,t_2 | |
| 971 daddu c_2,AT | |
| 972 dmultu a_5,b_0 /* mul_add_c(a[5],b[0],c3,c1,c2); */ | |
| 973 mflo t_1 | |
| 974 mfhi t_2 | |
| 975 daddu c_3,t_1 | |
| 976 sltu AT,c_3,t_1 | |
| 977 daddu t_2,AT | |
| 978 daddu c_1,t_2 | |
| 979 sltu AT,c_1,t_2 | |
| 980 daddu c_2,AT | |
| 981 sd c_3,40(a0) /* r[5]=c3; */ | |
| 982 | |
| 983 dmultu a_6,b_0 /* mul_add_c(a[6],b[0],c1,c2,c3); */ | |
| 984 mflo t_1 | |
| 985 mfhi t_2 | |
| 986 daddu c_1,t_1 | |
| 987 sltu AT,c_1,t_1 | |
| 988 daddu t_2,AT | |
| 989 daddu c_2,t_2 | |
| 990 sltu c_3,c_2,t_2 | |
| 991 dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */ | |
| 992 mflo t_1 | |
| 993 mfhi t_2 | |
| 994 daddu c_1,t_1 | |
| 995 sltu AT,c_1,t_1 | |
| 996 daddu t_2,AT | |
| 997 daddu c_2,t_2 | |
| 998 sltu AT,c_2,t_2 | |
| 999 daddu c_3,AT | |
| 1000 dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */ | |
| 1001 mflo t_1 | |
| 1002 mfhi t_2 | |
| 1003 daddu c_1,t_1 | |
| 1004 sltu AT,c_1,t_1 | |
| 1005 daddu t_2,AT | |
| 1006 daddu c_2,t_2 | |
| 1007 sltu AT,c_2,t_2 | |
| 1008 daddu c_3,AT | |
| 1009 dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ | |
| 1010 mflo t_1 | |
| 1011 mfhi t_2 | |
| 1012 daddu c_1,t_1 | |
| 1013 sltu AT,c_1,t_1 | |
| 1014 daddu t_2,AT | |
| 1015 daddu c_2,t_2 | |
| 1016 sltu AT,c_2,t_2 | |
| 1017 daddu c_3,AT | |
| 1018 dmultu a_2,b_4 /* mul_add_c(a[2],b[4],c1,c2,c3); */ | |
| 1019 mflo t_1 | |
| 1020 mfhi t_2 | |
| 1021 daddu c_1,t_1 | |
| 1022 sltu AT,c_1,t_1 | |
| 1023 daddu t_2,AT | |
| 1024 daddu c_2,t_2 | |
| 1025 sltu AT,c_2,t_2 | |
| 1026 daddu c_3,AT | |
| 1027 dmultu a_1,b_5 /* mul_add_c(a[1],b[5],c1,c2,c3); */ | |
| 1028 mflo t_1 | |
| 1029 mfhi t_2 | |
| 1030 daddu c_1,t_1 | |
| 1031 sltu AT,c_1,t_1 | |
| 1032 daddu t_2,AT | |
| 1033 daddu c_2,t_2 | |
| 1034 sltu AT,c_2,t_2 | |
| 1035 daddu c_3,AT | |
| 1036 dmultu a_0,b_6 /* mul_add_c(a[0],b[6],c1,c2,c3); */ | |
| 1037 mflo t_1 | |
| 1038 mfhi t_2 | |
| 1039 daddu c_1,t_1 | |
| 1040 sltu AT,c_1,t_1 | |
| 1041 daddu t_2,AT | |
| 1042 daddu c_2,t_2 | |
| 1043 sltu AT,c_2,t_2 | |
| 1044 daddu c_3,AT | |
| 1045 sd c_1,48(a0) /* r[6]=c1; */ | |
| 1046 | |
| 1047 dmultu a_0,b_7 /* mul_add_c(a[0],b[7],c2,c3,c1); */ | |
| 1048 mflo t_1 | |
| 1049 mfhi t_2 | |
| 1050 daddu c_2,t_1 | |
| 1051 sltu AT,c_2,t_1 | |
| 1052 daddu t_2,AT | |
| 1053 daddu c_3,t_2 | |
| 1054 sltu c_1,c_3,t_2 | |
| 1055 dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */ | |
| 1056 mflo t_1 | |
| 1057 mfhi t_2 | |
| 1058 daddu c_2,t_1 | |
| 1059 sltu AT,c_2,t_1 | |
| 1060 daddu t_2,AT | |
| 1061 daddu c_3,t_2 | |
| 1062 sltu AT,c_3,t_2 | |
| 1063 daddu c_1,AT | |
| 1064 dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */ | |
| 1065 mflo t_1 | |
| 1066 mfhi t_2 | |
| 1067 daddu c_2,t_1 | |
| 1068 sltu AT,c_2,t_1 | |
| 1069 daddu t_2,AT | |
| 1070 daddu c_3,t_2 | |
| 1071 sltu AT,c_3,t_2 | |
| 1072 daddu c_1,AT | |
| 1073 dmultu a_3,b_4 /* mul_add_c(a[3],b[4],c2,c3,c1); */ | |
| 1074 mflo t_1 | |
| 1075 mfhi t_2 | |
| 1076 daddu c_2,t_1 | |
| 1077 sltu AT,c_2,t_1 | |
| 1078 daddu t_2,AT | |
| 1079 daddu c_3,t_2 | |
| 1080 sltu AT,c_3,t_2 | |
| 1081 daddu c_1,AT | |
| 1082 dmultu a_4,b_3 /* mul_add_c(a[4],b[3],c2,c3,c1); */ | |
| 1083 mflo t_1 | |
| 1084 mfhi t_2 | |
| 1085 daddu c_2,t_1 | |
| 1086 sltu AT,c_2,t_1 | |
| 1087 daddu t_2,AT | |
| 1088 daddu c_3,t_2 | |
| 1089 sltu AT,c_3,t_2 | |
| 1090 daddu c_1,AT | |
| 1091 dmultu a_5,b_2 /* mul_add_c(a[5],b[2],c2,c3,c1); */ | |
| 1092 mflo t_1 | |
| 1093 mfhi t_2 | |
| 1094 daddu c_2,t_1 | |
| 1095 sltu AT,c_2,t_1 | |
| 1096 daddu t_2,AT | |
| 1097 daddu c_3,t_2 | |
| 1098 sltu AT,c_3,t_2 | |
| 1099 daddu c_1,AT | |
| 1100 dmultu a_6,b_1 /* mul_add_c(a[6],b[1],c2,c3,c1); */ | |
| 1101 mflo t_1 | |
| 1102 mfhi t_2 | |
| 1103 daddu c_2,t_1 | |
| 1104 sltu AT,c_2,t_1 | |
| 1105 daddu t_2,AT | |
| 1106 daddu c_3,t_2 | |
| 1107 sltu AT,c_3,t_2 | |
| 1108 daddu c_1,AT | |
| 1109 dmultu a_7,b_0 /* mul_add_c(a[7],b[0],c2,c3,c1); */ | |
| 1110 mflo t_1 | |
| 1111 mfhi t_2 | |
| 1112 daddu c_2,t_1 | |
| 1113 sltu AT,c_2,t_1 | |
| 1114 daddu t_2,AT | |
| 1115 daddu c_3,t_2 | |
| 1116 sltu AT,c_3,t_2 | |
| 1117 daddu c_1,AT | |
| 1118 sd c_2,56(a0) /* r[7]=c2; */ | |
| 1119 | |
| 1120 dmultu a_7,b_1 /* mul_add_c(a[7],b[1],c3,c1,c2); */ | |
| 1121 mflo t_1 | |
| 1122 mfhi t_2 | |
| 1123 daddu c_3,t_1 | |
| 1124 sltu AT,c_3,t_1 | |
| 1125 daddu t_2,AT | |
| 1126 daddu c_1,t_2 | |
| 1127 sltu c_2,c_1,t_2 | |
| 1128 dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */ | |
| 1129 mflo t_1 | |
| 1130 mfhi t_2 | |
| 1131 daddu c_3,t_1 | |
| 1132 sltu AT,c_3,t_1 | |
| 1133 daddu t_2,AT | |
| 1134 daddu c_1,t_2 | |
| 1135 sltu AT,c_1,t_2 | |
| 1136 daddu c_2,AT | |
| 1137 dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */ | |
| 1138 mflo t_1 | |
| 1139 mfhi t_2 | |
| 1140 daddu c_3,t_1 | |
| 1141 sltu AT,c_3,t_1 | |
| 1142 daddu t_2,AT | |
| 1143 daddu c_1,t_2 | |
| 1144 sltu AT,c_1,t_2 | |
| 1145 daddu c_2,AT | |
| 1146 dmultu a_4,b_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ | |
| 1147 mflo t_1 | |
| 1148 mfhi t_2 | |
| 1149 daddu c_3,t_1 | |
| 1150 sltu AT,c_3,t_1 | |
| 1151 daddu t_2,AT | |
| 1152 daddu c_1,t_2 | |
| 1153 sltu AT,c_1,t_2 | |
| 1154 daddu c_2,AT | |
| 1155 dmultu a_3,b_5 /* mul_add_c(a[3],b[5],c3,c1,c2); */ | |
| 1156 mflo t_1 | |
| 1157 mfhi t_2 | |
| 1158 daddu c_3,t_1 | |
| 1159 sltu AT,c_3,t_1 | |
| 1160 daddu t_2,AT | |
| 1161 daddu c_1,t_2 | |
| 1162 sltu AT,c_1,t_2 | |
| 1163 daddu c_2,AT | |
| 1164 dmultu a_2,b_6 /* mul_add_c(a[2],b[6],c3,c1,c2); */ | |
| 1165 mflo t_1 | |
| 1166 mfhi t_2 | |
| 1167 daddu c_3,t_1 | |
| 1168 sltu AT,c_3,t_1 | |
| 1169 daddu t_2,AT | |
| 1170 daddu c_1,t_2 | |
| 1171 sltu AT,c_1,t_2 | |
| 1172 daddu c_2,AT | |
| 1173 dmultu a_1,b_7 /* mul_add_c(a[1],b[7],c3,c1,c2); */ | |
| 1174 mflo t_1 | |
| 1175 mfhi t_2 | |
| 1176 daddu c_3,t_1 | |
| 1177 sltu AT,c_3,t_1 | |
| 1178 daddu t_2,AT | |
| 1179 daddu c_1,t_2 | |
| 1180 sltu AT,c_1,t_2 | |
| 1181 daddu c_2,AT | |
| 1182 sd c_3,64(a0) /* r[8]=c3; */ | |
| 1183 | |
| 1184 dmultu a_2,b_7 /* mul_add_c(a[2],b[7],c1,c2,c3); */ | |
| 1185 mflo t_1 | |
| 1186 mfhi t_2 | |
| 1187 daddu c_1,t_1 | |
| 1188 sltu AT,c_1,t_1 | |
| 1189 daddu t_2,AT | |
| 1190 daddu c_2,t_2 | |
| 1191 sltu c_3,c_2,t_2 | |
| 1192 dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */ | |
| 1193 mflo t_1 | |
| 1194 mfhi t_2 | |
| 1195 daddu c_1,t_1 | |
| 1196 sltu AT,c_1,t_1 | |
| 1197 daddu t_2,AT | |
| 1198 daddu c_2,t_2 | |
| 1199 sltu AT,c_2,t_2 | |
| 1200 daddu c_3,AT | |
| 1201 dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */ | |
| 1202 mflo t_1 | |
| 1203 mfhi t_2 | |
| 1204 daddu c_1,t_1 | |
| 1205 sltu AT,c_1,t_1 | |
| 1206 daddu t_2,AT | |
| 1207 daddu c_2,t_2 | |
| 1208 sltu AT,c_2,t_2 | |
| 1209 daddu c_3,AT | |
| 1210 dmultu a_5,b_4 /* mul_add_c(a[5],b[4],c1,c2,c3); */ | |
| 1211 mflo t_1 | |
| 1212 mfhi t_2 | |
| 1213 daddu c_1,t_1 | |
| 1214 sltu AT,c_1,t_1 | |
| 1215 daddu t_2,AT | |
| 1216 daddu c_2,t_2 | |
| 1217 sltu AT,c_2,t_2 | |
| 1218 daddu c_3,AT | |
| 1219 dmultu a_6,b_3 /* mul_add_c(a[6],b[3],c1,c2,c3); */ | |
| 1220 mflo t_1 | |
| 1221 mfhi t_2 | |
| 1222 daddu c_1,t_1 | |
| 1223 sltu AT,c_1,t_1 | |
| 1224 daddu t_2,AT | |
| 1225 daddu c_2,t_2 | |
| 1226 sltu AT,c_2,t_2 | |
| 1227 daddu c_3,AT | |
| 1228 dmultu a_7,b_2 /* mul_add_c(a[7],b[2],c1,c2,c3); */ | |
| 1229 mflo t_1 | |
| 1230 mfhi t_2 | |
| 1231 daddu c_1,t_1 | |
| 1232 sltu AT,c_1,t_1 | |
| 1233 daddu t_2,AT | |
| 1234 daddu c_2,t_2 | |
| 1235 sltu AT,c_2,t_2 | |
| 1236 daddu c_3,AT | |
| 1237 sd c_1,72(a0) /* r[9]=c1; */ | |
| 1238 | |
| 1239 dmultu a_7,b_3 /* mul_add_c(a[7],b[3],c2,c3,c1); */ | |
| 1240 mflo t_1 | |
| 1241 mfhi t_2 | |
| 1242 daddu c_2,t_1 | |
| 1243 sltu AT,c_2,t_1 | |
| 1244 daddu t_2,AT | |
| 1245 daddu c_3,t_2 | |
| 1246 sltu c_1,c_3,t_2 | |
| 1247 dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */ | |
| 1248 mflo t_1 | |
| 1249 mfhi t_2 | |
| 1250 daddu c_2,t_1 | |
| 1251 sltu AT,c_2,t_1 | |
| 1252 daddu t_2,AT | |
| 1253 daddu c_3,t_2 | |
| 1254 sltu AT,c_3,t_2 | |
| 1255 daddu c_1,AT | |
| 1256 dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ | |
| 1257 mflo t_1 | |
| 1258 mfhi t_2 | |
| 1259 daddu c_2,t_1 | |
| 1260 sltu AT,c_2,t_1 | |
| 1261 daddu t_2,AT | |
| 1262 daddu c_3,t_2 | |
| 1263 sltu AT,c_3,t_2 | |
| 1264 daddu c_1,AT | |
| 1265 dmultu a_4,b_6 /* mul_add_c(a[4],b[6],c2,c3,c1); */ | |
| 1266 mflo t_1 | |
| 1267 mfhi t_2 | |
| 1268 daddu c_2,t_1 | |
| 1269 sltu AT,c_2,t_1 | |
| 1270 daddu t_2,AT | |
| 1271 daddu c_3,t_2 | |
| 1272 sltu AT,c_3,t_2 | |
| 1273 daddu c_1,AT | |
| 1274 dmultu a_3,b_7 /* mul_add_c(a[3],b[7],c2,c3,c1); */ | |
| 1275 mflo t_1 | |
| 1276 mfhi t_2 | |
| 1277 daddu c_2,t_1 | |
| 1278 sltu AT,c_2,t_1 | |
| 1279 daddu t_2,AT | |
| 1280 daddu c_3,t_2 | |
| 1281 sltu AT,c_3,t_2 | |
| 1282 daddu c_1,AT | |
| 1283 sd c_2,80(a0) /* r[10]=c2; */ | |
| 1284 | |
| 1285 dmultu a_4,b_7 /* mul_add_c(a[4],b[7],c3,c1,c2); */ | |
| 1286 mflo t_1 | |
| 1287 mfhi t_2 | |
| 1288 daddu c_3,t_1 | |
| 1289 sltu AT,c_3,t_1 | |
| 1290 daddu t_2,AT | |
| 1291 daddu c_1,t_2 | |
| 1292 sltu c_2,c_1,t_2 | |
| 1293 dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */ | |
| 1294 mflo t_1 | |
| 1295 mfhi t_2 | |
| 1296 daddu c_3,t_1 | |
| 1297 sltu AT,c_3,t_1 | |
| 1298 daddu t_2,AT | |
| 1299 daddu c_1,t_2 | |
| 1300 sltu AT,c_1,t_2 | |
| 1301 daddu c_2,AT | |
| 1302 dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */ | |
| 1303 mflo t_1 | |
| 1304 mfhi t_2 | |
| 1305 daddu c_3,t_1 | |
| 1306 sltu AT,c_3,t_1 | |
| 1307 daddu t_2,AT | |
| 1308 daddu c_1,t_2 | |
| 1309 sltu AT,c_1,t_2 | |
| 1310 daddu c_2,AT | |
| 1311 dmultu a_7,b_4 /* mul_add_c(a[7],b[4],c3,c1,c2); */ | |
| 1312 mflo t_1 | |
| 1313 mfhi t_2 | |
| 1314 daddu c_3,t_1 | |
| 1315 sltu AT,c_3,t_1 | |
| 1316 daddu t_2,AT | |
| 1317 daddu c_1,t_2 | |
| 1318 sltu AT,c_1,t_2 | |
| 1319 daddu c_2,AT | |
| 1320 sd c_3,88(a0) /* r[11]=c3; */ | |
| 1321 | |
| 1322 dmultu a_7,b_5 /* mul_add_c(a[7],b[5],c1,c2,c3); */ | |
| 1323 mflo t_1 | |
| 1324 mfhi t_2 | |
| 1325 daddu c_1,t_1 | |
| 1326 sltu AT,c_1,t_1 | |
| 1327 daddu t_2,AT | |
| 1328 daddu c_2,t_2 | |
| 1329 sltu c_3,c_2,t_2 | |
| 1330 dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ | |
| 1331 mflo t_1 | |
| 1332 mfhi t_2 | |
| 1333 daddu c_1,t_1 | |
| 1334 sltu AT,c_1,t_1 | |
| 1335 daddu t_2,AT | |
| 1336 daddu c_2,t_2 | |
| 1337 sltu AT,c_2,t_2 | |
| 1338 daddu c_3,AT | |
| 1339 dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */ | |
| 1340 mflo t_1 | |
| 1341 mfhi t_2 | |
| 1342 daddu c_1,t_1 | |
| 1343 sltu AT,c_1,t_1 | |
| 1344 daddu t_2,AT | |
| 1345 daddu c_2,t_2 | |
| 1346 sltu AT,c_2,t_2 | |
| 1347 daddu c_3,AT | |
| 1348 sd c_1,96(a0) /* r[12]=c1; */ | |
| 1349 | |
| 1350 dmultu a_6,b_7 /* mul_add_c(a[6],b[7],c2,c3,c1); */ | |
| 1351 mflo t_1 | |
| 1352 mfhi t_2 | |
| 1353 daddu c_2,t_1 | |
| 1354 sltu AT,c_2,t_1 | |
| 1355 daddu t_2,AT | |
| 1356 daddu c_3,t_2 | |
| 1357 sltu c_1,c_3,t_2 | |
| 1358 dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */ | |
| 1359 mflo t_1 | |
| 1360 mfhi t_2 | |
| 1361 daddu c_2,t_1 | |
| 1362 sltu AT,c_2,t_1 | |
| 1363 daddu t_2,AT | |
| 1364 daddu c_3,t_2 | |
| 1365 sltu AT,c_3,t_2 | |
| 1366 daddu c_1,AT | |
| 1367 sd c_2,104(a0) /* r[13]=c2; */ | |
| 1368 | |
| 1369 dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ | |
| 1370 ld s0,0(sp) | |
| 1371 ld s1,8(sp) | |
| 1372 ld s2,16(sp) | |
| 1373 ld s3,24(sp) | |
| 1374 ld s4,32(sp) | |
| 1375 ld s5,40(sp) | |
| 1376 mflo t_1 | |
| 1377 mfhi t_2 | |
| 1378 daddu c_3,t_1 | |
| 1379 sltu AT,c_3,t_1 | |
| 1380 daddu t_2,AT | |
| 1381 daddu c_1,t_2 | |
| 1382 sd c_3,112(a0) /* r[14]=c3; */ | |
| 1383 sd c_1,120(a0) /* r[15]=c1; */ | |
| 1384 | |
| 1385 PTR_ADD sp,FRAME_SIZE | |
| 1386 | |
| 1387 jr ra | |
| 1388 END(bn_mul_comba8) | |
| 1389 | |
| 1390 .align 5 | |
| 1391 LEAF(bn_mul_comba4) | |
| 1392 .set reorder | |
| 1393 ld a_0,0(a1) | |
| 1394 ld b_0,0(a2) | |
| 1395 ld a_1,8(a1) | |
| 1396 ld a_2,16(a1) | |
| 1397 dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ | |
| 1398 ld a_3,24(a1) | |
| 1399 ld b_1,8(a2) | |
| 1400 ld b_2,16(a2) | |
| 1401 ld b_3,24(a2) | |
| 1402 mflo c_1 | |
| 1403 mfhi c_2 | |
| 1404 sd c_1,0(a0) | |
| 1405 | |
| 1406 dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ | |
| 1407 mflo t_1 | |
| 1408 mfhi t_2 | |
| 1409 daddu c_2,t_1 | |
| 1410 sltu AT,c_2,t_1 | |
| 1411 daddu c_3,t_2,AT | |
| 1412 dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ | |
| 1413 mflo t_1 | |
| 1414 mfhi t_2 | |
| 1415 daddu c_2,t_1 | |
| 1416 sltu AT,c_2,t_1 | |
| 1417 daddu t_2,AT | |
| 1418 daddu c_3,t_2 | |
| 1419 sltu c_1,c_3,t_2 | |
| 1420 sd c_2,8(a0) | |
| 1421 | |
| 1422 dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ | |
| 1423 mflo t_1 | |
| 1424 mfhi t_2 | |
| 1425 daddu c_3,t_1 | |
| 1426 sltu AT,c_3,t_1 | |
| 1427 daddu t_2,AT | |
| 1428 daddu c_1,t_2 | |
| 1429 dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ | |
| 1430 mflo t_1 | |
| 1431 mfhi t_2 | |
| 1432 daddu c_3,t_1 | |
| 1433 sltu AT,c_3,t_1 | |
| 1434 daddu t_2,AT | |
| 1435 daddu c_1,t_2 | |
| 1436 sltu c_2,c_1,t_2 | |
| 1437 dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ | |
| 1438 mflo t_1 | |
| 1439 mfhi t_2 | |
| 1440 daddu c_3,t_1 | |
| 1441 sltu AT,c_3,t_1 | |
| 1442 daddu t_2,AT | |
| 1443 daddu c_1,t_2 | |
| 1444 sltu AT,c_1,t_2 | |
| 1445 daddu c_2,AT | |
| 1446 sd c_3,16(a0) | |
| 1447 | |
| 1448 dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ | |
| 1449 mflo t_1 | |
| 1450 mfhi t_2 | |
| 1451 daddu c_1,t_1 | |
| 1452 sltu AT,c_1,t_1 | |
| 1453 daddu t_2,AT | |
| 1454 daddu c_2,t_2 | |
| 1455 sltu c_3,c_2,t_2 | |
| 1456 dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ | |
| 1457 mflo t_1 | |
| 1458 mfhi t_2 | |
| 1459 daddu c_1,t_1 | |
| 1460 sltu AT,c_1,t_1 | |
| 1461 daddu t_2,AT | |
| 1462 daddu c_2,t_2 | |
| 1463 sltu AT,c_2,t_2 | |
| 1464 daddu c_3,AT | |
| 1465 dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ | |
| 1466 mflo t_1 | |
| 1467 mfhi t_2 | |
| 1468 daddu c_1,t_1 | |
| 1469 sltu AT,c_1,t_1 | |
| 1470 daddu t_2,AT | |
| 1471 daddu c_2,t_2 | |
| 1472 sltu AT,c_2,t_2 | |
| 1473 daddu c_3,AT | |
| 1474 dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ | |
| 1475 mflo t_1 | |
| 1476 mfhi t_2 | |
| 1477 daddu c_1,t_1 | |
| 1478 sltu AT,c_1,t_1 | |
| 1479 daddu t_2,AT | |
| 1480 daddu c_2,t_2 | |
| 1481 sltu AT,c_2,t_2 | |
| 1482 daddu c_3,AT | |
| 1483 sd c_1,24(a0) | |
| 1484 | |
| 1485 dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ | |
| 1486 mflo t_1 | |
| 1487 mfhi t_2 | |
| 1488 daddu c_2,t_1 | |
| 1489 sltu AT,c_2,t_1 | |
| 1490 daddu t_2,AT | |
| 1491 daddu c_3,t_2 | |
| 1492 sltu c_1,c_3,t_2 | |
| 1493 dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ | |
| 1494 mflo t_1 | |
| 1495 mfhi t_2 | |
| 1496 daddu c_2,t_1 | |
| 1497 sltu AT,c_2,t_1 | |
| 1498 daddu t_2,AT | |
| 1499 daddu c_3,t_2 | |
| 1500 sltu AT,c_3,t_2 | |
| 1501 daddu c_1,AT | |
| 1502 dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ | |
| 1503 mflo t_1 | |
| 1504 mfhi t_2 | |
| 1505 daddu c_2,t_1 | |
| 1506 sltu AT,c_2,t_1 | |
| 1507 daddu t_2,AT | |
| 1508 daddu c_3,t_2 | |
| 1509 sltu AT,c_3,t_2 | |
| 1510 daddu c_1,AT | |
| 1511 sd c_2,32(a0) | |
| 1512 | |
| 1513 dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ | |
| 1514 mflo t_1 | |
| 1515 mfhi t_2 | |
| 1516 daddu c_3,t_1 | |
| 1517 sltu AT,c_3,t_1 | |
| 1518 daddu t_2,AT | |
| 1519 daddu c_1,t_2 | |
| 1520 sltu c_2,c_1,t_2 | |
| 1521 dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ | |
| 1522 mflo t_1 | |
| 1523 mfhi t_2 | |
| 1524 daddu c_3,t_1 | |
| 1525 sltu AT,c_3,t_1 | |
| 1526 daddu t_2,AT | |
| 1527 daddu c_1,t_2 | |
| 1528 sltu AT,c_1,t_2 | |
| 1529 daddu c_2,AT | |
| 1530 sd c_3,40(a0) | |
| 1531 | |
| 1532 dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ | |
| 1533 mflo t_1 | |
| 1534 mfhi t_2 | |
| 1535 daddu c_1,t_1 | |
| 1536 sltu AT,c_1,t_1 | |
| 1537 daddu t_2,AT | |
| 1538 daddu c_2,t_2 | |
| 1539 sd c_1,48(a0) | |
| 1540 sd c_2,56(a0) | |
| 1541 | |
| 1542 jr ra | |
| 1543 END(bn_mul_comba4) | |
| 1544 | |
| 1545 #undef a_4 | |
| 1546 #undef a_5 | |
| 1547 #undef a_6 | |
| 1548 #undef a_7 | |
| 1549 #define a_4 b_0 | |
| 1550 #define a_5 b_1 | |
| 1551 #define a_6 b_2 | |
| 1552 #define a_7 b_3 | |
| 1553 | |
| 1554 .align 5 | |
| 1555 LEAF(bn_sqr_comba8) | |
| 1556 .set reorder | |
| 1557 ld a_0,0(a1) | |
| 1558 ld a_1,8(a1) | |
| 1559 ld a_2,16(a1) | |
| 1560 ld a_3,24(a1) | |
| 1561 | |
| 1562 dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ | |
| 1563 ld a_4,32(a1) | |
| 1564 ld a_5,40(a1) | |
| 1565 ld a_6,48(a1) | |
| 1566 ld a_7,56(a1) | |
| 1567 mflo c_1 | |
| 1568 mfhi c_2 | |
| 1569 sd c_1,0(a0) | |
| 1570 | |
| 1571 dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ | |
| 1572 mflo t_1 | |
| 1573 mfhi t_2 | |
| 1574 slt c_1,t_2,zero | |
| 1575 dsll t_2,1 | |
| 1576 slt a2,t_1,zero | |
| 1577 daddu t_2,a2 | |
| 1578 dsll t_1,1 | |
| 1579 daddu c_2,t_1 | |
| 1580 sltu AT,c_2,t_1 | |
| 1581 daddu c_3,t_2,AT | |
| 1582 sd c_2,8(a0) | |
| 1583 | |
| 1584 dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ | |
| 1585 mflo t_1 | |
| 1586 mfhi t_2 | |
| 1587 slt c_2,t_2,zero | |
| 1588 dsll t_2,1 | |
| 1589 slt a2,t_1,zero | |
| 1590 daddu t_2,a2 | |
| 1591 dsll t_1,1 | |
| 1592 daddu c_3,t_1 | |
| 1593 sltu AT,c_3,t_1 | |
| 1594 daddu t_2,AT | |
| 1595 daddu c_1,t_2 | |
| 1596 sltu AT,c_1,t_2 | |
| 1597 daddu c_2,AT | |
| 1598 dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ | |
| 1599 mflo t_1 | |
| 1600 mfhi t_2 | |
| 1601 daddu c_3,t_1 | |
| 1602 sltu AT,c_3,t_1 | |
| 1603 daddu t_2,AT | |
| 1604 daddu c_1,t_2 | |
| 1605 sltu AT,c_1,t_2 | |
| 1606 daddu c_2,AT | |
| 1607 sd c_3,16(a0) | |
| 1608 | |
| 1609 dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ | |
| 1610 mflo t_1 | |
| 1611 mfhi t_2 | |
| 1612 slt c_3,t_2,zero | |
| 1613 dsll t_2,1 | |
| 1614 slt a2,t_1,zero | |
| 1615 daddu t_2,a2 | |
| 1616 dsll t_1,1 | |
| 1617 daddu c_1,t_1 | |
| 1618 sltu AT,c_1,t_1 | |
| 1619 daddu t_2,AT | |
| 1620 daddu c_2,t_2 | |
| 1621 sltu AT,c_2,t_2 | |
| 1622 daddu c_3,AT | |
| 1623 dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */ | |
| 1624 mflo t_1 | |
| 1625 mfhi t_2 | |
| 1626 slt AT,t_2,zero | |
| 1627 daddu c_3,AT | |
| 1628 dsll t_2,1 | |
| 1629 slt a2,t_1,zero | |
| 1630 daddu t_2,a2 | |
| 1631 dsll t_1,1 | |
| 1632 daddu c_1,t_1 | |
| 1633 sltu AT,c_1,t_1 | |
| 1634 daddu t_2,AT | |
| 1635 daddu c_2,t_2 | |
| 1636 sltu AT,c_2,t_2 | |
| 1637 daddu c_3,AT | |
| 1638 sd c_1,24(a0) | |
| 1639 | |
| 1640 dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */ | |
| 1641 mflo t_1 | |
| 1642 mfhi t_2 | |
| 1643 slt c_1,t_2,zero | |
| 1644 dsll t_2,1 | |
| 1645 slt a2,t_1,zero | |
| 1646 daddu t_2,a2 | |
| 1647 dsll t_1,1 | |
| 1648 daddu c_2,t_1 | |
| 1649 sltu AT,c_2,t_1 | |
| 1650 daddu t_2,AT | |
| 1651 daddu c_3,t_2 | |
| 1652 sltu AT,c_3,t_2 | |
| 1653 daddu c_1,AT | |
| 1654 dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ | |
| 1655 mflo t_1 | |
| 1656 mfhi t_2 | |
| 1657 slt AT,t_2,zero | |
| 1658 daddu c_1,AT | |
| 1659 dsll t_2,1 | |
| 1660 slt a2,t_1,zero | |
| 1661 daddu t_2,a2 | |
| 1662 dsll t_1,1 | |
| 1663 daddu c_2,t_1 | |
| 1664 sltu AT,c_2,t_1 | |
| 1665 daddu t_2,AT | |
| 1666 daddu c_3,t_2 | |
| 1667 sltu AT,c_3,t_2 | |
| 1668 daddu c_1,AT | |
| 1669 dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ | |
| 1670 mflo t_1 | |
| 1671 mfhi t_2 | |
| 1672 daddu c_2,t_1 | |
| 1673 sltu AT,c_2,t_1 | |
| 1674 daddu t_2,AT | |
| 1675 daddu c_3,t_2 | |
| 1676 sltu AT,c_3,t_2 | |
| 1677 daddu c_1,AT | |
| 1678 sd c_2,32(a0) | |
| 1679 | |
| 1680 dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */ | |
| 1681 mflo t_1 | |
| 1682 mfhi t_2 | |
| 1683 slt c_2,t_2,zero | |
| 1684 dsll t_2,1 | |
| 1685 slt a2,t_1,zero | |
| 1686 daddu t_2,a2 | |
| 1687 dsll t_1,1 | |
| 1688 daddu c_3,t_1 | |
| 1689 sltu AT,c_3,t_1 | |
| 1690 daddu t_2,AT | |
| 1691 daddu c_1,t_2 | |
| 1692 sltu AT,c_1,t_2 | |
| 1693 daddu c_2,AT | |
| 1694 dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */ | |
| 1695 mflo t_1 | |
| 1696 mfhi t_2 | |
| 1697 slt AT,t_2,zero | |
| 1698 daddu c_2,AT | |
| 1699 dsll t_2,1 | |
| 1700 slt a2,t_1,zero | |
| 1701 daddu t_2,a2 | |
| 1702 dsll t_1,1 | |
| 1703 daddu c_3,t_1 | |
| 1704 sltu AT,c_3,t_1 | |
| 1705 daddu t_2,AT | |
| 1706 daddu c_1,t_2 | |
| 1707 sltu AT,c_1,t_2 | |
| 1708 daddu c_2,AT | |
| 1709 dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ | |
| 1710 mflo t_1 | |
| 1711 mfhi t_2 | |
| 1712 slt AT,t_2,zero | |
| 1713 daddu c_2,AT | |
| 1714 dsll t_2,1 | |
| 1715 slt a2,t_1,zero | |
| 1716 daddu t_2,a2 | |
| 1717 dsll t_1,1 | |
| 1718 daddu c_3,t_1 | |
| 1719 sltu AT,c_3,t_1 | |
| 1720 daddu t_2,AT | |
| 1721 daddu c_1,t_2 | |
| 1722 sltu AT,c_1,t_2 | |
| 1723 daddu c_2,AT | |
| 1724 sd c_3,40(a0) | |
| 1725 | |
| 1726 dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */ | |
| 1727 mflo t_1 | |
| 1728 mfhi t_2 | |
| 1729 slt c_3,t_2,zero | |
| 1730 dsll t_2,1 | |
| 1731 slt a2,t_1,zero | |
| 1732 daddu t_2,a2 | |
| 1733 dsll t_1,1 | |
| 1734 daddu c_1,t_1 | |
| 1735 sltu AT,c_1,t_1 | |
| 1736 daddu t_2,AT | |
| 1737 daddu c_2,t_2 | |
| 1738 sltu AT,c_2,t_2 | |
| 1739 daddu c_3,AT | |
| 1740 dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */ | |
| 1741 mflo t_1 | |
| 1742 mfhi t_2 | |
| 1743 slt AT,t_2,zero | |
| 1744 daddu c_3,AT | |
| 1745 dsll t_2,1 | |
| 1746 slt a2,t_1,zero | |
| 1747 daddu t_2,a2 | |
| 1748 dsll t_1,1 | |
| 1749 daddu c_1,t_1 | |
| 1750 sltu AT,c_1,t_1 | |
| 1751 daddu t_2,AT | |
| 1752 daddu c_2,t_2 | |
| 1753 sltu AT,c_2,t_2 | |
| 1754 daddu c_3,AT | |
| 1755 dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */ | |
| 1756 mflo t_1 | |
| 1757 mfhi t_2 | |
| 1758 slt AT,t_2,zero | |
| 1759 daddu c_3,AT | |
| 1760 dsll t_2,1 | |
| 1761 slt a2,t_1,zero | |
| 1762 daddu t_2,a2 | |
| 1763 dsll t_1,1 | |
| 1764 daddu c_1,t_1 | |
| 1765 sltu AT,c_1,t_1 | |
| 1766 daddu t_2,AT | |
| 1767 daddu c_2,t_2 | |
| 1768 sltu AT,c_2,t_2 | |
| 1769 daddu c_3,AT | |
| 1770 dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ | |
| 1771 mflo t_1 | |
| 1772 mfhi t_2 | |
| 1773 daddu c_1,t_1 | |
| 1774 sltu AT,c_1,t_1 | |
| 1775 daddu t_2,AT | |
| 1776 daddu c_2,t_2 | |
| 1777 sltu AT,c_2,t_2 | |
| 1778 daddu c_3,AT | |
| 1779 sd c_1,48(a0) | |
| 1780 | |
| 1781 dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */ | |
| 1782 mflo t_1 | |
| 1783 mfhi t_2 | |
| 1784 slt c_1,t_2,zero | |
| 1785 dsll t_2,1 | |
| 1786 slt a2,t_1,zero | |
| 1787 daddu t_2,a2 | |
| 1788 dsll t_1,1 | |
| 1789 daddu c_2,t_1 | |
| 1790 sltu AT,c_2,t_1 | |
| 1791 daddu t_2,AT | |
| 1792 daddu c_3,t_2 | |
| 1793 sltu AT,c_3,t_2 | |
| 1794 daddu c_1,AT | |
| 1795 dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */ | |
| 1796 mflo t_1 | |
| 1797 mfhi t_2 | |
| 1798 slt AT,t_2,zero | |
| 1799 daddu c_1,AT | |
| 1800 dsll t_2,1 | |
| 1801 slt a2,t_1,zero | |
| 1802 daddu t_2,a2 | |
| 1803 dsll t_1,1 | |
| 1804 daddu c_2,t_1 | |
| 1805 sltu AT,c_2,t_1 | |
| 1806 daddu t_2,AT | |
| 1807 daddu c_3,t_2 | |
| 1808 sltu AT,c_3,t_2 | |
| 1809 daddu c_1,AT | |
| 1810 dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */ | |
| 1811 mflo t_1 | |
| 1812 mfhi t_2 | |
| 1813 slt AT,t_2,zero | |
| 1814 daddu c_1,AT | |
| 1815 dsll t_2,1 | |
| 1816 slt a2,t_1,zero | |
| 1817 daddu t_2,a2 | |
| 1818 dsll t_1,1 | |
| 1819 daddu c_2,t_1 | |
| 1820 sltu AT,c_2,t_1 | |
| 1821 daddu t_2,AT | |
| 1822 daddu c_3,t_2 | |
| 1823 sltu AT,c_3,t_2 | |
| 1824 daddu c_1,AT | |
| 1825 dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */ | |
| 1826 mflo t_1 | |
| 1827 mfhi t_2 | |
| 1828 slt AT,t_2,zero | |
| 1829 daddu c_1,AT | |
| 1830 dsll t_2,1 | |
| 1831 slt a2,t_1,zero | |
| 1832 daddu t_2,a2 | |
| 1833 dsll t_1,1 | |
| 1834 daddu c_2,t_1 | |
| 1835 sltu AT,c_2,t_1 | |
| 1836 daddu t_2,AT | |
| 1837 daddu c_3,t_2 | |
| 1838 sltu AT,c_3,t_2 | |
| 1839 daddu c_1,AT | |
| 1840 sd c_2,56(a0) | |
| 1841 | |
| 1842 dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */ | |
| 1843 mflo t_1 | |
| 1844 mfhi t_2 | |
| 1845 slt c_2,t_2,zero | |
| 1846 dsll t_2,1 | |
| 1847 slt a2,t_1,zero | |
| 1848 daddu t_2,a2 | |
| 1849 dsll t_1,1 | |
| 1850 daddu c_3,t_1 | |
| 1851 sltu AT,c_3,t_1 | |
| 1852 daddu t_2,AT | |
| 1853 daddu c_1,t_2 | |
| 1854 sltu AT,c_1,t_2 | |
| 1855 daddu c_2,AT | |
| 1856 dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */ | |
| 1857 mflo t_1 | |
| 1858 mfhi t_2 | |
| 1859 slt AT,t_2,zero | |
| 1860 daddu c_2,AT | |
| 1861 dsll t_2,1 | |
| 1862 slt a2,t_1,zero | |
| 1863 daddu t_2,a2 | |
| 1864 dsll t_1,1 | |
| 1865 daddu c_3,t_1 | |
| 1866 sltu AT,c_3,t_1 | |
| 1867 daddu t_2,AT | |
| 1868 daddu c_1,t_2 | |
| 1869 sltu AT,c_1,t_2 | |
| 1870 daddu c_2,AT | |
| 1871 dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */ | |
| 1872 mflo t_1 | |
| 1873 mfhi t_2 | |
| 1874 slt AT,t_2,zero | |
| 1875 daddu c_2,AT | |
| 1876 dsll t_2,1 | |
| 1877 slt a2,t_1,zero | |
| 1878 daddu t_2,a2 | |
| 1879 dsll t_1,1 | |
| 1880 daddu c_3,t_1 | |
| 1881 sltu AT,c_3,t_1 | |
| 1882 daddu t_2,AT | |
| 1883 daddu c_1,t_2 | |
| 1884 sltu AT,c_1,t_2 | |
| 1885 daddu c_2,AT | |
| 1886 dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ | |
| 1887 mflo t_1 | |
| 1888 mfhi t_2 | |
| 1889 daddu c_3,t_1 | |
| 1890 sltu AT,c_3,t_1 | |
| 1891 daddu t_2,AT | |
| 1892 daddu c_1,t_2 | |
| 1893 sltu AT,c_1,t_2 | |
| 1894 daddu c_2,AT | |
| 1895 sd c_3,64(a0) | |
| 1896 | |
| 1897 dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */ | |
| 1898 mflo t_1 | |
| 1899 mfhi t_2 | |
| 1900 slt c_3,t_2,zero | |
| 1901 dsll t_2,1 | |
| 1902 slt a2,t_1,zero | |
| 1903 daddu t_2,a2 | |
| 1904 dsll t_1,1 | |
| 1905 daddu c_1,t_1 | |
| 1906 sltu AT,c_1,t_1 | |
| 1907 daddu t_2,AT | |
| 1908 daddu c_2,t_2 | |
| 1909 sltu AT,c_2,t_2 | |
| 1910 daddu c_3,AT | |
| 1911 dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */ | |
| 1912 mflo t_1 | |
| 1913 mfhi t_2 | |
| 1914 slt AT,t_2,zero | |
| 1915 daddu c_3,AT | |
| 1916 dsll t_2,1 | |
| 1917 slt a2,t_1,zero | |
| 1918 daddu t_2,a2 | |
| 1919 dsll t_1,1 | |
| 1920 daddu c_1,t_1 | |
| 1921 sltu AT,c_1,t_1 | |
| 1922 daddu t_2,AT | |
| 1923 daddu c_2,t_2 | |
| 1924 sltu AT,c_2,t_2 | |
| 1925 daddu c_3,AT | |
| 1926 dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */ | |
| 1927 mflo t_1 | |
| 1928 mfhi t_2 | |
| 1929 slt AT,t_2,zero | |
| 1930 daddu c_3,AT | |
| 1931 dsll t_2,1 | |
| 1932 slt a2,t_1,zero | |
| 1933 daddu t_2,a2 | |
| 1934 dsll t_1,1 | |
| 1935 daddu c_1,t_1 | |
| 1936 sltu AT,c_1,t_1 | |
| 1937 daddu t_2,AT | |
| 1938 daddu c_2,t_2 | |
| 1939 sltu AT,c_2,t_2 | |
| 1940 daddu c_3,AT | |
| 1941 sd c_1,72(a0) | |
| 1942 | |
| 1943 dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */ | |
| 1944 mflo t_1 | |
| 1945 mfhi t_2 | |
| 1946 slt c_1,t_2,zero | |
| 1947 dsll t_2,1 | |
| 1948 slt a2,t_1,zero | |
| 1949 daddu t_2,a2 | |
| 1950 dsll t_1,1 | |
| 1951 daddu c_2,t_1 | |
| 1952 sltu AT,c_2,t_1 | |
| 1953 daddu t_2,AT | |
| 1954 daddu c_3,t_2 | |
| 1955 sltu AT,c_3,t_2 | |
| 1956 daddu c_1,AT | |
| 1957 dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */ | |
| 1958 mflo t_1 | |
| 1959 mfhi t_2 | |
| 1960 slt AT,t_2,zero | |
| 1961 daddu c_1,AT | |
| 1962 dsll t_2,1 | |
| 1963 slt a2,t_1,zero | |
| 1964 daddu t_2,a2 | |
| 1965 dsll t_1,1 | |
| 1966 daddu c_2,t_1 | |
| 1967 sltu AT,c_2,t_1 | |
| 1968 daddu t_2,AT | |
| 1969 daddu c_3,t_2 | |
| 1970 sltu AT,c_3,t_2 | |
| 1971 daddu c_1,AT | |
| 1972 dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ | |
| 1973 mflo t_1 | |
| 1974 mfhi t_2 | |
| 1975 daddu c_2,t_1 | |
| 1976 sltu AT,c_2,t_1 | |
| 1977 daddu t_2,AT | |
| 1978 daddu c_3,t_2 | |
| 1979 sltu AT,c_3,t_2 | |
| 1980 daddu c_1,AT | |
| 1981 sd c_2,80(a0) | |
| 1982 | |
| 1983 dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */ | |
| 1984 mflo t_1 | |
| 1985 mfhi t_2 | |
| 1986 slt c_2,t_2,zero | |
| 1987 dsll t_2,1 | |
| 1988 slt a2,t_1,zero | |
| 1989 daddu t_2,a2 | |
| 1990 dsll t_1,1 | |
| 1991 daddu c_3,t_1 | |
| 1992 sltu AT,c_3,t_1 | |
| 1993 daddu t_2,AT | |
| 1994 daddu c_1,t_2 | |
| 1995 sltu AT,c_1,t_2 | |
| 1996 daddu c_2,AT | |
| 1997 dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */ | |
| 1998 mflo t_1 | |
| 1999 mfhi t_2 | |
| 2000 slt AT,t_2,zero | |
| 2001 daddu c_2,AT | |
| 2002 dsll t_2,1 | |
| 2003 slt a2,t_1,zero | |
| 2004 daddu t_2,a2 | |
| 2005 dsll t_1,1 | |
| 2006 daddu c_3,t_1 | |
| 2007 sltu AT,c_3,t_1 | |
| 2008 daddu t_2,AT | |
| 2009 daddu c_1,t_2 | |
| 2010 sltu AT,c_1,t_2 | |
| 2011 daddu c_2,AT | |
| 2012 sd c_3,88(a0) | |
| 2013 | |
| 2014 dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */ | |
| 2015 mflo t_1 | |
| 2016 mfhi t_2 | |
| 2017 slt c_3,t_2,zero | |
| 2018 dsll t_2,1 | |
| 2019 slt a2,t_1,zero | |
| 2020 daddu t_2,a2 | |
| 2021 dsll t_1,1 | |
| 2022 daddu c_1,t_1 | |
| 2023 sltu AT,c_1,t_1 | |
| 2024 daddu t_2,AT | |
| 2025 daddu c_2,t_2 | |
| 2026 sltu AT,c_2,t_2 | |
| 2027 daddu c_3,AT | |
| 2028 dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ | |
| 2029 mflo t_1 | |
| 2030 mfhi t_2 | |
| 2031 daddu c_1,t_1 | |
| 2032 sltu AT,c_1,t_1 | |
| 2033 daddu t_2,AT | |
| 2034 daddu c_2,t_2 | |
| 2035 sltu AT,c_2,t_2 | |
| 2036 daddu c_3,AT | |
| 2037 sd c_1,96(a0) | |
| 2038 | |
| 2039 dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */ | |
| 2040 mflo t_1 | |
| 2041 mfhi t_2 | |
| 2042 slt c_1,t_2,zero | |
| 2043 dsll t_2,1 | |
| 2044 slt a2,t_1,zero | |
| 2045 daddu t_2,a2 | |
| 2046 dsll t_1,1 | |
| 2047 daddu c_2,t_1 | |
| 2048 sltu AT,c_2,t_1 | |
| 2049 daddu t_2,AT | |
| 2050 daddu c_3,t_2 | |
| 2051 sltu AT,c_3,t_2 | |
| 2052 daddu c_1,AT | |
| 2053 sd c_2,104(a0) | |
| 2054 | |
| 2055 dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ | |
| 2056 mflo t_1 | |
| 2057 mfhi t_2 | |
| 2058 daddu c_3,t_1 | |
| 2059 sltu AT,c_3,t_1 | |
| 2060 daddu t_2,AT | |
| 2061 daddu c_1,t_2 | |
| 2062 sd c_3,112(a0) | |
| 2063 sd c_1,120(a0) | |
| 2064 | |
| 2065 jr ra | |
| 2066 END(bn_sqr_comba8) | |
| 2067 | |
| 2068 .align 5 | |
| 2069 LEAF(bn_sqr_comba4) | |
| 2070 .set reorder | |
| 2071 ld a_0,0(a1) | |
| 2072 ld a_1,8(a1) | |
| 2073 ld a_2,16(a1) | |
| 2074 ld a_3,24(a1) | |
| 2075 dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ | |
| 2076 mflo c_1 | |
| 2077 mfhi c_2 | |
| 2078 sd c_1,0(a0) | |
| 2079 | |
| 2080 dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ | |
| 2081 mflo t_1 | |
| 2082 mfhi t_2 | |
| 2083 slt c_1,t_2,zero | |
| 2084 dsll t_2,1 | |
| 2085 slt a2,t_1,zero | |
| 2086 daddu t_2,a2 | |
| 2087 dsll t_1,1 | |
| 2088 daddu c_2,t_1 | |
| 2089 sltu AT,c_2,t_1 | |
| 2090 daddu c_3,t_2,AT | |
| 2091 sd c_2,8(a0) | |
| 2092 | |
| 2093 dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ | |
| 2094 mflo t_1 | |
| 2095 mfhi t_2 | |
| 2096 slt c_2,t_2,zero | |
| 2097 dsll t_2,1 | |
| 2098 slt a2,t_1,zero | |
| 2099 daddu t_2,a2 | |
| 2100 dsll t_1,1 | |
| 2101 daddu c_3,t_1 | |
| 2102 sltu AT,c_3,t_1 | |
| 2103 daddu t_2,AT | |
| 2104 daddu c_1,t_2 | |
| 2105 sltu AT,c_1,t_2 | |
| 2106 daddu c_2,AT | |
| 2107 dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ | |
| 2108 mflo t_1 | |
| 2109 mfhi t_2 | |
| 2110 daddu c_3,t_1 | |
| 2111 sltu AT,c_3,t_1 | |
| 2112 daddu t_2,AT | |
| 2113 daddu c_1,t_2 | |
| 2114 sltu AT,c_1,t_2 | |
| 2115 daddu c_2,AT | |
| 2116 sd c_3,16(a0) | |
| 2117 | |
| 2118 dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ | |
| 2119 mflo t_1 | |
| 2120 mfhi t_2 | |
| 2121 slt c_3,t_2,zero | |
| 2122 dsll t_2,1 | |
| 2123 slt a2,t_1,zero | |
| 2124 daddu t_2,a2 | |
| 2125 dsll t_1,1 | |
| 2126 daddu c_1,t_1 | |
| 2127 sltu AT,c_1,t_1 | |
| 2128 daddu t_2,AT | |
| 2129 daddu c_2,t_2 | |
| 2130 sltu AT,c_2,t_2 | |
| 2131 daddu c_3,AT | |
| 2132 dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */ | |
| 2133 mflo t_1 | |
| 2134 mfhi t_2 | |
| 2135 slt AT,t_2,zero | |
| 2136 daddu c_3,AT | |
| 2137 dsll t_2,1 | |
| 2138 slt a2,t_1,zero | |
| 2139 daddu t_2,a2 | |
| 2140 dsll t_1,1 | |
| 2141 daddu c_1,t_1 | |
| 2142 sltu AT,c_1,t_1 | |
| 2143 daddu t_2,AT | |
| 2144 daddu c_2,t_2 | |
| 2145 sltu AT,c_2,t_2 | |
| 2146 daddu c_3,AT | |
| 2147 sd c_1,24(a0) | |
| 2148 | |
| 2149 dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ | |
| 2150 mflo t_1 | |
| 2151 mfhi t_2 | |
| 2152 slt c_1,t_2,zero | |
| 2153 dsll t_2,1 | |
| 2154 slt a2,t_1,zero | |
| 2155 daddu t_2,a2 | |
| 2156 dsll t_1,1 | |
| 2157 daddu c_2,t_1 | |
| 2158 sltu AT,c_2,t_1 | |
| 2159 daddu t_2,AT | |
| 2160 daddu c_3,t_2 | |
| 2161 sltu AT,c_3,t_2 | |
| 2162 daddu c_1,AT | |
| 2163 dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ | |
| 2164 mflo t_1 | |
| 2165 mfhi t_2 | |
| 2166 daddu c_2,t_1 | |
| 2167 sltu AT,c_2,t_1 | |
| 2168 daddu t_2,AT | |
| 2169 daddu c_3,t_2 | |
| 2170 sltu AT,c_3,t_2 | |
| 2171 daddu c_1,AT | |
| 2172 sd c_2,32(a0) | |
| 2173 | |
| 2174 dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ | |
| 2175 mflo t_1 | |
| 2176 mfhi t_2 | |
| 2177 slt c_2,t_2,zero | |
| 2178 dsll t_2,1 | |
| 2179 slt a2,t_1,zero | |
| 2180 daddu t_2,a2 | |
| 2181 dsll t_1,1 | |
| 2182 daddu c_3,t_1 | |
| 2183 sltu AT,c_3,t_1 | |
| 2184 daddu t_2,AT | |
| 2185 daddu c_1,t_2 | |
| 2186 sltu AT,c_1,t_2 | |
| 2187 daddu c_2,AT | |
| 2188 sd c_3,40(a0) | |
| 2189 | |
| 2190 dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ | |
| 2191 mflo t_1 | |
| 2192 mfhi t_2 | |
| 2193 daddu c_1,t_1 | |
| 2194 sltu AT,c_1,t_1 | |
| 2195 daddu t_2,AT | |
| 2196 daddu c_2,t_2 | |
| 2197 sd c_1,48(a0) | |
| 2198 sd c_2,56(a0) | |
| 2199 | |
| 2200 jr ra | |
| 2201 END(bn_sqr_comba4) | |
| OLD | NEW |