| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/env perl | |
| 2 # | |
| 3 # ==================================================================== | |
| 4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | |
| 5 # project. | |
| 6 # | |
| 7 # Rights for redistribution and usage in source and binary forms are | |
| 8 # granted according to the OpenSSL license. Warranty of any kind is | |
| 9 # disclaimed. | |
| 10 # ==================================================================== | |
| 11 | |
| 12 | |
| 13 # July 1999 | |
| 14 # | |
| 15 # This is drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c. | |
| 16 # | |
| 17 # The module is designed to work with either of the "new" MIPS ABI(5), | |
| 18 # namely N32 or N64, offered by IRIX 6.x. It's not ment to work under | |
| 19 # IRIX 5.x not only because it doesn't support new ABIs but also | |
| 20 # because 5.x kernels put R4x00 CPU into 32-bit mode and all those | |
| 21 # 64-bit instructions (daddu, dmultu, etc.) found below gonna only | |
| 22 # cause illegal instruction exception:-( | |
| 23 # | |
| 24 # In addition the code depends on preprocessor flags set up by MIPSpro | |
| 25 # compiler driver (either as or cc) and therefore (probably?) can't be | |
| 26 # compiled by the GNU assembler. GNU C driver manages fine though... | |
| 27 # I mean as long as -mmips-as is specified or is the default option, | |
| 28 # because then it simply invokes /usr/bin/as which in turn takes | |
| 29 # perfect care of the preprocessor definitions. Another neat feature | |
| 30 # offered by the MIPSpro assembler is an optimization pass. This gave | |
| 31 # me the opportunity to have the code looking more regular as all those | |
| 32 # architecture dependent instruction rescheduling details were left to | |
| 33 # the assembler. Cool, huh? | |
| 34 # | |
| 35 # Performance improvement is astonishing! 'apps/openssl speed rsa dsa' | |
| 36 # goes way over 3 times faster! | |
| 37 # | |
| 38 # <appro@fy.chalmers.se> | |
| 39 | |
| 40 # October 2010 | |
| 41 # | |
| 42 # Adapt the module even for 32-bit ABIs and other OSes. The former was | |
| 43 # achieved by mechanical replacement of 64-bit arithmetic instructions | |
| 44 # such as dmultu, daddu, etc. with their 32-bit counterparts and | |
| 45 # adjusting offsets denoting multiples of BN_ULONG. Above mentioned | |
| 46 # >3x performance improvement naturally does not apply to 32-bit code | |
| 47 # [because there is no instruction 32-bit compiler can't use], one | |
| 48 # has to content with 40-85% improvement depending on benchmark and | |
| 49 # key length, more for longer keys. | |
| 50 | |
| 51 $flavour = shift; | |
| 52 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} | |
| 53 open STDOUT,">$output"; | |
| 54 | |
| 55 if ($flavour =~ /64|n32/i) { | |
| 56 $LD="ld"; | |
| 57 $ST="sd"; | |
| 58 $MULTU="dmultu"; | |
| 59 $DIVU="ddivu"; | |
| 60 $ADDU="daddu"; | |
| 61 $SUBU="dsubu"; | |
| 62 $SRL="dsrl"; | |
| 63 $SLL="dsll"; | |
| 64 $BNSZ=8; | |
| 65 $PTR_ADD="daddu"; | |
| 66 $PTR_SUB="dsubu"; | |
| 67 $SZREG=8; | |
| 68 $REG_S="sd"; | |
| 69 $REG_L="ld"; | |
| 70 } else { | |
| 71 $LD="lw"; | |
| 72 $ST="sw"; | |
| 73 $MULTU="multu"; | |
| 74 $DIVU="divu"; | |
| 75 $ADDU="addu"; | |
| 76 $SUBU="subu"; | |
| 77 $SRL="srl"; | |
| 78 $SLL="sll"; | |
| 79 $BNSZ=4; | |
| 80 $PTR_ADD="addu"; | |
| 81 $PTR_SUB="subu"; | |
| 82 $SZREG=4; | |
| 83 $REG_S="sw"; | |
| 84 $REG_L="lw"; | |
| 85 $code=".set mips2\n"; | |
| 86 } | |
| 87 | |
| 88 # Below is N32/64 register layout used in the original module. | |
| 89 # | |
| 90 ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); | |
| 91 ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); | |
| 92 ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); | |
| 93 ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); | |
| 94 ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); | |
| 95 ($ta0,$ta1,$ta2,$ta3)=($a4,$a5,$a6,$a7); | |
| 96 # | |
| 97 # No special adaptation is required for O32. NUBI on the other hand | |
| 98 # is treated by saving/restoring ($v1,$t0..$t3). | |
| 99 | |
| 100 $gp=$v1 if ($flavour =~ /nubi/i); | |
| 101 | |
| 102 $minus4=$v1; | |
| 103 | |
| 104 $code.=<<___; | |
| 105 .rdata | |
| 106 .asciiz "mips3.s, Version 1.2" | |
| 107 .asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>" | |
| 108 | |
| 109 .text | |
| 110 .set noat | |
| 111 | |
| 112 .align 5 | |
| 113 .globl bn_mul_add_words | |
| 114 .ent bn_mul_add_words | |
| 115 bn_mul_add_words: | |
| 116 .set noreorder | |
| 117 bgtz $a2,bn_mul_add_words_internal | |
| 118 move $v0,$zero | |
| 119 jr $ra | |
| 120 move $a0,$v0 | |
| 121 .end bn_mul_add_words | |
| 122 | |
| 123 .align 5 | |
| 124 .ent bn_mul_add_words_internal | |
| 125 bn_mul_add_words_internal: | |
| 126 ___ | |
| 127 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 128 .frame $sp,6*$SZREG,$ra | |
| 129 .mask 0x8000f008,-$SZREG | |
| 130 .set noreorder | |
| 131 $PTR_SUB $sp,6*$SZREG | |
| 132 $REG_S $ra,5*$SZREG($sp) | |
| 133 $REG_S $t3,4*$SZREG($sp) | |
| 134 $REG_S $t2,3*$SZREG($sp) | |
| 135 $REG_S $t1,2*$SZREG($sp) | |
| 136 $REG_S $t0,1*$SZREG($sp) | |
| 137 $REG_S $gp,0*$SZREG($sp) | |
| 138 ___ | |
| 139 $code.=<<___; | |
| 140 .set reorder | |
| 141 li $minus4,-4 | |
| 142 and $ta0,$a2,$minus4 | |
| 143 $LD $t0,0($a1) | |
| 144 beqz $ta0,.L_bn_mul_add_words_tail | |
| 145 | |
| 146 .L_bn_mul_add_words_loop: | |
| 147 $MULTU $t0,$a3 | |
| 148 $LD $t1,0($a0) | |
| 149 $LD $t2,$BNSZ($a1) | |
| 150 $LD $t3,$BNSZ($a0) | |
| 151 $LD $ta0,2*$BNSZ($a1) | |
| 152 $LD $ta1,2*$BNSZ($a0) | |
| 153 $ADDU $t1,$v0 | |
| 154 sltu $v0,$t1,$v0 # All manuals say it "compares 32-bit | |
| 155 # values", but it seems to work fine | |
| 156 # even on 64-bit registers. | |
| 157 mflo $at | |
| 158 mfhi $t0 | |
| 159 $ADDU $t1,$at | |
| 160 $ADDU $v0,$t0 | |
| 161 $MULTU $t2,$a3 | |
| 162 sltu $at,$t1,$at | |
| 163 $ST $t1,0($a0) | |
| 164 $ADDU $v0,$at | |
| 165 | |
| 166 $LD $ta2,3*$BNSZ($a1) | |
| 167 $LD $ta3,3*$BNSZ($a0) | |
| 168 $ADDU $t3,$v0 | |
| 169 sltu $v0,$t3,$v0 | |
| 170 mflo $at | |
| 171 mfhi $t2 | |
| 172 $ADDU $t3,$at | |
| 173 $ADDU $v0,$t2 | |
| 174 $MULTU $ta0,$a3 | |
| 175 sltu $at,$t3,$at | |
| 176 $ST $t3,$BNSZ($a0) | |
| 177 $ADDU $v0,$at | |
| 178 | |
| 179 subu $a2,4 | |
| 180 $PTR_ADD $a0,4*$BNSZ | |
| 181 $PTR_ADD $a1,4*$BNSZ | |
| 182 $ADDU $ta1,$v0 | |
| 183 sltu $v0,$ta1,$v0 | |
| 184 mflo $at | |
| 185 mfhi $ta0 | |
| 186 $ADDU $ta1,$at | |
| 187 $ADDU $v0,$ta0 | |
| 188 $MULTU $ta2,$a3 | |
| 189 sltu $at,$ta1,$at | |
| 190 $ST $ta1,-2*$BNSZ($a0) | |
| 191 $ADDU $v0,$at | |
| 192 | |
| 193 | |
| 194 and $ta0,$a2,$minus4 | |
| 195 $ADDU $ta3,$v0 | |
| 196 sltu $v0,$ta3,$v0 | |
| 197 mflo $at | |
| 198 mfhi $ta2 | |
| 199 $ADDU $ta3,$at | |
| 200 $ADDU $v0,$ta2 | |
| 201 sltu $at,$ta3,$at | |
| 202 $ST $ta3,-$BNSZ($a0) | |
| 203 $ADDU $v0,$at | |
| 204 .set noreorder | |
| 205 bgtzl $ta0,.L_bn_mul_add_words_loop | |
| 206 $LD $t0,0($a1) | |
| 207 | |
| 208 beqz $a2,.L_bn_mul_add_words_return | |
| 209 nop | |
| 210 | |
| 211 .L_bn_mul_add_words_tail: | |
| 212 .set reorder | |
| 213 $LD $t0,0($a1) | |
| 214 $MULTU $t0,$a3 | |
| 215 $LD $t1,0($a0) | |
| 216 subu $a2,1 | |
| 217 $ADDU $t1,$v0 | |
| 218 sltu $v0,$t1,$v0 | |
| 219 mflo $at | |
| 220 mfhi $t0 | |
| 221 $ADDU $t1,$at | |
| 222 $ADDU $v0,$t0 | |
| 223 sltu $at,$t1,$at | |
| 224 $ST $t1,0($a0) | |
| 225 $ADDU $v0,$at | |
| 226 beqz $a2,.L_bn_mul_add_words_return | |
| 227 | |
| 228 $LD $t0,$BNSZ($a1) | |
| 229 $MULTU $t0,$a3 | |
| 230 $LD $t1,$BNSZ($a0) | |
| 231 subu $a2,1 | |
| 232 $ADDU $t1,$v0 | |
| 233 sltu $v0,$t1,$v0 | |
| 234 mflo $at | |
| 235 mfhi $t0 | |
| 236 $ADDU $t1,$at | |
| 237 $ADDU $v0,$t0 | |
| 238 sltu $at,$t1,$at | |
| 239 $ST $t1,$BNSZ($a0) | |
| 240 $ADDU $v0,$at | |
| 241 beqz $a2,.L_bn_mul_add_words_return | |
| 242 | |
| 243 $LD $t0,2*$BNSZ($a1) | |
| 244 $MULTU $t0,$a3 | |
| 245 $LD $t1,2*$BNSZ($a0) | |
| 246 $ADDU $t1,$v0 | |
| 247 sltu $v0,$t1,$v0 | |
| 248 mflo $at | |
| 249 mfhi $t0 | |
| 250 $ADDU $t1,$at | |
| 251 $ADDU $v0,$t0 | |
| 252 sltu $at,$t1,$at | |
| 253 $ST $t1,2*$BNSZ($a0) | |
| 254 $ADDU $v0,$at | |
| 255 | |
| 256 .L_bn_mul_add_words_return: | |
| 257 .set noreorder | |
| 258 ___ | |
| 259 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 260 $REG_L $t3,4*$SZREG($sp) | |
| 261 $REG_L $t2,3*$SZREG($sp) | |
| 262 $REG_L $t1,2*$SZREG($sp) | |
| 263 $REG_L $t0,1*$SZREG($sp) | |
| 264 $REG_L $gp,0*$SZREG($sp) | |
| 265 $PTR_ADD $sp,6*$SZREG | |
| 266 ___ | |
| 267 $code.=<<___; | |
| 268 jr $ra | |
| 269 move $a0,$v0 | |
| 270 .end bn_mul_add_words_internal | |
| 271 | |
| 272 .align 5 | |
| 273 .globl bn_mul_words | |
| 274 .ent bn_mul_words | |
| 275 bn_mul_words: | |
| 276 .set noreorder | |
| 277 bgtz $a2,bn_mul_words_internal | |
| 278 move $v0,$zero | |
| 279 jr $ra | |
| 280 move $a0,$v0 | |
| 281 .end bn_mul_words | |
| 282 | |
| 283 .align 5 | |
| 284 .ent bn_mul_words_internal | |
| 285 bn_mul_words_internal: | |
| 286 ___ | |
| 287 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 288 .frame $sp,6*$SZREG,$ra | |
| 289 .mask 0x8000f008,-$SZREG | |
| 290 .set noreorder | |
| 291 $PTR_SUB $sp,6*$SZREG | |
| 292 $REG_S $ra,5*$SZREG($sp) | |
| 293 $REG_S $t3,4*$SZREG($sp) | |
| 294 $REG_S $t2,3*$SZREG($sp) | |
| 295 $REG_S $t1,2*$SZREG($sp) | |
| 296 $REG_S $t0,1*$SZREG($sp) | |
| 297 $REG_S $gp,0*$SZREG($sp) | |
| 298 ___ | |
| 299 $code.=<<___; | |
| 300 .set reorder | |
| 301 li $minus4,-4 | |
| 302 and $ta0,$a2,$minus4 | |
| 303 $LD $t0,0($a1) | |
| 304 beqz $ta0,.L_bn_mul_words_tail | |
| 305 | |
| 306 .L_bn_mul_words_loop: | |
| 307 $MULTU $t0,$a3 | |
| 308 $LD $t2,$BNSZ($a1) | |
| 309 $LD $ta0,2*$BNSZ($a1) | |
| 310 $LD $ta2,3*$BNSZ($a1) | |
| 311 mflo $at | |
| 312 mfhi $t0 | |
| 313 $ADDU $v0,$at | |
| 314 sltu $t1,$v0,$at | |
| 315 $MULTU $t2,$a3 | |
| 316 $ST $v0,0($a0) | |
| 317 $ADDU $v0,$t1,$t0 | |
| 318 | |
| 319 subu $a2,4 | |
| 320 $PTR_ADD $a0,4*$BNSZ | |
| 321 $PTR_ADD $a1,4*$BNSZ | |
| 322 mflo $at | |
| 323 mfhi $t2 | |
| 324 $ADDU $v0,$at | |
| 325 sltu $t3,$v0,$at | |
| 326 $MULTU $ta0,$a3 | |
| 327 $ST $v0,-3*$BNSZ($a0) | |
| 328 $ADDU $v0,$t3,$t2 | |
| 329 | |
| 330 mflo $at | |
| 331 mfhi $ta0 | |
| 332 $ADDU $v0,$at | |
| 333 sltu $ta1,$v0,$at | |
| 334 $MULTU $ta2,$a3 | |
| 335 $ST $v0,-2*$BNSZ($a0) | |
| 336 $ADDU $v0,$ta1,$ta0 | |
| 337 | |
| 338 and $ta0,$a2,$minus4 | |
| 339 mflo $at | |
| 340 mfhi $ta2 | |
| 341 $ADDU $v0,$at | |
| 342 sltu $ta3,$v0,$at | |
| 343 $ST $v0,-$BNSZ($a0) | |
| 344 $ADDU $v0,$ta3,$ta2 | |
| 345 .set noreorder | |
| 346 bgtzl $ta0,.L_bn_mul_words_loop | |
| 347 $LD $t0,0($a1) | |
| 348 | |
| 349 beqz $a2,.L_bn_mul_words_return | |
| 350 nop | |
| 351 | |
| 352 .L_bn_mul_words_tail: | |
| 353 .set reorder | |
| 354 $LD $t0,0($a1) | |
| 355 $MULTU $t0,$a3 | |
| 356 subu $a2,1 | |
| 357 mflo $at | |
| 358 mfhi $t0 | |
| 359 $ADDU $v0,$at | |
| 360 sltu $t1,$v0,$at | |
| 361 $ST $v0,0($a0) | |
| 362 $ADDU $v0,$t1,$t0 | |
| 363 beqz $a2,.L_bn_mul_words_return | |
| 364 | |
| 365 $LD $t0,$BNSZ($a1) | |
| 366 $MULTU $t0,$a3 | |
| 367 subu $a2,1 | |
| 368 mflo $at | |
| 369 mfhi $t0 | |
| 370 $ADDU $v0,$at | |
| 371 sltu $t1,$v0,$at | |
| 372 $ST $v0,$BNSZ($a0) | |
| 373 $ADDU $v0,$t1,$t0 | |
| 374 beqz $a2,.L_bn_mul_words_return | |
| 375 | |
| 376 $LD $t0,2*$BNSZ($a1) | |
| 377 $MULTU $t0,$a3 | |
| 378 mflo $at | |
| 379 mfhi $t0 | |
| 380 $ADDU $v0,$at | |
| 381 sltu $t1,$v0,$at | |
| 382 $ST $v0,2*$BNSZ($a0) | |
| 383 $ADDU $v0,$t1,$t0 | |
| 384 | |
| 385 .L_bn_mul_words_return: | |
| 386 .set noreorder | |
| 387 ___ | |
| 388 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 389 $REG_L $t3,4*$SZREG($sp) | |
| 390 $REG_L $t2,3*$SZREG($sp) | |
| 391 $REG_L $t1,2*$SZREG($sp) | |
| 392 $REG_L $t0,1*$SZREG($sp) | |
| 393 $REG_L $gp,0*$SZREG($sp) | |
| 394 $PTR_ADD $sp,6*$SZREG | |
| 395 ___ | |
| 396 $code.=<<___; | |
| 397 jr $ra | |
| 398 move $a0,$v0 | |
| 399 .end bn_mul_words_internal | |
| 400 | |
| 401 .align 5 | |
| 402 .globl bn_sqr_words | |
| 403 .ent bn_sqr_words | |
| 404 bn_sqr_words: | |
| 405 .set noreorder | |
| 406 bgtz $a2,bn_sqr_words_internal | |
| 407 move $v0,$zero | |
| 408 jr $ra | |
| 409 move $a0,$v0 | |
| 410 .end bn_sqr_words | |
| 411 | |
| 412 .align 5 | |
| 413 .ent bn_sqr_words_internal | |
| 414 bn_sqr_words_internal: | |
| 415 ___ | |
| 416 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 417 .frame $sp,6*$SZREG,$ra | |
| 418 .mask 0x8000f008,-$SZREG | |
| 419 .set noreorder | |
| 420 $PTR_SUB $sp,6*$SZREG | |
| 421 $REG_S $ra,5*$SZREG($sp) | |
| 422 $REG_S $t3,4*$SZREG($sp) | |
| 423 $REG_S $t2,3*$SZREG($sp) | |
| 424 $REG_S $t1,2*$SZREG($sp) | |
| 425 $REG_S $t0,1*$SZREG($sp) | |
| 426 $REG_S $gp,0*$SZREG($sp) | |
| 427 ___ | |
| 428 $code.=<<___; | |
| 429 .set reorder | |
| 430 li $minus4,-4 | |
| 431 and $ta0,$a2,$minus4 | |
| 432 $LD $t0,0($a1) | |
| 433 beqz $ta0,.L_bn_sqr_words_tail | |
| 434 | |
| 435 .L_bn_sqr_words_loop: | |
| 436 $MULTU $t0,$t0 | |
| 437 $LD $t2,$BNSZ($a1) | |
| 438 $LD $ta0,2*$BNSZ($a1) | |
| 439 $LD $ta2,3*$BNSZ($a1) | |
| 440 mflo $t1 | |
| 441 mfhi $t0 | |
| 442 $ST $t1,0($a0) | |
| 443 $ST $t0,$BNSZ($a0) | |
| 444 | |
| 445 $MULTU $t2,$t2 | |
| 446 subu $a2,4 | |
| 447 $PTR_ADD $a0,8*$BNSZ | |
| 448 $PTR_ADD $a1,4*$BNSZ | |
| 449 mflo $t3 | |
| 450 mfhi $t2 | |
| 451 $ST $t3,-6*$BNSZ($a0) | |
| 452 $ST $t2,-5*$BNSZ($a0) | |
| 453 | |
| 454 $MULTU $ta0,$ta0 | |
| 455 mflo $ta1 | |
| 456 mfhi $ta0 | |
| 457 $ST $ta1,-4*$BNSZ($a0) | |
| 458 $ST $ta0,-3*$BNSZ($a0) | |
| 459 | |
| 460 | |
| 461 $MULTU $ta2,$ta2 | |
| 462 and $ta0,$a2,$minus4 | |
| 463 mflo $ta3 | |
| 464 mfhi $ta2 | |
| 465 $ST $ta3,-2*$BNSZ($a0) | |
| 466 $ST $ta2,-$BNSZ($a0) | |
| 467 | |
| 468 .set noreorder | |
| 469 bgtzl $ta0,.L_bn_sqr_words_loop | |
| 470 $LD $t0,0($a1) | |
| 471 | |
| 472 beqz $a2,.L_bn_sqr_words_return | |
| 473 nop | |
| 474 | |
| 475 .L_bn_sqr_words_tail: | |
| 476 .set reorder | |
| 477 $LD $t0,0($a1) | |
| 478 $MULTU $t0,$t0 | |
| 479 subu $a2,1 | |
| 480 mflo $t1 | |
| 481 mfhi $t0 | |
| 482 $ST $t1,0($a0) | |
| 483 $ST $t0,$BNSZ($a0) | |
| 484 beqz $a2,.L_bn_sqr_words_return | |
| 485 | |
| 486 $LD $t0,$BNSZ($a1) | |
| 487 $MULTU $t0,$t0 | |
| 488 subu $a2,1 | |
| 489 mflo $t1 | |
| 490 mfhi $t0 | |
| 491 $ST $t1,2*$BNSZ($a0) | |
| 492 $ST $t0,3*$BNSZ($a0) | |
| 493 beqz $a2,.L_bn_sqr_words_return | |
| 494 | |
| 495 $LD $t0,2*$BNSZ($a1) | |
| 496 $MULTU $t0,$t0 | |
| 497 mflo $t1 | |
| 498 mfhi $t0 | |
| 499 $ST $t1,4*$BNSZ($a0) | |
| 500 $ST $t0,5*$BNSZ($a0) | |
| 501 | |
| 502 .L_bn_sqr_words_return: | |
| 503 .set noreorder | |
| 504 ___ | |
| 505 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 506 $REG_L $t3,4*$SZREG($sp) | |
| 507 $REG_L $t2,3*$SZREG($sp) | |
| 508 $REG_L $t1,2*$SZREG($sp) | |
| 509 $REG_L $t0,1*$SZREG($sp) | |
| 510 $REG_L $gp,0*$SZREG($sp) | |
| 511 $PTR_ADD $sp,6*$SZREG | |
| 512 ___ | |
| 513 $code.=<<___; | |
| 514 jr $ra | |
| 515 move $a0,$v0 | |
| 516 | |
| 517 .end bn_sqr_words_internal | |
| 518 | |
| 519 .align 5 | |
| 520 .globl bn_add_words | |
| 521 .ent bn_add_words | |
| 522 bn_add_words: | |
| 523 .set noreorder | |
| 524 bgtz $a3,bn_add_words_internal | |
| 525 move $v0,$zero | |
| 526 jr $ra | |
| 527 move $a0,$v0 | |
| 528 .end bn_add_words | |
| 529 | |
| 530 .align 5 | |
| 531 .ent bn_add_words_internal | |
| 532 bn_add_words_internal: | |
| 533 ___ | |
| 534 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 535 .frame $sp,6*$SZREG,$ra | |
| 536 .mask 0x8000f008,-$SZREG | |
| 537 .set noreorder | |
| 538 $PTR_SUB $sp,6*$SZREG | |
| 539 $REG_S $ra,5*$SZREG($sp) | |
| 540 $REG_S $t3,4*$SZREG($sp) | |
| 541 $REG_S $t2,3*$SZREG($sp) | |
| 542 $REG_S $t1,2*$SZREG($sp) | |
| 543 $REG_S $t0,1*$SZREG($sp) | |
| 544 $REG_S $gp,0*$SZREG($sp) | |
| 545 ___ | |
| 546 $code.=<<___; | |
| 547 .set reorder | |
| 548 li $minus4,-4 | |
| 549 and $at,$a3,$minus4 | |
| 550 $LD $t0,0($a1) | |
| 551 beqz $at,.L_bn_add_words_tail | |
| 552 | |
| 553 .L_bn_add_words_loop: | |
| 554 $LD $ta0,0($a2) | |
| 555 subu $a3,4 | |
| 556 $LD $t1,$BNSZ($a1) | |
| 557 and $at,$a3,$minus4 | |
| 558 $LD $t2,2*$BNSZ($a1) | |
| 559 $PTR_ADD $a2,4*$BNSZ | |
| 560 $LD $t3,3*$BNSZ($a1) | |
| 561 $PTR_ADD $a0,4*$BNSZ | |
| 562 $LD $ta1,-3*$BNSZ($a2) | |
| 563 $PTR_ADD $a1,4*$BNSZ | |
| 564 $LD $ta2,-2*$BNSZ($a2) | |
| 565 $LD $ta3,-$BNSZ($a2) | |
| 566 $ADDU $ta0,$t0 | |
| 567 sltu $t8,$ta0,$t0 | |
| 568 $ADDU $t0,$ta0,$v0 | |
| 569 sltu $v0,$t0,$ta0 | |
| 570 $ST $t0,-4*$BNSZ($a0) | |
| 571 $ADDU $v0,$t8 | |
| 572 | |
| 573 $ADDU $ta1,$t1 | |
| 574 sltu $t9,$ta1,$t1 | |
| 575 $ADDU $t1,$ta1,$v0 | |
| 576 sltu $v0,$t1,$ta1 | |
| 577 $ST $t1,-3*$BNSZ($a0) | |
| 578 $ADDU $v0,$t9 | |
| 579 | |
| 580 $ADDU $ta2,$t2 | |
| 581 sltu $t8,$ta2,$t2 | |
| 582 $ADDU $t2,$ta2,$v0 | |
| 583 sltu $v0,$t2,$ta2 | |
| 584 $ST $t2,-2*$BNSZ($a0) | |
| 585 $ADDU $v0,$t8 | |
| 586 | |
| 587 $ADDU $ta3,$t3 | |
| 588 sltu $t9,$ta3,$t3 | |
| 589 $ADDU $t3,$ta3,$v0 | |
| 590 sltu $v0,$t3,$ta3 | |
| 591 $ST $t3,-$BNSZ($a0) | |
| 592 $ADDU $v0,$t9 | |
| 593 | |
| 594 .set noreorder | |
| 595 bgtzl $at,.L_bn_add_words_loop | |
| 596 $LD $t0,0($a1) | |
| 597 | |
| 598 beqz $a3,.L_bn_add_words_return | |
| 599 nop | |
| 600 | |
| 601 .L_bn_add_words_tail: | |
| 602 .set reorder | |
| 603 $LD $t0,0($a1) | |
| 604 $LD $ta0,0($a2) | |
| 605 $ADDU $ta0,$t0 | |
| 606 subu $a3,1 | |
| 607 sltu $t8,$ta0,$t0 | |
| 608 $ADDU $t0,$ta0,$v0 | |
| 609 sltu $v0,$t0,$ta0 | |
| 610 $ST $t0,0($a0) | |
| 611 $ADDU $v0,$t8 | |
| 612 beqz $a3,.L_bn_add_words_return | |
| 613 | |
| 614 $LD $t1,$BNSZ($a1) | |
| 615 $LD $ta1,$BNSZ($a2) | |
| 616 $ADDU $ta1,$t1 | |
| 617 subu $a3,1 | |
| 618 sltu $t9,$ta1,$t1 | |
| 619 $ADDU $t1,$ta1,$v0 | |
| 620 sltu $v0,$t1,$ta1 | |
| 621 $ST $t1,$BNSZ($a0) | |
| 622 $ADDU $v0,$t9 | |
| 623 beqz $a3,.L_bn_add_words_return | |
| 624 | |
| 625 $LD $t2,2*$BNSZ($a1) | |
| 626 $LD $ta2,2*$BNSZ($a2) | |
| 627 $ADDU $ta2,$t2 | |
| 628 sltu $t8,$ta2,$t2 | |
| 629 $ADDU $t2,$ta2,$v0 | |
| 630 sltu $v0,$t2,$ta2 | |
| 631 $ST $t2,2*$BNSZ($a0) | |
| 632 $ADDU $v0,$t8 | |
| 633 | |
| 634 .L_bn_add_words_return: | |
| 635 .set noreorder | |
| 636 ___ | |
| 637 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 638 $REG_L $t3,4*$SZREG($sp) | |
| 639 $REG_L $t2,3*$SZREG($sp) | |
| 640 $REG_L $t1,2*$SZREG($sp) | |
| 641 $REG_L $t0,1*$SZREG($sp) | |
| 642 $REG_L $gp,0*$SZREG($sp) | |
| 643 $PTR_ADD $sp,6*$SZREG | |
| 644 ___ | |
| 645 $code.=<<___; | |
| 646 jr $ra | |
| 647 move $a0,$v0 | |
| 648 | |
| 649 .end bn_add_words_internal | |
| 650 | |
| 651 .align 5 | |
| 652 .globl bn_sub_words | |
| 653 .ent bn_sub_words | |
| 654 bn_sub_words: | |
| 655 .set noreorder | |
| 656 bgtz $a3,bn_sub_words_internal | |
| 657 move $v0,$zero | |
| 658 jr $ra | |
| 659 move $a0,$zero | |
| 660 .end bn_sub_words | |
| 661 | |
| 662 .align 5 | |
| 663 .ent bn_sub_words_internal | |
| 664 bn_sub_words_internal: | |
| 665 ___ | |
| 666 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 667 .frame $sp,6*$SZREG,$ra | |
| 668 .mask 0x8000f008,-$SZREG | |
| 669 .set noreorder | |
| 670 $PTR_SUB $sp,6*$SZREG | |
| 671 $REG_S $ra,5*$SZREG($sp) | |
| 672 $REG_S $t3,4*$SZREG($sp) | |
| 673 $REG_S $t2,3*$SZREG($sp) | |
| 674 $REG_S $t1,2*$SZREG($sp) | |
| 675 $REG_S $t0,1*$SZREG($sp) | |
| 676 $REG_S $gp,0*$SZREG($sp) | |
| 677 ___ | |
| 678 $code.=<<___; | |
| 679 .set reorder | |
| 680 li $minus4,-4 | |
| 681 and $at,$a3,$minus4 | |
| 682 $LD $t0,0($a1) | |
| 683 beqz $at,.L_bn_sub_words_tail | |
| 684 | |
| 685 .L_bn_sub_words_loop: | |
| 686 $LD $ta0,0($a2) | |
| 687 subu $a3,4 | |
| 688 $LD $t1,$BNSZ($a1) | |
| 689 and $at,$a3,$minus4 | |
| 690 $LD $t2,2*$BNSZ($a1) | |
| 691 $PTR_ADD $a2,4*$BNSZ | |
| 692 $LD $t3,3*$BNSZ($a1) | |
| 693 $PTR_ADD $a0,4*$BNSZ | |
| 694 $LD $ta1,-3*$BNSZ($a2) | |
| 695 $PTR_ADD $a1,4*$BNSZ | |
| 696 $LD $ta2,-2*$BNSZ($a2) | |
| 697 $LD $ta3,-$BNSZ($a2) | |
| 698 sltu $t8,$t0,$ta0 | |
| 699 $SUBU $ta0,$t0,$ta0 | |
| 700 $SUBU $t0,$ta0,$v0 | |
| 701 sgtu $v0,$t0,$ta0 | |
| 702 $ST $t0,-4*$BNSZ($a0) | |
| 703 $ADDU $v0,$t8 | |
| 704 | |
| 705 sltu $t9,$t1,$ta1 | |
| 706 $SUBU $ta1,$t1,$ta1 | |
| 707 $SUBU $t1,$ta1,$v0 | |
| 708 sgtu $v0,$t1,$ta1 | |
| 709 $ST $t1,-3*$BNSZ($a0) | |
| 710 $ADDU $v0,$t9 | |
| 711 | |
| 712 | |
| 713 sltu $t8,$t2,$ta2 | |
| 714 $SUBU $ta2,$t2,$ta2 | |
| 715 $SUBU $t2,$ta2,$v0 | |
| 716 sgtu $v0,$t2,$ta2 | |
| 717 $ST $t2,-2*$BNSZ($a0) | |
| 718 $ADDU $v0,$t8 | |
| 719 | |
| 720 sltu $t9,$t3,$ta3 | |
| 721 $SUBU $ta3,$t3,$ta3 | |
| 722 $SUBU $t3,$ta3,$v0 | |
| 723 sgtu $v0,$t3,$ta3 | |
| 724 $ST $t3,-$BNSZ($a0) | |
| 725 $ADDU $v0,$t9 | |
| 726 | |
| 727 .set noreorder | |
| 728 bgtzl $at,.L_bn_sub_words_loop | |
| 729 $LD $t0,0($a1) | |
| 730 | |
| 731 beqz $a3,.L_bn_sub_words_return | |
| 732 nop | |
| 733 | |
| 734 .L_bn_sub_words_tail: | |
| 735 .set reorder | |
| 736 $LD $t0,0($a1) | |
| 737 $LD $ta0,0($a2) | |
| 738 subu $a3,1 | |
| 739 sltu $t8,$t0,$ta0 | |
| 740 $SUBU $ta0,$t0,$ta0 | |
| 741 $SUBU $t0,$ta0,$v0 | |
| 742 sgtu $v0,$t0,$ta0 | |
| 743 $ST $t0,0($a0) | |
| 744 $ADDU $v0,$t8 | |
| 745 beqz $a3,.L_bn_sub_words_return | |
| 746 | |
| 747 $LD $t1,$BNSZ($a1) | |
| 748 subu $a3,1 | |
| 749 $LD $ta1,$BNSZ($a2) | |
| 750 sltu $t9,$t1,$ta1 | |
| 751 $SUBU $ta1,$t1,$ta1 | |
| 752 $SUBU $t1,$ta1,$v0 | |
| 753 sgtu $v0,$t1,$ta1 | |
| 754 $ST $t1,$BNSZ($a0) | |
| 755 $ADDU $v0,$t9 | |
| 756 beqz $a3,.L_bn_sub_words_return | |
| 757 | |
| 758 $LD $t2,2*$BNSZ($a1) | |
| 759 $LD $ta2,2*$BNSZ($a2) | |
| 760 sltu $t8,$t2,$ta2 | |
| 761 $SUBU $ta2,$t2,$ta2 | |
| 762 $SUBU $t2,$ta2,$v0 | |
| 763 sgtu $v0,$t2,$ta2 | |
| 764 $ST $t2,2*$BNSZ($a0) | |
| 765 $ADDU $v0,$t8 | |
| 766 | |
| 767 .L_bn_sub_words_return: | |
| 768 .set noreorder | |
| 769 ___ | |
| 770 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 771 $REG_L $t3,4*$SZREG($sp) | |
| 772 $REG_L $t2,3*$SZREG($sp) | |
| 773 $REG_L $t1,2*$SZREG($sp) | |
| 774 $REG_L $t0,1*$SZREG($sp) | |
| 775 $REG_L $gp,0*$SZREG($sp) | |
| 776 $PTR_ADD $sp,6*$SZREG | |
| 777 ___ | |
| 778 $code.=<<___; | |
| 779 jr $ra | |
| 780 move $a0,$v0 | |
| 781 .end bn_sub_words_internal | |
| 782 | |
| 783 .align 5 | |
| 784 .globl bn_div_3_words | |
| 785 .ent bn_div_3_words | |
| 786 bn_div_3_words: | |
| 787 .set noreorder | |
| 788 move $a3,$a0 # we know that bn_div_words does not | |
| 789 # touch $a3, $ta2, $ta3 and preserves $a2 | |
| 790 # so that we can save two arguments | |
| 791 # and return address in registers | |
| 792 # instead of stack:-) | |
| 793 | |
| 794 $LD $a0,($a3) | |
| 795 move $ta2,$a1 | |
| 796 bne $a0,$a2,bn_div_3_words_internal | |
| 797 $LD $a1,-$BNSZ($a3) | |
| 798 li $v0,-1 | |
| 799 jr $ra | |
| 800 move $a0,$v0 | |
| 801 .end bn_div_3_words | |
| 802 | |
| 803 .align 5 | |
| 804 .ent bn_div_3_words_internal | |
| 805 bn_div_3_words_internal: | |
| 806 ___ | |
| 807 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 808 .frame $sp,6*$SZREG,$ra | |
| 809 .mask 0x8000f008,-$SZREG | |
| 810 .set noreorder | |
| 811 $PTR_SUB $sp,6*$SZREG | |
| 812 $REG_S $ra,5*$SZREG($sp) | |
| 813 $REG_S $t3,4*$SZREG($sp) | |
| 814 $REG_S $t2,3*$SZREG($sp) | |
| 815 $REG_S $t1,2*$SZREG($sp) | |
| 816 $REG_S $t0,1*$SZREG($sp) | |
| 817 $REG_S $gp,0*$SZREG($sp) | |
| 818 ___ | |
| 819 $code.=<<___; | |
| 820 .set reorder | |
| 821 move $ta3,$ra | |
| 822 bal bn_div_words_internal | |
| 823 move $ra,$ta3 | |
| 824 $MULTU $ta2,$v0 | |
| 825 $LD $t2,-2*$BNSZ($a3) | |
| 826 move $ta0,$zero | |
| 827 mfhi $t1 | |
| 828 mflo $t0 | |
| 829 sltu $t8,$t1,$a1 | |
| 830 .L_bn_div_3_words_inner_loop: | |
| 831 bnez $t8,.L_bn_div_3_words_inner_loop_done | |
| 832 sgeu $at,$t2,$t0 | |
| 833 seq $t9,$t1,$a1 | |
| 834 and $at,$t9 | |
| 835 sltu $t3,$t0,$ta2 | |
| 836 $ADDU $a1,$a2 | |
| 837 $SUBU $t1,$t3 | |
| 838 $SUBU $t0,$ta2 | |
| 839 sltu $t8,$t1,$a1 | |
| 840 sltu $ta0,$a1,$a2 | |
| 841 or $t8,$ta0 | |
| 842 .set noreorder | |
| 843 beqzl $at,.L_bn_div_3_words_inner_loop | |
| 844 $SUBU $v0,1 | |
| 845 .set reorder | |
| 846 .L_bn_div_3_words_inner_loop_done: | |
| 847 .set noreorder | |
| 848 ___ | |
| 849 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 850 $REG_L $t3,4*$SZREG($sp) | |
| 851 $REG_L $t2,3*$SZREG($sp) | |
| 852 $REG_L $t1,2*$SZREG($sp) | |
| 853 $REG_L $t0,1*$SZREG($sp) | |
| 854 $REG_L $gp,0*$SZREG($sp) | |
| 855 $PTR_ADD $sp,6*$SZREG | |
| 856 ___ | |
| 857 $code.=<<___; | |
| 858 jr $ra | |
| 859 move $a0,$v0 | |
| 860 .end bn_div_3_words_internal | |
| 861 | |
| 862 .align 5 | |
| 863 .globl bn_div_words | |
| 864 .ent bn_div_words | |
| 865 bn_div_words: | |
| 866 .set noreorder | |
| 867 bnez $a2,bn_div_words_internal | |
| 868 li $v0,-1 # I would rather signal div-by-zero | |
| 869 # which can be done with 'break 7' | |
| 870 jr $ra | |
| 871 move $a0,$v0 | |
| 872 .end bn_div_words | |
| 873 | |
| 874 .align 5 | |
| 875 .ent bn_div_words_internal | |
| 876 bn_div_words_internal: | |
| 877 ___ | |
| 878 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 879 .frame $sp,6*$SZREG,$ra | |
| 880 .mask 0x8000f008,-$SZREG | |
| 881 .set noreorder | |
| 882 $PTR_SUB $sp,6*$SZREG | |
| 883 $REG_S $ra,5*$SZREG($sp) | |
| 884 $REG_S $t3,4*$SZREG($sp) | |
| 885 $REG_S $t2,3*$SZREG($sp) | |
| 886 $REG_S $t1,2*$SZREG($sp) | |
| 887 $REG_S $t0,1*$SZREG($sp) | |
| 888 $REG_S $gp,0*$SZREG($sp) | |
| 889 ___ | |
| 890 $code.=<<___; | |
| 891 move $v1,$zero | |
| 892 bltz $a2,.L_bn_div_words_body | |
| 893 move $t9,$v1 | |
| 894 $SLL $a2,1 | |
| 895 bgtz $a2,.-4 | |
| 896 addu $t9,1 | |
| 897 | |
| 898 .set reorder | |
| 899 negu $t1,$t9 | |
| 900 li $t2,-1 | |
| 901 $SLL $t2,$t1 | |
| 902 and $t2,$a0 | |
| 903 $SRL $at,$a1,$t1 | |
| 904 .set noreorder | |
| 905 bnezl $t2,.+8 | |
| 906 break 6 # signal overflow | |
| 907 .set reorder | |
| 908 $SLL $a0,$t9 | |
| 909 $SLL $a1,$t9 | |
| 910 or $a0,$at | |
| 911 ___ | |
| 912 $QT=$ta0; | |
| 913 $HH=$ta1; | |
| 914 $DH=$v1; | |
| 915 $code.=<<___; | |
| 916 .L_bn_div_words_body: | |
| 917 $SRL $DH,$a2,4*$BNSZ # bits | |
| 918 sgeu $at,$a0,$a2 | |
| 919 .set noreorder | |
| 920 bnezl $at,.+8 | |
| 921 $SUBU $a0,$a2 | |
| 922 .set reorder | |
| 923 | |
| 924 li $QT,-1 | |
| 925 $SRL $HH,$a0,4*$BNSZ # bits | |
| 926 $SRL $QT,4*$BNSZ # q=0xffffffff | |
| 927 beq $DH,$HH,.L_bn_div_words_skip_div1 | |
| 928 $DIVU $zero,$a0,$DH | |
| 929 mflo $QT | |
| 930 .L_bn_div_words_skip_div1: | |
| 931 $MULTU $a2,$QT | |
| 932 $SLL $t3,$a0,4*$BNSZ # bits | |
| 933 $SRL $at,$a1,4*$BNSZ # bits | |
| 934 or $t3,$at | |
| 935 mflo $t0 | |
| 936 mfhi $t1 | |
| 937 .L_bn_div_words_inner_loop1: | |
| 938 sltu $t2,$t3,$t0 | |
| 939 seq $t8,$HH,$t1 | |
| 940 sltu $at,$HH,$t1 | |
| 941 and $t2,$t8 | |
| 942 sltu $v0,$t0,$a2 | |
| 943 or $at,$t2 | |
| 944 .set noreorder | |
| 945 beqz $at,.L_bn_div_words_inner_loop1_done | |
| 946 $SUBU $t1,$v0 | |
| 947 $SUBU $t0,$a2 | |
| 948 b .L_bn_div_words_inner_loop1 | |
| 949 $SUBU $QT,1 | |
| 950 .set reorder | |
| 951 .L_bn_div_words_inner_loop1_done: | |
| 952 | |
| 953 $SLL $a1,4*$BNSZ # bits | |
| 954 $SUBU $a0,$t3,$t0 | |
| 955 $SLL $v0,$QT,4*$BNSZ # bits | |
| 956 | |
| 957 li $QT,-1 | |
| 958 $SRL $HH,$a0,4*$BNSZ # bits | |
| 959 $SRL $QT,4*$BNSZ # q=0xffffffff | |
| 960 beq $DH,$HH,.L_bn_div_words_skip_div2 | |
| 961 $DIVU $zero,$a0,$DH | |
| 962 mflo $QT | |
| 963 .L_bn_div_words_skip_div2: | |
| 964 $MULTU $a2,$QT | |
| 965 $SLL $t3,$a0,4*$BNSZ # bits | |
| 966 $SRL $at,$a1,4*$BNSZ # bits | |
| 967 or $t3,$at | |
| 968 mflo $t0 | |
| 969 mfhi $t1 | |
| 970 .L_bn_div_words_inner_loop2: | |
| 971 sltu $t2,$t3,$t0 | |
| 972 seq $t8,$HH,$t1 | |
| 973 sltu $at,$HH,$t1 | |
| 974 and $t2,$t8 | |
| 975 sltu $v1,$t0,$a2 | |
| 976 or $at,$t2 | |
| 977 .set noreorder | |
| 978 beqz $at,.L_bn_div_words_inner_loop2_done | |
| 979 $SUBU $t1,$v1 | |
| 980 $SUBU $t0,$a2 | |
| 981 b .L_bn_div_words_inner_loop2 | |
| 982 $SUBU $QT,1 | |
| 983 .set reorder | |
| 984 .L_bn_div_words_inner_loop2_done: | |
| 985 | |
| 986 $SUBU $a0,$t3,$t0 | |
| 987 or $v0,$QT | |
| 988 $SRL $v1,$a0,$t9 # $v1 contains remainder if anybody wants it | |
| 989 $SRL $a2,$t9 # restore $a2 | |
| 990 | |
| 991 .set noreorder | |
| 992 move $a1,$v1 | |
| 993 ___ | |
| 994 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 995 $REG_L $t3,4*$SZREG($sp) | |
| 996 $REG_L $t2,3*$SZREG($sp) | |
| 997 $REG_L $t1,2*$SZREG($sp) | |
| 998 $REG_L $t0,1*$SZREG($sp) | |
| 999 $REG_L $gp,0*$SZREG($sp) | |
| 1000 $PTR_ADD $sp,6*$SZREG | |
| 1001 ___ | |
| 1002 $code.=<<___; | |
| 1003 jr $ra | |
| 1004 move $a0,$v0 | |
| 1005 .end bn_div_words_internal | |
| 1006 ___ | |
| 1007 undef $HH; undef $QT; undef $DH; | |
| 1008 | |
| 1009 ($a_0,$a_1,$a_2,$a_3)=($t0,$t1,$t2,$t3); | |
| 1010 ($b_0,$b_1,$b_2,$b_3)=($ta0,$ta1,$ta2,$ta3); | |
| 1011 | |
| 1012 ($a_4,$a_5,$a_6,$a_7)=($s0,$s2,$s4,$a1); # once we load a[7], no use for $a1 | |
| 1013 ($b_4,$b_5,$b_6,$b_7)=($s1,$s3,$s5,$a2); # once we load b[7], no use for $a2 | |
| 1014 | |
| 1015 ($t_1,$t_2,$c_1,$c_2,$c_3)=($t8,$t9,$v0,$v1,$a3); | |
| 1016 | |
| 1017 $code.=<<___; | |
| 1018 | |
| 1019 .align 5 | |
| 1020 .globl bn_mul_comba8 | |
| 1021 .ent bn_mul_comba8 | |
| 1022 bn_mul_comba8: | |
| 1023 .set noreorder | |
| 1024 ___ | |
| 1025 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 1026 .frame $sp,12*$SZREG,$ra | |
| 1027 .mask 0x803ff008,-$SZREG | |
| 1028 $PTR_SUB $sp,12*$SZREG | |
| 1029 $REG_S $ra,11*$SZREG($sp) | |
| 1030 $REG_S $s5,10*$SZREG($sp) | |
| 1031 $REG_S $s4,9*$SZREG($sp) | |
| 1032 $REG_S $s3,8*$SZREG($sp) | |
| 1033 $REG_S $s2,7*$SZREG($sp) | |
| 1034 $REG_S $s1,6*$SZREG($sp) | |
| 1035 $REG_S $s0,5*$SZREG($sp) | |
| 1036 $REG_S $t3,4*$SZREG($sp) | |
| 1037 $REG_S $t2,3*$SZREG($sp) | |
| 1038 $REG_S $t1,2*$SZREG($sp) | |
| 1039 $REG_S $t0,1*$SZREG($sp) | |
| 1040 $REG_S $gp,0*$SZREG($sp) | |
| 1041 ___ | |
| 1042 $code.=<<___ if ($flavour !~ /nubi/i); | |
| 1043 .frame $sp,6*$SZREG,$ra | |
| 1044 .mask 0x003f0000,-$SZREG | |
| 1045 $PTR_SUB $sp,6*$SZREG | |
| 1046 $REG_S $s5,5*$SZREG($sp) | |
| 1047 $REG_S $s4,4*$SZREG($sp) | |
| 1048 $REG_S $s3,3*$SZREG($sp) | |
| 1049 $REG_S $s2,2*$SZREG($sp) | |
| 1050 $REG_S $s1,1*$SZREG($sp) | |
| 1051 $REG_S $s0,0*$SZREG($sp) | |
| 1052 ___ | |
| 1053 $code.=<<___; | |
| 1054 | |
| 1055 .set reorder | |
| 1056 $LD $a_0,0($a1) # If compiled with -mips3 option on | |
| 1057 # R5000 box assembler barks on this | |
| 1058 # 1ine with "should not have mult/div | |
| 1059 # as last instruction in bb (R10K | |
| 1060 # bug)" warning. If anybody out there | |
| 1061 # has a clue about how to circumvent | |
| 1062 # this do send me a note. | |
| 1063 # <appro\@fy.chalmers.se> | |
| 1064 | |
| 1065 $LD $b_0,0($a2) | |
| 1066 $LD $a_1,$BNSZ($a1) | |
| 1067 $LD $a_2,2*$BNSZ($a1) | |
| 1068 $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3); | |
| 1069 $LD $a_3,3*$BNSZ($a1) | |
| 1070 $LD $b_1,$BNSZ($a2) | |
| 1071 $LD $b_2,2*$BNSZ($a2) | |
| 1072 $LD $b_3,3*$BNSZ($a2) | |
| 1073 mflo $c_1 | |
| 1074 mfhi $c_2 | |
| 1075 | |
| 1076 $LD $a_4,4*$BNSZ($a1) | |
| 1077 $LD $a_5,5*$BNSZ($a1) | |
| 1078 $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1); | |
| 1079 $LD $a_6,6*$BNSZ($a1) | |
| 1080 $LD $a_7,7*$BNSZ($a1) | |
| 1081 $LD $b_4,4*$BNSZ($a2) | |
| 1082 $LD $b_5,5*$BNSZ($a2) | |
| 1083 mflo $t_1 | |
| 1084 mfhi $t_2 | |
| 1085 $ADDU $c_2,$t_1 | |
| 1086 sltu $at,$c_2,$t_1 | |
| 1087 $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1); | |
| 1088 $ADDU $c_3,$t_2,$at | |
| 1089 $LD $b_6,6*$BNSZ($a2) | |
| 1090 $LD $b_7,7*$BNSZ($a2) | |
| 1091 $ST $c_1,0($a0) # r[0]=c1; | |
| 1092 mflo $t_1 | |
| 1093 mfhi $t_2 | |
| 1094 $ADDU $c_2,$t_1 | |
| 1095 sltu $at,$c_2,$t_1 | |
| 1096 $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2); | |
| 1097 $ADDU $t_2,$at | |
| 1098 $ADDU $c_3,$t_2 | |
| 1099 sltu $c_1,$c_3,$t_2 | |
| 1100 $ST $c_2,$BNSZ($a0) # r[1]=c2; | |
| 1101 | |
| 1102 mflo $t_1 | |
| 1103 mfhi $t_2 | |
| 1104 $ADDU $c_3,$t_1 | |
| 1105 sltu $at,$c_3,$t_1 | |
| 1106 $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2); | |
| 1107 $ADDU $t_2,$at | |
| 1108 $ADDU $c_1,$t_2 | |
| 1109 mflo $t_1 | |
| 1110 mfhi $t_2 | |
| 1111 $ADDU $c_3,$t_1 | |
| 1112 sltu $at,$c_3,$t_1 | |
| 1113 $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2); | |
| 1114 $ADDU $t_2,$at | |
| 1115 $ADDU $c_1,$t_2 | |
| 1116 sltu $c_2,$c_1,$t_2 | |
| 1117 mflo $t_1 | |
| 1118 mfhi $t_2 | |
| 1119 $ADDU $c_3,$t_1 | |
| 1120 sltu $at,$c_3,$t_1 | |
| 1121 $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3); | |
| 1122 $ADDU $t_2,$at | |
| 1123 $ADDU $c_1,$t_2 | |
| 1124 sltu $at,$c_1,$t_2 | |
| 1125 $ADDU $c_2,$at | |
| 1126 $ST $c_3,2*$BNSZ($a0) # r[2]=c3; | |
| 1127 | |
| 1128 mflo $t_1 | |
| 1129 mfhi $t_2 | |
| 1130 $ADDU $c_1,$t_1 | |
| 1131 sltu $at,$c_1,$t_1 | |
| 1132 $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3); | |
| 1133 $ADDU $t_2,$at | |
| 1134 $ADDU $c_2,$t_2 | |
| 1135 sltu $c_3,$c_2,$t_2 | |
| 1136 mflo $t_1 | |
| 1137 mfhi $t_2 | |
| 1138 $ADDU $c_1,$t_1 | |
| 1139 sltu $at,$c_1,$t_1 | |
| 1140 $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3); | |
| 1141 $ADDU $t_2,$at | |
| 1142 $ADDU $c_2,$t_2 | |
| 1143 sltu $at,$c_2,$t_2 | |
| 1144 $ADDU $c_3,$at | |
| 1145 mflo $t_1 | |
| 1146 mfhi $t_2 | |
| 1147 $ADDU $c_1,$t_1 | |
| 1148 sltu $at,$c_1,$t_1 | |
| 1149 $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3); | |
| 1150 $ADDU $t_2,$at | |
| 1151 $ADDU $c_2,$t_2 | |
| 1152 sltu $at,$c_2,$t_2 | |
| 1153 $ADDU $c_3,$at | |
| 1154 mflo $t_1 | |
| 1155 mfhi $t_2 | |
| 1156 $ADDU $c_1,$t_1 | |
| 1157 sltu $at,$c_1,$t_1 | |
| 1158 $MULTU $a_4,$b_0 # mul_add_c(a[4],b[0],c2,c3,c1); | |
| 1159 $ADDU $t_2,$at | |
| 1160 $ADDU $c_2,$t_2 | |
| 1161 sltu $at,$c_2,$t_2 | |
| 1162 $ADDU $c_3,$at | |
| 1163 $ST $c_1,3*$BNSZ($a0) # r[3]=c1; | |
| 1164 | |
| 1165 mflo $t_1 | |
| 1166 mfhi $t_2 | |
| 1167 $ADDU $c_2,$t_1 | |
| 1168 sltu $at,$c_2,$t_1 | |
| 1169 $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1); | |
| 1170 $ADDU $t_2,$at | |
| 1171 $ADDU $c_3,$t_2 | |
| 1172 sltu $c_1,$c_3,$t_2 | |
| 1173 mflo $t_1 | |
| 1174 mfhi $t_2 | |
| 1175 $ADDU $c_2,$t_1 | |
| 1176 sltu $at,$c_2,$t_1 | |
| 1177 $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1); | |
| 1178 $ADDU $t_2,$at | |
| 1179 $ADDU $c_3,$t_2 | |
| 1180 sltu $at,$c_3,$t_2 | |
| 1181 $ADDU $c_1,$at | |
| 1182 mflo $t_1 | |
| 1183 mfhi $t_2 | |
| 1184 $ADDU $c_2,$t_1 | |
| 1185 sltu $at,$c_2,$t_1 | |
| 1186 $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1); | |
| 1187 $ADDU $t_2,$at | |
| 1188 $ADDU $c_3,$t_2 | |
| 1189 sltu $at,$c_3,$t_2 | |
| 1190 $ADDU $c_1,$at | |
| 1191 mflo $t_1 | |
| 1192 mfhi $t_2 | |
| 1193 $ADDU $c_2,$t_1 | |
| 1194 sltu $at,$c_2,$t_1 | |
| 1195 $MULTU $a_0,$b_4 # mul_add_c(a[0],b[4],c2,c3,c1); | |
| 1196 $ADDU $t_2,$at | |
| 1197 $ADDU $c_3,$t_2 | |
| 1198 sltu $at,$c_3,$t_2 | |
| 1199 $ADDU $c_1,$at | |
| 1200 mflo $t_1 | |
| 1201 mfhi $t_2 | |
| 1202 $ADDU $c_2,$t_1 | |
| 1203 sltu $at,$c_2,$t_1 | |
| 1204 $MULTU $a_0,$b_5 # mul_add_c(a[0],b[5],c3,c1,c2); | |
| 1205 $ADDU $t_2,$at | |
| 1206 $ADDU $c_3,$t_2 | |
| 1207 sltu $at,$c_3,$t_2 | |
| 1208 $ADDU $c_1,$at | |
| 1209 $ST $c_2,4*$BNSZ($a0) # r[4]=c2; | |
| 1210 | |
| 1211 mflo $t_1 | |
| 1212 mfhi $t_2 | |
| 1213 $ADDU $c_3,$t_1 | |
| 1214 sltu $at,$c_3,$t_1 | |
| 1215 $MULTU $a_1,$b_4 # mul_add_c(a[1],b[4],c3,c1,c2); | |
| 1216 $ADDU $t_2,$at | |
| 1217 $ADDU $c_1,$t_2 | |
| 1218 sltu $c_2,$c_1,$t_2 | |
| 1219 mflo $t_1 | |
| 1220 mfhi $t_2 | |
| 1221 $ADDU $c_3,$t_1 | |
| 1222 sltu $at,$c_3,$t_1 | |
| 1223 $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2); | |
| 1224 $ADDU $t_2,$at | |
| 1225 $ADDU $c_1,$t_2 | |
| 1226 sltu $at,$c_1,$t_2 | |
| 1227 $ADDU $c_2,$at | |
| 1228 mflo $t_1 | |
| 1229 mfhi $t_2 | |
| 1230 $ADDU $c_3,$t_1 | |
| 1231 sltu $at,$c_3,$t_1 | |
| 1232 $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2); | |
| 1233 $ADDU $t_2,$at | |
| 1234 $ADDU $c_1,$t_2 | |
| 1235 sltu $at,$c_1,$t_2 | |
| 1236 $ADDU $c_2,$at | |
| 1237 mflo $t_1 | |
| 1238 mfhi $t_2 | |
| 1239 $ADDU $c_3,$t_1 | |
| 1240 sltu $at,$c_3,$t_1 | |
| 1241 $MULTU $a_4,$b_1 # mul_add_c(a[4],b[1],c3,c1,c2); | |
| 1242 $ADDU $t_2,$at | |
| 1243 $ADDU $c_1,$t_2 | |
| 1244 sltu $at,$c_1,$t_2 | |
| 1245 $ADDU $c_2,$at | |
| 1246 mflo $t_1 | |
| 1247 mfhi $t_2 | |
| 1248 $ADDU $c_3,$t_1 | |
| 1249 sltu $at,$c_3,$t_1 | |
| 1250 $MULTU $a_5,$b_0 # mul_add_c(a[5],b[0],c3,c1,c2); | |
| 1251 $ADDU $t_2,$at | |
| 1252 $ADDU $c_1,$t_2 | |
| 1253 sltu $at,$c_1,$t_2 | |
| 1254 $ADDU $c_2,$at | |
| 1255 mflo $t_1 | |
| 1256 mfhi $t_2 | |
| 1257 $ADDU $c_3,$t_1 | |
| 1258 sltu $at,$c_3,$t_1 | |
| 1259 $MULTU $a_6,$b_0 # mul_add_c(a[6],b[0],c1,c2,c3); | |
| 1260 $ADDU $t_2,$at | |
| 1261 $ADDU $c_1,$t_2 | |
| 1262 sltu $at,$c_1,$t_2 | |
| 1263 $ADDU $c_2,$at | |
| 1264 $ST $c_3,5*$BNSZ($a0) # r[5]=c3; | |
| 1265 | |
| 1266 mflo $t_1 | |
| 1267 mfhi $t_2 | |
| 1268 $ADDU $c_1,$t_1 | |
| 1269 sltu $at,$c_1,$t_1 | |
| 1270 $MULTU $a_5,$b_1 # mul_add_c(a[5],b[1],c1,c2,c3); | |
| 1271 $ADDU $t_2,$at | |
| 1272 $ADDU $c_2,$t_2 | |
| 1273 sltu $c_3,$c_2,$t_2 | |
| 1274 mflo $t_1 | |
| 1275 mfhi $t_2 | |
| 1276 $ADDU $c_1,$t_1 | |
| 1277 sltu $at,$c_1,$t_1 | |
| 1278 $MULTU $a_4,$b_2 # mul_add_c(a[4],b[2],c1,c2,c3); | |
| 1279 $ADDU $t_2,$at | |
| 1280 $ADDU $c_2,$t_2 | |
| 1281 sltu $at,$c_2,$t_2 | |
| 1282 $ADDU $c_3,$at | |
| 1283 mflo $t_1 | |
| 1284 mfhi $t_2 | |
| 1285 $ADDU $c_1,$t_1 | |
| 1286 sltu $at,$c_1,$t_1 | |
| 1287 $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3); | |
| 1288 $ADDU $t_2,$at | |
| 1289 $ADDU $c_2,$t_2 | |
| 1290 sltu $at,$c_2,$t_2 | |
| 1291 $ADDU $c_3,$at | |
| 1292 mflo $t_1 | |
| 1293 mfhi $t_2 | |
| 1294 $ADDU $c_1,$t_1 | |
| 1295 sltu $at,$c_1,$t_1 | |
| 1296 $MULTU $a_2,$b_4 # mul_add_c(a[2],b[4],c1,c2,c3); | |
| 1297 $ADDU $t_2,$at | |
| 1298 $ADDU $c_2,$t_2 | |
| 1299 sltu $at,$c_2,$t_2 | |
| 1300 $ADDU $c_3,$at | |
| 1301 mflo $t_1 | |
| 1302 mfhi $t_2 | |
| 1303 $ADDU $c_1,$t_1 | |
| 1304 sltu $at,$c_1,$t_1 | |
| 1305 $MULTU $a_1,$b_5 # mul_add_c(a[1],b[5],c1,c2,c3); | |
| 1306 $ADDU $t_2,$at | |
| 1307 $ADDU $c_2,$t_2 | |
| 1308 sltu $at,$c_2,$t_2 | |
| 1309 $ADDU $c_3,$at | |
| 1310 mflo $t_1 | |
| 1311 mfhi $t_2 | |
| 1312 $ADDU $c_1,$t_1 | |
| 1313 sltu $at,$c_1,$t_1 | |
| 1314 $MULTU $a_0,$b_6 # mul_add_c(a[0],b[6],c1,c2,c3); | |
| 1315 $ADDU $t_2,$at | |
| 1316 $ADDU $c_2,$t_2 | |
| 1317 sltu $at,$c_2,$t_2 | |
| 1318 $ADDU $c_3,$at | |
| 1319 mflo $t_1 | |
| 1320 mfhi $t_2 | |
| 1321 $ADDU $c_1,$t_1 | |
| 1322 sltu $at,$c_1,$t_1 | |
| 1323 $MULTU $a_0,$b_7 # mul_add_c(a[0],b[7],c2,c3,c1); | |
| 1324 $ADDU $t_2,$at | |
| 1325 $ADDU $c_2,$t_2 | |
| 1326 sltu $at,$c_2,$t_2 | |
| 1327 $ADDU $c_3,$at | |
| 1328 $ST $c_1,6*$BNSZ($a0) # r[6]=c1; | |
| 1329 | |
| 1330 mflo $t_1 | |
| 1331 mfhi $t_2 | |
| 1332 $ADDU $c_2,$t_1 | |
| 1333 sltu $at,$c_2,$t_1 | |
| 1334 $MULTU $a_1,$b_6 # mul_add_c(a[1],b[6],c2,c3,c1); | |
| 1335 $ADDU $t_2,$at | |
| 1336 $ADDU $c_3,$t_2 | |
| 1337 sltu $c_1,$c_3,$t_2 | |
| 1338 mflo $t_1 | |
| 1339 mfhi $t_2 | |
| 1340 $ADDU $c_2,$t_1 | |
| 1341 sltu $at,$c_2,$t_1 | |
| 1342 $MULTU $a_2,$b_5 # mul_add_c(a[2],b[5],c2,c3,c1); | |
| 1343 $ADDU $t_2,$at | |
| 1344 $ADDU $c_3,$t_2 | |
| 1345 sltu $at,$c_3,$t_2 | |
| 1346 $ADDU $c_1,$at | |
| 1347 mflo $t_1 | |
| 1348 mfhi $t_2 | |
| 1349 $ADDU $c_2,$t_1 | |
| 1350 sltu $at,$c_2,$t_1 | |
| 1351 $MULTU $a_3,$b_4 # mul_add_c(a[3],b[4],c2,c3,c1); | |
| 1352 $ADDU $t_2,$at | |
| 1353 $ADDU $c_3,$t_2 | |
| 1354 sltu $at,$c_3,$t_2 | |
| 1355 $ADDU $c_1,$at | |
| 1356 mflo $t_1 | |
| 1357 mfhi $t_2 | |
| 1358 $ADDU $c_2,$t_1 | |
| 1359 sltu $at,$c_2,$t_1 | |
| 1360 $MULTU $a_4,$b_3 # mul_add_c(a[4],b[3],c2,c3,c1); | |
| 1361 $ADDU $t_2,$at | |
| 1362 $ADDU $c_3,$t_2 | |
| 1363 sltu $at,$c_3,$t_2 | |
| 1364 $ADDU $c_1,$at | |
| 1365 mflo $t_1 | |
| 1366 mfhi $t_2 | |
| 1367 $ADDU $c_2,$t_1 | |
| 1368 sltu $at,$c_2,$t_1 | |
| 1369 $MULTU $a_5,$b_2 # mul_add_c(a[5],b[2],c2,c3,c1); | |
| 1370 $ADDU $t_2,$at | |
| 1371 $ADDU $c_3,$t_2 | |
| 1372 sltu $at,$c_3,$t_2 | |
| 1373 $ADDU $c_1,$at | |
| 1374 mflo $t_1 | |
| 1375 mfhi $t_2 | |
| 1376 $ADDU $c_2,$t_1 | |
| 1377 sltu $at,$c_2,$t_1 | |
| 1378 $MULTU $a_6,$b_1 # mul_add_c(a[6],b[1],c2,c3,c1); | |
| 1379 $ADDU $t_2,$at | |
| 1380 $ADDU $c_3,$t_2 | |
| 1381 sltu $at,$c_3,$t_2 | |
| 1382 $ADDU $c_1,$at | |
| 1383 mflo $t_1 | |
| 1384 mfhi $t_2 | |
| 1385 $ADDU $c_2,$t_1 | |
| 1386 sltu $at,$c_2,$t_1 | |
| 1387 $MULTU $a_7,$b_0 # mul_add_c(a[7],b[0],c2,c3,c1); | |
| 1388 $ADDU $t_2,$at | |
| 1389 $ADDU $c_3,$t_2 | |
| 1390 sltu $at,$c_3,$t_2 | |
| 1391 $ADDU $c_1,$at | |
| 1392 mflo $t_1 | |
| 1393 mfhi $t_2 | |
| 1394 $ADDU $c_2,$t_1 | |
| 1395 sltu $at,$c_2,$t_1 | |
| 1396 $MULTU $a_7,$b_1 # mul_add_c(a[7],b[1],c3,c1,c2); | |
| 1397 $ADDU $t_2,$at | |
| 1398 $ADDU $c_3,$t_2 | |
| 1399 sltu $at,$c_3,$t_2 | |
| 1400 $ADDU $c_1,$at | |
| 1401 $ST $c_2,7*$BNSZ($a0) # r[7]=c2; | |
| 1402 | |
| 1403 mflo $t_1 | |
| 1404 mfhi $t_2 | |
| 1405 $ADDU $c_3,$t_1 | |
| 1406 sltu $at,$c_3,$t_1 | |
| 1407 $MULTU $a_6,$b_2 # mul_add_c(a[6],b[2],c3,c1,c2); | |
| 1408 $ADDU $t_2,$at | |
| 1409 $ADDU $c_1,$t_2 | |
| 1410 sltu $c_2,$c_1,$t_2 | |
| 1411 mflo $t_1 | |
| 1412 mfhi $t_2 | |
| 1413 $ADDU $c_3,$t_1 | |
| 1414 sltu $at,$c_3,$t_1 | |
| 1415 $MULTU $a_5,$b_3 # mul_add_c(a[5],b[3],c3,c1,c2); | |
| 1416 $ADDU $t_2,$at | |
| 1417 $ADDU $c_1,$t_2 | |
| 1418 sltu $at,$c_1,$t_2 | |
| 1419 $ADDU $c_2,$at | |
| 1420 mflo $t_1 | |
| 1421 mfhi $t_2 | |
| 1422 $ADDU $c_3,$t_1 | |
| 1423 sltu $at,$c_3,$t_1 | |
| 1424 $MULTU $a_4,$b_4 # mul_add_c(a[4],b[4],c3,c1,c2); | |
| 1425 $ADDU $t_2,$at | |
| 1426 $ADDU $c_1,$t_2 | |
| 1427 sltu $at,$c_1,$t_2 | |
| 1428 $ADDU $c_2,$at | |
| 1429 mflo $t_1 | |
| 1430 mfhi $t_2 | |
| 1431 $ADDU $c_3,$t_1 | |
| 1432 sltu $at,$c_3,$t_1 | |
| 1433 $MULTU $a_3,$b_5 # mul_add_c(a[3],b[5],c3,c1,c2); | |
| 1434 $ADDU $t_2,$at | |
| 1435 $ADDU $c_1,$t_2 | |
| 1436 sltu $at,$c_1,$t_2 | |
| 1437 $ADDU $c_2,$at | |
| 1438 mflo $t_1 | |
| 1439 mfhi $t_2 | |
| 1440 $ADDU $c_3,$t_1 | |
| 1441 sltu $at,$c_3,$t_1 | |
| 1442 $MULTU $a_2,$b_6 # mul_add_c(a[2],b[6],c3,c1,c2); | |
| 1443 $ADDU $t_2,$at | |
| 1444 $ADDU $c_1,$t_2 | |
| 1445 sltu $at,$c_1,$t_2 | |
| 1446 $ADDU $c_2,$at | |
| 1447 mflo $t_1 | |
| 1448 mfhi $t_2 | |
| 1449 $ADDU $c_3,$t_1 | |
| 1450 sltu $at,$c_3,$t_1 | |
| 1451 $MULTU $a_1,$b_7 # mul_add_c(a[1],b[7],c3,c1,c2); | |
| 1452 $ADDU $t_2,$at | |
| 1453 $ADDU $c_1,$t_2 | |
| 1454 sltu $at,$c_1,$t_2 | |
| 1455 $ADDU $c_2,$at | |
| 1456 mflo $t_1 | |
| 1457 mfhi $t_2 | |
| 1458 $ADDU $c_3,$t_1 | |
| 1459 sltu $at,$c_3,$t_1 | |
| 1460 $MULTU $a_2,$b_7 # mul_add_c(a[2],b[7],c1,c2,c3); | |
| 1461 $ADDU $t_2,$at | |
| 1462 $ADDU $c_1,$t_2 | |
| 1463 sltu $at,$c_1,$t_2 | |
| 1464 $ADDU $c_2,$at | |
| 1465 $ST $c_3,8*$BNSZ($a0) # r[8]=c3; | |
| 1466 | |
| 1467 mflo $t_1 | |
| 1468 mfhi $t_2 | |
| 1469 $ADDU $c_1,$t_1 | |
| 1470 sltu $at,$c_1,$t_1 | |
| 1471 $MULTU $a_3,$b_6 # mul_add_c(a[3],b[6],c1,c2,c3); | |
| 1472 $ADDU $t_2,$at | |
| 1473 $ADDU $c_2,$t_2 | |
| 1474 sltu $c_3,$c_2,$t_2 | |
| 1475 mflo $t_1 | |
| 1476 mfhi $t_2 | |
| 1477 $ADDU $c_1,$t_1 | |
| 1478 sltu $at,$c_1,$t_1 | |
| 1479 $MULTU $a_4,$b_5 # mul_add_c(a[4],b[5],c1,c2,c3); | |
| 1480 $ADDU $t_2,$at | |
| 1481 $ADDU $c_2,$t_2 | |
| 1482 sltu $at,$c_2,$t_2 | |
| 1483 $ADDU $c_3,$at | |
| 1484 mflo $t_1 | |
| 1485 mfhi $t_2 | |
| 1486 $ADDU $c_1,$t_1 | |
| 1487 sltu $at,$c_1,$t_1 | |
| 1488 $MULTU $a_5,$b_4 # mul_add_c(a[5],b[4],c1,c2,c3); | |
| 1489 $ADDU $t_2,$at | |
| 1490 $ADDU $c_2,$t_2 | |
| 1491 sltu $at,$c_2,$t_2 | |
| 1492 $ADDU $c_3,$at | |
| 1493 mflo $t_1 | |
| 1494 mfhi $t_2 | |
| 1495 $ADDU $c_1,$t_1 | |
| 1496 sltu $at,$c_1,$t_1 | |
| 1497 $MULTU $a_6,$b_3 # mul_add_c(a[6],b[3],c1,c2,c3); | |
| 1498 $ADDU $t_2,$at | |
| 1499 $ADDU $c_2,$t_2 | |
| 1500 sltu $at,$c_2,$t_2 | |
| 1501 $ADDU $c_3,$at | |
| 1502 mflo $t_1 | |
| 1503 mfhi $t_2 | |
| 1504 $ADDU $c_1,$t_1 | |
| 1505 sltu $at,$c_1,$t_1 | |
| 1506 $MULTU $a_7,$b_2 # mul_add_c(a[7],b[2],c1,c2,c3); | |
| 1507 $ADDU $t_2,$at | |
| 1508 $ADDU $c_2,$t_2 | |
| 1509 sltu $at,$c_2,$t_2 | |
| 1510 $ADDU $c_3,$at | |
| 1511 mflo $t_1 | |
| 1512 mfhi $t_2 | |
| 1513 $ADDU $c_1,$t_1 | |
| 1514 sltu $at,$c_1,$t_1 | |
| 1515 $MULTU $a_7,$b_3 # mul_add_c(a[7],b[3],c2,c3,c1); | |
| 1516 $ADDU $t_2,$at | |
| 1517 $ADDU $c_2,$t_2 | |
| 1518 sltu $at,$c_2,$t_2 | |
| 1519 $ADDU $c_3,$at | |
| 1520 $ST $c_1,9*$BNSZ($a0) # r[9]=c1; | |
| 1521 | |
| 1522 mflo $t_1 | |
| 1523 mfhi $t_2 | |
| 1524 $ADDU $c_2,$t_1 | |
| 1525 sltu $at,$c_2,$t_1 | |
| 1526 $MULTU $a_6,$b_4 # mul_add_c(a[6],b[4],c2,c3,c1); | |
| 1527 $ADDU $t_2,$at | |
| 1528 $ADDU $c_3,$t_2 | |
| 1529 sltu $c_1,$c_3,$t_2 | |
| 1530 mflo $t_1 | |
| 1531 mfhi $t_2 | |
| 1532 $ADDU $c_2,$t_1 | |
| 1533 sltu $at,$c_2,$t_1 | |
| 1534 $MULTU $a_5,$b_5 # mul_add_c(a[5],b[5],c2,c3,c1); | |
| 1535 $ADDU $t_2,$at | |
| 1536 $ADDU $c_3,$t_2 | |
| 1537 sltu $at,$c_3,$t_2 | |
| 1538 $ADDU $c_1,$at | |
| 1539 mflo $t_1 | |
| 1540 mfhi $t_2 | |
| 1541 $ADDU $c_2,$t_1 | |
| 1542 sltu $at,$c_2,$t_1 | |
| 1543 $MULTU $a_4,$b_6 # mul_add_c(a[4],b[6],c2,c3,c1); | |
| 1544 $ADDU $t_2,$at | |
| 1545 $ADDU $c_3,$t_2 | |
| 1546 sltu $at,$c_3,$t_2 | |
| 1547 $ADDU $c_1,$at | |
| 1548 mflo $t_1 | |
| 1549 mfhi $t_2 | |
| 1550 $ADDU $c_2,$t_1 | |
| 1551 sltu $at,$c_2,$t_1 | |
| 1552 $MULTU $a_3,$b_7 # mul_add_c(a[3],b[7],c2,c3,c1); | |
| 1553 $ADDU $t_2,$at | |
| 1554 $ADDU $c_3,$t_2 | |
| 1555 sltu $at,$c_3,$t_2 | |
| 1556 $ADDU $c_1,$at | |
| 1557 mflo $t_1 | |
| 1558 mfhi $t_2 | |
| 1559 $ADDU $c_2,$t_1 | |
| 1560 sltu $at,$c_2,$t_1 | |
| 1561 $MULTU $a_4,$b_7 # mul_add_c(a[4],b[7],c3,c1,c2); | |
| 1562 $ADDU $t_2,$at | |
| 1563 $ADDU $c_3,$t_2 | |
| 1564 sltu $at,$c_3,$t_2 | |
| 1565 $ADDU $c_1,$at | |
| 1566 $ST $c_2,10*$BNSZ($a0) # r[10]=c2; | |
| 1567 | |
| 1568 mflo $t_1 | |
| 1569 mfhi $t_2 | |
| 1570 $ADDU $c_3,$t_1 | |
| 1571 sltu $at,$c_3,$t_1 | |
| 1572 $MULTU $a_5,$b_6 # mul_add_c(a[5],b[6],c3,c1,c2); | |
| 1573 $ADDU $t_2,$at | |
| 1574 $ADDU $c_1,$t_2 | |
| 1575 sltu $c_2,$c_1,$t_2 | |
| 1576 mflo $t_1 | |
| 1577 mfhi $t_2 | |
| 1578 $ADDU $c_3,$t_1 | |
| 1579 sltu $at,$c_3,$t_1 | |
| 1580 $MULTU $a_6,$b_5 # mul_add_c(a[6],b[5],c3,c1,c2); | |
| 1581 $ADDU $t_2,$at | |
| 1582 $ADDU $c_1,$t_2 | |
| 1583 sltu $at,$c_1,$t_2 | |
| 1584 $ADDU $c_2,$at | |
| 1585 mflo $t_1 | |
| 1586 mfhi $t_2 | |
| 1587 $ADDU $c_3,$t_1 | |
| 1588 sltu $at,$c_3,$t_1 | |
| 1589 $MULTU $a_7,$b_4 # mul_add_c(a[7],b[4],c3,c1,c2); | |
| 1590 $ADDU $t_2,$at | |
| 1591 $ADDU $c_1,$t_2 | |
| 1592 sltu $at,$c_1,$t_2 | |
| 1593 $ADDU $c_2,$at | |
| 1594 mflo $t_1 | |
| 1595 mfhi $t_2 | |
| 1596 $ADDU $c_3,$t_1 | |
| 1597 sltu $at,$c_3,$t_1 | |
| 1598 $MULTU $a_7,$b_5 # mul_add_c(a[7],b[5],c1,c2,c3); | |
| 1599 $ADDU $t_2,$at | |
| 1600 $ADDU $c_1,$t_2 | |
| 1601 sltu $at,$c_1,$t_2 | |
| 1602 $ADDU $c_2,$at | |
| 1603 $ST $c_3,11*$BNSZ($a0) # r[11]=c3; | |
| 1604 | |
| 1605 mflo $t_1 | |
| 1606 mfhi $t_2 | |
| 1607 $ADDU $c_1,$t_1 | |
| 1608 sltu $at,$c_1,$t_1 | |
| 1609 $MULTU $a_6,$b_6 # mul_add_c(a[6],b[6],c1,c2,c3); | |
| 1610 $ADDU $t_2,$at | |
| 1611 $ADDU $c_2,$t_2 | |
| 1612 sltu $c_3,$c_2,$t_2 | |
| 1613 mflo $t_1 | |
| 1614 mfhi $t_2 | |
| 1615 $ADDU $c_1,$t_1 | |
| 1616 sltu $at,$c_1,$t_1 | |
| 1617 $MULTU $a_5,$b_7 # mul_add_c(a[5],b[7],c1,c2,c3); | |
| 1618 $ADDU $t_2,$at | |
| 1619 $ADDU $c_2,$t_2 | |
| 1620 sltu $at,$c_2,$t_2 | |
| 1621 $ADDU $c_3,$at | |
| 1622 mflo $t_1 | |
| 1623 mfhi $t_2 | |
| 1624 $ADDU $c_1,$t_1 | |
| 1625 sltu $at,$c_1,$t_1 | |
| 1626 $MULTU $a_6,$b_7 # mul_add_c(a[6],b[7],c2,c3,c1); | |
| 1627 $ADDU $t_2,$at | |
| 1628 $ADDU $c_2,$t_2 | |
| 1629 sltu $at,$c_2,$t_2 | |
| 1630 $ADDU $c_3,$at | |
| 1631 $ST $c_1,12*$BNSZ($a0) # r[12]=c1; | |
| 1632 | |
| 1633 mflo $t_1 | |
| 1634 mfhi $t_2 | |
| 1635 $ADDU $c_2,$t_1 | |
| 1636 sltu $at,$c_2,$t_1 | |
| 1637 $MULTU $a_7,$b_6 # mul_add_c(a[7],b[6],c2,c3,c1); | |
| 1638 $ADDU $t_2,$at | |
| 1639 $ADDU $c_3,$t_2 | |
| 1640 sltu $c_1,$c_3,$t_2 | |
| 1641 mflo $t_1 | |
| 1642 mfhi $t_2 | |
| 1643 $ADDU $c_2,$t_1 | |
| 1644 sltu $at,$c_2,$t_1 | |
| 1645 $MULTU $a_7,$b_7 # mul_add_c(a[7],b[7],c3,c1,c2); | |
| 1646 $ADDU $t_2,$at | |
| 1647 $ADDU $c_3,$t_2 | |
| 1648 sltu $at,$c_3,$t_2 | |
| 1649 $ADDU $c_1,$at | |
| 1650 $ST $c_2,13*$BNSZ($a0) # r[13]=c2; | |
| 1651 | |
| 1652 mflo $t_1 | |
| 1653 mfhi $t_2 | |
| 1654 $ADDU $c_3,$t_1 | |
| 1655 sltu $at,$c_3,$t_1 | |
| 1656 $ADDU $t_2,$at | |
| 1657 $ADDU $c_1,$t_2 | |
| 1658 $ST $c_3,14*$BNSZ($a0) # r[14]=c3; | |
| 1659 $ST $c_1,15*$BNSZ($a0) # r[15]=c1; | |
| 1660 | |
| 1661 .set noreorder | |
| 1662 ___ | |
| 1663 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 1664 $REG_L $s5,10*$SZREG($sp) | |
| 1665 $REG_L $s4,9*$SZREG($sp) | |
| 1666 $REG_L $s3,8*$SZREG($sp) | |
| 1667 $REG_L $s2,7*$SZREG($sp) | |
| 1668 $REG_L $s1,6*$SZREG($sp) | |
| 1669 $REG_L $s0,5*$SZREG($sp) | |
| 1670 $REG_L $t3,4*$SZREG($sp) | |
| 1671 $REG_L $t2,3*$SZREG($sp) | |
| 1672 $REG_L $t1,2*$SZREG($sp) | |
| 1673 $REG_L $t0,1*$SZREG($sp) | |
| 1674 $REG_L $gp,0*$SZREG($sp) | |
| 1675 jr $ra | |
| 1676 $PTR_ADD $sp,12*$SZREG | |
| 1677 ___ | |
| 1678 $code.=<<___ if ($flavour !~ /nubi/i); | |
| 1679 $REG_L $s5,5*$SZREG($sp) | |
| 1680 $REG_L $s4,4*$SZREG($sp) | |
| 1681 $REG_L $s3,3*$SZREG($sp) | |
| 1682 $REG_L $s2,2*$SZREG($sp) | |
| 1683 $REG_L $s1,1*$SZREG($sp) | |
| 1684 $REG_L $s0,0*$SZREG($sp) | |
| 1685 jr $ra | |
| 1686 $PTR_ADD $sp,6*$SZREG | |
| 1687 ___ | |
| 1688 $code.=<<___; | |
| 1689 .end bn_mul_comba8 | |
| 1690 | |
| 1691 .align 5 | |
| 1692 .globl bn_mul_comba4 | |
| 1693 .ent bn_mul_comba4 | |
| 1694 bn_mul_comba4: | |
| 1695 ___ | |
| 1696 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 1697 .frame $sp,6*$SZREG,$ra | |
| 1698 .mask 0x8000f008,-$SZREG | |
| 1699 .set noreorder | |
| 1700 $PTR_SUB $sp,6*$SZREG | |
| 1701 $REG_S $ra,5*$SZREG($sp) | |
| 1702 $REG_S $t3,4*$SZREG($sp) | |
| 1703 $REG_S $t2,3*$SZREG($sp) | |
| 1704 $REG_S $t1,2*$SZREG($sp) | |
| 1705 $REG_S $t0,1*$SZREG($sp) | |
| 1706 $REG_S $gp,0*$SZREG($sp) | |
| 1707 ___ | |
| 1708 $code.=<<___; | |
| 1709 .set reorder | |
| 1710 $LD $a_0,0($a1) | |
| 1711 $LD $b_0,0($a2) | |
| 1712 $LD $a_1,$BNSZ($a1) | |
| 1713 $LD $a_2,2*$BNSZ($a1) | |
| 1714 $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3); | |
| 1715 $LD $a_3,3*$BNSZ($a1) | |
| 1716 $LD $b_1,$BNSZ($a2) | |
| 1717 $LD $b_2,2*$BNSZ($a2) | |
| 1718 $LD $b_3,3*$BNSZ($a2) | |
| 1719 mflo $c_1 | |
| 1720 mfhi $c_2 | |
| 1721 $ST $c_1,0($a0) | |
| 1722 | |
| 1723 $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1); | |
| 1724 mflo $t_1 | |
| 1725 mfhi $t_2 | |
| 1726 $ADDU $c_2,$t_1 | |
| 1727 sltu $at,$c_2,$t_1 | |
| 1728 $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1); | |
| 1729 $ADDU $c_3,$t_2,$at | |
| 1730 mflo $t_1 | |
| 1731 mfhi $t_2 | |
| 1732 $ADDU $c_2,$t_1 | |
| 1733 sltu $at,$c_2,$t_1 | |
| 1734 $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2); | |
| 1735 $ADDU $t_2,$at | |
| 1736 $ADDU $c_3,$t_2 | |
| 1737 sltu $c_1,$c_3,$t_2 | |
| 1738 $ST $c_2,$BNSZ($a0) | |
| 1739 | |
| 1740 mflo $t_1 | |
| 1741 mfhi $t_2 | |
| 1742 $ADDU $c_3,$t_1 | |
| 1743 sltu $at,$c_3,$t_1 | |
| 1744 $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2); | |
| 1745 $ADDU $t_2,$at | |
| 1746 $ADDU $c_1,$t_2 | |
| 1747 mflo $t_1 | |
| 1748 mfhi $t_2 | |
| 1749 $ADDU $c_3,$t_1 | |
| 1750 sltu $at,$c_3,$t_1 | |
| 1751 $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2); | |
| 1752 $ADDU $t_2,$at | |
| 1753 $ADDU $c_1,$t_2 | |
| 1754 sltu $c_2,$c_1,$t_2 | |
| 1755 mflo $t_1 | |
| 1756 mfhi $t_2 | |
| 1757 $ADDU $c_3,$t_1 | |
| 1758 sltu $at,$c_3,$t_1 | |
| 1759 $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3); | |
| 1760 $ADDU $t_2,$at | |
| 1761 $ADDU $c_1,$t_2 | |
| 1762 sltu $at,$c_1,$t_2 | |
| 1763 $ADDU $c_2,$at | |
| 1764 $ST $c_3,2*$BNSZ($a0) | |
| 1765 | |
| 1766 mflo $t_1 | |
| 1767 mfhi $t_2 | |
| 1768 $ADDU $c_1,$t_1 | |
| 1769 sltu $at,$c_1,$t_1 | |
| 1770 $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3); | |
| 1771 $ADDU $t_2,$at | |
| 1772 $ADDU $c_2,$t_2 | |
| 1773 sltu $c_3,$c_2,$t_2 | |
| 1774 mflo $t_1 | |
| 1775 mfhi $t_2 | |
| 1776 $ADDU $c_1,$t_1 | |
| 1777 sltu $at,$c_1,$t_1 | |
| 1778 $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3); | |
| 1779 $ADDU $t_2,$at | |
| 1780 $ADDU $c_2,$t_2 | |
| 1781 sltu $at,$c_2,$t_2 | |
| 1782 $ADDU $c_3,$at | |
| 1783 mflo $t_1 | |
| 1784 mfhi $t_2 | |
| 1785 $ADDU $c_1,$t_1 | |
| 1786 sltu $at,$c_1,$t_1 | |
| 1787 $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3); | |
| 1788 $ADDU $t_2,$at | |
| 1789 $ADDU $c_2,$t_2 | |
| 1790 sltu $at,$c_2,$t_2 | |
| 1791 $ADDU $c_3,$at | |
| 1792 mflo $t_1 | |
| 1793 mfhi $t_2 | |
| 1794 $ADDU $c_1,$t_1 | |
| 1795 sltu $at,$c_1,$t_1 | |
| 1796 $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1); | |
| 1797 $ADDU $t_2,$at | |
| 1798 $ADDU $c_2,$t_2 | |
| 1799 sltu $at,$c_2,$t_2 | |
| 1800 $ADDU $c_3,$at | |
| 1801 $ST $c_1,3*$BNSZ($a0) | |
| 1802 | |
| 1803 mflo $t_1 | |
| 1804 mfhi $t_2 | |
| 1805 $ADDU $c_2,$t_1 | |
| 1806 sltu $at,$c_2,$t_1 | |
| 1807 $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1); | |
| 1808 $ADDU $t_2,$at | |
| 1809 $ADDU $c_3,$t_2 | |
| 1810 sltu $c_1,$c_3,$t_2 | |
| 1811 mflo $t_1 | |
| 1812 mfhi $t_2 | |
| 1813 $ADDU $c_2,$t_1 | |
| 1814 sltu $at,$c_2,$t_1 | |
| 1815 $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1); | |
| 1816 $ADDU $t_2,$at | |
| 1817 $ADDU $c_3,$t_2 | |
| 1818 sltu $at,$c_3,$t_2 | |
| 1819 $ADDU $c_1,$at | |
| 1820 mflo $t_1 | |
| 1821 mfhi $t_2 | |
| 1822 $ADDU $c_2,$t_1 | |
| 1823 sltu $at,$c_2,$t_1 | |
| 1824 $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2); | |
| 1825 $ADDU $t_2,$at | |
| 1826 $ADDU $c_3,$t_2 | |
| 1827 sltu $at,$c_3,$t_2 | |
| 1828 $ADDU $c_1,$at | |
| 1829 $ST $c_2,4*$BNSZ($a0) | |
| 1830 | |
| 1831 mflo $t_1 | |
| 1832 mfhi $t_2 | |
| 1833 $ADDU $c_3,$t_1 | |
| 1834 sltu $at,$c_3,$t_1 | |
| 1835 $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2); | |
| 1836 $ADDU $t_2,$at | |
| 1837 $ADDU $c_1,$t_2 | |
| 1838 sltu $c_2,$c_1,$t_2 | |
| 1839 mflo $t_1 | |
| 1840 mfhi $t_2 | |
| 1841 $ADDU $c_3,$t_1 | |
| 1842 sltu $at,$c_3,$t_1 | |
| 1843 $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3); | |
| 1844 $ADDU $t_2,$at | |
| 1845 $ADDU $c_1,$t_2 | |
| 1846 sltu $at,$c_1,$t_2 | |
| 1847 $ADDU $c_2,$at | |
| 1848 $ST $c_3,5*$BNSZ($a0) | |
| 1849 | |
| 1850 mflo $t_1 | |
| 1851 mfhi $t_2 | |
| 1852 $ADDU $c_1,$t_1 | |
| 1853 sltu $at,$c_1,$t_1 | |
| 1854 $ADDU $t_2,$at | |
| 1855 $ADDU $c_2,$t_2 | |
| 1856 $ST $c_1,6*$BNSZ($a0) | |
| 1857 $ST $c_2,7*$BNSZ($a0) | |
| 1858 | |
| 1859 .set noreorder | |
| 1860 ___ | |
| 1861 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 1862 $REG_L $t3,4*$SZREG($sp) | |
| 1863 $REG_L $t2,3*$SZREG($sp) | |
| 1864 $REG_L $t1,2*$SZREG($sp) | |
| 1865 $REG_L $t0,1*$SZREG($sp) | |
| 1866 $REG_L $gp,0*$SZREG($sp) | |
| 1867 $PTR_ADD $sp,6*$SZREG | |
| 1868 ___ | |
| 1869 $code.=<<___; | |
| 1870 jr $ra | |
| 1871 nop | |
| 1872 .end bn_mul_comba4 | |
| 1873 ___ | |
| 1874 | |
| 1875 ($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); | |
| 1876 | |
| 1877 $code.=<<___; | |
| 1878 | |
| 1879 .align 5 | |
| 1880 .globl bn_sqr_comba8 | |
| 1881 .ent bn_sqr_comba8 | |
| 1882 bn_sqr_comba8: | |
| 1883 ___ | |
| 1884 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 1885 .frame $sp,6*$SZREG,$ra | |
| 1886 .mask 0x8000f008,-$SZREG | |
| 1887 .set noreorder | |
| 1888 $PTR_SUB $sp,6*$SZREG | |
| 1889 $REG_S $ra,5*$SZREG($sp) | |
| 1890 $REG_S $t3,4*$SZREG($sp) | |
| 1891 $REG_S $t2,3*$SZREG($sp) | |
| 1892 $REG_S $t1,2*$SZREG($sp) | |
| 1893 $REG_S $t0,1*$SZREG($sp) | |
| 1894 $REG_S $gp,0*$SZREG($sp) | |
| 1895 ___ | |
| 1896 $code.=<<___; | |
| 1897 .set reorder | |
| 1898 $LD $a_0,0($a1) | |
| 1899 $LD $a_1,$BNSZ($a1) | |
| 1900 $LD $a_2,2*$BNSZ($a1) | |
| 1901 $LD $a_3,3*$BNSZ($a1) | |
| 1902 | |
| 1903 $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3); | |
| 1904 $LD $a_4,4*$BNSZ($a1) | |
| 1905 $LD $a_5,5*$BNSZ($a1) | |
| 1906 $LD $a_6,6*$BNSZ($a1) | |
| 1907 $LD $a_7,7*$BNSZ($a1) | |
| 1908 mflo $c_1 | |
| 1909 mfhi $c_2 | |
| 1910 $ST $c_1,0($a0) | |
| 1911 | |
| 1912 $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1); | |
| 1913 mflo $t_1 | |
| 1914 mfhi $t_2 | |
| 1915 slt $c_1,$t_2,$zero | |
| 1916 $SLL $t_2,1 | |
| 1917 $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2); | |
| 1918 slt $a2,$t_1,$zero | |
| 1919 $ADDU $t_2,$a2 | |
| 1920 $SLL $t_1,1 | |
| 1921 $ADDU $c_2,$t_1 | |
| 1922 sltu $at,$c_2,$t_1 | |
| 1923 $ADDU $c_3,$t_2,$at | |
| 1924 $ST $c_2,$BNSZ($a0) | |
| 1925 | |
| 1926 mflo $t_1 | |
| 1927 mfhi $t_2 | |
| 1928 slt $c_2,$t_2,$zero | |
| 1929 $SLL $t_2,1 | |
| 1930 $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); | |
| 1931 slt $a2,$t_1,$zero | |
| 1932 $ADDU $t_2,$a2 | |
| 1933 $SLL $t_1,1 | |
| 1934 $ADDU $c_3,$t_1 | |
| 1935 sltu $at,$c_3,$t_1 | |
| 1936 $ADDU $t_2,$at | |
| 1937 $ADDU $c_1,$t_2 | |
| 1938 sltu $at,$c_1,$t_2 | |
| 1939 $ADDU $c_2,$at | |
| 1940 mflo $t_1 | |
| 1941 mfhi $t_2 | |
| 1942 $ADDU $c_3,$t_1 | |
| 1943 sltu $at,$c_3,$t_1 | |
| 1944 $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3); | |
| 1945 $ADDU $t_2,$at | |
| 1946 $ADDU $c_1,$t_2 | |
| 1947 sltu $at,$c_1,$t_2 | |
| 1948 $ADDU $c_2,$at | |
| 1949 $ST $c_3,2*$BNSZ($a0) | |
| 1950 | |
| 1951 mflo $t_1 | |
| 1952 mfhi $t_2 | |
| 1953 slt $c_3,$t_2,$zero | |
| 1954 $SLL $t_2,1 | |
| 1955 $MULTU $a_1,$a_2 # mul_add_c2(a[1],b[2],c1,c2,c3); | |
| 1956 slt $a2,$t_1,$zero | |
| 1957 $ADDU $t_2,$a2 | |
| 1958 $SLL $t_1,1 | |
| 1959 $ADDU $c_1,$t_1 | |
| 1960 sltu $at,$c_1,$t_1 | |
| 1961 $ADDU $t_2,$at | |
| 1962 $ADDU $c_2,$t_2 | |
| 1963 sltu $at,$c_2,$t_2 | |
| 1964 $ADDU $c_3,$at | |
| 1965 mflo $t_1 | |
| 1966 mfhi $t_2 | |
| 1967 slt $at,$t_2,$zero | |
| 1968 $ADDU $c_3,$at | |
| 1969 $MULTU $a_4,$a_0 # mul_add_c2(a[4],b[0],c2,c3,c1); | |
| 1970 $SLL $t_2,1 | |
| 1971 slt $a2,$t_1,$zero | |
| 1972 $ADDU $t_2,$a2 | |
| 1973 $SLL $t_1,1 | |
| 1974 $ADDU $c_1,$t_1 | |
| 1975 sltu $at,$c_1,$t_1 | |
| 1976 $ADDU $t_2,$at | |
| 1977 $ADDU $c_2,$t_2 | |
| 1978 sltu $at,$c_2,$t_2 | |
| 1979 $ADDU $c_3,$at | |
| 1980 $ST $c_1,3*$BNSZ($a0) | |
| 1981 | |
| 1982 mflo $t_1 | |
| 1983 mfhi $t_2 | |
| 1984 slt $c_1,$t_2,$zero | |
| 1985 $SLL $t_2,1 | |
| 1986 $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); | |
| 1987 slt $a2,$t_1,$zero | |
| 1988 $ADDU $t_2,$a2 | |
| 1989 $SLL $t_1,1 | |
| 1990 $ADDU $c_2,$t_1 | |
| 1991 sltu $at,$c_2,$t_1 | |
| 1992 $ADDU $t_2,$at | |
| 1993 $ADDU $c_3,$t_2 | |
| 1994 sltu $at,$c_3,$t_2 | |
| 1995 $ADDU $c_1,$at | |
| 1996 mflo $t_1 | |
| 1997 mfhi $t_2 | |
| 1998 slt $at,$t_2,$zero | |
| 1999 $ADDU $c_1,$at | |
| 2000 $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); | |
| 2001 $SLL $t_2,1 | |
| 2002 slt $a2,$t_1,$zero | |
| 2003 $ADDU $t_2,$a2 | |
| 2004 $SLL $t_1,1 | |
| 2005 $ADDU $c_2,$t_1 | |
| 2006 sltu $at,$c_2,$t_1 | |
| 2007 $ADDU $t_2,$at | |
| 2008 $ADDU $c_3,$t_2 | |
| 2009 sltu $at,$c_3,$t_2 | |
| 2010 $ADDU $c_1,$at | |
| 2011 mflo $t_1 | |
| 2012 mfhi $t_2 | |
| 2013 $ADDU $c_2,$t_1 | |
| 2014 sltu $at,$c_2,$t_1 | |
| 2015 $MULTU $a_0,$a_5 # mul_add_c2(a[0],b[5],c3,c1,c2); | |
| 2016 $ADDU $t_2,$at | |
| 2017 $ADDU $c_3,$t_2 | |
| 2018 sltu $at,$c_3,$t_2 | |
| 2019 $ADDU $c_1,$at | |
| 2020 $ST $c_2,4*$BNSZ($a0) | |
| 2021 | |
| 2022 mflo $t_1 | |
| 2023 mfhi $t_2 | |
| 2024 slt $c_2,$t_2,$zero | |
| 2025 $SLL $t_2,1 | |
| 2026 $MULTU $a_1,$a_4 # mul_add_c2(a[1],b[4],c3,c1,c2); | |
| 2027 slt $a2,$t_1,$zero | |
| 2028 $ADDU $t_2,$a2 | |
| 2029 $SLL $t_1,1 | |
| 2030 $ADDU $c_3,$t_1 | |
| 2031 sltu $at,$c_3,$t_1 | |
| 2032 $ADDU $t_2,$at | |
| 2033 $ADDU $c_1,$t_2 | |
| 2034 sltu $at,$c_1,$t_2 | |
| 2035 $ADDU $c_2,$at | |
| 2036 mflo $t_1 | |
| 2037 mfhi $t_2 | |
| 2038 slt $at,$t_2,$zero | |
| 2039 $ADDU $c_2,$at | |
| 2040 $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); | |
| 2041 $SLL $t_2,1 | |
| 2042 slt $a2,$t_1,$zero | |
| 2043 $ADDU $t_2,$a2 | |
| 2044 $SLL $t_1,1 | |
| 2045 $ADDU $c_3,$t_1 | |
| 2046 sltu $at,$c_3,$t_1 | |
| 2047 $ADDU $t_2,$at | |
| 2048 $ADDU $c_1,$t_2 | |
| 2049 sltu $at,$c_1,$t_2 | |
| 2050 $ADDU $c_2,$at | |
| 2051 mflo $t_1 | |
| 2052 mfhi $t_2 | |
| 2053 slt $at,$t_2,$zero | |
| 2054 $MULTU $a_6,$a_0 # mul_add_c2(a[6],b[0],c1,c2,c3); | |
| 2055 $ADDU $c_2,$at | |
| 2056 $SLL $t_2,1 | |
| 2057 slt $a2,$t_1,$zero | |
| 2058 $ADDU $t_2,$a2 | |
| 2059 $SLL $t_1,1 | |
| 2060 $ADDU $c_3,$t_1 | |
| 2061 sltu $at,$c_3,$t_1 | |
| 2062 $ADDU $t_2,$at | |
| 2063 $ADDU $c_1,$t_2 | |
| 2064 sltu $at,$c_1,$t_2 | |
| 2065 $ADDU $c_2,$at | |
| 2066 $ST $c_3,5*$BNSZ($a0) | |
| 2067 | |
| 2068 mflo $t_1 | |
| 2069 mfhi $t_2 | |
| 2070 slt $c_3,$t_2,$zero | |
| 2071 $SLL $t_2,1 | |
| 2072 $MULTU $a_5,$a_1 # mul_add_c2(a[5],b[1],c1,c2,c3); | |
| 2073 slt $a2,$t_1,$zero | |
| 2074 $ADDU $t_2,$a2 | |
| 2075 $SLL $t_1,1 | |
| 2076 $ADDU $c_1,$t_1 | |
| 2077 sltu $at,$c_1,$t_1 | |
| 2078 $ADDU $t_2,$at | |
| 2079 $ADDU $c_2,$t_2 | |
| 2080 sltu $at,$c_2,$t_2 | |
| 2081 $ADDU $c_3,$at | |
| 2082 mflo $t_1 | |
| 2083 mfhi $t_2 | |
| 2084 slt $at,$t_2,$zero | |
| 2085 $ADDU $c_3,$at | |
| 2086 $MULTU $a_4,$a_2 # mul_add_c2(a[4],b[2],c1,c2,c3); | |
| 2087 $SLL $t_2,1 | |
| 2088 slt $a2,$t_1,$zero | |
| 2089 $ADDU $t_2,$a2 | |
| 2090 $SLL $t_1,1 | |
| 2091 $ADDU $c_1,$t_1 | |
| 2092 sltu $at,$c_1,$t_1 | |
| 2093 $ADDU $t_2,$at | |
| 2094 $ADDU $c_2,$t_2 | |
| 2095 sltu $at,$c_2,$t_2 | |
| 2096 $ADDU $c_3,$at | |
| 2097 mflo $t_1 | |
| 2098 mfhi $t_2 | |
| 2099 slt $at,$t_2,$zero | |
| 2100 $ADDU $c_3,$at | |
| 2101 $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); | |
| 2102 $SLL $t_2,1 | |
| 2103 slt $a2,$t_1,$zero | |
| 2104 $ADDU $t_2,$a2 | |
| 2105 $SLL $t_1,1 | |
| 2106 $ADDU $c_1,$t_1 | |
| 2107 sltu $at,$c_1,$t_1 | |
| 2108 $ADDU $t_2,$at | |
| 2109 $ADDU $c_2,$t_2 | |
| 2110 sltu $at,$c_2,$t_2 | |
| 2111 $ADDU $c_3,$at | |
| 2112 mflo $t_1 | |
| 2113 mfhi $t_2 | |
| 2114 $ADDU $c_1,$t_1 | |
| 2115 sltu $at,$c_1,$t_1 | |
| 2116 $MULTU $a_0,$a_7 # mul_add_c2(a[0],b[7],c2,c3,c1); | |
| 2117 $ADDU $t_2,$at | |
| 2118 $ADDU $c_2,$t_2 | |
| 2119 sltu $at,$c_2,$t_2 | |
| 2120 $ADDU $c_3,$at | |
| 2121 $ST $c_1,6*$BNSZ($a0) | |
| 2122 | |
| 2123 mflo $t_1 | |
| 2124 mfhi $t_2 | |
| 2125 slt $c_1,$t_2,$zero | |
| 2126 $SLL $t_2,1 | |
| 2127 $MULTU $a_1,$a_6 # mul_add_c2(a[1],b[6],c2,c3,c1); | |
| 2128 slt $a2,$t_1,$zero | |
| 2129 $ADDU $t_2,$a2 | |
| 2130 $SLL $t_1,1 | |
| 2131 $ADDU $c_2,$t_1 | |
| 2132 sltu $at,$c_2,$t_1 | |
| 2133 $ADDU $t_2,$at | |
| 2134 $ADDU $c_3,$t_2 | |
| 2135 sltu $at,$c_3,$t_2 | |
| 2136 $ADDU $c_1,$at | |
| 2137 mflo $t_1 | |
| 2138 mfhi $t_2 | |
| 2139 slt $at,$t_2,$zero | |
| 2140 $ADDU $c_1,$at | |
| 2141 $MULTU $a_2,$a_5 # mul_add_c2(a[2],b[5],c2,c3,c1); | |
| 2142 $SLL $t_2,1 | |
| 2143 slt $a2,$t_1,$zero | |
| 2144 $ADDU $t_2,$a2 | |
| 2145 $SLL $t_1,1 | |
| 2146 $ADDU $c_2,$t_1 | |
| 2147 sltu $at,$c_2,$t_1 | |
| 2148 $ADDU $t_2,$at | |
| 2149 $ADDU $c_3,$t_2 | |
| 2150 sltu $at,$c_3,$t_2 | |
| 2151 $ADDU $c_1,$at | |
| 2152 mflo $t_1 | |
| 2153 mfhi $t_2 | |
| 2154 slt $at,$t_2,$zero | |
| 2155 $ADDU $c_1,$at | |
| 2156 $MULTU $a_3,$a_4 # mul_add_c2(a[3],b[4],c2,c3,c1); | |
| 2157 $SLL $t_2,1 | |
| 2158 slt $a2,$t_1,$zero | |
| 2159 $ADDU $t_2,$a2 | |
| 2160 $SLL $t_1,1 | |
| 2161 $ADDU $c_2,$t_1 | |
| 2162 sltu $at,$c_2,$t_1 | |
| 2163 $ADDU $t_2,$at | |
| 2164 $ADDU $c_3,$t_2 | |
| 2165 sltu $at,$c_3,$t_2 | |
| 2166 $ADDU $c_1,$at | |
| 2167 mflo $t_1 | |
| 2168 mfhi $t_2 | |
| 2169 slt $at,$t_2,$zero | |
| 2170 $ADDU $c_1,$at | |
| 2171 $MULTU $a_7,$a_1 # mul_add_c2(a[7],b[1],c3,c1,c2); | |
| 2172 $SLL $t_2,1 | |
| 2173 slt $a2,$t_1,$zero | |
| 2174 $ADDU $t_2,$a2 | |
| 2175 $SLL $t_1,1 | |
| 2176 $ADDU $c_2,$t_1 | |
| 2177 sltu $at,$c_2,$t_1 | |
| 2178 $ADDU $t_2,$at | |
| 2179 $ADDU $c_3,$t_2 | |
| 2180 sltu $at,$c_3,$t_2 | |
| 2181 $ADDU $c_1,$at | |
| 2182 $ST $c_2,7*$BNSZ($a0) | |
| 2183 | |
| 2184 mflo $t_1 | |
| 2185 mfhi $t_2 | |
| 2186 slt $c_2,$t_2,$zero | |
| 2187 $SLL $t_2,1 | |
| 2188 $MULTU $a_6,$a_2 # mul_add_c2(a[6],b[2],c3,c1,c2); | |
| 2189 slt $a2,$t_1,$zero | |
| 2190 $ADDU $t_2,$a2 | |
| 2191 $SLL $t_1,1 | |
| 2192 $ADDU $c_3,$t_1 | |
| 2193 sltu $at,$c_3,$t_1 | |
| 2194 $ADDU $t_2,$at | |
| 2195 $ADDU $c_1,$t_2 | |
| 2196 sltu $at,$c_1,$t_2 | |
| 2197 $ADDU $c_2,$at | |
| 2198 mflo $t_1 | |
| 2199 mfhi $t_2 | |
| 2200 slt $at,$t_2,$zero | |
| 2201 $ADDU $c_2,$at | |
| 2202 $MULTU $a_5,$a_3 # mul_add_c2(a[5],b[3],c3,c1,c2); | |
| 2203 $SLL $t_2,1 | |
| 2204 slt $a2,$t_1,$zero | |
| 2205 $ADDU $t_2,$a2 | |
| 2206 $SLL $t_1,1 | |
| 2207 $ADDU $c_3,$t_1 | |
| 2208 sltu $at,$c_3,$t_1 | |
| 2209 $ADDU $t_2,$at | |
| 2210 $ADDU $c_1,$t_2 | |
| 2211 sltu $at,$c_1,$t_2 | |
| 2212 $ADDU $c_2,$at | |
| 2213 mflo $t_1 | |
| 2214 mfhi $t_2 | |
| 2215 slt $at,$t_2,$zero | |
| 2216 $ADDU $c_2,$at | |
| 2217 $MULTU $a_4,$a_4 # mul_add_c(a[4],b[4],c3,c1,c2); | |
| 2218 $SLL $t_2,1 | |
| 2219 slt $a2,$t_1,$zero | |
| 2220 $ADDU $t_2,$a2 | |
| 2221 $SLL $t_1,1 | |
| 2222 $ADDU $c_3,$t_1 | |
| 2223 sltu $at,$c_3,$t_1 | |
| 2224 $ADDU $t_2,$at | |
| 2225 $ADDU $c_1,$t_2 | |
| 2226 sltu $at,$c_1,$t_2 | |
| 2227 $ADDU $c_2,$at | |
| 2228 mflo $t_1 | |
| 2229 mfhi $t_2 | |
| 2230 $ADDU $c_3,$t_1 | |
| 2231 sltu $at,$c_3,$t_1 | |
| 2232 $MULTU $a_2,$a_7 # mul_add_c2(a[2],b[7],c1,c2,c3); | |
| 2233 $ADDU $t_2,$at | |
| 2234 $ADDU $c_1,$t_2 | |
| 2235 sltu $at,$c_1,$t_2 | |
| 2236 $ADDU $c_2,$at | |
| 2237 $ST $c_3,8*$BNSZ($a0) | |
| 2238 | |
| 2239 mflo $t_1 | |
| 2240 mfhi $t_2 | |
| 2241 slt $c_3,$t_2,$zero | |
| 2242 $SLL $t_2,1 | |
| 2243 $MULTU $a_3,$a_6 # mul_add_c2(a[3],b[6],c1,c2,c3); | |
| 2244 slt $a2,$t_1,$zero | |
| 2245 $ADDU $t_2,$a2 | |
| 2246 $SLL $t_1,1 | |
| 2247 $ADDU $c_1,$t_1 | |
| 2248 sltu $at,$c_1,$t_1 | |
| 2249 $ADDU $t_2,$at | |
| 2250 $ADDU $c_2,$t_2 | |
| 2251 sltu $at,$c_2,$t_2 | |
| 2252 $ADDU $c_3,$at | |
| 2253 mflo $t_1 | |
| 2254 mfhi $t_2 | |
| 2255 slt $at,$t_2,$zero | |
| 2256 $ADDU $c_3,$at | |
| 2257 $MULTU $a_4,$a_5 # mul_add_c2(a[4],b[5],c1,c2,c3); | |
| 2258 $SLL $t_2,1 | |
| 2259 slt $a2,$t_1,$zero | |
| 2260 $ADDU $t_2,$a2 | |
| 2261 $SLL $t_1,1 | |
| 2262 $ADDU $c_1,$t_1 | |
| 2263 sltu $at,$c_1,$t_1 | |
| 2264 $ADDU $t_2,$at | |
| 2265 $ADDU $c_2,$t_2 | |
| 2266 sltu $at,$c_2,$t_2 | |
| 2267 $ADDU $c_3,$at | |
| 2268 mflo $t_1 | |
| 2269 mfhi $t_2 | |
| 2270 slt $at,$t_2,$zero | |
| 2271 $ADDU $c_3,$at | |
| 2272 $MULTU $a_7,$a_3 # mul_add_c2(a[7],b[3],c2,c3,c1); | |
| 2273 $SLL $t_2,1 | |
| 2274 slt $a2,$t_1,$zero | |
| 2275 $ADDU $t_2,$a2 | |
| 2276 $SLL $t_1,1 | |
| 2277 $ADDU $c_1,$t_1 | |
| 2278 sltu $at,$c_1,$t_1 | |
| 2279 $ADDU $t_2,$at | |
| 2280 $ADDU $c_2,$t_2 | |
| 2281 sltu $at,$c_2,$t_2 | |
| 2282 $ADDU $c_3,$at | |
| 2283 $ST $c_1,9*$BNSZ($a0) | |
| 2284 | |
| 2285 mflo $t_1 | |
| 2286 mfhi $t_2 | |
| 2287 slt $c_1,$t_2,$zero | |
| 2288 $SLL $t_2,1 | |
| 2289 $MULTU $a_6,$a_4 # mul_add_c2(a[6],b[4],c2,c3,c1); | |
| 2290 slt $a2,$t_1,$zero | |
| 2291 $ADDU $t_2,$a2 | |
| 2292 $SLL $t_1,1 | |
| 2293 $ADDU $c_2,$t_1 | |
| 2294 sltu $at,$c_2,$t_1 | |
| 2295 $ADDU $t_2,$at | |
| 2296 $ADDU $c_3,$t_2 | |
| 2297 sltu $at,$c_3,$t_2 | |
| 2298 $ADDU $c_1,$at | |
| 2299 mflo $t_1 | |
| 2300 mfhi $t_2 | |
| 2301 slt $at,$t_2,$zero | |
| 2302 $ADDU $c_1,$at | |
| 2303 $MULTU $a_5,$a_5 # mul_add_c(a[5],b[5],c2,c3,c1); | |
| 2304 $SLL $t_2,1 | |
| 2305 slt $a2,$t_1,$zero | |
| 2306 $ADDU $t_2,$a2 | |
| 2307 $SLL $t_1,1 | |
| 2308 $ADDU $c_2,$t_1 | |
| 2309 sltu $at,$c_2,$t_1 | |
| 2310 $ADDU $t_2,$at | |
| 2311 $ADDU $c_3,$t_2 | |
| 2312 sltu $at,$c_3,$t_2 | |
| 2313 $ADDU $c_1,$at | |
| 2314 mflo $t_1 | |
| 2315 mfhi $t_2 | |
| 2316 $ADDU $c_2,$t_1 | |
| 2317 sltu $at,$c_2,$t_1 | |
| 2318 $MULTU $a_4,$a_7 # mul_add_c2(a[4],b[7],c3,c1,c2); | |
| 2319 $ADDU $t_2,$at | |
| 2320 $ADDU $c_3,$t_2 | |
| 2321 sltu $at,$c_3,$t_2 | |
| 2322 $ADDU $c_1,$at | |
| 2323 $ST $c_2,10*$BNSZ($a0) | |
| 2324 | |
| 2325 mflo $t_1 | |
| 2326 mfhi $t_2 | |
| 2327 slt $c_2,$t_2,$zero | |
| 2328 $SLL $t_2,1 | |
| 2329 $MULTU $a_5,$a_6 # mul_add_c2(a[5],b[6],c3,c1,c2); | |
| 2330 slt $a2,$t_1,$zero | |
| 2331 $ADDU $t_2,$a2 | |
| 2332 $SLL $t_1,1 | |
| 2333 $ADDU $c_3,$t_1 | |
| 2334 sltu $at,$c_3,$t_1 | |
| 2335 $ADDU $t_2,$at | |
| 2336 $ADDU $c_1,$t_2 | |
| 2337 sltu $at,$c_1,$t_2 | |
| 2338 $ADDU $c_2,$at | |
| 2339 mflo $t_1 | |
| 2340 mfhi $t_2 | |
| 2341 slt $at,$t_2,$zero | |
| 2342 $ADDU $c_2,$at | |
| 2343 $MULTU $a_7,$a_5 # mul_add_c2(a[7],b[5],c1,c2,c3); | |
| 2344 $SLL $t_2,1 | |
| 2345 slt $a2,$t_1,$zero | |
| 2346 $ADDU $t_2,$a2 | |
| 2347 $SLL $t_1,1 | |
| 2348 $ADDU $c_3,$t_1 | |
| 2349 sltu $at,$c_3,$t_1 | |
| 2350 $ADDU $t_2,$at | |
| 2351 $ADDU $c_1,$t_2 | |
| 2352 sltu $at,$c_1,$t_2 | |
| 2353 $ADDU $c_2,$at | |
| 2354 $ST $c_3,11*$BNSZ($a0) | |
| 2355 | |
| 2356 mflo $t_1 | |
| 2357 mfhi $t_2 | |
| 2358 slt $c_3,$t_2,$zero | |
| 2359 $SLL $t_2,1 | |
| 2360 $MULTU $a_6,$a_6 # mul_add_c(a[6],b[6],c1,c2,c3); | |
| 2361 slt $a2,$t_1,$zero | |
| 2362 $ADDU $t_2,$a2 | |
| 2363 $SLL $t_1,1 | |
| 2364 $ADDU $c_1,$t_1 | |
| 2365 sltu $at,$c_1,$t_1 | |
| 2366 $ADDU $t_2,$at | |
| 2367 $ADDU $c_2,$t_2 | |
| 2368 sltu $at,$c_2,$t_2 | |
| 2369 $ADDU $c_3,$at | |
| 2370 mflo $t_1 | |
| 2371 mfhi $t_2 | |
| 2372 $ADDU $c_1,$t_1 | |
| 2373 sltu $at,$c_1,$t_1 | |
| 2374 $MULTU $a_6,$a_7 # mul_add_c2(a[6],b[7],c2,c3,c1); | |
| 2375 $ADDU $t_2,$at | |
| 2376 $ADDU $c_2,$t_2 | |
| 2377 sltu $at,$c_2,$t_2 | |
| 2378 $ADDU $c_3,$at | |
| 2379 $ST $c_1,12*$BNSZ($a0) | |
| 2380 | |
| 2381 mflo $t_1 | |
| 2382 mfhi $t_2 | |
| 2383 slt $c_1,$t_2,$zero | |
| 2384 $SLL $t_2,1 | |
| 2385 $MULTU $a_7,$a_7 # mul_add_c(a[7],b[7],c3,c1,c2); | |
| 2386 slt $a2,$t_1,$zero | |
| 2387 $ADDU $t_2,$a2 | |
| 2388 $SLL $t_1,1 | |
| 2389 $ADDU $c_2,$t_1 | |
| 2390 sltu $at,$c_2,$t_1 | |
| 2391 $ADDU $t_2,$at | |
| 2392 $ADDU $c_3,$t_2 | |
| 2393 sltu $at,$c_3,$t_2 | |
| 2394 $ADDU $c_1,$at | |
| 2395 $ST $c_2,13*$BNSZ($a0) | |
| 2396 | |
| 2397 mflo $t_1 | |
| 2398 mfhi $t_2 | |
| 2399 $ADDU $c_3,$t_1 | |
| 2400 sltu $at,$c_3,$t_1 | |
| 2401 $ADDU $t_2,$at | |
| 2402 $ADDU $c_1,$t_2 | |
| 2403 $ST $c_3,14*$BNSZ($a0) | |
| 2404 $ST $c_1,15*$BNSZ($a0) | |
| 2405 | |
| 2406 .set noreorder | |
| 2407 ___ | |
| 2408 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 2409 $REG_L $t3,4*$SZREG($sp) | |
| 2410 $REG_L $t2,3*$SZREG($sp) | |
| 2411 $REG_L $t1,2*$SZREG($sp) | |
| 2412 $REG_L $t0,1*$SZREG($sp) | |
| 2413 $REG_L $gp,0*$SZREG($sp) | |
| 2414 $PTR_ADD $sp,6*$SZREG | |
| 2415 ___ | |
| 2416 $code.=<<___; | |
| 2417 jr $ra | |
| 2418 nop | |
| 2419 .end bn_sqr_comba8 | |
| 2420 | |
| 2421 .align 5 | |
| 2422 .globl bn_sqr_comba4 | |
| 2423 .ent bn_sqr_comba4 | |
| 2424 bn_sqr_comba4: | |
| 2425 ___ | |
| 2426 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 2427 .frame $sp,6*$SZREG,$ra | |
| 2428 .mask 0x8000f008,-$SZREG | |
| 2429 .set noreorder | |
| 2430 $PTR_SUB $sp,6*$SZREG | |
| 2431 $REG_S $ra,5*$SZREG($sp) | |
| 2432 $REG_S $t3,4*$SZREG($sp) | |
| 2433 $REG_S $t2,3*$SZREG($sp) | |
| 2434 $REG_S $t1,2*$SZREG($sp) | |
| 2435 $REG_S $t0,1*$SZREG($sp) | |
| 2436 $REG_S $gp,0*$SZREG($sp) | |
| 2437 ___ | |
| 2438 $code.=<<___; | |
| 2439 .set reorder | |
| 2440 $LD $a_0,0($a1) | |
| 2441 $LD $a_1,$BNSZ($a1) | |
| 2442 $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3); | |
| 2443 $LD $a_2,2*$BNSZ($a1) | |
| 2444 $LD $a_3,3*$BNSZ($a1) | |
| 2445 mflo $c_1 | |
| 2446 mfhi $c_2 | |
| 2447 $ST $c_1,0($a0) | |
| 2448 | |
| 2449 $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1); | |
| 2450 mflo $t_1 | |
| 2451 mfhi $t_2 | |
| 2452 slt $c_1,$t_2,$zero | |
| 2453 $SLL $t_2,1 | |
| 2454 $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2); | |
| 2455 slt $a2,$t_1,$zero | |
| 2456 $ADDU $t_2,$a2 | |
| 2457 $SLL $t_1,1 | |
| 2458 $ADDU $c_2,$t_1 | |
| 2459 sltu $at,$c_2,$t_1 | |
| 2460 $ADDU $c_3,$t_2,$at | |
| 2461 $ST $c_2,$BNSZ($a0) | |
| 2462 | |
| 2463 mflo $t_1 | |
| 2464 mfhi $t_2 | |
| 2465 slt $c_2,$t_2,$zero | |
| 2466 $SLL $t_2,1 | |
| 2467 $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); | |
| 2468 slt $a2,$t_1,$zero | |
| 2469 $ADDU $t_2,$a2 | |
| 2470 $SLL $t_1,1 | |
| 2471 $ADDU $c_3,$t_1 | |
| 2472 sltu $at,$c_3,$t_1 | |
| 2473 $ADDU $t_2,$at | |
| 2474 $ADDU $c_1,$t_2 | |
| 2475 sltu $at,$c_1,$t_2 | |
| 2476 $ADDU $c_2,$at | |
| 2477 mflo $t_1 | |
| 2478 mfhi $t_2 | |
| 2479 $ADDU $c_3,$t_1 | |
| 2480 sltu $at,$c_3,$t_1 | |
| 2481 $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3); | |
| 2482 $ADDU $t_2,$at | |
| 2483 $ADDU $c_1,$t_2 | |
| 2484 sltu $at,$c_1,$t_2 | |
| 2485 $ADDU $c_2,$at | |
| 2486 $ST $c_3,2*$BNSZ($a0) | |
| 2487 | |
| 2488 mflo $t_1 | |
| 2489 mfhi $t_2 | |
| 2490 slt $c_3,$t_2,$zero | |
| 2491 $SLL $t_2,1 | |
| 2492 $MULTU $a_1,$a_2 # mul_add_c(a2[1],b[2],c1,c2,c3); | |
| 2493 slt $a2,$t_1,$zero | |
| 2494 $ADDU $t_2,$a2 | |
| 2495 $SLL $t_1,1 | |
| 2496 $ADDU $c_1,$t_1 | |
| 2497 sltu $at,$c_1,$t_1 | |
| 2498 $ADDU $t_2,$at | |
| 2499 $ADDU $c_2,$t_2 | |
| 2500 sltu $at,$c_2,$t_2 | |
| 2501 $ADDU $c_3,$at | |
| 2502 mflo $t_1 | |
| 2503 mfhi $t_2 | |
| 2504 slt $at,$t_2,$zero | |
| 2505 $ADDU $c_3,$at | |
| 2506 $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); | |
| 2507 $SLL $t_2,1 | |
| 2508 slt $a2,$t_1,$zero | |
| 2509 $ADDU $t_2,$a2 | |
| 2510 $SLL $t_1,1 | |
| 2511 $ADDU $c_1,$t_1 | |
| 2512 sltu $at,$c_1,$t_1 | |
| 2513 $ADDU $t_2,$at | |
| 2514 $ADDU $c_2,$t_2 | |
| 2515 sltu $at,$c_2,$t_2 | |
| 2516 $ADDU $c_3,$at | |
| 2517 $ST $c_1,3*$BNSZ($a0) | |
| 2518 | |
| 2519 mflo $t_1 | |
| 2520 mfhi $t_2 | |
| 2521 slt $c_1,$t_2,$zero | |
| 2522 $SLL $t_2,1 | |
| 2523 $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); | |
| 2524 slt $a2,$t_1,$zero | |
| 2525 $ADDU $t_2,$a2 | |
| 2526 $SLL $t_1,1 | |
| 2527 $ADDU $c_2,$t_1 | |
| 2528 sltu $at,$c_2,$t_1 | |
| 2529 $ADDU $t_2,$at | |
| 2530 $ADDU $c_3,$t_2 | |
| 2531 sltu $at,$c_3,$t_2 | |
| 2532 $ADDU $c_1,$at | |
| 2533 mflo $t_1 | |
| 2534 mfhi $t_2 | |
| 2535 $ADDU $c_2,$t_1 | |
| 2536 sltu $at,$c_2,$t_1 | |
| 2537 $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); | |
| 2538 $ADDU $t_2,$at | |
| 2539 $ADDU $c_3,$t_2 | |
| 2540 sltu $at,$c_3,$t_2 | |
| 2541 $ADDU $c_1,$at | |
| 2542 $ST $c_2,4*$BNSZ($a0) | |
| 2543 | |
| 2544 mflo $t_1 | |
| 2545 mfhi $t_2 | |
| 2546 slt $c_2,$t_2,$zero | |
| 2547 $SLL $t_2,1 | |
| 2548 $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); | |
| 2549 slt $a2,$t_1,$zero | |
| 2550 $ADDU $t_2,$a2 | |
| 2551 $SLL $t_1,1 | |
| 2552 $ADDU $c_3,$t_1 | |
| 2553 sltu $at,$c_3,$t_1 | |
| 2554 $ADDU $t_2,$at | |
| 2555 $ADDU $c_1,$t_2 | |
| 2556 sltu $at,$c_1,$t_2 | |
| 2557 $ADDU $c_2,$at | |
| 2558 $ST $c_3,5*$BNSZ($a0) | |
| 2559 | |
| 2560 mflo $t_1 | |
| 2561 mfhi $t_2 | |
| 2562 $ADDU $c_1,$t_1 | |
| 2563 sltu $at,$c_1,$t_1 | |
| 2564 $ADDU $t_2,$at | |
| 2565 $ADDU $c_2,$t_2 | |
| 2566 $ST $c_1,6*$BNSZ($a0) | |
| 2567 $ST $c_2,7*$BNSZ($a0) | |
| 2568 | |
| 2569 .set noreorder | |
| 2570 ___ | |
| 2571 $code.=<<___ if ($flavour =~ /nubi/i); | |
| 2572 $REG_L $t3,4*$SZREG($sp) | |
| 2573 $REG_L $t2,3*$SZREG($sp) | |
| 2574 $REG_L $t1,2*$SZREG($sp) | |
| 2575 $REG_L $t0,1*$SZREG($sp) | |
| 2576 $REG_L $gp,0*$SZREG($sp) | |
| 2577 $PTR_ADD $sp,6*$SZREG | |
| 2578 ___ | |
| 2579 $code.=<<___; | |
| 2580 jr $ra | |
| 2581 nop | |
| 2582 .end bn_sqr_comba4 | |
| 2583 ___ | |
| 2584 print $code; | |
| 2585 close STDOUT; | |
| OLD | NEW |